1/* strcpy with SSSE3
2 Copyright (C) 2011-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#if IS_IN (libc)
21
22# ifndef USE_AS_STRCAT
23# include <sysdep.h>
24
25# ifndef STRCPY
26# define STRCPY __strcpy_ssse3
27# endif
28
29 .section .text.ssse3,"ax",@progbits
30ENTRY (STRCPY)
31
32 mov %rsi, %rcx
33# ifdef USE_AS_STRNCPY
34 mov %RDX_LP, %R8_LP
35# endif
36 mov %rdi, %rdx
37# ifdef USE_AS_STRNCPY
38 test %R8_LP, %R8_LP
39 jz L(Exit0)
40 cmp $8, %R8_LP
41 jbe L(StrncpyExit8Bytes)
42# endif
43 cmpb $0, (%rcx)
44 jz L(Exit1)
45 cmpb $0, 1(%rcx)
46 jz L(Exit2)
47 cmpb $0, 2(%rcx)
48 jz L(Exit3)
49 cmpb $0, 3(%rcx)
50 jz L(Exit4)
51 cmpb $0, 4(%rcx)
52 jz L(Exit5)
53 cmpb $0, 5(%rcx)
54 jz L(Exit6)
55 cmpb $0, 6(%rcx)
56 jz L(Exit7)
57 cmpb $0, 7(%rcx)
58 jz L(Exit8)
59# ifdef USE_AS_STRNCPY
60 cmp $16, %r8
61 jb L(StrncpyExit15Bytes)
62# endif
63 cmpb $0, 8(%rcx)
64 jz L(Exit9)
65 cmpb $0, 9(%rcx)
66 jz L(Exit10)
67 cmpb $0, 10(%rcx)
68 jz L(Exit11)
69 cmpb $0, 11(%rcx)
70 jz L(Exit12)
71 cmpb $0, 12(%rcx)
72 jz L(Exit13)
73 cmpb $0, 13(%rcx)
74 jz L(Exit14)
75 cmpb $0, 14(%rcx)
76 jz L(Exit15)
77# ifdef USE_AS_STRNCPY
78 cmp $16, %r8
79 je L(Exit16)
80# endif
81 cmpb $0, 15(%rcx)
82 jz L(Exit16)
83# endif
84
85# ifdef USE_AS_STRNCPY
86 mov %rcx, %rsi
87 sub $16, %r8
88 and $0xf, %rsi
89
90/* add 16 bytes rcx_offset to r8 */
91
92 add %rsi, %r8
93# endif
94 lea 16(%rcx), %rsi
95 and $-16, %rsi
96 pxor %xmm0, %xmm0
97 mov (%rcx), %r9
98 mov %r9, (%rdx)
99 pcmpeqb (%rsi), %xmm0
100 mov 8(%rcx), %r9
101 mov %r9, 8(%rdx)
102
103/* convert byte mask in xmm0 to bit mask */
104
105 pmovmskb %xmm0, %rax
106 sub %rcx, %rsi
107
108# ifdef USE_AS_STRNCPY
109 sub $16, %r8
110 jbe L(CopyFrom1To16BytesCase2OrCase3)
111# endif
112 test %rax, %rax
113 jnz L(CopyFrom1To16Bytes)
114
115 mov %rdx, %rax
116 lea 16(%rdx), %rdx
117 and $-16, %rdx
118 sub %rdx, %rax
119
120# ifdef USE_AS_STRNCPY
121 add %rax, %rsi
122 lea -1(%rsi), %rsi
123 and $1<<31, %esi
124 test %rsi, %rsi
125 jnz L(ContinueCopy)
126 lea 16(%r8), %r8
127
128L(ContinueCopy):
129# endif
130 sub %rax, %rcx
131 mov %rcx, %rax
132 and $0xf, %rax
133 mov $0, %rsi
134
135/* case: rcx_offset == rdx_offset */
136
137 jz L(Align16Both)
138
139 cmp $8, %rax
140 jae L(ShlHigh8)
141 cmp $1, %rax
142 je L(Shl1)
143 cmp $2, %rax
144 je L(Shl2)
145 cmp $3, %rax
146 je L(Shl3)
147 cmp $4, %rax
148 je L(Shl4)
149 cmp $5, %rax
150 je L(Shl5)
151 cmp $6, %rax
152 je L(Shl6)
153 jmp L(Shl7)
154
155L(ShlHigh8):
156 je L(Shl8)
157 cmp $9, %rax
158 je L(Shl9)
159 cmp $10, %rax
160 je L(Shl10)
161 cmp $11, %rax
162 je L(Shl11)
163 cmp $12, %rax
164 je L(Shl12)
165 cmp $13, %rax
166 je L(Shl13)
167 cmp $14, %rax
168 je L(Shl14)
169 jmp L(Shl15)
170
171L(Align16Both):
172 movaps (%rcx), %xmm1
173 movaps 16(%rcx), %xmm2
174 movaps %xmm1, (%rdx)
175 pcmpeqb %xmm2, %xmm0
176 pmovmskb %xmm0, %rax
177 lea 16(%rsi), %rsi
178# ifdef USE_AS_STRNCPY
179 sub $16, %r8
180 jbe L(CopyFrom1To16BytesCase2OrCase3)
181# endif
182 test %rax, %rax
183 jnz L(CopyFrom1To16Bytes)
184
185 movaps 16(%rcx, %rsi), %xmm3
186 movaps %xmm2, (%rdx, %rsi)
187 pcmpeqb %xmm3, %xmm0
188 pmovmskb %xmm0, %rax
189 lea 16(%rsi), %rsi
190# ifdef USE_AS_STRNCPY
191 sub $16, %r8
192 jbe L(CopyFrom1To16BytesCase2OrCase3)
193# endif
194 test %rax, %rax
195 jnz L(CopyFrom1To16Bytes)
196
197 movaps 16(%rcx, %rsi), %xmm4
198 movaps %xmm3, (%rdx, %rsi)
199 pcmpeqb %xmm4, %xmm0
200 pmovmskb %xmm0, %rax
201 lea 16(%rsi), %rsi
202# ifdef USE_AS_STRNCPY
203 sub $16, %r8
204 jbe L(CopyFrom1To16BytesCase2OrCase3)
205# endif
206 test %rax, %rax
207 jnz L(CopyFrom1To16Bytes)
208
209 movaps 16(%rcx, %rsi), %xmm1
210 movaps %xmm4, (%rdx, %rsi)
211 pcmpeqb %xmm1, %xmm0
212 pmovmskb %xmm0, %rax
213 lea 16(%rsi), %rsi
214# ifdef USE_AS_STRNCPY
215 sub $16, %r8
216 jbe L(CopyFrom1To16BytesCase2OrCase3)
217# endif
218 test %rax, %rax
219 jnz L(CopyFrom1To16Bytes)
220
221 movaps 16(%rcx, %rsi), %xmm2
222 movaps %xmm1, (%rdx, %rsi)
223 pcmpeqb %xmm2, %xmm0
224 pmovmskb %xmm0, %rax
225 lea 16(%rsi), %rsi
226# ifdef USE_AS_STRNCPY
227 sub $16, %r8
228 jbe L(CopyFrom1To16BytesCase2OrCase3)
229# endif
230 test %rax, %rax
231 jnz L(CopyFrom1To16Bytes)
232
233 movaps 16(%rcx, %rsi), %xmm3
234 movaps %xmm2, (%rdx, %rsi)
235 pcmpeqb %xmm3, %xmm0
236 pmovmskb %xmm0, %rax
237 lea 16(%rsi), %rsi
238# ifdef USE_AS_STRNCPY
239 sub $16, %r8
240 jbe L(CopyFrom1To16BytesCase2OrCase3)
241# endif
242 test %rax, %rax
243 jnz L(CopyFrom1To16Bytes)
244
245 movaps %xmm3, (%rdx, %rsi)
246 mov %rcx, %rax
247 lea 16(%rcx, %rsi), %rcx
248 and $-0x40, %rcx
249 sub %rcx, %rax
250 sub %rax, %rdx
251# ifdef USE_AS_STRNCPY
252 lea 112(%r8, %rax), %r8
253# endif
254 mov $-0x40, %rsi
255
256 .p2align 4
257L(Aligned64Loop):
258 movaps (%rcx), %xmm2
259 movaps %xmm2, %xmm4
260 movaps 16(%rcx), %xmm5
261 movaps 32(%rcx), %xmm3
262 movaps %xmm3, %xmm6
263 movaps 48(%rcx), %xmm7
264 pminub %xmm5, %xmm2
265 pminub %xmm7, %xmm3
266 pminub %xmm2, %xmm3
267 pcmpeqb %xmm0, %xmm3
268 pmovmskb %xmm3, %rax
269 lea 64(%rdx), %rdx
270 lea 64(%rcx), %rcx
271# ifdef USE_AS_STRNCPY
272 sub $64, %r8
273 jbe L(StrncpyLeaveCase2OrCase3)
274# endif
275 test %rax, %rax
276 jnz L(Aligned64Leave)
277 movaps %xmm4, -64(%rdx)
278 movaps %xmm5, -48(%rdx)
279 movaps %xmm6, -32(%rdx)
280 movaps %xmm7, -16(%rdx)
281 jmp L(Aligned64Loop)
282
283L(Aligned64Leave):
284# ifdef USE_AS_STRNCPY
285 lea 48(%r8), %r8
286# endif
287 pcmpeqb %xmm4, %xmm0
288 pmovmskb %xmm0, %rax
289 test %rax, %rax
290 jnz L(CopyFrom1To16Bytes)
291
292 pcmpeqb %xmm5, %xmm0
293# ifdef USE_AS_STRNCPY
294 lea -16(%r8), %r8
295# endif
296 pmovmskb %xmm0, %rax
297 movaps %xmm4, -64(%rdx)
298 test %rax, %rax
299 lea 16(%rsi), %rsi
300 jnz L(CopyFrom1To16Bytes)
301
302 pcmpeqb %xmm6, %xmm0
303# ifdef USE_AS_STRNCPY
304 lea -16(%r8), %r8
305# endif
306 pmovmskb %xmm0, %rax
307 movaps %xmm5, -48(%rdx)
308 test %rax, %rax
309 lea 16(%rsi), %rsi
310 jnz L(CopyFrom1To16Bytes)
311
312 movaps %xmm6, -32(%rdx)
313 pcmpeqb %xmm7, %xmm0
314# ifdef USE_AS_STRNCPY
315 lea -16(%r8), %r8
316# endif
317 pmovmskb %xmm0, %rax
318 lea 16(%rsi), %rsi
319 jmp L(CopyFrom1To16Bytes)
320
321 .p2align 4
322L(Shl1):
323 movaps -1(%rcx), %xmm1
324 movaps 15(%rcx), %xmm2
325L(Shl1Start):
326 pcmpeqb %xmm2, %xmm0
327 pmovmskb %xmm0, %rax
328 movaps %xmm2, %xmm3
329# ifdef USE_AS_STRNCPY
330 sub $16, %r8
331 jbe L(StrncpyExit1Case2OrCase3)
332# endif
333 test %rax, %rax
334 jnz L(Shl1LoopExit)
335
336 palignr $1, %xmm1, %xmm2
337 movaps %xmm2, (%rdx)
338 movaps 31(%rcx), %xmm2
339
340 pcmpeqb %xmm2, %xmm0
341 lea 16(%rdx), %rdx
342 pmovmskb %xmm0, %rax
343 lea 16(%rcx), %rcx
344 movaps %xmm2, %xmm1
345# ifdef USE_AS_STRNCPY
346 sub $16, %r8
347 jbe L(StrncpyExit1Case2OrCase3)
348# endif
349 test %rax, %rax
350 jnz L(Shl1LoopExit)
351
352 palignr $1, %xmm3, %xmm2
353 movaps %xmm2, (%rdx)
354 movaps 31(%rcx), %xmm2
355
356 pcmpeqb %xmm2, %xmm0
357 lea 16(%rdx), %rdx
358 pmovmskb %xmm0, %rax
359 lea 16(%rcx), %rcx
360 movaps %xmm2, %xmm3
361# ifdef USE_AS_STRNCPY
362 sub $16, %r8
363 jbe L(StrncpyExit1Case2OrCase3)
364# endif
365 test %rax, %rax
366 jnz L(Shl1LoopExit)
367
368 palignr $1, %xmm1, %xmm2
369 movaps %xmm2, (%rdx)
370 movaps 31(%rcx), %xmm2
371
372 pcmpeqb %xmm2, %xmm0
373 lea 16(%rdx), %rdx
374 pmovmskb %xmm0, %rax
375 lea 16(%rcx), %rcx
376# ifdef USE_AS_STRNCPY
377 sub $16, %r8
378 jbe L(StrncpyExit1Case2OrCase3)
379# endif
380 test %rax, %rax
381 jnz L(Shl1LoopExit)
382
383 palignr $1, %xmm3, %xmm2
384 movaps %xmm2, (%rdx)
385 lea 31(%rcx), %rcx
386 lea 16(%rdx), %rdx
387
388 mov %rcx, %rax
389 and $-0x40, %rcx
390 sub %rcx, %rax
391 lea -15(%rcx), %rcx
392 sub %rax, %rdx
393# ifdef USE_AS_STRNCPY
394 add %rax, %r8
395# endif
396 movaps -1(%rcx), %xmm1
397
398/* 64 bytes loop */
399 .p2align 4
400L(Shl1LoopStart):
401 movaps 15(%rcx), %xmm2
402 movaps 31(%rcx), %xmm3
403 movaps %xmm3, %xmm6
404 movaps 47(%rcx), %xmm4
405 movaps %xmm4, %xmm7
406 movaps 63(%rcx), %xmm5
407 pminub %xmm2, %xmm6
408 pminub %xmm5, %xmm7
409 pminub %xmm6, %xmm7
410 pcmpeqb %xmm0, %xmm7
411 pmovmskb %xmm7, %rax
412 movaps %xmm5, %xmm7
413 palignr $1, %xmm4, %xmm5
414 test %rax, %rax
415 palignr $1, %xmm3, %xmm4
416 jnz L(Shl1Start)
417# ifdef USE_AS_STRNCPY
418 sub $64, %r8
419 jbe L(StrncpyLeave1)
420# endif
421 palignr $1, %xmm2, %xmm3
422 lea 64(%rcx), %rcx
423 palignr $1, %xmm1, %xmm2
424 movaps %xmm7, %xmm1
425 movaps %xmm5, 48(%rdx)
426 movaps %xmm4, 32(%rdx)
427 movaps %xmm3, 16(%rdx)
428 movaps %xmm2, (%rdx)
429 lea 64(%rdx), %rdx
430 jmp L(Shl1LoopStart)
431
432L(Shl1LoopExit):
433 movdqu -1(%rcx), %xmm1
434 mov $15, %rsi
435 movdqu %xmm1, -1(%rdx)
436 jmp L(CopyFrom1To16Bytes)
437
438 .p2align 4
439L(Shl2):
440 movaps -2(%rcx), %xmm1
441 movaps 14(%rcx), %xmm2
442L(Shl2Start):
443 pcmpeqb %xmm2, %xmm0
444 pmovmskb %xmm0, %rax
445 movaps %xmm2, %xmm3
446# ifdef USE_AS_STRNCPY
447 sub $16, %r8
448 jbe L(StrncpyExit2Case2OrCase3)
449# endif
450 test %rax, %rax
451 jnz L(Shl2LoopExit)
452
453 palignr $2, %xmm1, %xmm2
454 movaps %xmm2, (%rdx)
455 movaps 30(%rcx), %xmm2
456
457 pcmpeqb %xmm2, %xmm0
458 lea 16(%rdx), %rdx
459 pmovmskb %xmm0, %rax
460 lea 16(%rcx), %rcx
461 movaps %xmm2, %xmm1
462# ifdef USE_AS_STRNCPY
463 sub $16, %r8
464 jbe L(StrncpyExit2Case2OrCase3)
465# endif
466 test %rax, %rax
467 jnz L(Shl2LoopExit)
468
469 palignr $2, %xmm3, %xmm2
470 movaps %xmm2, (%rdx)
471 movaps 30(%rcx), %xmm2
472
473 pcmpeqb %xmm2, %xmm0
474 lea 16(%rdx), %rdx
475 pmovmskb %xmm0, %rax
476 lea 16(%rcx), %rcx
477 movaps %xmm2, %xmm3
478# ifdef USE_AS_STRNCPY
479 sub $16, %r8
480 jbe L(StrncpyExit2Case2OrCase3)
481# endif
482 test %rax, %rax
483 jnz L(Shl2LoopExit)
484
485 palignr $2, %xmm1, %xmm2
486 movaps %xmm2, (%rdx)
487 movaps 30(%rcx), %xmm2
488
489 pcmpeqb %xmm2, %xmm0
490 lea 16(%rdx), %rdx
491 pmovmskb %xmm0, %rax
492 lea 16(%rcx), %rcx
493# ifdef USE_AS_STRNCPY
494 sub $16, %r8
495 jbe L(StrncpyExit2Case2OrCase3)
496# endif
497 test %rax, %rax
498 jnz L(Shl2LoopExit)
499
500 palignr $2, %xmm3, %xmm2
501 movaps %xmm2, (%rdx)
502 lea 30(%rcx), %rcx
503 lea 16(%rdx), %rdx
504
505 mov %rcx, %rax
506 and $-0x40, %rcx
507 sub %rcx, %rax
508 lea -14(%rcx), %rcx
509 sub %rax, %rdx
510# ifdef USE_AS_STRNCPY
511 add %rax, %r8
512# endif
513 movaps -2(%rcx), %xmm1
514
515/* 64 bytes loop */
516 .p2align 4
517L(Shl2LoopStart):
518 movaps 14(%rcx), %xmm2
519 movaps 30(%rcx), %xmm3
520 movaps %xmm3, %xmm6
521 movaps 46(%rcx), %xmm4
522 movaps %xmm4, %xmm7
523 movaps 62(%rcx), %xmm5
524 pminub %xmm2, %xmm6
525 pminub %xmm5, %xmm7
526 pminub %xmm6, %xmm7
527 pcmpeqb %xmm0, %xmm7
528 pmovmskb %xmm7, %rax
529 movaps %xmm5, %xmm7
530 palignr $2, %xmm4, %xmm5
531 test %rax, %rax
532 palignr $2, %xmm3, %xmm4
533 jnz L(Shl2Start)
534# ifdef USE_AS_STRNCPY
535 sub $64, %r8
536 jbe L(StrncpyLeave2)
537# endif
538 palignr $2, %xmm2, %xmm3
539 lea 64(%rcx), %rcx
540 palignr $2, %xmm1, %xmm2
541 movaps %xmm7, %xmm1
542 movaps %xmm5, 48(%rdx)
543 movaps %xmm4, 32(%rdx)
544 movaps %xmm3, 16(%rdx)
545 movaps %xmm2, (%rdx)
546 lea 64(%rdx), %rdx
547 jmp L(Shl2LoopStart)
548
549L(Shl2LoopExit):
550 movdqu -2(%rcx), %xmm1
551 mov $14, %rsi
552 movdqu %xmm1, -2(%rdx)
553 jmp L(CopyFrom1To16Bytes)
554
555 .p2align 4
556L(Shl3):
557 movaps -3(%rcx), %xmm1
558 movaps 13(%rcx), %xmm2
559L(Shl3Start):
560 pcmpeqb %xmm2, %xmm0
561 pmovmskb %xmm0, %rax
562 movaps %xmm2, %xmm3
563# ifdef USE_AS_STRNCPY
564 sub $16, %r8
565 jbe L(StrncpyExit3Case2OrCase3)
566# endif
567 test %rax, %rax
568 jnz L(Shl3LoopExit)
569
570 palignr $3, %xmm1, %xmm2
571 movaps %xmm2, (%rdx)
572 movaps 29(%rcx), %xmm2
573
574 pcmpeqb %xmm2, %xmm0
575 lea 16(%rdx), %rdx
576 pmovmskb %xmm0, %rax
577 lea 16(%rcx), %rcx
578 movaps %xmm2, %xmm1
579# ifdef USE_AS_STRNCPY
580 sub $16, %r8
581 jbe L(StrncpyExit3Case2OrCase3)
582# endif
583 test %rax, %rax
584 jnz L(Shl3LoopExit)
585
586 palignr $3, %xmm3, %xmm2
587 movaps %xmm2, (%rdx)
588 movaps 29(%rcx), %xmm2
589
590 pcmpeqb %xmm2, %xmm0
591 lea 16(%rdx), %rdx
592 pmovmskb %xmm0, %rax
593 lea 16(%rcx), %rcx
594 movaps %xmm2, %xmm3
595# ifdef USE_AS_STRNCPY
596 sub $16, %r8
597 jbe L(StrncpyExit3Case2OrCase3)
598# endif
599 test %rax, %rax
600 jnz L(Shl3LoopExit)
601
602 palignr $3, %xmm1, %xmm2
603 movaps %xmm2, (%rdx)
604 movaps 29(%rcx), %xmm2
605
606 pcmpeqb %xmm2, %xmm0
607 lea 16(%rdx), %rdx
608 pmovmskb %xmm0, %rax
609 lea 16(%rcx), %rcx
610# ifdef USE_AS_STRNCPY
611 sub $16, %r8
612 jbe L(StrncpyExit3Case2OrCase3)
613# endif
614 test %rax, %rax
615 jnz L(Shl3LoopExit)
616
617 palignr $3, %xmm3, %xmm2
618 movaps %xmm2, (%rdx)
619 lea 29(%rcx), %rcx
620 lea 16(%rdx), %rdx
621
622 mov %rcx, %rax
623 and $-0x40, %rcx
624 sub %rcx, %rax
625 lea -13(%rcx), %rcx
626 sub %rax, %rdx
627# ifdef USE_AS_STRNCPY
628 add %rax, %r8
629# endif
630 movaps -3(%rcx), %xmm1
631
632/* 64 bytes loop */
633 .p2align 4
634L(Shl3LoopStart):
635 movaps 13(%rcx), %xmm2
636 movaps 29(%rcx), %xmm3
637 movaps %xmm3, %xmm6
638 movaps 45(%rcx), %xmm4
639 movaps %xmm4, %xmm7
640 movaps 61(%rcx), %xmm5
641 pminub %xmm2, %xmm6
642 pminub %xmm5, %xmm7
643 pminub %xmm6, %xmm7
644 pcmpeqb %xmm0, %xmm7
645 pmovmskb %xmm7, %rax
646 movaps %xmm5, %xmm7
647 palignr $3, %xmm4, %xmm5
648 test %rax, %rax
649 palignr $3, %xmm3, %xmm4
650 jnz L(Shl3Start)
651# ifdef USE_AS_STRNCPY
652 sub $64, %r8
653 jbe L(StrncpyLeave3)
654# endif
655 palignr $3, %xmm2, %xmm3
656 lea 64(%rcx), %rcx
657 palignr $3, %xmm1, %xmm2
658 movaps %xmm7, %xmm1
659 movaps %xmm5, 48(%rdx)
660 movaps %xmm4, 32(%rdx)
661 movaps %xmm3, 16(%rdx)
662 movaps %xmm2, (%rdx)
663 lea 64(%rdx), %rdx
664 jmp L(Shl3LoopStart)
665
666L(Shl3LoopExit):
667 movdqu -3(%rcx), %xmm1
668 mov $13, %rsi
669 movdqu %xmm1, -3(%rdx)
670 jmp L(CopyFrom1To16Bytes)
671
672 .p2align 4
673L(Shl4):
674 movaps -4(%rcx), %xmm1
675 movaps 12(%rcx), %xmm2
676L(Shl4Start):
677 pcmpeqb %xmm2, %xmm0
678 pmovmskb %xmm0, %rax
679 movaps %xmm2, %xmm3
680# ifdef USE_AS_STRNCPY
681 sub $16, %r8
682 jbe L(StrncpyExit4Case2OrCase3)
683# endif
684 test %rax, %rax
685 jnz L(Shl4LoopExit)
686
687 palignr $4, %xmm1, %xmm2
688 movaps %xmm2, (%rdx)
689 movaps 28(%rcx), %xmm2
690
691 pcmpeqb %xmm2, %xmm0
692 lea 16(%rdx), %rdx
693 pmovmskb %xmm0, %rax
694 lea 16(%rcx), %rcx
695 movaps %xmm2, %xmm1
696# ifdef USE_AS_STRNCPY
697 sub $16, %r8
698 jbe L(StrncpyExit4Case2OrCase3)
699# endif
700 test %rax, %rax
701 jnz L(Shl4LoopExit)
702
703 palignr $4, %xmm3, %xmm2
704 movaps %xmm2, (%rdx)
705 movaps 28(%rcx), %xmm2
706
707 pcmpeqb %xmm2, %xmm0
708 lea 16(%rdx), %rdx
709 pmovmskb %xmm0, %rax
710 lea 16(%rcx), %rcx
711 movaps %xmm2, %xmm3
712# ifdef USE_AS_STRNCPY
713 sub $16, %r8
714 jbe L(StrncpyExit4Case2OrCase3)
715# endif
716 test %rax, %rax
717 jnz L(Shl4LoopExit)
718
719 palignr $4, %xmm1, %xmm2
720 movaps %xmm2, (%rdx)
721 movaps 28(%rcx), %xmm2
722
723 pcmpeqb %xmm2, %xmm0
724 lea 16(%rdx), %rdx
725 pmovmskb %xmm0, %rax
726 lea 16(%rcx), %rcx
727# ifdef USE_AS_STRNCPY
728 sub $16, %r8
729 jbe L(StrncpyExit4Case2OrCase3)
730# endif
731 test %rax, %rax
732 jnz L(Shl4LoopExit)
733
734 palignr $4, %xmm3, %xmm2
735 movaps %xmm2, (%rdx)
736 lea 28(%rcx), %rcx
737 lea 16(%rdx), %rdx
738
739 mov %rcx, %rax
740 and $-0x40, %rcx
741 sub %rcx, %rax
742 lea -12(%rcx), %rcx
743 sub %rax, %rdx
744# ifdef USE_AS_STRNCPY
745 add %rax, %r8
746# endif
747 movaps -4(%rcx), %xmm1
748
749/* 64 bytes loop */
750 .p2align 4
751L(Shl4LoopStart):
752 movaps 12(%rcx), %xmm2
753 movaps 28(%rcx), %xmm3
754 movaps %xmm3, %xmm6
755 movaps 44(%rcx), %xmm4
756 movaps %xmm4, %xmm7
757 movaps 60(%rcx), %xmm5
758 pminub %xmm2, %xmm6
759 pminub %xmm5, %xmm7
760 pminub %xmm6, %xmm7
761 pcmpeqb %xmm0, %xmm7
762 pmovmskb %xmm7, %rax
763 movaps %xmm5, %xmm7
764 palignr $4, %xmm4, %xmm5
765 test %rax, %rax
766 palignr $4, %xmm3, %xmm4
767 jnz L(Shl4Start)
768# ifdef USE_AS_STRNCPY
769 sub $64, %r8
770 jbe L(StrncpyLeave4)
771# endif
772 palignr $4, %xmm2, %xmm3
773 lea 64(%rcx), %rcx
774 palignr $4, %xmm1, %xmm2
775 movaps %xmm7, %xmm1
776 movaps %xmm5, 48(%rdx)
777 movaps %xmm4, 32(%rdx)
778 movaps %xmm3, 16(%rdx)
779 movaps %xmm2, (%rdx)
780 lea 64(%rdx), %rdx
781 jmp L(Shl4LoopStart)
782
783L(Shl4LoopExit):
784 movdqu -4(%rcx), %xmm1
785 mov $12, %rsi
786 movdqu %xmm1, -4(%rdx)
787 jmp L(CopyFrom1To16Bytes)
788
789 .p2align 4
790L(Shl5):
791 movaps -5(%rcx), %xmm1
792 movaps 11(%rcx), %xmm2
793L(Shl5Start):
794 pcmpeqb %xmm2, %xmm0
795 pmovmskb %xmm0, %rax
796 movaps %xmm2, %xmm3
797# ifdef USE_AS_STRNCPY
798 sub $16, %r8
799 jbe L(StrncpyExit5Case2OrCase3)
800# endif
801 test %rax, %rax
802 jnz L(Shl5LoopExit)
803
804 palignr $5, %xmm1, %xmm2
805 movaps %xmm2, (%rdx)
806 movaps 27(%rcx), %xmm2
807
808 pcmpeqb %xmm2, %xmm0
809 lea 16(%rdx), %rdx
810 pmovmskb %xmm0, %rax
811 lea 16(%rcx), %rcx
812 movaps %xmm2, %xmm1
813# ifdef USE_AS_STRNCPY
814 sub $16, %r8
815 jbe L(StrncpyExit5Case2OrCase3)
816# endif
817 test %rax, %rax
818 jnz L(Shl5LoopExit)
819
820 palignr $5, %xmm3, %xmm2
821 movaps %xmm2, (%rdx)
822 movaps 27(%rcx), %xmm2
823
824 pcmpeqb %xmm2, %xmm0
825 lea 16(%rdx), %rdx
826 pmovmskb %xmm0, %rax
827 lea 16(%rcx), %rcx
828 movaps %xmm2, %xmm3
829# ifdef USE_AS_STRNCPY
830 sub $16, %r8
831 jbe L(StrncpyExit5Case2OrCase3)
832# endif
833 test %rax, %rax
834 jnz L(Shl5LoopExit)
835
836 palignr $5, %xmm1, %xmm2
837 movaps %xmm2, (%rdx)
838 movaps 27(%rcx), %xmm2
839
840 pcmpeqb %xmm2, %xmm0
841 lea 16(%rdx), %rdx
842 pmovmskb %xmm0, %rax
843 lea 16(%rcx), %rcx
844# ifdef USE_AS_STRNCPY
845 sub $16, %r8
846 jbe L(StrncpyExit5Case2OrCase3)
847# endif
848 test %rax, %rax
849 jnz L(Shl5LoopExit)
850
851 palignr $5, %xmm3, %xmm2
852 movaps %xmm2, (%rdx)
853 lea 27(%rcx), %rcx
854 lea 16(%rdx), %rdx
855
856 mov %rcx, %rax
857 and $-0x40, %rcx
858 sub %rcx, %rax
859 lea -11(%rcx), %rcx
860 sub %rax, %rdx
861# ifdef USE_AS_STRNCPY
862 add %rax, %r8
863# endif
864 movaps -5(%rcx), %xmm1
865
866/* 64 bytes loop */
867 .p2align 4
868L(Shl5LoopStart):
869 movaps 11(%rcx), %xmm2
870 movaps 27(%rcx), %xmm3
871 movaps %xmm3, %xmm6
872 movaps 43(%rcx), %xmm4
873 movaps %xmm4, %xmm7
874 movaps 59(%rcx), %xmm5
875 pminub %xmm2, %xmm6
876 pminub %xmm5, %xmm7
877 pminub %xmm6, %xmm7
878 pcmpeqb %xmm0, %xmm7
879 pmovmskb %xmm7, %rax
880 movaps %xmm5, %xmm7
881 palignr $5, %xmm4, %xmm5
882 test %rax, %rax
883 palignr $5, %xmm3, %xmm4
884 jnz L(Shl5Start)
885# ifdef USE_AS_STRNCPY
886 sub $64, %r8
887 jbe L(StrncpyLeave5)
888# endif
889 palignr $5, %xmm2, %xmm3
890 lea 64(%rcx), %rcx
891 palignr $5, %xmm1, %xmm2
892 movaps %xmm7, %xmm1
893 movaps %xmm5, 48(%rdx)
894 movaps %xmm4, 32(%rdx)
895 movaps %xmm3, 16(%rdx)
896 movaps %xmm2, (%rdx)
897 lea 64(%rdx), %rdx
898 jmp L(Shl5LoopStart)
899
900L(Shl5LoopExit):
901 movdqu -5(%rcx), %xmm1
902 mov $11, %rsi
903 movdqu %xmm1, -5(%rdx)
904 jmp L(CopyFrom1To16Bytes)
905
906 .p2align 4
907L(Shl6):
908 movaps -6(%rcx), %xmm1
909 movaps 10(%rcx), %xmm2
910L(Shl6Start):
911 pcmpeqb %xmm2, %xmm0
912 pmovmskb %xmm0, %rax
913 movaps %xmm2, %xmm3
914# ifdef USE_AS_STRNCPY
915 sub $16, %r8
916 jbe L(StrncpyExit6Case2OrCase3)
917# endif
918 test %rax, %rax
919 jnz L(Shl6LoopExit)
920
921 palignr $6, %xmm1, %xmm2
922 movaps %xmm2, (%rdx)
923 movaps 26(%rcx), %xmm2
924
925 pcmpeqb %xmm2, %xmm0
926 lea 16(%rdx), %rdx
927 pmovmskb %xmm0, %rax
928 lea 16(%rcx), %rcx
929 movaps %xmm2, %xmm1
930# ifdef USE_AS_STRNCPY
931 sub $16, %r8
932 jbe L(StrncpyExit6Case2OrCase3)
933# endif
934 test %rax, %rax
935 jnz L(Shl6LoopExit)
936
937 palignr $6, %xmm3, %xmm2
938 movaps %xmm2, (%rdx)
939 movaps 26(%rcx), %xmm2
940
941 pcmpeqb %xmm2, %xmm0
942 lea 16(%rdx), %rdx
943 pmovmskb %xmm0, %rax
944 lea 16(%rcx), %rcx
945 movaps %xmm2, %xmm3
946# ifdef USE_AS_STRNCPY
947 sub $16, %r8
948 jbe L(StrncpyExit6Case2OrCase3)
949# endif
950 test %rax, %rax
951 jnz L(Shl6LoopExit)
952
953 palignr $6, %xmm1, %xmm2
954 movaps %xmm2, (%rdx)
955 movaps 26(%rcx), %xmm2
956
957 pcmpeqb %xmm2, %xmm0
958 lea 16(%rdx), %rdx
959 pmovmskb %xmm0, %rax
960 lea 16(%rcx), %rcx
961# ifdef USE_AS_STRNCPY
962 sub $16, %r8
963 jbe L(StrncpyExit6Case2OrCase3)
964# endif
965 test %rax, %rax
966 jnz L(Shl6LoopExit)
967
968 palignr $6, %xmm3, %xmm2
969 movaps %xmm2, (%rdx)
970 lea 26(%rcx), %rcx
971 lea 16(%rdx), %rdx
972
973 mov %rcx, %rax
974 and $-0x40, %rcx
975 sub %rcx, %rax
976 lea -10(%rcx), %rcx
977 sub %rax, %rdx
978# ifdef USE_AS_STRNCPY
979 add %rax, %r8
980# endif
981 movaps -6(%rcx), %xmm1
982
983/* 64 bytes loop */
984 .p2align 4
985L(Shl6LoopStart):
986 movaps 10(%rcx), %xmm2
987 movaps 26(%rcx), %xmm3
988 movaps %xmm3, %xmm6
989 movaps 42(%rcx), %xmm4
990 movaps %xmm4, %xmm7
991 movaps 58(%rcx), %xmm5
992 pminub %xmm2, %xmm6
993 pminub %xmm5, %xmm7
994 pminub %xmm6, %xmm7
995 pcmpeqb %xmm0, %xmm7
996 pmovmskb %xmm7, %rax
997 movaps %xmm5, %xmm7
998 palignr $6, %xmm4, %xmm5
999 test %rax, %rax
1000 palignr $6, %xmm3, %xmm4
1001 jnz L(Shl6Start)
1002# ifdef USE_AS_STRNCPY
1003 sub $64, %r8
1004 jbe L(StrncpyLeave6)
1005# endif
1006 palignr $6, %xmm2, %xmm3
1007 lea 64(%rcx), %rcx
1008 palignr $6, %xmm1, %xmm2
1009 movaps %xmm7, %xmm1
1010 movaps %xmm5, 48(%rdx)
1011 movaps %xmm4, 32(%rdx)
1012 movaps %xmm3, 16(%rdx)
1013 movaps %xmm2, (%rdx)
1014 lea 64(%rdx), %rdx
1015 jmp L(Shl6LoopStart)
1016
1017L(Shl6LoopExit):
1018 mov (%rcx), %r9
1019 mov 6(%rcx), %esi
1020 mov %r9, (%rdx)
1021 mov %esi, 6(%rdx)
1022 mov $10, %rsi
1023 jmp L(CopyFrom1To16Bytes)
1024
1025 .p2align 4
1026L(Shl7):
1027 movaps -7(%rcx), %xmm1
1028 movaps 9(%rcx), %xmm2
1029L(Shl7Start):
1030 pcmpeqb %xmm2, %xmm0
1031 pmovmskb %xmm0, %rax
1032 movaps %xmm2, %xmm3
1033# ifdef USE_AS_STRNCPY
1034 sub $16, %r8
1035 jbe L(StrncpyExit7Case2OrCase3)
1036# endif
1037 test %rax, %rax
1038 jnz L(Shl7LoopExit)
1039
1040 palignr $7, %xmm1, %xmm2
1041 movaps %xmm2, (%rdx)
1042 movaps 25(%rcx), %xmm2
1043
1044 pcmpeqb %xmm2, %xmm0
1045 lea 16(%rdx), %rdx
1046 pmovmskb %xmm0, %rax
1047 lea 16(%rcx), %rcx
1048 movaps %xmm2, %xmm1
1049# ifdef USE_AS_STRNCPY
1050 sub $16, %r8
1051 jbe L(StrncpyExit7Case2OrCase3)
1052# endif
1053 test %rax, %rax
1054 jnz L(Shl7LoopExit)
1055
1056 palignr $7, %xmm3, %xmm2
1057 movaps %xmm2, (%rdx)
1058 movaps 25(%rcx), %xmm2
1059
1060 pcmpeqb %xmm2, %xmm0
1061 lea 16(%rdx), %rdx
1062 pmovmskb %xmm0, %rax
1063 lea 16(%rcx), %rcx
1064 movaps %xmm2, %xmm3
1065# ifdef USE_AS_STRNCPY
1066 sub $16, %r8
1067 jbe L(StrncpyExit7Case2OrCase3)
1068# endif
1069 test %rax, %rax
1070 jnz L(Shl7LoopExit)
1071
1072 palignr $7, %xmm1, %xmm2
1073 movaps %xmm2, (%rdx)
1074 movaps 25(%rcx), %xmm2
1075
1076 pcmpeqb %xmm2, %xmm0
1077 lea 16(%rdx), %rdx
1078 pmovmskb %xmm0, %rax
1079 lea 16(%rcx), %rcx
1080# ifdef USE_AS_STRNCPY
1081 sub $16, %r8
1082 jbe L(StrncpyExit7Case2OrCase3)
1083# endif
1084 test %rax, %rax
1085 jnz L(Shl7LoopExit)
1086
1087 palignr $7, %xmm3, %xmm2
1088 movaps %xmm2, (%rdx)
1089 lea 25(%rcx), %rcx
1090 lea 16(%rdx), %rdx
1091
1092 mov %rcx, %rax
1093 and $-0x40, %rcx
1094 sub %rcx, %rax
1095 lea -9(%rcx), %rcx
1096 sub %rax, %rdx
1097# ifdef USE_AS_STRNCPY
1098 add %rax, %r8
1099# endif
1100 movaps -7(%rcx), %xmm1
1101
1102/* 64 bytes loop */
1103 .p2align 4
1104L(Shl7LoopStart):
1105 movaps 9(%rcx), %xmm2
1106 movaps 25(%rcx), %xmm3
1107 movaps %xmm3, %xmm6
1108 movaps 41(%rcx), %xmm4
1109 movaps %xmm4, %xmm7
1110 movaps 57(%rcx), %xmm5
1111 pminub %xmm2, %xmm6
1112 pminub %xmm5, %xmm7
1113 pminub %xmm6, %xmm7
1114 pcmpeqb %xmm0, %xmm7
1115 pmovmskb %xmm7, %rax
1116 movaps %xmm5, %xmm7
1117 palignr $7, %xmm4, %xmm5
1118 test %rax, %rax
1119 palignr $7, %xmm3, %xmm4
1120 jnz L(Shl7Start)
1121# ifdef USE_AS_STRNCPY
1122 sub $64, %r8
1123 jbe L(StrncpyLeave7)
1124# endif
1125 palignr $7, %xmm2, %xmm3
1126 lea 64(%rcx), %rcx
1127 palignr $7, %xmm1, %xmm2
1128 movaps %xmm7, %xmm1
1129 movaps %xmm5, 48(%rdx)
1130 movaps %xmm4, 32(%rdx)
1131 movaps %xmm3, 16(%rdx)
1132 movaps %xmm2, (%rdx)
1133 lea 64(%rdx), %rdx
1134 jmp L(Shl7LoopStart)
1135
1136L(Shl7LoopExit):
1137 mov (%rcx), %r9
1138 mov 5(%rcx), %esi
1139 mov %r9, (%rdx)
1140 mov %esi, 5(%rdx)
1141 mov $9, %rsi
1142 jmp L(CopyFrom1To16Bytes)
1143
1144 .p2align 4
1145L(Shl8):
1146 movaps -8(%rcx), %xmm1
1147 movaps 8(%rcx), %xmm2
1148L(Shl8Start):
1149 pcmpeqb %xmm2, %xmm0
1150 pmovmskb %xmm0, %rax
1151 movaps %xmm2, %xmm3
1152# ifdef USE_AS_STRNCPY
1153 sub $16, %r8
1154 jbe L(StrncpyExit8Case2OrCase3)
1155# endif
1156 test %rax, %rax
1157 jnz L(Shl8LoopExit)
1158
1159 palignr $8, %xmm1, %xmm2
1160 movaps %xmm2, (%rdx)
1161 movaps 24(%rcx), %xmm2
1162
1163 pcmpeqb %xmm2, %xmm0
1164 lea 16(%rdx), %rdx
1165 pmovmskb %xmm0, %rax
1166 lea 16(%rcx), %rcx
1167 movaps %xmm2, %xmm1
1168# ifdef USE_AS_STRNCPY
1169 sub $16, %r8
1170 jbe L(StrncpyExit8Case2OrCase3)
1171# endif
1172 test %rax, %rax
1173 jnz L(Shl8LoopExit)
1174
1175 palignr $8, %xmm3, %xmm2
1176 movaps %xmm2, (%rdx)
1177 movaps 24(%rcx), %xmm2
1178
1179 pcmpeqb %xmm2, %xmm0
1180 lea 16(%rdx), %rdx
1181 pmovmskb %xmm0, %rax
1182 lea 16(%rcx), %rcx
1183 movaps %xmm2, %xmm3
1184# ifdef USE_AS_STRNCPY
1185 sub $16, %r8
1186 jbe L(StrncpyExit8Case2OrCase3)
1187# endif
1188 test %rax, %rax
1189 jnz L(Shl8LoopExit)
1190
1191 palignr $8, %xmm1, %xmm2
1192 movaps %xmm2, (%rdx)
1193 movaps 24(%rcx), %xmm2
1194
1195 pcmpeqb %xmm2, %xmm0
1196 lea 16(%rdx), %rdx
1197 pmovmskb %xmm0, %rax
1198 lea 16(%rcx), %rcx
1199# ifdef USE_AS_STRNCPY
1200 sub $16, %r8
1201 jbe L(StrncpyExit8Case2OrCase3)
1202# endif
1203 test %rax, %rax
1204 jnz L(Shl8LoopExit)
1205
1206 palignr $8, %xmm3, %xmm2
1207 movaps %xmm2, (%rdx)
1208 lea 24(%rcx), %rcx
1209 lea 16(%rdx), %rdx
1210
1211 mov %rcx, %rax
1212 and $-0x40, %rcx
1213 sub %rcx, %rax
1214 lea -8(%rcx), %rcx
1215 sub %rax, %rdx
1216# ifdef USE_AS_STRNCPY
1217 add %rax, %r8
1218# endif
1219 movaps -8(%rcx), %xmm1
1220
1221/* 64 bytes loop */
1222 .p2align 4
1223L(Shl8LoopStart):
1224 movaps 8(%rcx), %xmm2
1225 movaps 24(%rcx), %xmm3
1226 movaps %xmm3, %xmm6
1227 movaps 40(%rcx), %xmm4
1228 movaps %xmm4, %xmm7
1229 movaps 56(%rcx), %xmm5
1230 pminub %xmm2, %xmm6
1231 pminub %xmm5, %xmm7
1232 pminub %xmm6, %xmm7
1233 pcmpeqb %xmm0, %xmm7
1234 pmovmskb %xmm7, %rax
1235 movaps %xmm5, %xmm7
1236 palignr $8, %xmm4, %xmm5
1237 test %rax, %rax
1238 palignr $8, %xmm3, %xmm4
1239 jnz L(Shl8Start)
1240# ifdef USE_AS_STRNCPY
1241 sub $64, %r8
1242 jbe L(StrncpyLeave8)
1243# endif
1244 palignr $8, %xmm2, %xmm3
1245 lea 64(%rcx), %rcx
1246 palignr $8, %xmm1, %xmm2
1247 movaps %xmm7, %xmm1
1248 movaps %xmm5, 48(%rdx)
1249 movaps %xmm4, 32(%rdx)
1250 movaps %xmm3, 16(%rdx)
1251 movaps %xmm2, (%rdx)
1252 lea 64(%rdx), %rdx
1253 jmp L(Shl8LoopStart)
1254
1255L(Shl8LoopExit):
1256 mov (%rcx), %r9
1257 mov $8, %rsi
1258 mov %r9, (%rdx)
1259 jmp L(CopyFrom1To16Bytes)
1260
1261 .p2align 4
1262L(Shl9):
1263 movaps -9(%rcx), %xmm1
1264 movaps 7(%rcx), %xmm2
1265L(Shl9Start):
1266 pcmpeqb %xmm2, %xmm0
1267 pmovmskb %xmm0, %rax
1268 movaps %xmm2, %xmm3
1269# ifdef USE_AS_STRNCPY
1270 sub $16, %r8
1271 jbe L(StrncpyExit9Case2OrCase3)
1272# endif
1273 test %rax, %rax
1274 jnz L(Shl9LoopExit)
1275
1276 palignr $9, %xmm1, %xmm2
1277 movaps %xmm2, (%rdx)
1278 movaps 23(%rcx), %xmm2
1279
1280 pcmpeqb %xmm2, %xmm0
1281 lea 16(%rdx), %rdx
1282 pmovmskb %xmm0, %rax
1283 lea 16(%rcx), %rcx
1284 movaps %xmm2, %xmm1
1285# ifdef USE_AS_STRNCPY
1286 sub $16, %r8
1287 jbe L(StrncpyExit9Case2OrCase3)
1288# endif
1289 test %rax, %rax
1290 jnz L(Shl9LoopExit)
1291
1292 palignr $9, %xmm3, %xmm2
1293 movaps %xmm2, (%rdx)
1294 movaps 23(%rcx), %xmm2
1295
1296 pcmpeqb %xmm2, %xmm0
1297 lea 16(%rdx), %rdx
1298 pmovmskb %xmm0, %rax
1299 lea 16(%rcx), %rcx
1300 movaps %xmm2, %xmm3
1301# ifdef USE_AS_STRNCPY
1302 sub $16, %r8
1303 jbe L(StrncpyExit9Case2OrCase3)
1304# endif
1305 test %rax, %rax
1306 jnz L(Shl9LoopExit)
1307
1308 palignr $9, %xmm1, %xmm2
1309 movaps %xmm2, (%rdx)
1310 movaps 23(%rcx), %xmm2
1311
1312 pcmpeqb %xmm2, %xmm0
1313 lea 16(%rdx), %rdx
1314 pmovmskb %xmm0, %rax
1315 lea 16(%rcx), %rcx
1316# ifdef USE_AS_STRNCPY
1317 sub $16, %r8
1318 jbe L(StrncpyExit9Case2OrCase3)
1319# endif
1320 test %rax, %rax
1321 jnz L(Shl9LoopExit)
1322
1323 palignr $9, %xmm3, %xmm2
1324 movaps %xmm2, (%rdx)
1325 lea 23(%rcx), %rcx
1326 lea 16(%rdx), %rdx
1327
1328 mov %rcx, %rax
1329 and $-0x40, %rcx
1330 sub %rcx, %rax
1331 lea -7(%rcx), %rcx
1332 sub %rax, %rdx
1333# ifdef USE_AS_STRNCPY
1334 add %rax, %r8
1335# endif
1336 movaps -9(%rcx), %xmm1
1337
1338/* 64 bytes loop */
1339 .p2align 4
1340L(Shl9LoopStart):
1341 movaps 7(%rcx), %xmm2
1342 movaps 23(%rcx), %xmm3
1343 movaps %xmm3, %xmm6
1344 movaps 39(%rcx), %xmm4
1345 movaps %xmm4, %xmm7
1346 movaps 55(%rcx), %xmm5
1347 pminub %xmm2, %xmm6
1348 pminub %xmm5, %xmm7
1349 pminub %xmm6, %xmm7
1350 pcmpeqb %xmm0, %xmm7
1351 pmovmskb %xmm7, %rax
1352 movaps %xmm5, %xmm7
1353 palignr $9, %xmm4, %xmm5
1354 test %rax, %rax
1355 palignr $9, %xmm3, %xmm4
1356 jnz L(Shl9Start)
1357# ifdef USE_AS_STRNCPY
1358 sub $64, %r8
1359 jbe L(StrncpyLeave9)
1360# endif
1361 palignr $9, %xmm2, %xmm3
1362 lea 64(%rcx), %rcx
1363 palignr $9, %xmm1, %xmm2
1364 movaps %xmm7, %xmm1
1365 movaps %xmm5, 48(%rdx)
1366 movaps %xmm4, 32(%rdx)
1367 movaps %xmm3, 16(%rdx)
1368 movaps %xmm2, (%rdx)
1369 lea 64(%rdx), %rdx
1370 jmp L(Shl9LoopStart)
1371
1372L(Shl9LoopExit):
1373 mov -1(%rcx), %r9
1374 mov $7, %rsi
1375 mov %r9, -1(%rdx)
1376 jmp L(CopyFrom1To16Bytes)
1377
1378 .p2align 4
1379L(Shl10):
1380 movaps -10(%rcx), %xmm1
1381 movaps 6(%rcx), %xmm2
1382L(Shl10Start):
1383 pcmpeqb %xmm2, %xmm0
1384 pmovmskb %xmm0, %rax
1385 movaps %xmm2, %xmm3
1386# ifdef USE_AS_STRNCPY
1387 sub $16, %r8
1388 jbe L(StrncpyExit10Case2OrCase3)
1389# endif
1390 test %rax, %rax
1391 jnz L(Shl10LoopExit)
1392
1393 palignr $10, %xmm1, %xmm2
1394 movaps %xmm2, (%rdx)
1395 movaps 22(%rcx), %xmm2
1396
1397 pcmpeqb %xmm2, %xmm0
1398 lea 16(%rdx), %rdx
1399 pmovmskb %xmm0, %rax
1400 lea 16(%rcx), %rcx
1401 movaps %xmm2, %xmm1
1402# ifdef USE_AS_STRNCPY
1403 sub $16, %r8
1404 jbe L(StrncpyExit10Case2OrCase3)
1405# endif
1406 test %rax, %rax
1407 jnz L(Shl10LoopExit)
1408
1409 palignr $10, %xmm3, %xmm2
1410 movaps %xmm2, (%rdx)
1411 movaps 22(%rcx), %xmm2
1412
1413 pcmpeqb %xmm2, %xmm0
1414 lea 16(%rdx), %rdx
1415 pmovmskb %xmm0, %rax
1416 lea 16(%rcx), %rcx
1417 movaps %xmm2, %xmm3
1418# ifdef USE_AS_STRNCPY
1419 sub $16, %r8
1420 jbe L(StrncpyExit10Case2OrCase3)
1421# endif
1422 test %rax, %rax
1423 jnz L(Shl10LoopExit)
1424
1425 palignr $10, %xmm1, %xmm2
1426 movaps %xmm2, (%rdx)
1427 movaps 22(%rcx), %xmm2
1428
1429 pcmpeqb %xmm2, %xmm0
1430 lea 16(%rdx), %rdx
1431 pmovmskb %xmm0, %rax
1432 lea 16(%rcx), %rcx
1433# ifdef USE_AS_STRNCPY
1434 sub $16, %r8
1435 jbe L(StrncpyExit10Case2OrCase3)
1436# endif
1437 test %rax, %rax
1438 jnz L(Shl10LoopExit)
1439
1440 palignr $10, %xmm3, %xmm2
1441 movaps %xmm2, (%rdx)
1442 lea 22(%rcx), %rcx
1443 lea 16(%rdx), %rdx
1444
1445 mov %rcx, %rax
1446 and $-0x40, %rcx
1447 sub %rcx, %rax
1448 lea -6(%rcx), %rcx
1449 sub %rax, %rdx
1450# ifdef USE_AS_STRNCPY
1451 add %rax, %r8
1452# endif
1453 movaps -10(%rcx), %xmm1
1454
1455/* 64 bytes loop */
1456 .p2align 4
1457L(Shl10LoopStart):
1458 movaps 6(%rcx), %xmm2
1459 movaps 22(%rcx), %xmm3
1460 movaps %xmm3, %xmm6
1461 movaps 38(%rcx), %xmm4
1462 movaps %xmm4, %xmm7
1463 movaps 54(%rcx), %xmm5
1464 pminub %xmm2, %xmm6
1465 pminub %xmm5, %xmm7
1466 pminub %xmm6, %xmm7
1467 pcmpeqb %xmm0, %xmm7
1468 pmovmskb %xmm7, %rax
1469 movaps %xmm5, %xmm7
1470 palignr $10, %xmm4, %xmm5
1471 test %rax, %rax
1472 palignr $10, %xmm3, %xmm4
1473 jnz L(Shl10Start)
1474# ifdef USE_AS_STRNCPY
1475 sub $64, %r8
1476 jbe L(StrncpyLeave10)
1477# endif
1478 palignr $10, %xmm2, %xmm3
1479 lea 64(%rcx), %rcx
1480 palignr $10, %xmm1, %xmm2
1481 movaps %xmm7, %xmm1
1482 movaps %xmm5, 48(%rdx)
1483 movaps %xmm4, 32(%rdx)
1484 movaps %xmm3, 16(%rdx)
1485 movaps %xmm2, (%rdx)
1486 lea 64(%rdx), %rdx
1487 jmp L(Shl10LoopStart)
1488
1489L(Shl10LoopExit):
1490 mov -2(%rcx), %r9
1491 mov $6, %rsi
1492 mov %r9, -2(%rdx)
1493 jmp L(CopyFrom1To16Bytes)
1494
1495 .p2align 4
1496L(Shl11):
1497 movaps -11(%rcx), %xmm1
1498 movaps 5(%rcx), %xmm2
1499L(Shl11Start):
1500 pcmpeqb %xmm2, %xmm0
1501 pmovmskb %xmm0, %rax
1502 movaps %xmm2, %xmm3
1503# ifdef USE_AS_STRNCPY
1504 sub $16, %r8
1505 jbe L(StrncpyExit11Case2OrCase3)
1506# endif
1507 test %rax, %rax
1508 jnz L(Shl11LoopExit)
1509
1510 palignr $11, %xmm1, %xmm2
1511 movaps %xmm2, (%rdx)
1512 movaps 21(%rcx), %xmm2
1513
1514 pcmpeqb %xmm2, %xmm0
1515 lea 16(%rdx), %rdx
1516 pmovmskb %xmm0, %rax
1517 lea 16(%rcx), %rcx
1518 movaps %xmm2, %xmm1
1519# ifdef USE_AS_STRNCPY
1520 sub $16, %r8
1521 jbe L(StrncpyExit11Case2OrCase3)
1522# endif
1523 test %rax, %rax
1524 jnz L(Shl11LoopExit)
1525
1526 palignr $11, %xmm3, %xmm2
1527 movaps %xmm2, (%rdx)
1528 movaps 21(%rcx), %xmm2
1529
1530 pcmpeqb %xmm2, %xmm0
1531 lea 16(%rdx), %rdx
1532 pmovmskb %xmm0, %rax
1533 lea 16(%rcx), %rcx
1534 movaps %xmm2, %xmm3
1535# ifdef USE_AS_STRNCPY
1536 sub $16, %r8
1537 jbe L(StrncpyExit11Case2OrCase3)
1538# endif
1539 test %rax, %rax
1540 jnz L(Shl11LoopExit)
1541
1542 palignr $11, %xmm1, %xmm2
1543 movaps %xmm2, (%rdx)
1544 movaps 21(%rcx), %xmm2
1545
1546 pcmpeqb %xmm2, %xmm0
1547 lea 16(%rdx), %rdx
1548 pmovmskb %xmm0, %rax
1549 lea 16(%rcx), %rcx
1550# ifdef USE_AS_STRNCPY
1551 sub $16, %r8
1552 jbe L(StrncpyExit11Case2OrCase3)
1553# endif
1554 test %rax, %rax
1555 jnz L(Shl11LoopExit)
1556
1557 palignr $11, %xmm3, %xmm2
1558 movaps %xmm2, (%rdx)
1559 lea 21(%rcx), %rcx
1560 lea 16(%rdx), %rdx
1561
1562 mov %rcx, %rax
1563 and $-0x40, %rcx
1564 sub %rcx, %rax
1565 lea -5(%rcx), %rcx
1566 sub %rax, %rdx
1567# ifdef USE_AS_STRNCPY
1568 add %rax, %r8
1569# endif
1570 movaps -11(%rcx), %xmm1
1571
1572/* 64 bytes loop */
1573 .p2align 4
1574L(Shl11LoopStart):
1575 movaps 5(%rcx), %xmm2
1576 movaps 21(%rcx), %xmm3
1577 movaps %xmm3, %xmm6
1578 movaps 37(%rcx), %xmm4
1579 movaps %xmm4, %xmm7
1580 movaps 53(%rcx), %xmm5
1581 pminub %xmm2, %xmm6
1582 pminub %xmm5, %xmm7
1583 pminub %xmm6, %xmm7
1584 pcmpeqb %xmm0, %xmm7
1585 pmovmskb %xmm7, %rax
1586 movaps %xmm5, %xmm7
1587 palignr $11, %xmm4, %xmm5
1588 test %rax, %rax
1589 palignr $11, %xmm3, %xmm4
1590 jnz L(Shl11Start)
1591# ifdef USE_AS_STRNCPY
1592 sub $64, %r8
1593 jbe L(StrncpyLeave11)
1594# endif
1595 palignr $11, %xmm2, %xmm3
1596 lea 64(%rcx), %rcx
1597 palignr $11, %xmm1, %xmm2
1598 movaps %xmm7, %xmm1
1599 movaps %xmm5, 48(%rdx)
1600 movaps %xmm4, 32(%rdx)
1601 movaps %xmm3, 16(%rdx)
1602 movaps %xmm2, (%rdx)
1603 lea 64(%rdx), %rdx
1604 jmp L(Shl11LoopStart)
1605
1606L(Shl11LoopExit):
1607 mov -3(%rcx), %r9
1608 mov $5, %rsi
1609 mov %r9, -3(%rdx)
1610 jmp L(CopyFrom1To16Bytes)
1611
1612 .p2align 4
1613L(Shl12):
1614 movaps -12(%rcx), %xmm1
1615 movaps 4(%rcx), %xmm2
1616L(Shl12Start):
1617 pcmpeqb %xmm2, %xmm0
1618 pmovmskb %xmm0, %rax
1619 movaps %xmm2, %xmm3
1620# ifdef USE_AS_STRNCPY
1621 sub $16, %r8
1622 jbe L(StrncpyExit12Case2OrCase3)
1623# endif
1624 test %rax, %rax
1625 jnz L(Shl12LoopExit)
1626
1627 palignr $12, %xmm1, %xmm2
1628 movaps %xmm2, (%rdx)
1629 movaps 20(%rcx), %xmm2
1630
1631 pcmpeqb %xmm2, %xmm0
1632 lea 16(%rdx), %rdx
1633 pmovmskb %xmm0, %rax
1634 lea 16(%rcx), %rcx
1635 movaps %xmm2, %xmm1
1636# ifdef USE_AS_STRNCPY
1637 sub $16, %r8
1638 jbe L(StrncpyExit12Case2OrCase3)
1639# endif
1640 test %rax, %rax
1641 jnz L(Shl12LoopExit)
1642
1643 palignr $12, %xmm3, %xmm2
1644 movaps %xmm2, (%rdx)
1645 movaps 20(%rcx), %xmm2
1646
1647 pcmpeqb %xmm2, %xmm0
1648 lea 16(%rdx), %rdx
1649 pmovmskb %xmm0, %rax
1650 lea 16(%rcx), %rcx
1651 movaps %xmm2, %xmm3
1652# ifdef USE_AS_STRNCPY
1653 sub $16, %r8
1654 jbe L(StrncpyExit12Case2OrCase3)
1655# endif
1656 test %rax, %rax
1657 jnz L(Shl12LoopExit)
1658
1659 palignr $12, %xmm1, %xmm2
1660 movaps %xmm2, (%rdx)
1661 movaps 20(%rcx), %xmm2
1662
1663 pcmpeqb %xmm2, %xmm0
1664 lea 16(%rdx), %rdx
1665 pmovmskb %xmm0, %rax
1666 lea 16(%rcx), %rcx
1667# ifdef USE_AS_STRNCPY
1668 sub $16, %r8
1669 jbe L(StrncpyExit12Case2OrCase3)
1670# endif
1671 test %rax, %rax
1672 jnz L(Shl12LoopExit)
1673
1674 palignr $12, %xmm3, %xmm2
1675 movaps %xmm2, (%rdx)
1676 lea 20(%rcx), %rcx
1677 lea 16(%rdx), %rdx
1678
1679 mov %rcx, %rax
1680 and $-0x40, %rcx
1681 sub %rcx, %rax
1682 lea -4(%rcx), %rcx
1683 sub %rax, %rdx
1684# ifdef USE_AS_STRNCPY
1685 add %rax, %r8
1686# endif
1687 movaps -12(%rcx), %xmm1
1688
1689/* 64 bytes loop */
1690 .p2align 4
1691L(Shl12LoopStart):
1692 movaps 4(%rcx), %xmm2
1693 movaps 20(%rcx), %xmm3
1694 movaps %xmm3, %xmm6
1695 movaps 36(%rcx), %xmm4
1696 movaps %xmm4, %xmm7
1697 movaps 52(%rcx), %xmm5
1698 pminub %xmm2, %xmm6
1699 pminub %xmm5, %xmm7
1700 pminub %xmm6, %xmm7
1701 pcmpeqb %xmm0, %xmm7
1702 pmovmskb %xmm7, %rax
1703 movaps %xmm5, %xmm7
1704 palignr $12, %xmm4, %xmm5
1705 test %rax, %rax
1706 palignr $12, %xmm3, %xmm4
1707 jnz L(Shl12Start)
1708# ifdef USE_AS_STRNCPY
1709 sub $64, %r8
1710 jbe L(StrncpyLeave12)
1711# endif
1712 palignr $12, %xmm2, %xmm3
1713 lea 64(%rcx), %rcx
1714 palignr $12, %xmm1, %xmm2
1715 movaps %xmm7, %xmm1
1716 movaps %xmm5, 48(%rdx)
1717 movaps %xmm4, 32(%rdx)
1718 movaps %xmm3, 16(%rdx)
1719 movaps %xmm2, (%rdx)
1720 lea 64(%rdx), %rdx
1721 jmp L(Shl12LoopStart)
1722
1723L(Shl12LoopExit):
1724 mov (%rcx), %r9d
1725 mov $4, %rsi
1726 mov %r9d, (%rdx)
1727 jmp L(CopyFrom1To16Bytes)
1728
1729 .p2align 4
1730L(Shl13):
1731 movaps -13(%rcx), %xmm1
1732 movaps 3(%rcx), %xmm2
1733L(Shl13Start):
1734 pcmpeqb %xmm2, %xmm0
1735 pmovmskb %xmm0, %rax
1736 movaps %xmm2, %xmm3
1737# ifdef USE_AS_STRNCPY
1738 sub $16, %r8
1739 jbe L(StrncpyExit13Case2OrCase3)
1740# endif
1741 test %rax, %rax
1742 jnz L(Shl13LoopExit)
1743
1744 palignr $13, %xmm1, %xmm2
1745 movaps %xmm2, (%rdx)
1746 movaps 19(%rcx), %xmm2
1747
1748 pcmpeqb %xmm2, %xmm0
1749 lea 16(%rdx), %rdx
1750 pmovmskb %xmm0, %rax
1751 lea 16(%rcx), %rcx
1752 movaps %xmm2, %xmm1
1753# ifdef USE_AS_STRNCPY
1754 sub $16, %r8
1755 jbe L(StrncpyExit13Case2OrCase3)
1756# endif
1757 test %rax, %rax
1758 jnz L(Shl13LoopExit)
1759
1760 palignr $13, %xmm3, %xmm2
1761 movaps %xmm2, (%rdx)
1762 movaps 19(%rcx), %xmm2
1763
1764 pcmpeqb %xmm2, %xmm0
1765 lea 16(%rdx), %rdx
1766 pmovmskb %xmm0, %rax
1767 lea 16(%rcx), %rcx
1768 movaps %xmm2, %xmm3
1769# ifdef USE_AS_STRNCPY
1770 sub $16, %r8
1771 jbe L(StrncpyExit13Case2OrCase3)
1772# endif
1773 test %rax, %rax
1774 jnz L(Shl13LoopExit)
1775
1776 palignr $13, %xmm1, %xmm2
1777 movaps %xmm2, (%rdx)
1778 movaps 19(%rcx), %xmm2
1779
1780 pcmpeqb %xmm2, %xmm0
1781 lea 16(%rdx), %rdx
1782 pmovmskb %xmm0, %rax
1783 lea 16(%rcx), %rcx
1784# ifdef USE_AS_STRNCPY
1785 sub $16, %r8
1786 jbe L(StrncpyExit13Case2OrCase3)
1787# endif
1788 test %rax, %rax
1789 jnz L(Shl13LoopExit)
1790
1791 palignr $13, %xmm3, %xmm2
1792 movaps %xmm2, (%rdx)
1793 lea 19(%rcx), %rcx
1794 lea 16(%rdx), %rdx
1795
1796 mov %rcx, %rax
1797 and $-0x40, %rcx
1798 sub %rcx, %rax
1799 lea -3(%rcx), %rcx
1800 sub %rax, %rdx
1801# ifdef USE_AS_STRNCPY
1802 add %rax, %r8
1803# endif
1804 movaps -13(%rcx), %xmm1
1805
1806/* 64 bytes loop */
1807 .p2align 4
1808L(Shl13LoopStart):
1809 movaps 3(%rcx), %xmm2
1810 movaps 19(%rcx), %xmm3
1811 movaps %xmm3, %xmm6
1812 movaps 35(%rcx), %xmm4
1813 movaps %xmm4, %xmm7
1814 movaps 51(%rcx), %xmm5
1815 pminub %xmm2, %xmm6
1816 pminub %xmm5, %xmm7
1817 pminub %xmm6, %xmm7
1818 pcmpeqb %xmm0, %xmm7
1819 pmovmskb %xmm7, %rax
1820 movaps %xmm5, %xmm7
1821 palignr $13, %xmm4, %xmm5
1822 test %rax, %rax
1823 palignr $13, %xmm3, %xmm4
1824 jnz L(Shl13Start)
1825# ifdef USE_AS_STRNCPY
1826 sub $64, %r8
1827 jbe L(StrncpyLeave13)
1828# endif
1829 palignr $13, %xmm2, %xmm3
1830 lea 64(%rcx), %rcx
1831 palignr $13, %xmm1, %xmm2
1832 movaps %xmm7, %xmm1
1833 movaps %xmm5, 48(%rdx)
1834 movaps %xmm4, 32(%rdx)
1835 movaps %xmm3, 16(%rdx)
1836 movaps %xmm2, (%rdx)
1837 lea 64(%rdx), %rdx
1838 jmp L(Shl13LoopStart)
1839
1840L(Shl13LoopExit):
1841 mov -1(%rcx), %r9d
1842 mov $3, %rsi
1843 mov %r9d, -1(%rdx)
1844 jmp L(CopyFrom1To16Bytes)
1845
1846 .p2align 4
1847L(Shl14):
1848 movaps -14(%rcx), %xmm1
1849 movaps 2(%rcx), %xmm2
1850L(Shl14Start):
1851 pcmpeqb %xmm2, %xmm0
1852 pmovmskb %xmm0, %rax
1853 movaps %xmm2, %xmm3
1854# ifdef USE_AS_STRNCPY
1855 sub $16, %r8
1856 jbe L(StrncpyExit14Case2OrCase3)
1857# endif
1858 test %rax, %rax
1859 jnz L(Shl14LoopExit)
1860
1861 palignr $14, %xmm1, %xmm2
1862 movaps %xmm2, (%rdx)
1863 movaps 18(%rcx), %xmm2
1864
1865 pcmpeqb %xmm2, %xmm0
1866 lea 16(%rdx), %rdx
1867 pmovmskb %xmm0, %rax
1868 lea 16(%rcx), %rcx
1869 movaps %xmm2, %xmm1
1870# ifdef USE_AS_STRNCPY
1871 sub $16, %r8
1872 jbe L(StrncpyExit14Case2OrCase3)
1873# endif
1874 test %rax, %rax
1875 jnz L(Shl14LoopExit)
1876
1877 palignr $14, %xmm3, %xmm2
1878 movaps %xmm2, (%rdx)
1879 movaps 18(%rcx), %xmm2
1880
1881 pcmpeqb %xmm2, %xmm0
1882 lea 16(%rdx), %rdx
1883 pmovmskb %xmm0, %rax
1884 lea 16(%rcx), %rcx
1885 movaps %xmm2, %xmm3
1886# ifdef USE_AS_STRNCPY
1887 sub $16, %r8
1888 jbe L(StrncpyExit14Case2OrCase3)
1889# endif
1890 test %rax, %rax
1891 jnz L(Shl14LoopExit)
1892
1893 palignr $14, %xmm1, %xmm2
1894 movaps %xmm2, (%rdx)
1895 movaps 18(%rcx), %xmm2
1896
1897 pcmpeqb %xmm2, %xmm0
1898 lea 16(%rdx), %rdx
1899 pmovmskb %xmm0, %rax
1900 lea 16(%rcx), %rcx
1901# ifdef USE_AS_STRNCPY
1902 sub $16, %r8
1903 jbe L(StrncpyExit14Case2OrCase3)
1904# endif
1905 test %rax, %rax
1906 jnz L(Shl14LoopExit)
1907
1908 palignr $14, %xmm3, %xmm2
1909 movaps %xmm2, (%rdx)
1910 lea 18(%rcx), %rcx
1911 lea 16(%rdx), %rdx
1912
1913 mov %rcx, %rax
1914 and $-0x40, %rcx
1915 sub %rcx, %rax
1916 lea -2(%rcx), %rcx
1917 sub %rax, %rdx
1918# ifdef USE_AS_STRNCPY
1919 add %rax, %r8
1920# endif
1921 movaps -14(%rcx), %xmm1
1922
1923/* 64 bytes loop */
1924 .p2align 4
1925L(Shl14LoopStart):
1926 movaps 2(%rcx), %xmm2
1927 movaps 18(%rcx), %xmm3
1928 movaps %xmm3, %xmm6
1929 movaps 34(%rcx), %xmm4
1930 movaps %xmm4, %xmm7
1931 movaps 50(%rcx), %xmm5
1932 pminub %xmm2, %xmm6
1933 pminub %xmm5, %xmm7
1934 pminub %xmm6, %xmm7
1935 pcmpeqb %xmm0, %xmm7
1936 pmovmskb %xmm7, %rax
1937 movaps %xmm5, %xmm7
1938 palignr $14, %xmm4, %xmm5
1939 test %rax, %rax
1940 palignr $14, %xmm3, %xmm4
1941 jnz L(Shl14Start)
1942# ifdef USE_AS_STRNCPY
1943 sub $64, %r8
1944 jbe L(StrncpyLeave14)
1945# endif
1946 palignr $14, %xmm2, %xmm3
1947 lea 64(%rcx), %rcx
1948 palignr $14, %xmm1, %xmm2
1949 movaps %xmm7, %xmm1
1950 movaps %xmm5, 48(%rdx)
1951 movaps %xmm4, 32(%rdx)
1952 movaps %xmm3, 16(%rdx)
1953 movaps %xmm2, (%rdx)
1954 lea 64(%rdx), %rdx
1955 jmp L(Shl14LoopStart)
1956
1957L(Shl14LoopExit):
1958 mov -2(%rcx), %r9d
1959 mov $2, %rsi
1960 mov %r9d, -2(%rdx)
1961 jmp L(CopyFrom1To16Bytes)
1962
1963 .p2align 4
1964L(Shl15):
1965 movaps -15(%rcx), %xmm1
1966 movaps 1(%rcx), %xmm2
1967L(Shl15Start):
1968 pcmpeqb %xmm2, %xmm0
1969 pmovmskb %xmm0, %rax
1970 movaps %xmm2, %xmm3
1971# ifdef USE_AS_STRNCPY
1972 sub $16, %r8
1973 jbe L(StrncpyExit15Case2OrCase3)
1974# endif
1975 test %rax, %rax
1976 jnz L(Shl15LoopExit)
1977
1978 palignr $15, %xmm1, %xmm2
1979 movaps %xmm2, (%rdx)
1980 movaps 17(%rcx), %xmm2
1981
1982 pcmpeqb %xmm2, %xmm0
1983 lea 16(%rdx), %rdx
1984 pmovmskb %xmm0, %rax
1985 lea 16(%rcx), %rcx
1986 movaps %xmm2, %xmm1
1987# ifdef USE_AS_STRNCPY
1988 sub $16, %r8
1989 jbe L(StrncpyExit15Case2OrCase3)
1990# endif
1991 test %rax, %rax
1992 jnz L(Shl15LoopExit)
1993
1994 palignr $15, %xmm3, %xmm2
1995 movaps %xmm2, (%rdx)
1996 movaps 17(%rcx), %xmm2
1997
1998 pcmpeqb %xmm2, %xmm0
1999 lea 16(%rdx), %rdx
2000 pmovmskb %xmm0, %rax
2001 lea 16(%rcx), %rcx
2002 movaps %xmm2, %xmm3
2003# ifdef USE_AS_STRNCPY
2004 sub $16, %r8
2005 jbe L(StrncpyExit15Case2OrCase3)
2006# endif
2007 test %rax, %rax
2008 jnz L(Shl15LoopExit)
2009
2010 palignr $15, %xmm1, %xmm2
2011 movaps %xmm2, (%rdx)
2012 movaps 17(%rcx), %xmm2
2013
2014 pcmpeqb %xmm2, %xmm0
2015 lea 16(%rdx), %rdx
2016 pmovmskb %xmm0, %rax
2017 lea 16(%rcx), %rcx
2018# ifdef USE_AS_STRNCPY
2019 sub $16, %r8
2020 jbe L(StrncpyExit15Case2OrCase3)
2021# endif
2022 test %rax, %rax
2023 jnz L(Shl15LoopExit)
2024
2025 palignr $15, %xmm3, %xmm2
2026 movaps %xmm2, (%rdx)
2027 lea 17(%rcx), %rcx
2028 lea 16(%rdx), %rdx
2029
2030 mov %rcx, %rax
2031 and $-0x40, %rcx
2032 sub %rcx, %rax
2033 lea -1(%rcx), %rcx
2034 sub %rax, %rdx
2035# ifdef USE_AS_STRNCPY
2036 add %rax, %r8
2037# endif
2038 movaps -15(%rcx), %xmm1
2039
2040/* 64 bytes loop */
2041 .p2align 4
2042L(Shl15LoopStart):
2043 movaps 1(%rcx), %xmm2
2044 movaps 17(%rcx), %xmm3
2045 movaps %xmm3, %xmm6
2046 movaps 33(%rcx), %xmm4
2047 movaps %xmm4, %xmm7
2048 movaps 49(%rcx), %xmm5
2049 pminub %xmm2, %xmm6
2050 pminub %xmm5, %xmm7
2051 pminub %xmm6, %xmm7
2052 pcmpeqb %xmm0, %xmm7
2053 pmovmskb %xmm7, %rax
2054 movaps %xmm5, %xmm7
2055 palignr $15, %xmm4, %xmm5
2056 test %rax, %rax
2057 palignr $15, %xmm3, %xmm4
2058 jnz L(Shl15Start)
2059# ifdef USE_AS_STRNCPY
2060 sub $64, %r8
2061 jbe L(StrncpyLeave15)
2062# endif
2063 palignr $15, %xmm2, %xmm3
2064 lea 64(%rcx), %rcx
2065 palignr $15, %xmm1, %xmm2
2066 movaps %xmm7, %xmm1
2067 movaps %xmm5, 48(%rdx)
2068 movaps %xmm4, 32(%rdx)
2069 movaps %xmm3, 16(%rdx)
2070 movaps %xmm2, (%rdx)
2071 lea 64(%rdx), %rdx
2072 jmp L(Shl15LoopStart)
2073
2074L(Shl15LoopExit):
2075 mov -3(%rcx), %r9d
2076 mov $1, %rsi
2077 mov %r9d, -3(%rdx)
2078# ifdef USE_AS_STRCAT
2079 jmp L(CopyFrom1To16Bytes)
2080# endif
2081
2082# ifndef USE_AS_STRCAT
2083
2084 .p2align 4
2085L(CopyFrom1To16Bytes):
2086# ifdef USE_AS_STRNCPY
2087 add $16, %r8
2088# endif
2089 add %rsi, %rdx
2090 add %rsi, %rcx
2091
2092 test %al, %al
2093 jz L(ExitHigh)
2094 test $0x01, %al
2095 jnz L(Exit1)
2096 test $0x02, %al
2097 jnz L(Exit2)
2098 test $0x04, %al
2099 jnz L(Exit3)
2100 test $0x08, %al
2101 jnz L(Exit4)
2102 test $0x10, %al
2103 jnz L(Exit5)
2104 test $0x20, %al
2105 jnz L(Exit6)
2106 test $0x40, %al
2107 jnz L(Exit7)
2108
2109 .p2align 4
2110L(Exit8):
2111 mov (%rcx), %rax
2112 mov %rax, (%rdx)
2113# ifdef USE_AS_STPCPY
2114 lea 7(%rdx), %rax
2115# else
2116 mov %rdi, %rax
2117# endif
2118# ifdef USE_AS_STRNCPY
2119 sub $8, %r8
2120 lea 8(%rdx), %rcx
2121 jnz L(StrncpyFillTailWithZero1)
2122# ifdef USE_AS_STPCPY
2123 cmpb $1, (%rax)
2124 sbb $-1, %rax
2125# endif
2126# endif
2127 ret
2128
2129 .p2align 4
2130L(ExitHigh):
2131 test $0x01, %ah
2132 jnz L(Exit9)
2133 test $0x02, %ah
2134 jnz L(Exit10)
2135 test $0x04, %ah
2136 jnz L(Exit11)
2137 test $0x08, %ah
2138 jnz L(Exit12)
2139 test $0x10, %ah
2140 jnz L(Exit13)
2141 test $0x20, %ah
2142 jnz L(Exit14)
2143 test $0x40, %ah
2144 jnz L(Exit15)
2145
2146 .p2align 4
2147L(Exit16):
2148 mov (%rcx), %rax
2149 mov %rax, (%rdx)
2150 mov 8(%rcx), %rax
2151 mov %rax, 8(%rdx)
2152# ifdef USE_AS_STPCPY
2153 lea 15(%rdx), %rax
2154# else
2155 mov %rdi, %rax
2156# endif
2157# ifdef USE_AS_STRNCPY
2158 sub $16, %r8
2159 lea 16(%rdx), %rcx
2160 jnz L(StrncpyFillTailWithZero1)
2161# ifdef USE_AS_STPCPY
2162 cmpb $1, (%rax)
2163 sbb $-1, %rax
2164# endif
2165# endif
2166 ret
2167
2168# ifdef USE_AS_STRNCPY
2169
2170 .p2align 4
2171L(CopyFrom1To16BytesCase2):
2172 add $16, %r8
2173 add %rsi, %rcx
2174 lea (%rsi, %rdx), %rsi
2175 lea -9(%r8), %rdx
2176 and $1<<7, %dh
2177 or %al, %dh
2178 test %dh, %dh
2179 lea (%rsi), %rdx
2180 jz L(ExitHighCase2)
2181
2182 cmp $1, %r8
2183 je L(Exit1)
2184 test $0x01, %al
2185 jnz L(Exit1)
2186 cmp $2, %r8
2187 je L(Exit2)
2188 test $0x02, %al
2189 jnz L(Exit2)
2190 cmp $3, %r8
2191 je L(Exit3)
2192 test $0x04, %al
2193 jnz L(Exit3)
2194 cmp $4, %r8
2195 je L(Exit4)
2196 test $0x08, %al
2197 jnz L(Exit4)
2198 cmp $5, %r8
2199 je L(Exit5)
2200 test $0x10, %al
2201 jnz L(Exit5)
2202 cmp $6, %r8
2203 je L(Exit6)
2204 test $0x20, %al
2205 jnz L(Exit6)
2206 cmp $7, %r8
2207 je L(Exit7)
2208 test $0x40, %al
2209 jnz L(Exit7)
2210 jmp L(Exit8)
2211
2212 .p2align 4
2213L(ExitHighCase2):
2214 cmp $9, %r8
2215 je L(Exit9)
2216 test $0x01, %ah
2217 jnz L(Exit9)
2218 cmp $10, %r8
2219 je L(Exit10)
2220 test $0x02, %ah
2221 jnz L(Exit10)
2222 cmp $11, %r8
2223 je L(Exit11)
2224 test $0x04, %ah
2225 jnz L(Exit11)
2226 cmp $12, %r8
2227 je L(Exit12)
2228 test $0x8, %ah
2229 jnz L(Exit12)
2230 cmp $13, %r8
2231 je L(Exit13)
2232 test $0x10, %ah
2233 jnz L(Exit13)
2234 cmp $14, %r8
2235 je L(Exit14)
2236 test $0x20, %ah
2237 jnz L(Exit14)
2238 cmp $15, %r8
2239 je L(Exit15)
2240 test $0x40, %ah
2241 jnz L(Exit15)
2242 jmp L(Exit16)
2243
2244L(CopyFrom1To16BytesCase2OrCase3):
2245 test %rax, %rax
2246 jnz L(CopyFrom1To16BytesCase2)
2247
2248 .p2align 4
2249L(CopyFrom1To16BytesCase3):
2250 add $16, %r8
2251 add %rsi, %rdx
2252 add %rsi, %rcx
2253
2254 cmp $16, %r8
2255 je L(Exit16)
2256 cmp $8, %r8
2257 je L(Exit8)
2258 jg L(More8Case3)
2259 cmp $4, %r8
2260 je L(Exit4)
2261 jg L(More4Case3)
2262 cmp $2, %r8
2263 jl L(Exit1)
2264 je L(Exit2)
2265 jg L(Exit3)
2266L(More8Case3): /* but less than 16 */
2267 cmp $12, %r8
2268 je L(Exit12)
2269 jl L(Less12Case3)
2270 cmp $14, %r8
2271 jl L(Exit13)
2272 je L(Exit14)
2273 jg L(Exit15)
2274L(More4Case3): /* but less than 8 */
2275 cmp $6, %r8
2276 jl L(Exit5)
2277 je L(Exit6)
2278 jg L(Exit7)
2279L(Less12Case3): /* but more than 8 */
2280 cmp $10, %r8
2281 jl L(Exit9)
2282 je L(Exit10)
2283 jg L(Exit11)
2284# endif
2285
2286 .p2align 4
2287L(Exit1):
2288 movb (%rcx), %al
2289 movb %al, (%rdx)
2290# ifdef USE_AS_STPCPY
2291 lea (%rdx), %rax
2292# else
2293 mov %rdi, %rax
2294# endif
2295# ifdef USE_AS_STRNCPY
2296 sub $1, %r8
2297 lea 1(%rdx), %rcx
2298 jnz L(StrncpyFillTailWithZero1)
2299# ifdef USE_AS_STPCPY
2300 cmpb $1, (%rax)
2301 sbb $-1, %rax
2302# endif
2303# endif
2304 ret
2305
2306 .p2align 4
2307L(Exit2):
2308 movw (%rcx), %ax
2309 movw %ax, (%rdx)
2310# ifdef USE_AS_STPCPY
2311 lea 1(%rdx), %rax
2312# else
2313 mov %rdi, %rax
2314# endif
2315# ifdef USE_AS_STRNCPY
2316 sub $2, %r8
2317 lea 2(%rdx), %rcx
2318 jnz L(StrncpyFillTailWithZero1)
2319# ifdef USE_AS_STPCPY
2320 cmpb $1, (%rax)
2321 sbb $-1, %rax
2322# endif
2323# endif
2324 ret
2325
2326 .p2align 4
2327L(Exit3):
2328 movw (%rcx), %ax
2329 movw %ax, (%rdx)
2330 movb 2(%rcx), %al
2331 movb %al, 2(%rdx)
2332# ifdef USE_AS_STPCPY
2333 lea 2(%rdx), %rax
2334# else
2335 mov %rdi, %rax
2336# endif
2337# ifdef USE_AS_STRNCPY
2338 sub $3, %r8
2339 lea 3(%rdx), %rcx
2340 jnz L(StrncpyFillTailWithZero1)
2341# ifdef USE_AS_STPCPY
2342 cmpb $1, (%rax)
2343 sbb $-1, %rax
2344# endif
2345# endif
2346 ret
2347
2348 .p2align 4
2349L(Exit4):
2350 movl (%rcx), %eax
2351 movl %eax, (%rdx)
2352# ifdef USE_AS_STPCPY
2353 lea 3(%rdx), %rax
2354# else
2355 mov %rdi, %rax
2356# endif
2357# ifdef USE_AS_STRNCPY
2358 sub $4, %r8
2359 lea 4(%rdx), %rcx
2360 jnz L(StrncpyFillTailWithZero1)
2361# ifdef USE_AS_STPCPY
2362 cmpb $1, (%rax)
2363 sbb $-1, %rax
2364# endif
2365# endif
2366 ret
2367
2368 .p2align 4
2369L(Exit5):
2370 movl (%rcx), %eax
2371 movl %eax, (%rdx)
2372 movb 4(%rcx), %al
2373 movb %al, 4(%rdx)
2374# ifdef USE_AS_STPCPY
2375 lea 4(%rdx), %rax
2376# else
2377 mov %rdi, %rax
2378# endif
2379# ifdef USE_AS_STRNCPY
2380 sub $5, %r8
2381 lea 5(%rdx), %rcx
2382 jnz L(StrncpyFillTailWithZero1)
2383# ifdef USE_AS_STPCPY
2384 cmpb $1, (%rax)
2385 sbb $-1, %rax
2386# endif
2387# endif
2388 ret
2389
2390 .p2align 4
2391L(Exit6):
2392 movl (%rcx), %eax
2393 movl %eax, (%rdx)
2394 movw 4(%rcx), %ax
2395 movw %ax, 4(%rdx)
2396# ifdef USE_AS_STPCPY
2397 lea 5(%rdx), %rax
2398# else
2399 mov %rdi, %rax
2400# endif
2401# ifdef USE_AS_STRNCPY
2402 sub $6, %r8
2403 lea 6(%rdx), %rcx
2404 jnz L(StrncpyFillTailWithZero1)
2405# ifdef USE_AS_STPCPY
2406 cmpb $1, (%rax)
2407 sbb $-1, %rax
2408# endif
2409# endif
2410 ret
2411
2412 .p2align 4
2413L(Exit7):
2414 movl (%rcx), %eax
2415 movl %eax, (%rdx)
2416 movl 3(%rcx), %eax
2417 movl %eax, 3(%rdx)
2418# ifdef USE_AS_STPCPY
2419 lea 6(%rdx), %rax
2420# else
2421 mov %rdi, %rax
2422# endif
2423# ifdef USE_AS_STRNCPY
2424 sub $7, %r8
2425 lea 7(%rdx), %rcx
2426 jnz L(StrncpyFillTailWithZero1)
2427# ifdef USE_AS_STPCPY
2428 cmpb $1, (%rax)
2429 sbb $-1, %rax
2430# endif
2431# endif
2432 ret
2433
2434 .p2align 4
2435L(Exit9):
2436 mov (%rcx), %rax
2437 mov %rax, (%rdx)
2438 mov 5(%rcx), %eax
2439 mov %eax, 5(%rdx)
2440# ifdef USE_AS_STPCPY
2441 lea 8(%rdx), %rax
2442# else
2443 mov %rdi, %rax
2444# endif
2445# ifdef USE_AS_STRNCPY
2446 sub $9, %r8
2447 lea 9(%rdx), %rcx
2448 jnz L(StrncpyFillTailWithZero1)
2449# ifdef USE_AS_STPCPY
2450 cmpb $1, (%rax)
2451 sbb $-1, %rax
2452# endif
2453# endif
2454 ret
2455
2456 .p2align 4
2457L(Exit10):
2458 mov (%rcx), %rax
2459 mov %rax, (%rdx)
2460 mov 6(%rcx), %eax
2461 mov %eax, 6(%rdx)
2462# ifdef USE_AS_STPCPY
2463 lea 9(%rdx), %rax
2464# else
2465 mov %rdi, %rax
2466# endif
2467# ifdef USE_AS_STRNCPY
2468 sub $10, %r8
2469 lea 10(%rdx), %rcx
2470 jnz L(StrncpyFillTailWithZero1)
2471# ifdef USE_AS_STPCPY
2472 cmpb $1, (%rax)
2473 sbb $-1, %rax
2474# endif
2475# endif
2476 ret
2477
2478 .p2align 4
2479L(Exit11):
2480 mov (%rcx), %rax
2481 mov %rax, (%rdx)
2482 mov 7(%rcx), %eax
2483 mov %eax, 7(%rdx)
2484# ifdef USE_AS_STPCPY
2485 lea 10(%rdx), %rax
2486# else
2487 mov %rdi, %rax
2488# endif
2489# ifdef USE_AS_STRNCPY
2490 sub $11, %r8
2491 lea 11(%rdx), %rcx
2492 jnz L(StrncpyFillTailWithZero1)
2493# ifdef USE_AS_STPCPY
2494 cmpb $1, (%rax)
2495 sbb $-1, %rax
2496# endif
2497# endif
2498 ret
2499
2500 .p2align 4
2501L(Exit12):
2502 mov (%rcx), %rax
2503 mov %rax, (%rdx)
2504 mov 8(%rcx), %eax
2505 mov %eax, 8(%rdx)
2506# ifdef USE_AS_STPCPY
2507 lea 11(%rdx), %rax
2508# else
2509 mov %rdi, %rax
2510# endif
2511# ifdef USE_AS_STRNCPY
2512 sub $12, %r8
2513 lea 12(%rdx), %rcx
2514 jnz L(StrncpyFillTailWithZero1)
2515# ifdef USE_AS_STPCPY
2516 cmpb $1, (%rax)
2517 sbb $-1, %rax
2518# endif
2519# endif
2520 ret
2521
2522 .p2align 4
2523L(Exit13):
2524 mov (%rcx), %rax
2525 mov %rax, (%rdx)
2526 mov 5(%rcx), %rax
2527 mov %rax, 5(%rdx)
2528# ifdef USE_AS_STPCPY
2529 lea 12(%rdx), %rax
2530# else
2531 mov %rdi, %rax
2532# endif
2533# ifdef USE_AS_STRNCPY
2534 sub $13, %r8
2535 lea 13(%rdx), %rcx
2536 jnz L(StrncpyFillTailWithZero1)
2537# ifdef USE_AS_STPCPY
2538 cmpb $1, (%rax)
2539 sbb $-1, %rax
2540# endif
2541# endif
2542 ret
2543
2544 .p2align 4
2545L(Exit14):
2546 mov (%rcx), %rax
2547 mov %rax, (%rdx)
2548 mov 6(%rcx), %rax
2549 mov %rax, 6(%rdx)
2550# ifdef USE_AS_STPCPY
2551 lea 13(%rdx), %rax
2552# else
2553 mov %rdi, %rax
2554# endif
2555# ifdef USE_AS_STRNCPY
2556 sub $14, %r8
2557 lea 14(%rdx), %rcx
2558 jnz L(StrncpyFillTailWithZero1)
2559# ifdef USE_AS_STPCPY
2560 cmpb $1, (%rax)
2561 sbb $-1, %rax
2562# endif
2563# endif
2564 ret
2565
2566 .p2align 4
2567L(Exit15):
2568 mov (%rcx), %rax
2569 mov %rax, (%rdx)
2570 mov 7(%rcx), %rax
2571 mov %rax, 7(%rdx)
2572# ifdef USE_AS_STPCPY
2573 lea 14(%rdx), %rax
2574# else
2575 mov %rdi, %rax
2576# endif
2577# ifdef USE_AS_STRNCPY
2578 sub $15, %r8
2579 lea 15(%rdx), %rcx
2580 jnz L(StrncpyFillTailWithZero1)
2581# ifdef USE_AS_STPCPY
2582 cmpb $1, (%rax)
2583 sbb $-1, %rax
2584# endif
2585# endif
2586 ret
2587
2588# ifdef USE_AS_STRNCPY
2589 .p2align 4
2590L(Fill0):
2591 ret
2592
2593 .p2align 4
2594L(Fill1):
2595 movb %dl, (%rcx)
2596 ret
2597
2598 .p2align 4
2599L(Fill2):
2600 movw %dx, (%rcx)
2601 ret
2602
2603 .p2align 4
2604L(Fill3):
2605 movw %dx, (%rcx)
2606 movb %dl, 2(%rcx)
2607 ret
2608
2609 .p2align 4
2610L(Fill4):
2611 movl %edx, (%rcx)
2612 ret
2613
2614 .p2align 4
2615L(Fill5):
2616 movl %edx, (%rcx)
2617 movb %dl, 4(%rcx)
2618 ret
2619
2620 .p2align 4
2621L(Fill6):
2622 movl %edx, (%rcx)
2623 movw %dx, 4(%rcx)
2624 ret
2625
2626 .p2align 4
2627L(Fill7):
2628 movl %edx, (%rcx)
2629 movl %edx, 3(%rcx)
2630 ret
2631
2632 .p2align 4
2633L(Fill8):
2634 mov %rdx, (%rcx)
2635 ret
2636
2637 .p2align 4
2638L(Fill9):
2639 mov %rdx, (%rcx)
2640 movb %dl, 8(%rcx)
2641 ret
2642
2643 .p2align 4
2644L(Fill10):
2645 mov %rdx, (%rcx)
2646 movw %dx, 8(%rcx)
2647 ret
2648
2649 .p2align 4
2650L(Fill11):
2651 mov %rdx, (%rcx)
2652 movl %edx, 7(%rcx)
2653 ret
2654
2655 .p2align 4
2656L(Fill12):
2657 mov %rdx, (%rcx)
2658 movl %edx, 8(%rcx)
2659 ret
2660
2661 .p2align 4
2662L(Fill13):
2663 mov %rdx, (%rcx)
2664 mov %rdx, 5(%rcx)
2665 ret
2666
2667 .p2align 4
2668L(Fill14):
2669 mov %rdx, (%rcx)
2670 mov %rdx, 6(%rcx)
2671 ret
2672
2673 .p2align 4
2674L(Fill15):
2675 mov %rdx, (%rcx)
2676 mov %rdx, 7(%rcx)
2677 ret
2678
2679 .p2align 4
2680L(Fill16):
2681 mov %rdx, (%rcx)
2682 mov %rdx, 8(%rcx)
2683 ret
2684
2685 .p2align 4
2686L(StrncpyFillExit1):
2687 lea 16(%r8), %r8
2688L(FillFrom1To16Bytes):
2689 test %r8, %r8
2690 jz L(Fill0)
2691 cmp $16, %r8
2692 je L(Fill16)
2693 cmp $8, %r8
2694 je L(Fill8)
2695 jg L(FillMore8)
2696 cmp $4, %r8
2697 je L(Fill4)
2698 jg L(FillMore4)
2699 cmp $2, %r8
2700 jl L(Fill1)
2701 je L(Fill2)
2702 jg L(Fill3)
2703L(FillMore8): /* but less than 16 */
2704 cmp $12, %r8
2705 je L(Fill12)
2706 jl L(FillLess12)
2707 cmp $14, %r8
2708 jl L(Fill13)
2709 je L(Fill14)
2710 jg L(Fill15)
2711L(FillMore4): /* but less than 8 */
2712 cmp $6, %r8
2713 jl L(Fill5)
2714 je L(Fill6)
2715 jg L(Fill7)
2716L(FillLess12): /* but more than 8 */
2717 cmp $10, %r8
2718 jl L(Fill9)
2719 je L(Fill10)
2720 jmp L(Fill11)
2721
2722 .p2align 4
2723L(StrncpyFillTailWithZero1):
2724 xor %rdx, %rdx
2725 sub $16, %r8
2726 jbe L(StrncpyFillExit1)
2727
2728 pxor %xmm0, %xmm0
2729 mov %rdx, (%rcx)
2730 mov %rdx, 8(%rcx)
2731
2732 lea 16(%rcx), %rcx
2733
2734 mov %rcx, %rdx
2735 and $0xf, %rdx
2736 sub %rdx, %rcx
2737 add %rdx, %r8
2738 xor %rdx, %rdx
2739 sub $64, %r8
2740 jb L(StrncpyFillLess64)
2741
2742L(StrncpyFillLoopMovdqa):
2743 movdqa %xmm0, (%rcx)
2744 movdqa %xmm0, 16(%rcx)
2745 movdqa %xmm0, 32(%rcx)
2746 movdqa %xmm0, 48(%rcx)
2747 lea 64(%rcx), %rcx
2748 sub $64, %r8
2749 jae L(StrncpyFillLoopMovdqa)
2750
2751L(StrncpyFillLess64):
2752 add $32, %r8
2753 jl L(StrncpyFillLess32)
2754 movdqa %xmm0, (%rcx)
2755 movdqa %xmm0, 16(%rcx)
2756 lea 32(%rcx), %rcx
2757 sub $16, %r8
2758 jl L(StrncpyFillExit1)
2759 movdqa %xmm0, (%rcx)
2760 lea 16(%rcx), %rcx
2761 jmp L(FillFrom1To16Bytes)
2762
2763L(StrncpyFillLess32):
2764 add $16, %r8
2765 jl L(StrncpyFillExit1)
2766 movdqa %xmm0, (%rcx)
2767 lea 16(%rcx), %rcx
2768 jmp L(FillFrom1To16Bytes)
2769
2770 .p2align 4
2771L(Exit0):
2772 mov %rdx, %rax
2773 ret
2774
2775 .p2align 4
2776L(StrncpyExit15Bytes):
2777 cmp $9, %r8
2778 je L(Exit9)
2779 cmpb $0, 8(%rcx)
2780 jz L(Exit9)
2781 cmp $10, %r8
2782 je L(Exit10)
2783 cmpb $0, 9(%rcx)
2784 jz L(Exit10)
2785 cmp $11, %r8
2786 je L(Exit11)
2787 cmpb $0, 10(%rcx)
2788 jz L(Exit11)
2789 cmp $12, %r8
2790 je L(Exit12)
2791 cmpb $0, 11(%rcx)
2792 jz L(Exit12)
2793 cmp $13, %r8
2794 je L(Exit13)
2795 cmpb $0, 12(%rcx)
2796 jz L(Exit13)
2797 cmp $14, %r8
2798 je L(Exit14)
2799 cmpb $0, 13(%rcx)
2800 jz L(Exit14)
2801 mov (%rcx), %rax
2802 mov %rax, (%rdx)
2803 mov 7(%rcx), %rax
2804 mov %rax, 7(%rdx)
2805# ifdef USE_AS_STPCPY
2806 lea 14(%rdx), %rax
2807 cmpb $1, (%rax)
2808 sbb $-1, %rax
2809# else
2810 mov %rdi, %rax
2811# endif
2812 ret
2813
2814 .p2align 4
2815L(StrncpyExit8Bytes):
2816 cmp $1, %r8
2817 je L(Exit1)
2818 cmpb $0, (%rcx)
2819 jz L(Exit1)
2820 cmp $2, %r8
2821 je L(Exit2)
2822 cmpb $0, 1(%rcx)
2823 jz L(Exit2)
2824 cmp $3, %r8
2825 je L(Exit3)
2826 cmpb $0, 2(%rcx)
2827 jz L(Exit3)
2828 cmp $4, %r8
2829 je L(Exit4)
2830 cmpb $0, 3(%rcx)
2831 jz L(Exit4)
2832 cmp $5, %r8
2833 je L(Exit5)
2834 cmpb $0, 4(%rcx)
2835 jz L(Exit5)
2836 cmp $6, %r8
2837 je L(Exit6)
2838 cmpb $0, 5(%rcx)
2839 jz L(Exit6)
2840 cmp $7, %r8
2841 je L(Exit7)
2842 cmpb $0, 6(%rcx)
2843 jz L(Exit7)
2844 mov (%rcx), %rax
2845 mov %rax, (%rdx)
2846# ifdef USE_AS_STPCPY
2847 lea 7(%rdx), %rax
2848 cmpb $1, (%rax)
2849 sbb $-1, %rax
2850# else
2851 mov %rdi, %rax
2852# endif
2853 ret
2854
2855# endif
2856# endif
2857
2858# ifdef USE_AS_STRNCPY
2859 .p2align 4
2860L(StrncpyLeaveCase2OrCase3):
2861 test %rax, %rax
2862 jnz L(Aligned64LeaveCase2)
2863
2864L(Aligned64LeaveCase3):
2865 lea 64(%r8), %r8
2866 sub $16, %r8
2867 jbe L(CopyFrom1To16BytesCase3)
2868 movaps %xmm4, -64(%rdx)
2869 lea 16(%rsi), %rsi
2870 sub $16, %r8
2871 jbe L(CopyFrom1To16BytesCase3)
2872 movaps %xmm5, -48(%rdx)
2873 lea 16(%rsi), %rsi
2874 sub $16, %r8
2875 jbe L(CopyFrom1To16BytesCase3)
2876 movaps %xmm6, -32(%rdx)
2877 lea 16(%rsi), %rsi
2878 lea -16(%r8), %r8
2879 jmp L(CopyFrom1To16BytesCase3)
2880
2881L(Aligned64LeaveCase2):
2882 pcmpeqb %xmm4, %xmm0
2883 pmovmskb %xmm0, %rax
2884 add $48, %r8
2885 jle L(CopyFrom1To16BytesCase2OrCase3)
2886 test %rax, %rax
2887 jnz L(CopyFrom1To16Bytes)
2888
2889 pcmpeqb %xmm5, %xmm0
2890 pmovmskb %xmm0, %rax
2891 movaps %xmm4, -64(%rdx)
2892 lea 16(%rsi), %rsi
2893 sub $16, %r8
2894 jbe L(CopyFrom1To16BytesCase2OrCase3)
2895 test %rax, %rax
2896 jnz L(CopyFrom1To16Bytes)
2897
2898 pcmpeqb %xmm6, %xmm0
2899 pmovmskb %xmm0, %rax
2900 movaps %xmm5, -48(%rdx)
2901 lea 16(%rsi), %rsi
2902 sub $16, %r8
2903 jbe L(CopyFrom1To16BytesCase2OrCase3)
2904 test %rax, %rax
2905 jnz L(CopyFrom1To16Bytes)
2906
2907 pcmpeqb %xmm7, %xmm0
2908 pmovmskb %xmm0, %rax
2909 movaps %xmm6, -32(%rdx)
2910 lea 16(%rsi), %rsi
2911 lea -16(%r8), %r8
2912 jmp L(CopyFrom1To16BytesCase2)
2913/*--------------------------------------------------*/
2914 .p2align 4
2915L(StrncpyExit1Case2OrCase3):
2916 movdqu -1(%rcx), %xmm0
2917 movdqu %xmm0, -1(%rdx)
2918 mov $15, %rsi
2919 test %rax, %rax
2920 jnz L(CopyFrom1To16BytesCase2)
2921 jmp L(CopyFrom1To16BytesCase3)
2922
2923 .p2align 4
2924L(StrncpyExit2Case2OrCase3):
2925 movdqu -2(%rcx), %xmm0
2926 movdqu %xmm0, -2(%rdx)
2927 mov $14, %rsi
2928 test %rax, %rax
2929 jnz L(CopyFrom1To16BytesCase2)
2930 jmp L(CopyFrom1To16BytesCase3)
2931
2932 .p2align 4
2933L(StrncpyExit3Case2OrCase3):
2934 movdqu -3(%rcx), %xmm0
2935 movdqu %xmm0, -3(%rdx)
2936 mov $13, %rsi
2937 test %rax, %rax
2938 jnz L(CopyFrom1To16BytesCase2)
2939 jmp L(CopyFrom1To16BytesCase3)
2940
2941 .p2align 4
2942L(StrncpyExit4Case2OrCase3):
2943 movdqu -4(%rcx), %xmm0
2944 movdqu %xmm0, -4(%rdx)
2945 mov $12, %rsi
2946 test %rax, %rax
2947 jnz L(CopyFrom1To16BytesCase2)
2948 jmp L(CopyFrom1To16BytesCase3)
2949
2950 .p2align 4
2951L(StrncpyExit5Case2OrCase3):
2952 movdqu -5(%rcx), %xmm0
2953 movdqu %xmm0, -5(%rdx)
2954 mov $11, %rsi
2955 test %rax, %rax
2956 jnz L(CopyFrom1To16BytesCase2)
2957 jmp L(CopyFrom1To16BytesCase3)
2958
2959 .p2align 4
2960L(StrncpyExit6Case2OrCase3):
2961 mov (%rcx), %rsi
2962 mov 6(%rcx), %r9d
2963 mov %r9d, 6(%rdx)
2964 mov %rsi, (%rdx)
2965 test %rax, %rax
2966 mov $10, %rsi
2967 jnz L(CopyFrom1To16BytesCase2)
2968 jmp L(CopyFrom1To16BytesCase3)
2969
2970 .p2align 4
2971L(StrncpyExit7Case2OrCase3):
2972 mov (%rcx), %rsi
2973 mov 5(%rcx), %r9d
2974 mov %r9d, 5(%rdx)
2975 mov %rsi, (%rdx)
2976 test %rax, %rax
2977 mov $9, %rsi
2978 jnz L(CopyFrom1To16BytesCase2)
2979 jmp L(CopyFrom1To16BytesCase3)
2980
2981 .p2align 4
2982L(StrncpyExit8Case2OrCase3):
2983 mov (%rcx), %r9
2984 mov $8, %rsi
2985 mov %r9, (%rdx)
2986 test %rax, %rax
2987 jnz L(CopyFrom1To16BytesCase2)
2988 jmp L(CopyFrom1To16BytesCase3)
2989
2990 .p2align 4
2991L(StrncpyExit9Case2OrCase3):
2992 mov -1(%rcx), %r9
2993 mov $7, %rsi
2994 mov %r9, -1(%rdx)
2995 test %rax, %rax
2996 jnz L(CopyFrom1To16BytesCase2)
2997 jmp L(CopyFrom1To16BytesCase3)
2998
2999 .p2align 4
3000L(StrncpyExit10Case2OrCase3):
3001 mov -2(%rcx), %r9
3002 mov $6, %rsi
3003 mov %r9, -2(%rdx)
3004 test %rax, %rax
3005 jnz L(CopyFrom1To16BytesCase2)
3006 jmp L(CopyFrom1To16BytesCase3)
3007
3008 .p2align 4
3009L(StrncpyExit11Case2OrCase3):
3010 mov -3(%rcx), %r9
3011 mov $5, %rsi
3012 mov %r9, -3(%rdx)
3013 test %rax, %rax
3014 jnz L(CopyFrom1To16BytesCase2)
3015 jmp L(CopyFrom1To16BytesCase3)
3016
3017 .p2align 4
3018L(StrncpyExit12Case2OrCase3):
3019 mov (%rcx), %r9d
3020 mov $4, %rsi
3021 mov %r9d, (%rdx)
3022 test %rax, %rax
3023 jnz L(CopyFrom1To16BytesCase2)
3024 jmp L(CopyFrom1To16BytesCase3)
3025
3026 .p2align 4
3027L(StrncpyExit13Case2OrCase3):
3028 mov -1(%rcx), %r9d
3029 mov $3, %rsi
3030 mov %r9d, -1(%rdx)
3031 test %rax, %rax
3032 jnz L(CopyFrom1To16BytesCase2)
3033 jmp L(CopyFrom1To16BytesCase3)
3034
3035 .p2align 4
3036L(StrncpyExit14Case2OrCase3):
3037 mov -2(%rcx), %r9d
3038 mov $2, %rsi
3039 mov %r9d, -2(%rdx)
3040 test %rax, %rax
3041 jnz L(CopyFrom1To16BytesCase2)
3042 jmp L(CopyFrom1To16BytesCase3)
3043
3044 .p2align 4
3045L(StrncpyExit15Case2OrCase3):
3046 mov -3(%rcx), %r9d
3047 mov $1, %rsi
3048 mov %r9d, -3(%rdx)
3049 test %rax, %rax
3050 jnz L(CopyFrom1To16BytesCase2)
3051 jmp L(CopyFrom1To16BytesCase3)
3052
3053 .p2align 4
3054L(StrncpyLeave1):
3055 movaps %xmm2, %xmm3
3056 add $48, %r8
3057 jle L(StrncpyExit1)
3058 palignr $1, %xmm1, %xmm2
3059 movaps %xmm2, (%rdx)
3060 movaps 31(%rcx), %xmm2
3061 lea 16(%rsi), %rsi
3062 sub $16, %r8
3063 jbe L(StrncpyExit1)
3064 palignr $1, %xmm3, %xmm2
3065 movaps %xmm2, 16(%rdx)
3066 lea 16(%rsi), %rsi
3067 sub $16, %r8
3068 jbe L(StrncpyExit1)
3069 movaps %xmm4, 32(%rdx)
3070 lea 16(%rsi), %rsi
3071 sub $16, %r8
3072 jbe L(StrncpyExit1)
3073 movaps %xmm5, 48(%rdx)
3074 lea 16(%rsi), %rsi
3075 lea -16(%r8), %r8
3076
3077L(StrncpyExit1):
3078 lea 15(%rdx, %rsi), %rdx
3079 lea 15(%rcx, %rsi), %rcx
3080 mov -15(%rcx), %rsi
3081 mov -8(%rcx), %rax
3082 mov %rsi, -15(%rdx)
3083 mov %rax, -8(%rdx)
3084 xor %rsi, %rsi
3085 jmp L(CopyFrom1To16BytesCase3)
3086
3087 .p2align 4
3088L(StrncpyLeave2):
3089 movaps %xmm2, %xmm3
3090 add $48, %r8
3091 jle L(StrncpyExit2)
3092 palignr $2, %xmm1, %xmm2
3093 movaps %xmm2, (%rdx)
3094 movaps 30(%rcx), %xmm2
3095 lea 16(%rsi), %rsi
3096 sub $16, %r8
3097 jbe L(StrncpyExit2)
3098 palignr $2, %xmm3, %xmm2
3099 movaps %xmm2, 16(%rdx)
3100 lea 16(%rsi), %rsi
3101 sub $16, %r8
3102 jbe L(StrncpyExit2)
3103 movaps %xmm4, 32(%rdx)
3104 lea 16(%rsi), %rsi
3105 sub $16, %r8
3106 jbe L(StrncpyExit2)
3107 movaps %xmm5, 48(%rdx)
3108 lea 16(%rsi), %rsi
3109 lea -16(%r8), %r8
3110
3111L(StrncpyExit2):
3112 lea 14(%rdx, %rsi), %rdx
3113 lea 14(%rcx, %rsi), %rcx
3114 mov -14(%rcx), %rsi
3115 mov -8(%rcx), %rax
3116 mov %rsi, -14(%rdx)
3117 mov %rax, -8(%rdx)
3118 xor %rsi, %rsi
3119 jmp L(CopyFrom1To16BytesCase3)
3120
3121 .p2align 4
3122L(StrncpyLeave3):
3123 movaps %xmm2, %xmm3
3124 add $48, %r8
3125 jle L(StrncpyExit3)
3126 palignr $3, %xmm1, %xmm2
3127 movaps %xmm2, (%rdx)
3128 movaps 29(%rcx), %xmm2
3129 lea 16(%rsi), %rsi
3130 sub $16, %r8
3131 jbe L(StrncpyExit3)
3132 palignr $3, %xmm3, %xmm2
3133 movaps %xmm2, 16(%rdx)
3134 lea 16(%rsi), %rsi
3135 sub $16, %r8
3136 jbe L(StrncpyExit3)
3137 movaps %xmm4, 32(%rdx)
3138 lea 16(%rsi), %rsi
3139 sub $16, %r8
3140 jbe L(StrncpyExit3)
3141 movaps %xmm5, 48(%rdx)
3142 lea 16(%rsi), %rsi
3143 lea -16(%r8), %r8
3144
3145L(StrncpyExit3):
3146 lea 13(%rdx, %rsi), %rdx
3147 lea 13(%rcx, %rsi), %rcx
3148 mov -13(%rcx), %rsi
3149 mov -8(%rcx), %rax
3150 mov %rsi, -13(%rdx)
3151 mov %rax, -8(%rdx)
3152 xor %rsi, %rsi
3153 jmp L(CopyFrom1To16BytesCase3)
3154
3155 .p2align 4
3156L(StrncpyLeave4):
3157 movaps %xmm2, %xmm3
3158 add $48, %r8
3159 jle L(StrncpyExit4)
3160 palignr $4, %xmm1, %xmm2
3161 movaps %xmm2, (%rdx)
3162 movaps 28(%rcx), %xmm2
3163 lea 16(%rsi), %rsi
3164 sub $16, %r8
3165 jbe L(StrncpyExit4)
3166 palignr $4, %xmm3, %xmm2
3167 movaps %xmm2, 16(%rdx)
3168 lea 16(%rsi), %rsi
3169 sub $16, %r8
3170 jbe L(StrncpyExit4)
3171 movaps %xmm4, 32(%rdx)
3172 lea 16(%rsi), %rsi
3173 sub $16, %r8
3174 jbe L(StrncpyExit4)
3175 movaps %xmm5, 48(%rdx)
3176 lea 16(%rsi), %rsi
3177 lea -16(%r8), %r8
3178
3179L(StrncpyExit4):
3180 lea 12(%rdx, %rsi), %rdx
3181 lea 12(%rcx, %rsi), %rcx
3182 mov -12(%rcx), %rsi
3183 mov -4(%rcx), %eax
3184 mov %rsi, -12(%rdx)
3185 mov %eax, -4(%rdx)
3186 xor %rsi, %rsi
3187 jmp L(CopyFrom1To16BytesCase3)
3188
3189 .p2align 4
3190L(StrncpyLeave5):
3191 movaps %xmm2, %xmm3
3192 add $48, %r8
3193 jle L(StrncpyExit5)
3194 palignr $5, %xmm1, %xmm2
3195 movaps %xmm2, (%rdx)
3196 movaps 27(%rcx), %xmm2
3197 lea 16(%rsi), %rsi
3198 sub $16, %r8
3199 jbe L(StrncpyExit5)
3200 palignr $5, %xmm3, %xmm2
3201 movaps %xmm2, 16(%rdx)
3202 lea 16(%rsi), %rsi
3203 sub $16, %r8
3204 jbe L(StrncpyExit5)
3205 movaps %xmm4, 32(%rdx)
3206 lea 16(%rsi), %rsi
3207 sub $16, %r8
3208 jbe L(StrncpyExit5)
3209 movaps %xmm5, 48(%rdx)
3210 lea 16(%rsi), %rsi
3211 lea -16(%r8), %r8
3212
3213L(StrncpyExit5):
3214 lea 11(%rdx, %rsi), %rdx
3215 lea 11(%rcx, %rsi), %rcx
3216 mov -11(%rcx), %rsi
3217 mov -4(%rcx), %eax
3218 mov %rsi, -11(%rdx)
3219 mov %eax, -4(%rdx)
3220 xor %rsi, %rsi
3221 jmp L(CopyFrom1To16BytesCase3)
3222
3223 .p2align 4
3224L(StrncpyLeave6):
3225 movaps %xmm2, %xmm3
3226 add $48, %r8
3227 jle L(StrncpyExit6)
3228 palignr $6, %xmm1, %xmm2
3229 movaps %xmm2, (%rdx)
3230 movaps 26(%rcx), %xmm2
3231 lea 16(%rsi), %rsi
3232 sub $16, %r8
3233 jbe L(StrncpyExit6)
3234 palignr $6, %xmm3, %xmm2
3235 movaps %xmm2, 16(%rdx)
3236 lea 16(%rsi), %rsi
3237 sub $16, %r8
3238 jbe L(StrncpyExit6)
3239 movaps %xmm4, 32(%rdx)
3240 lea 16(%rsi), %rsi
3241 sub $16, %r8
3242 jbe L(StrncpyExit6)
3243 movaps %xmm5, 48(%rdx)
3244 lea 16(%rsi), %rsi
3245 lea -16(%r8), %r8
3246
3247L(StrncpyExit6):
3248 lea 10(%rdx, %rsi), %rdx
3249 lea 10(%rcx, %rsi), %rcx
3250 mov -10(%rcx), %rsi
3251 movw -2(%rcx), %ax
3252 mov %rsi, -10(%rdx)
3253 movw %ax, -2(%rdx)
3254 xor %rsi, %rsi
3255 jmp L(CopyFrom1To16BytesCase3)
3256
3257 .p2align 4
3258L(StrncpyLeave7):
3259 movaps %xmm2, %xmm3
3260 add $48, %r8
3261 jle L(StrncpyExit7)
3262 palignr $7, %xmm1, %xmm2
3263 movaps %xmm2, (%rdx)
3264 movaps 25(%rcx), %xmm2
3265 lea 16(%rsi), %rsi
3266 sub $16, %r8
3267 jbe L(StrncpyExit7)
3268 palignr $7, %xmm3, %xmm2
3269 movaps %xmm2, 16(%rdx)
3270 lea 16(%rsi), %rsi
3271 sub $16, %r8
3272 jbe L(StrncpyExit7)
3273 movaps %xmm4, 32(%rdx)
3274 lea 16(%rsi), %rsi
3275 sub $16, %r8
3276 jbe L(StrncpyExit7)
3277 movaps %xmm5, 48(%rdx)
3278 lea 16(%rsi), %rsi
3279 lea -16(%r8), %r8
3280
3281L(StrncpyExit7):
3282 lea 9(%rdx, %rsi), %rdx
3283 lea 9(%rcx, %rsi), %rcx
3284 mov -9(%rcx), %rsi
3285 movb -1(%rcx), %ah
3286 mov %rsi, -9(%rdx)
3287 movb %ah, -1(%rdx)
3288 xor %rsi, %rsi
3289 jmp L(CopyFrom1To16BytesCase3)
3290
3291 .p2align 4
3292L(StrncpyLeave8):
3293 movaps %xmm2, %xmm3
3294 add $48, %r8
3295 jle L(StrncpyExit8)
3296 palignr $8, %xmm1, %xmm2
3297 movaps %xmm2, (%rdx)
3298 movaps 24(%rcx), %xmm2
3299 lea 16(%rsi), %rsi
3300 sub $16, %r8
3301 jbe L(StrncpyExit8)
3302 palignr $8, %xmm3, %xmm2
3303 movaps %xmm2, 16(%rdx)
3304 lea 16(%rsi), %rsi
3305 sub $16, %r8
3306 jbe L(StrncpyExit8)
3307 movaps %xmm4, 32(%rdx)
3308 lea 16(%rsi), %rsi
3309 sub $16, %r8
3310 jbe L(StrncpyExit8)
3311 movaps %xmm5, 48(%rdx)
3312 lea 16(%rsi), %rsi
3313 lea -16(%r8), %r8
3314
3315L(StrncpyExit8):
3316 lea 8(%rdx, %rsi), %rdx
3317 lea 8(%rcx, %rsi), %rcx
3318 mov -8(%rcx), %rax
3319 xor %rsi, %rsi
3320 mov %rax, -8(%rdx)
3321 jmp L(CopyFrom1To16BytesCase3)
3322
3323 .p2align 4
3324L(StrncpyLeave9):
3325 movaps %xmm2, %xmm3
3326 add $48, %r8
3327 jle L(StrncpyExit9)
3328 palignr $9, %xmm1, %xmm2
3329 movaps %xmm2, (%rdx)
3330 movaps 23(%rcx), %xmm2
3331 lea 16(%rsi), %rsi
3332 sub $16, %r8
3333 jbe L(StrncpyExit9)
3334 palignr $9, %xmm3, %xmm2
3335 movaps %xmm2, 16(%rdx)
3336 lea 16(%rsi), %rsi
3337 sub $16, %r8
3338 jbe L(StrncpyExit9)
3339 movaps %xmm4, 32(%rdx)
3340 lea 16(%rsi), %rsi
3341 sub $16, %r8
3342 jbe L(StrncpyExit9)
3343 movaps %xmm5, 48(%rdx)
3344 lea 16(%rsi), %rsi
3345 lea -16(%r8), %r8
3346
3347L(StrncpyExit9):
3348 lea 7(%rdx, %rsi), %rdx
3349 lea 7(%rcx, %rsi), %rcx
3350 mov -8(%rcx), %rax
3351 xor %rsi, %rsi
3352 mov %rax, -8(%rdx)
3353 jmp L(CopyFrom1To16BytesCase3)
3354
3355 .p2align 4
3356L(StrncpyLeave10):
3357 movaps %xmm2, %xmm3
3358 add $48, %r8
3359 jle L(StrncpyExit10)
3360 palignr $10, %xmm1, %xmm2
3361 movaps %xmm2, (%rdx)
3362 movaps 22(%rcx), %xmm2
3363 lea 16(%rsi), %rsi
3364 sub $16, %r8
3365 jbe L(StrncpyExit10)
3366 palignr $10, %xmm3, %xmm2
3367 movaps %xmm2, 16(%rdx)
3368 lea 16(%rsi), %rsi
3369 sub $16, %r8
3370 jbe L(StrncpyExit10)
3371 movaps %xmm4, 32(%rdx)
3372 lea 16(%rsi), %rsi
3373 sub $16, %r8
3374 jbe L(StrncpyExit10)
3375 movaps %xmm5, 48(%rdx)
3376 lea 16(%rsi), %rsi
3377 lea -16(%r8), %r8
3378
3379L(StrncpyExit10):
3380 lea 6(%rdx, %rsi), %rdx
3381 lea 6(%rcx, %rsi), %rcx
3382 mov -8(%rcx), %rax
3383 xor %rsi, %rsi
3384 mov %rax, -8(%rdx)
3385 jmp L(CopyFrom1To16BytesCase3)
3386
3387 .p2align 4
3388L(StrncpyLeave11):
3389 movaps %xmm2, %xmm3
3390 add $48, %r8
3391 jle L(StrncpyExit11)
3392 palignr $11, %xmm1, %xmm2
3393 movaps %xmm2, (%rdx)
3394 movaps 21(%rcx), %xmm2
3395 lea 16(%rsi), %rsi
3396 sub $16, %r8
3397 jbe L(StrncpyExit11)
3398 palignr $11, %xmm3, %xmm2
3399 movaps %xmm2, 16(%rdx)
3400 lea 16(%rsi), %rsi
3401 sub $16, %r8
3402 jbe L(StrncpyExit11)
3403 movaps %xmm4, 32(%rdx)
3404 lea 16(%rsi), %rsi
3405 sub $16, %r8
3406 jbe L(StrncpyExit11)
3407 movaps %xmm5, 48(%rdx)
3408 lea 16(%rsi), %rsi
3409 lea -16(%r8), %r8
3410
3411L(StrncpyExit11):
3412 lea 5(%rdx, %rsi), %rdx
3413 lea 5(%rcx, %rsi), %rcx
3414 mov -8(%rcx), %rax
3415 xor %rsi, %rsi
3416 mov %rax, -8(%rdx)
3417 jmp L(CopyFrom1To16BytesCase3)
3418
3419 .p2align 4
3420L(StrncpyLeave12):
3421 movaps %xmm2, %xmm3
3422 add $48, %r8
3423 jle L(StrncpyExit12)
3424 palignr $12, %xmm1, %xmm2
3425 movaps %xmm2, (%rdx)
3426 movaps 20(%rcx), %xmm2
3427 lea 16(%rsi), %rsi
3428 sub $16, %r8
3429 jbe L(StrncpyExit12)
3430 palignr $12, %xmm3, %xmm2
3431 movaps %xmm2, 16(%rdx)
3432 lea 16(%rsi), %rsi
3433 sub $16, %r8
3434 jbe L(StrncpyExit12)
3435 movaps %xmm4, 32(%rdx)
3436 lea 16(%rsi), %rsi
3437 sub $16, %r8
3438 jbe L(StrncpyExit12)
3439 movaps %xmm5, 48(%rdx)
3440 lea 16(%rsi), %rsi
3441 lea -16(%r8), %r8
3442
3443L(StrncpyExit12):
3444 lea 4(%rdx, %rsi), %rdx
3445 lea 4(%rcx, %rsi), %rcx
3446 mov -4(%rcx), %eax
3447 xor %rsi, %rsi
3448 mov %eax, -4(%rdx)
3449 jmp L(CopyFrom1To16BytesCase3)
3450
3451 .p2align 4
3452L(StrncpyLeave13):
3453 movaps %xmm2, %xmm3
3454 add $48, %r8
3455 jle L(StrncpyExit13)
3456 palignr $13, %xmm1, %xmm2
3457 movaps %xmm2, (%rdx)
3458 movaps 19(%rcx), %xmm2
3459 lea 16(%rsi), %rsi
3460 sub $16, %r8
3461 jbe L(StrncpyExit13)
3462 palignr $13, %xmm3, %xmm2
3463 movaps %xmm2, 16(%rdx)
3464 lea 16(%rsi), %rsi
3465 sub $16, %r8
3466 jbe L(StrncpyExit13)
3467 movaps %xmm4, 32(%rdx)
3468 lea 16(%rsi), %rsi
3469 sub $16, %r8
3470 jbe L(StrncpyExit13)
3471 movaps %xmm5, 48(%rdx)
3472 lea 16(%rsi), %rsi
3473 lea -16(%r8), %r8
3474
3475L(StrncpyExit13):
3476 lea 3(%rdx, %rsi), %rdx
3477 lea 3(%rcx, %rsi), %rcx
3478 mov -4(%rcx), %eax
3479 xor %rsi, %rsi
3480 mov %eax, -4(%rdx)
3481 jmp L(CopyFrom1To16BytesCase3)
3482
3483 .p2align 4
3484L(StrncpyLeave14):
3485 movaps %xmm2, %xmm3
3486 add $48, %r8
3487 jle L(StrncpyExit14)
3488 palignr $14, %xmm1, %xmm2
3489 movaps %xmm2, (%rdx)
3490 movaps 18(%rcx), %xmm2
3491 lea 16(%rsi), %rsi
3492 sub $16, %r8
3493 jbe L(StrncpyExit14)
3494 palignr $14, %xmm3, %xmm2
3495 movaps %xmm2, 16(%rdx)
3496 lea 16(%rsi), %rsi
3497 sub $16, %r8
3498 jbe L(StrncpyExit14)
3499 movaps %xmm4, 32(%rdx)
3500 lea 16(%rsi), %rsi
3501 sub $16, %r8
3502 jbe L(StrncpyExit14)
3503 movaps %xmm5, 48(%rdx)
3504 lea 16(%rsi), %rsi
3505 lea -16(%r8), %r8
3506
3507L(StrncpyExit14):
3508 lea 2(%rdx, %rsi), %rdx
3509 lea 2(%rcx, %rsi), %rcx
3510 movw -2(%rcx), %ax
3511 xor %rsi, %rsi
3512 movw %ax, -2(%rdx)
3513 jmp L(CopyFrom1To16BytesCase3)
3514
3515 .p2align 4
3516L(StrncpyLeave15):
3517 movaps %xmm2, %xmm3
3518 add $48, %r8
3519 jle L(StrncpyExit15)
3520 palignr $15, %xmm1, %xmm2
3521 movaps %xmm2, (%rdx)
3522 movaps 17(%rcx), %xmm2
3523 lea 16(%rsi), %rsi
3524 sub $16, %r8
3525 jbe L(StrncpyExit15)
3526 palignr $15, %xmm3, %xmm2
3527 movaps %xmm2, 16(%rdx)
3528 lea 16(%rsi), %rsi
3529 sub $16, %r8
3530 jbe L(StrncpyExit15)
3531 movaps %xmm4, 32(%rdx)
3532 lea 16(%rsi), %rsi
3533 sub $16, %r8
3534 jbe L(StrncpyExit15)
3535 movaps %xmm5, 48(%rdx)
3536 lea 16(%rsi), %rsi
3537 lea -16(%r8), %r8
3538
3539L(StrncpyExit15):
3540 lea 1(%rdx, %rsi), %rdx
3541 lea 1(%rcx, %rsi), %rcx
3542 movb -1(%rcx), %ah
3543 xor %rsi, %rsi
3544 movb %ah, -1(%rdx)
3545 jmp L(CopyFrom1To16BytesCase3)
3546
3547# endif
3548# ifndef USE_AS_STRCAT
3549END (STRCPY)
3550# endif
3551#endif
3552