1/* memcpy with SSSE3 and REP string
2 Copyright (C) 2010-2020 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <https://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22#if IS_IN (libc)
23
24#include "asm-syntax.h"
25
26#ifndef MEMCPY
27# define MEMCPY __memcpy_ssse3_back
28# define MEMCPY_CHK __memcpy_chk_ssse3_back
29# define MEMPCPY __mempcpy_ssse3_back
30# define MEMPCPY_CHK __mempcpy_chk_ssse3_back
31#endif
32
33#define JMPTBL(I, B) I - B
34
35/* Branch to an entry in a jump table. TABLE is a jump table with
36 relative offsets. INDEX is a register contains the index into the
37 jump table. SCALE is the scale of INDEX. */
38#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
39 lea TABLE(%rip), %r11; \
40 movslq (%r11, INDEX, SCALE), INDEX; \
41 lea (%r11, INDEX), INDEX; \
42 _CET_NOTRACK jmp *INDEX; \
43 ud2
44
45 .section .text.ssse3,"ax",@progbits
46#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
47ENTRY (MEMPCPY_CHK)
48 cmp %RDX_LP, %RCX_LP
49 jb HIDDEN_JUMPTARGET (__chk_fail)
50END (MEMPCPY_CHK)
51
52ENTRY (MEMPCPY)
53 mov %RDI_LP, %RAX_LP
54 add %RDX_LP, %RAX_LP
55 jmp L(start)
56END (MEMPCPY)
57#endif
58
59#if !defined USE_AS_BCOPY
60ENTRY (MEMCPY_CHK)
61 cmp %RDX_LP, %RCX_LP
62 jb HIDDEN_JUMPTARGET (__chk_fail)
63END (MEMCPY_CHK)
64#endif
65
66ENTRY (MEMCPY)
67 mov %RDI_LP, %RAX_LP
68#ifdef USE_AS_MEMPCPY
69 add %RDX_LP, %RAX_LP
70#endif
71
72#ifdef __ILP32__
73 /* Clear the upper 32 bits. */
74 mov %edx, %edx
75#endif
76
77#ifdef USE_AS_MEMMOVE
78 cmp %rsi, %rdi
79 jb L(copy_forward)
80 je L(bwd_write_0bytes)
81 cmp $144, %rdx
82 jae L(copy_backward)
83 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
84L(copy_forward):
85#endif
86L(start):
87 cmp $144, %rdx
88 jae L(144bytesormore)
89
90L(fwd_write_less32bytes):
91#ifndef USE_AS_MEMMOVE
92 cmp %dil, %sil
93 jbe L(bk_write)
94#endif
95 add %rdx, %rsi
96 add %rdx, %rdi
97 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
98#ifndef USE_AS_MEMMOVE
99L(bk_write):
100
101 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
102#endif
103
104 .p2align 4
105L(144bytesormore):
106
107#ifndef USE_AS_MEMMOVE
108 cmp %dil, %sil
109 jle L(copy_backward)
110#endif
111 movdqu (%rsi), %xmm0
112 mov %rdi, %r8
113 and $-16, %rdi
114 add $16, %rdi
115 mov %rdi, %r9
116 sub %r8, %r9
117 sub %r9, %rdx
118 add %r9, %rsi
119 mov %rsi, %r9
120 and $0xf, %r9
121 jz L(shl_0)
122#ifdef DATA_CACHE_SIZE
123 mov $DATA_CACHE_SIZE, %RCX_LP
124#else
125 mov __x86_data_cache_size(%rip), %RCX_LP
126#endif
127 cmp %rcx, %rdx
128 jae L(gobble_mem_fwd)
129 lea L(shl_table_fwd)(%rip), %r11
130 sub $0x80, %rdx
131 movslq (%r11, %r9, 4), %r9
132 add %r11, %r9
133 _CET_NOTRACK jmp *%r9
134 ud2
135
136 .p2align 4
137L(copy_backward):
138#ifdef DATA_CACHE_SIZE
139 mov $DATA_CACHE_SIZE, %RCX_LP
140#else
141 mov __x86_data_cache_size(%rip), %RCX_LP
142#endif
143 shl $1, %rcx
144 cmp %rcx, %rdx
145 ja L(gobble_mem_bwd)
146
147 add %rdx, %rdi
148 add %rdx, %rsi
149 movdqu -16(%rsi), %xmm0
150 lea -16(%rdi), %r8
151 mov %rdi, %r9
152 and $0xf, %r9
153 xor %r9, %rdi
154 sub %r9, %rsi
155 sub %r9, %rdx
156 mov %rsi, %r9
157 and $0xf, %r9
158 jz L(shl_0_bwd)
159 lea L(shl_table_bwd)(%rip), %r11
160 sub $0x80, %rdx
161 movslq (%r11, %r9, 4), %r9
162 add %r11, %r9
163 _CET_NOTRACK jmp *%r9
164 ud2
165
166 .p2align 4
167L(shl_0):
168
169 mov %rdx, %r9
170 shr $8, %r9
171 add %rdx, %r9
172#ifdef DATA_CACHE_SIZE
173 cmp $DATA_CACHE_SIZE_HALF, %R9_LP
174#else
175 cmp __x86_data_cache_size_half(%rip), %R9_LP
176#endif
177 jae L(gobble_mem_fwd)
178 sub $0x80, %rdx
179 .p2align 4
180L(shl_0_loop):
181 movdqa (%rsi), %xmm1
182 movdqa %xmm1, (%rdi)
183 movaps 0x10(%rsi), %xmm2
184 movaps %xmm2, 0x10(%rdi)
185 movaps 0x20(%rsi), %xmm3
186 movaps %xmm3, 0x20(%rdi)
187 movaps 0x30(%rsi), %xmm4
188 movaps %xmm4, 0x30(%rdi)
189 movaps 0x40(%rsi), %xmm1
190 movaps %xmm1, 0x40(%rdi)
191 movaps 0x50(%rsi), %xmm2
192 movaps %xmm2, 0x50(%rdi)
193 movaps 0x60(%rsi), %xmm3
194 movaps %xmm3, 0x60(%rdi)
195 movaps 0x70(%rsi), %xmm4
196 movaps %xmm4, 0x70(%rdi)
197 sub $0x80, %rdx
198 lea 0x80(%rsi), %rsi
199 lea 0x80(%rdi), %rdi
200 jae L(shl_0_loop)
201 movdqu %xmm0, (%r8)
202 add $0x80, %rdx
203 add %rdx, %rsi
204 add %rdx, %rdi
205 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
206
207 .p2align 4
208L(shl_0_bwd):
209 sub $0x80, %rdx
210L(copy_backward_loop):
211 movaps -0x10(%rsi), %xmm1
212 movaps %xmm1, -0x10(%rdi)
213 movaps -0x20(%rsi), %xmm2
214 movaps %xmm2, -0x20(%rdi)
215 movaps -0x30(%rsi), %xmm3
216 movaps %xmm3, -0x30(%rdi)
217 movaps -0x40(%rsi), %xmm4
218 movaps %xmm4, -0x40(%rdi)
219 movaps -0x50(%rsi), %xmm5
220 movaps %xmm5, -0x50(%rdi)
221 movaps -0x60(%rsi), %xmm5
222 movaps %xmm5, -0x60(%rdi)
223 movaps -0x70(%rsi), %xmm5
224 movaps %xmm5, -0x70(%rdi)
225 movaps -0x80(%rsi), %xmm5
226 movaps %xmm5, -0x80(%rdi)
227 sub $0x80, %rdx
228 lea -0x80(%rdi), %rdi
229 lea -0x80(%rsi), %rsi
230 jae L(copy_backward_loop)
231
232 movdqu %xmm0, (%r8)
233 add $0x80, %rdx
234 sub %rdx, %rdi
235 sub %rdx, %rsi
236 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
237
238 .p2align 4
239L(shl_1):
240 sub $0x80, %rdx
241 movaps -0x01(%rsi), %xmm1
242 movaps 0x0f(%rsi), %xmm2
243 movaps 0x1f(%rsi), %xmm3
244 movaps 0x2f(%rsi), %xmm4
245 movaps 0x3f(%rsi), %xmm5
246 movaps 0x4f(%rsi), %xmm6
247 movaps 0x5f(%rsi), %xmm7
248 movaps 0x6f(%rsi), %xmm8
249 movaps 0x7f(%rsi), %xmm9
250 lea 0x80(%rsi), %rsi
251 palignr $1, %xmm8, %xmm9
252 movaps %xmm9, 0x70(%rdi)
253 palignr $1, %xmm7, %xmm8
254 movaps %xmm8, 0x60(%rdi)
255 palignr $1, %xmm6, %xmm7
256 movaps %xmm7, 0x50(%rdi)
257 palignr $1, %xmm5, %xmm6
258 movaps %xmm6, 0x40(%rdi)
259 palignr $1, %xmm4, %xmm5
260 movaps %xmm5, 0x30(%rdi)
261 palignr $1, %xmm3, %xmm4
262 movaps %xmm4, 0x20(%rdi)
263 palignr $1, %xmm2, %xmm3
264 movaps %xmm3, 0x10(%rdi)
265 palignr $1, %xmm1, %xmm2
266 movaps %xmm2, (%rdi)
267 lea 0x80(%rdi), %rdi
268 jae L(shl_1)
269 movdqu %xmm0, (%r8)
270 add $0x80, %rdx
271 add %rdx, %rdi
272 add %rdx, %rsi
273 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
274
275 .p2align 4
276L(shl_1_bwd):
277 movaps -0x01(%rsi), %xmm1
278
279 movaps -0x11(%rsi), %xmm2
280 palignr $1, %xmm2, %xmm1
281 movaps %xmm1, -0x10(%rdi)
282
283 movaps -0x21(%rsi), %xmm3
284 palignr $1, %xmm3, %xmm2
285 movaps %xmm2, -0x20(%rdi)
286
287 movaps -0x31(%rsi), %xmm4
288 palignr $1, %xmm4, %xmm3
289 movaps %xmm3, -0x30(%rdi)
290
291 movaps -0x41(%rsi), %xmm5
292 palignr $1, %xmm5, %xmm4
293 movaps %xmm4, -0x40(%rdi)
294
295 movaps -0x51(%rsi), %xmm6
296 palignr $1, %xmm6, %xmm5
297 movaps %xmm5, -0x50(%rdi)
298
299 movaps -0x61(%rsi), %xmm7
300 palignr $1, %xmm7, %xmm6
301 movaps %xmm6, -0x60(%rdi)
302
303 movaps -0x71(%rsi), %xmm8
304 palignr $1, %xmm8, %xmm7
305 movaps %xmm7, -0x70(%rdi)
306
307 movaps -0x81(%rsi), %xmm9
308 palignr $1, %xmm9, %xmm8
309 movaps %xmm8, -0x80(%rdi)
310
311 sub $0x80, %rdx
312 lea -0x80(%rdi), %rdi
313 lea -0x80(%rsi), %rsi
314 jae L(shl_1_bwd)
315 movdqu %xmm0, (%r8)
316 add $0x80, %rdx
317 sub %rdx, %rdi
318 sub %rdx, %rsi
319 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
320
321 .p2align 4
322L(shl_2):
323 sub $0x80, %rdx
324 movaps -0x02(%rsi), %xmm1
325 movaps 0x0e(%rsi), %xmm2
326 movaps 0x1e(%rsi), %xmm3
327 movaps 0x2e(%rsi), %xmm4
328 movaps 0x3e(%rsi), %xmm5
329 movaps 0x4e(%rsi), %xmm6
330 movaps 0x5e(%rsi), %xmm7
331 movaps 0x6e(%rsi), %xmm8
332 movaps 0x7e(%rsi), %xmm9
333 lea 0x80(%rsi), %rsi
334 palignr $2, %xmm8, %xmm9
335 movaps %xmm9, 0x70(%rdi)
336 palignr $2, %xmm7, %xmm8
337 movaps %xmm8, 0x60(%rdi)
338 palignr $2, %xmm6, %xmm7
339 movaps %xmm7, 0x50(%rdi)
340 palignr $2, %xmm5, %xmm6
341 movaps %xmm6, 0x40(%rdi)
342 palignr $2, %xmm4, %xmm5
343 movaps %xmm5, 0x30(%rdi)
344 palignr $2, %xmm3, %xmm4
345 movaps %xmm4, 0x20(%rdi)
346 palignr $2, %xmm2, %xmm3
347 movaps %xmm3, 0x10(%rdi)
348 palignr $2, %xmm1, %xmm2
349 movaps %xmm2, (%rdi)
350 lea 0x80(%rdi), %rdi
351 jae L(shl_2)
352 movdqu %xmm0, (%r8)
353 add $0x80, %rdx
354 add %rdx, %rdi
355 add %rdx, %rsi
356 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
357
358 .p2align 4
359L(shl_2_bwd):
360 movaps -0x02(%rsi), %xmm1
361
362 movaps -0x12(%rsi), %xmm2
363 palignr $2, %xmm2, %xmm1
364 movaps %xmm1, -0x10(%rdi)
365
366 movaps -0x22(%rsi), %xmm3
367 palignr $2, %xmm3, %xmm2
368 movaps %xmm2, -0x20(%rdi)
369
370 movaps -0x32(%rsi), %xmm4
371 palignr $2, %xmm4, %xmm3
372 movaps %xmm3, -0x30(%rdi)
373
374 movaps -0x42(%rsi), %xmm5
375 palignr $2, %xmm5, %xmm4
376 movaps %xmm4, -0x40(%rdi)
377
378 movaps -0x52(%rsi), %xmm6
379 palignr $2, %xmm6, %xmm5
380 movaps %xmm5, -0x50(%rdi)
381
382 movaps -0x62(%rsi), %xmm7
383 palignr $2, %xmm7, %xmm6
384 movaps %xmm6, -0x60(%rdi)
385
386 movaps -0x72(%rsi), %xmm8
387 palignr $2, %xmm8, %xmm7
388 movaps %xmm7, -0x70(%rdi)
389
390 movaps -0x82(%rsi), %xmm9
391 palignr $2, %xmm9, %xmm8
392 movaps %xmm8, -0x80(%rdi)
393
394 sub $0x80, %rdx
395 lea -0x80(%rdi), %rdi
396 lea -0x80(%rsi), %rsi
397 jae L(shl_2_bwd)
398 movdqu %xmm0, (%r8)
399 add $0x80, %rdx
400 sub %rdx, %rdi
401 sub %rdx, %rsi
402 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
403
404 .p2align 4
405L(shl_3):
406 sub $0x80, %rdx
407 movaps -0x03(%rsi), %xmm1
408 movaps 0x0d(%rsi), %xmm2
409 movaps 0x1d(%rsi), %xmm3
410 movaps 0x2d(%rsi), %xmm4
411 movaps 0x3d(%rsi), %xmm5
412 movaps 0x4d(%rsi), %xmm6
413 movaps 0x5d(%rsi), %xmm7
414 movaps 0x6d(%rsi), %xmm8
415 movaps 0x7d(%rsi), %xmm9
416 lea 0x80(%rsi), %rsi
417 palignr $3, %xmm8, %xmm9
418 movaps %xmm9, 0x70(%rdi)
419 palignr $3, %xmm7, %xmm8
420 movaps %xmm8, 0x60(%rdi)
421 palignr $3, %xmm6, %xmm7
422 movaps %xmm7, 0x50(%rdi)
423 palignr $3, %xmm5, %xmm6
424 movaps %xmm6, 0x40(%rdi)
425 palignr $3, %xmm4, %xmm5
426 movaps %xmm5, 0x30(%rdi)
427 palignr $3, %xmm3, %xmm4
428 movaps %xmm4, 0x20(%rdi)
429 palignr $3, %xmm2, %xmm3
430 movaps %xmm3, 0x10(%rdi)
431 palignr $3, %xmm1, %xmm2
432 movaps %xmm2, (%rdi)
433 lea 0x80(%rdi), %rdi
434 jae L(shl_3)
435 movdqu %xmm0, (%r8)
436 add $0x80, %rdx
437 add %rdx, %rdi
438 add %rdx, %rsi
439 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
440
441 .p2align 4
442L(shl_3_bwd):
443 movaps -0x03(%rsi), %xmm1
444
445 movaps -0x13(%rsi), %xmm2
446 palignr $3, %xmm2, %xmm1
447 movaps %xmm1, -0x10(%rdi)
448
449 movaps -0x23(%rsi), %xmm3
450 palignr $3, %xmm3, %xmm2
451 movaps %xmm2, -0x20(%rdi)
452
453 movaps -0x33(%rsi), %xmm4
454 palignr $3, %xmm4, %xmm3
455 movaps %xmm3, -0x30(%rdi)
456
457 movaps -0x43(%rsi), %xmm5
458 palignr $3, %xmm5, %xmm4
459 movaps %xmm4, -0x40(%rdi)
460
461 movaps -0x53(%rsi), %xmm6
462 palignr $3, %xmm6, %xmm5
463 movaps %xmm5, -0x50(%rdi)
464
465 movaps -0x63(%rsi), %xmm7
466 palignr $3, %xmm7, %xmm6
467 movaps %xmm6, -0x60(%rdi)
468
469 movaps -0x73(%rsi), %xmm8
470 palignr $3, %xmm8, %xmm7
471 movaps %xmm7, -0x70(%rdi)
472
473 movaps -0x83(%rsi), %xmm9
474 palignr $3, %xmm9, %xmm8
475 movaps %xmm8, -0x80(%rdi)
476
477 sub $0x80, %rdx
478 lea -0x80(%rdi), %rdi
479 lea -0x80(%rsi), %rsi
480 jae L(shl_3_bwd)
481 movdqu %xmm0, (%r8)
482 add $0x80, %rdx
483 sub %rdx, %rdi
484 sub %rdx, %rsi
485 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
486
487 .p2align 4
488L(shl_4):
489 sub $0x80, %rdx
490 movaps -0x04(%rsi), %xmm1
491 movaps 0x0c(%rsi), %xmm2
492 movaps 0x1c(%rsi), %xmm3
493 movaps 0x2c(%rsi), %xmm4
494 movaps 0x3c(%rsi), %xmm5
495 movaps 0x4c(%rsi), %xmm6
496 movaps 0x5c(%rsi), %xmm7
497 movaps 0x6c(%rsi), %xmm8
498 movaps 0x7c(%rsi), %xmm9
499 lea 0x80(%rsi), %rsi
500 palignr $4, %xmm8, %xmm9
501 movaps %xmm9, 0x70(%rdi)
502 palignr $4, %xmm7, %xmm8
503 movaps %xmm8, 0x60(%rdi)
504 palignr $4, %xmm6, %xmm7
505 movaps %xmm7, 0x50(%rdi)
506 palignr $4, %xmm5, %xmm6
507 movaps %xmm6, 0x40(%rdi)
508 palignr $4, %xmm4, %xmm5
509 movaps %xmm5, 0x30(%rdi)
510 palignr $4, %xmm3, %xmm4
511 movaps %xmm4, 0x20(%rdi)
512 palignr $4, %xmm2, %xmm3
513 movaps %xmm3, 0x10(%rdi)
514 palignr $4, %xmm1, %xmm2
515 movaps %xmm2, (%rdi)
516 lea 0x80(%rdi), %rdi
517 jae L(shl_4)
518 movdqu %xmm0, (%r8)
519 add $0x80, %rdx
520 add %rdx, %rdi
521 add %rdx, %rsi
522 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
523
524 .p2align 4
525L(shl_4_bwd):
526 movaps -0x04(%rsi), %xmm1
527
528 movaps -0x14(%rsi), %xmm2
529 palignr $4, %xmm2, %xmm1
530 movaps %xmm1, -0x10(%rdi)
531
532 movaps -0x24(%rsi), %xmm3
533 palignr $4, %xmm3, %xmm2
534 movaps %xmm2, -0x20(%rdi)
535
536 movaps -0x34(%rsi), %xmm4
537 palignr $4, %xmm4, %xmm3
538 movaps %xmm3, -0x30(%rdi)
539
540 movaps -0x44(%rsi), %xmm5
541 palignr $4, %xmm5, %xmm4
542 movaps %xmm4, -0x40(%rdi)
543
544 movaps -0x54(%rsi), %xmm6
545 palignr $4, %xmm6, %xmm5
546 movaps %xmm5, -0x50(%rdi)
547
548 movaps -0x64(%rsi), %xmm7
549 palignr $4, %xmm7, %xmm6
550 movaps %xmm6, -0x60(%rdi)
551
552 movaps -0x74(%rsi), %xmm8
553 palignr $4, %xmm8, %xmm7
554 movaps %xmm7, -0x70(%rdi)
555
556 movaps -0x84(%rsi), %xmm9
557 palignr $4, %xmm9, %xmm8
558 movaps %xmm8, -0x80(%rdi)
559
560 sub $0x80, %rdx
561 lea -0x80(%rdi), %rdi
562 lea -0x80(%rsi), %rsi
563 jae L(shl_4_bwd)
564 movdqu %xmm0, (%r8)
565 add $0x80, %rdx
566 sub %rdx, %rdi
567 sub %rdx, %rsi
568 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
569
570 .p2align 4
571L(shl_5):
572 sub $0x80, %rdx
573 movaps -0x05(%rsi), %xmm1
574 movaps 0x0b(%rsi), %xmm2
575 movaps 0x1b(%rsi), %xmm3
576 movaps 0x2b(%rsi), %xmm4
577 movaps 0x3b(%rsi), %xmm5
578 movaps 0x4b(%rsi), %xmm6
579 movaps 0x5b(%rsi), %xmm7
580 movaps 0x6b(%rsi), %xmm8
581 movaps 0x7b(%rsi), %xmm9
582 lea 0x80(%rsi), %rsi
583 palignr $5, %xmm8, %xmm9
584 movaps %xmm9, 0x70(%rdi)
585 palignr $5, %xmm7, %xmm8
586 movaps %xmm8, 0x60(%rdi)
587 palignr $5, %xmm6, %xmm7
588 movaps %xmm7, 0x50(%rdi)
589 palignr $5, %xmm5, %xmm6
590 movaps %xmm6, 0x40(%rdi)
591 palignr $5, %xmm4, %xmm5
592 movaps %xmm5, 0x30(%rdi)
593 palignr $5, %xmm3, %xmm4
594 movaps %xmm4, 0x20(%rdi)
595 palignr $5, %xmm2, %xmm3
596 movaps %xmm3, 0x10(%rdi)
597 palignr $5, %xmm1, %xmm2
598 movaps %xmm2, (%rdi)
599 lea 0x80(%rdi), %rdi
600 jae L(shl_5)
601 movdqu %xmm0, (%r8)
602 add $0x80, %rdx
603 add %rdx, %rdi
604 add %rdx, %rsi
605 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
606
607 .p2align 4
608L(shl_5_bwd):
609 movaps -0x05(%rsi), %xmm1
610
611 movaps -0x15(%rsi), %xmm2
612 palignr $5, %xmm2, %xmm1
613 movaps %xmm1, -0x10(%rdi)
614
615 movaps -0x25(%rsi), %xmm3
616 palignr $5, %xmm3, %xmm2
617 movaps %xmm2, -0x20(%rdi)
618
619 movaps -0x35(%rsi), %xmm4
620 palignr $5, %xmm4, %xmm3
621 movaps %xmm3, -0x30(%rdi)
622
623 movaps -0x45(%rsi), %xmm5
624 palignr $5, %xmm5, %xmm4
625 movaps %xmm4, -0x40(%rdi)
626
627 movaps -0x55(%rsi), %xmm6
628 palignr $5, %xmm6, %xmm5
629 movaps %xmm5, -0x50(%rdi)
630
631 movaps -0x65(%rsi), %xmm7
632 palignr $5, %xmm7, %xmm6
633 movaps %xmm6, -0x60(%rdi)
634
635 movaps -0x75(%rsi), %xmm8
636 palignr $5, %xmm8, %xmm7
637 movaps %xmm7, -0x70(%rdi)
638
639 movaps -0x85(%rsi), %xmm9
640 palignr $5, %xmm9, %xmm8
641 movaps %xmm8, -0x80(%rdi)
642
643 sub $0x80, %rdx
644 lea -0x80(%rdi), %rdi
645 lea -0x80(%rsi), %rsi
646 jae L(shl_5_bwd)
647 movdqu %xmm0, (%r8)
648 add $0x80, %rdx
649 sub %rdx, %rdi
650 sub %rdx, %rsi
651 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
652
653 .p2align 4
654L(shl_6):
655 sub $0x80, %rdx
656 movaps -0x06(%rsi), %xmm1
657 movaps 0x0a(%rsi), %xmm2
658 movaps 0x1a(%rsi), %xmm3
659 movaps 0x2a(%rsi), %xmm4
660 movaps 0x3a(%rsi), %xmm5
661 movaps 0x4a(%rsi), %xmm6
662 movaps 0x5a(%rsi), %xmm7
663 movaps 0x6a(%rsi), %xmm8
664 movaps 0x7a(%rsi), %xmm9
665 lea 0x80(%rsi), %rsi
666 palignr $6, %xmm8, %xmm9
667 movaps %xmm9, 0x70(%rdi)
668 palignr $6, %xmm7, %xmm8
669 movaps %xmm8, 0x60(%rdi)
670 palignr $6, %xmm6, %xmm7
671 movaps %xmm7, 0x50(%rdi)
672 palignr $6, %xmm5, %xmm6
673 movaps %xmm6, 0x40(%rdi)
674 palignr $6, %xmm4, %xmm5
675 movaps %xmm5, 0x30(%rdi)
676 palignr $6, %xmm3, %xmm4
677 movaps %xmm4, 0x20(%rdi)
678 palignr $6, %xmm2, %xmm3
679 movaps %xmm3, 0x10(%rdi)
680 palignr $6, %xmm1, %xmm2
681 movaps %xmm2, (%rdi)
682 lea 0x80(%rdi), %rdi
683 jae L(shl_6)
684 movdqu %xmm0, (%r8)
685 add $0x80, %rdx
686 add %rdx, %rdi
687 add %rdx, %rsi
688 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
689
690 .p2align 4
691L(shl_6_bwd):
692 movaps -0x06(%rsi), %xmm1
693
694 movaps -0x16(%rsi), %xmm2
695 palignr $6, %xmm2, %xmm1
696 movaps %xmm1, -0x10(%rdi)
697
698 movaps -0x26(%rsi), %xmm3
699 palignr $6, %xmm3, %xmm2
700 movaps %xmm2, -0x20(%rdi)
701
702 movaps -0x36(%rsi), %xmm4
703 palignr $6, %xmm4, %xmm3
704 movaps %xmm3, -0x30(%rdi)
705
706 movaps -0x46(%rsi), %xmm5
707 palignr $6, %xmm5, %xmm4
708 movaps %xmm4, -0x40(%rdi)
709
710 movaps -0x56(%rsi), %xmm6
711 palignr $6, %xmm6, %xmm5
712 movaps %xmm5, -0x50(%rdi)
713
714 movaps -0x66(%rsi), %xmm7
715 palignr $6, %xmm7, %xmm6
716 movaps %xmm6, -0x60(%rdi)
717
718 movaps -0x76(%rsi), %xmm8
719 palignr $6, %xmm8, %xmm7
720 movaps %xmm7, -0x70(%rdi)
721
722 movaps -0x86(%rsi), %xmm9
723 palignr $6, %xmm9, %xmm8
724 movaps %xmm8, -0x80(%rdi)
725
726 sub $0x80, %rdx
727 lea -0x80(%rdi), %rdi
728 lea -0x80(%rsi), %rsi
729 jae L(shl_6_bwd)
730 movdqu %xmm0, (%r8)
731 add $0x80, %rdx
732 sub %rdx, %rdi
733 sub %rdx, %rsi
734 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
735
736 .p2align 4
737L(shl_7):
738 sub $0x80, %rdx
739 movaps -0x07(%rsi), %xmm1
740 movaps 0x09(%rsi), %xmm2
741 movaps 0x19(%rsi), %xmm3
742 movaps 0x29(%rsi), %xmm4
743 movaps 0x39(%rsi), %xmm5
744 movaps 0x49(%rsi), %xmm6
745 movaps 0x59(%rsi), %xmm7
746 movaps 0x69(%rsi), %xmm8
747 movaps 0x79(%rsi), %xmm9
748 lea 0x80(%rsi), %rsi
749 palignr $7, %xmm8, %xmm9
750 movaps %xmm9, 0x70(%rdi)
751 palignr $7, %xmm7, %xmm8
752 movaps %xmm8, 0x60(%rdi)
753 palignr $7, %xmm6, %xmm7
754 movaps %xmm7, 0x50(%rdi)
755 palignr $7, %xmm5, %xmm6
756 movaps %xmm6, 0x40(%rdi)
757 palignr $7, %xmm4, %xmm5
758 movaps %xmm5, 0x30(%rdi)
759 palignr $7, %xmm3, %xmm4
760 movaps %xmm4, 0x20(%rdi)
761 palignr $7, %xmm2, %xmm3
762 movaps %xmm3, 0x10(%rdi)
763 palignr $7, %xmm1, %xmm2
764 movaps %xmm2, (%rdi)
765 lea 0x80(%rdi), %rdi
766 jae L(shl_7)
767 movdqu %xmm0, (%r8)
768 add $0x80, %rdx
769 add %rdx, %rdi
770 add %rdx, %rsi
771 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
772
773 .p2align 4
774L(shl_7_bwd):
775 movaps -0x07(%rsi), %xmm1
776
777 movaps -0x17(%rsi), %xmm2
778 palignr $7, %xmm2, %xmm1
779 movaps %xmm1, -0x10(%rdi)
780
781 movaps -0x27(%rsi), %xmm3
782 palignr $7, %xmm3, %xmm2
783 movaps %xmm2, -0x20(%rdi)
784
785 movaps -0x37(%rsi), %xmm4
786 palignr $7, %xmm4, %xmm3
787 movaps %xmm3, -0x30(%rdi)
788
789 movaps -0x47(%rsi), %xmm5
790 palignr $7, %xmm5, %xmm4
791 movaps %xmm4, -0x40(%rdi)
792
793 movaps -0x57(%rsi), %xmm6
794 palignr $7, %xmm6, %xmm5
795 movaps %xmm5, -0x50(%rdi)
796
797 movaps -0x67(%rsi), %xmm7
798 palignr $7, %xmm7, %xmm6
799 movaps %xmm6, -0x60(%rdi)
800
801 movaps -0x77(%rsi), %xmm8
802 palignr $7, %xmm8, %xmm7
803 movaps %xmm7, -0x70(%rdi)
804
805 movaps -0x87(%rsi), %xmm9
806 palignr $7, %xmm9, %xmm8
807 movaps %xmm8, -0x80(%rdi)
808
809 sub $0x80, %rdx
810 lea -0x80(%rdi), %rdi
811 lea -0x80(%rsi), %rsi
812 jae L(shl_7_bwd)
813 movdqu %xmm0, (%r8)
814 add $0x80, %rdx
815 sub %rdx, %rdi
816 sub %rdx, %rsi
817 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
818
819 .p2align 4
820L(shl_8):
821 sub $0x80, %rdx
822 movaps -0x08(%rsi), %xmm1
823 movaps 0x08(%rsi), %xmm2
824 movaps 0x18(%rsi), %xmm3
825 movaps 0x28(%rsi), %xmm4
826 movaps 0x38(%rsi), %xmm5
827 movaps 0x48(%rsi), %xmm6
828 movaps 0x58(%rsi), %xmm7
829 movaps 0x68(%rsi), %xmm8
830 movaps 0x78(%rsi), %xmm9
831 lea 0x80(%rsi), %rsi
832 palignr $8, %xmm8, %xmm9
833 movaps %xmm9, 0x70(%rdi)
834 palignr $8, %xmm7, %xmm8
835 movaps %xmm8, 0x60(%rdi)
836 palignr $8, %xmm6, %xmm7
837 movaps %xmm7, 0x50(%rdi)
838 palignr $8, %xmm5, %xmm6
839 movaps %xmm6, 0x40(%rdi)
840 palignr $8, %xmm4, %xmm5
841 movaps %xmm5, 0x30(%rdi)
842 palignr $8, %xmm3, %xmm4
843 movaps %xmm4, 0x20(%rdi)
844 palignr $8, %xmm2, %xmm3
845 movaps %xmm3, 0x10(%rdi)
846 palignr $8, %xmm1, %xmm2
847 movaps %xmm2, (%rdi)
848 lea 0x80(%rdi), %rdi
849 jae L(shl_8)
850 movdqu %xmm0, (%r8)
851 add $0x80, %rdx
852 add %rdx, %rdi
853 add %rdx, %rsi
854 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
855
856 .p2align 4
857L(shl_8_bwd):
858 movaps -0x08(%rsi), %xmm1
859
860 movaps -0x18(%rsi), %xmm2
861 palignr $8, %xmm2, %xmm1
862 movaps %xmm1, -0x10(%rdi)
863
864 movaps -0x28(%rsi), %xmm3
865 palignr $8, %xmm3, %xmm2
866 movaps %xmm2, -0x20(%rdi)
867
868 movaps -0x38(%rsi), %xmm4
869 palignr $8, %xmm4, %xmm3
870 movaps %xmm3, -0x30(%rdi)
871
872 movaps -0x48(%rsi), %xmm5
873 palignr $8, %xmm5, %xmm4
874 movaps %xmm4, -0x40(%rdi)
875
876 movaps -0x58(%rsi), %xmm6
877 palignr $8, %xmm6, %xmm5
878 movaps %xmm5, -0x50(%rdi)
879
880 movaps -0x68(%rsi), %xmm7
881 palignr $8, %xmm7, %xmm6
882 movaps %xmm6, -0x60(%rdi)
883
884 movaps -0x78(%rsi), %xmm8
885 palignr $8, %xmm8, %xmm7
886 movaps %xmm7, -0x70(%rdi)
887
888 movaps -0x88(%rsi), %xmm9
889 palignr $8, %xmm9, %xmm8
890 movaps %xmm8, -0x80(%rdi)
891
892 sub $0x80, %rdx
893 lea -0x80(%rdi), %rdi
894 lea -0x80(%rsi), %rsi
895 jae L(shl_8_bwd)
896L(shl_8_end_bwd):
897 movdqu %xmm0, (%r8)
898 add $0x80, %rdx
899 sub %rdx, %rdi
900 sub %rdx, %rsi
901 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
902
903 .p2align 4
904L(shl_9):
905 sub $0x80, %rdx
906 movaps -0x09(%rsi), %xmm1
907 movaps 0x07(%rsi), %xmm2
908 movaps 0x17(%rsi), %xmm3
909 movaps 0x27(%rsi), %xmm4
910 movaps 0x37(%rsi), %xmm5
911 movaps 0x47(%rsi), %xmm6
912 movaps 0x57(%rsi), %xmm7
913 movaps 0x67(%rsi), %xmm8
914 movaps 0x77(%rsi), %xmm9
915 lea 0x80(%rsi), %rsi
916 palignr $9, %xmm8, %xmm9
917 movaps %xmm9, 0x70(%rdi)
918 palignr $9, %xmm7, %xmm8
919 movaps %xmm8, 0x60(%rdi)
920 palignr $9, %xmm6, %xmm7
921 movaps %xmm7, 0x50(%rdi)
922 palignr $9, %xmm5, %xmm6
923 movaps %xmm6, 0x40(%rdi)
924 palignr $9, %xmm4, %xmm5
925 movaps %xmm5, 0x30(%rdi)
926 palignr $9, %xmm3, %xmm4
927 movaps %xmm4, 0x20(%rdi)
928 palignr $9, %xmm2, %xmm3
929 movaps %xmm3, 0x10(%rdi)
930 palignr $9, %xmm1, %xmm2
931 movaps %xmm2, (%rdi)
932 lea 0x80(%rdi), %rdi
933 jae L(shl_9)
934 movdqu %xmm0, (%r8)
935 add $0x80, %rdx
936 add %rdx, %rdi
937 add %rdx, %rsi
938 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
939
940 .p2align 4
941L(shl_9_bwd):
942 movaps -0x09(%rsi), %xmm1
943
944 movaps -0x19(%rsi), %xmm2
945 palignr $9, %xmm2, %xmm1
946 movaps %xmm1, -0x10(%rdi)
947
948 movaps -0x29(%rsi), %xmm3
949 palignr $9, %xmm3, %xmm2
950 movaps %xmm2, -0x20(%rdi)
951
952 movaps -0x39(%rsi), %xmm4
953 palignr $9, %xmm4, %xmm3
954 movaps %xmm3, -0x30(%rdi)
955
956 movaps -0x49(%rsi), %xmm5
957 palignr $9, %xmm5, %xmm4
958 movaps %xmm4, -0x40(%rdi)
959
960 movaps -0x59(%rsi), %xmm6
961 palignr $9, %xmm6, %xmm5
962 movaps %xmm5, -0x50(%rdi)
963
964 movaps -0x69(%rsi), %xmm7
965 palignr $9, %xmm7, %xmm6
966 movaps %xmm6, -0x60(%rdi)
967
968 movaps -0x79(%rsi), %xmm8
969 palignr $9, %xmm8, %xmm7
970 movaps %xmm7, -0x70(%rdi)
971
972 movaps -0x89(%rsi), %xmm9
973 palignr $9, %xmm9, %xmm8
974 movaps %xmm8, -0x80(%rdi)
975
976 sub $0x80, %rdx
977 lea -0x80(%rdi), %rdi
978 lea -0x80(%rsi), %rsi
979 jae L(shl_9_bwd)
980 movdqu %xmm0, (%r8)
981 add $0x80, %rdx
982 sub %rdx, %rdi
983 sub %rdx, %rsi
984 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
985
986 .p2align 4
987L(shl_10):
988 sub $0x80, %rdx
989 movaps -0x0a(%rsi), %xmm1
990 movaps 0x06(%rsi), %xmm2
991 movaps 0x16(%rsi), %xmm3
992 movaps 0x26(%rsi), %xmm4
993 movaps 0x36(%rsi), %xmm5
994 movaps 0x46(%rsi), %xmm6
995 movaps 0x56(%rsi), %xmm7
996 movaps 0x66(%rsi), %xmm8
997 movaps 0x76(%rsi), %xmm9
998 lea 0x80(%rsi), %rsi
999 palignr $10, %xmm8, %xmm9
1000 movaps %xmm9, 0x70(%rdi)
1001 palignr $10, %xmm7, %xmm8
1002 movaps %xmm8, 0x60(%rdi)
1003 palignr $10, %xmm6, %xmm7
1004 movaps %xmm7, 0x50(%rdi)
1005 palignr $10, %xmm5, %xmm6
1006 movaps %xmm6, 0x40(%rdi)
1007 palignr $10, %xmm4, %xmm5
1008 movaps %xmm5, 0x30(%rdi)
1009 palignr $10, %xmm3, %xmm4
1010 movaps %xmm4, 0x20(%rdi)
1011 palignr $10, %xmm2, %xmm3
1012 movaps %xmm3, 0x10(%rdi)
1013 palignr $10, %xmm1, %xmm2
1014 movaps %xmm2, (%rdi)
1015 lea 0x80(%rdi), %rdi
1016 jae L(shl_10)
1017 movdqu %xmm0, (%r8)
1018 add $0x80, %rdx
1019 add %rdx, %rdi
1020 add %rdx, %rsi
1021 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1022
1023 .p2align 4
1024L(shl_10_bwd):
1025 movaps -0x0a(%rsi), %xmm1
1026
1027 movaps -0x1a(%rsi), %xmm2
1028 palignr $10, %xmm2, %xmm1
1029 movaps %xmm1, -0x10(%rdi)
1030
1031 movaps -0x2a(%rsi), %xmm3
1032 palignr $10, %xmm3, %xmm2
1033 movaps %xmm2, -0x20(%rdi)
1034
1035 movaps -0x3a(%rsi), %xmm4
1036 palignr $10, %xmm4, %xmm3
1037 movaps %xmm3, -0x30(%rdi)
1038
1039 movaps -0x4a(%rsi), %xmm5
1040 palignr $10, %xmm5, %xmm4
1041 movaps %xmm4, -0x40(%rdi)
1042
1043 movaps -0x5a(%rsi), %xmm6
1044 palignr $10, %xmm6, %xmm5
1045 movaps %xmm5, -0x50(%rdi)
1046
1047 movaps -0x6a(%rsi), %xmm7
1048 palignr $10, %xmm7, %xmm6
1049 movaps %xmm6, -0x60(%rdi)
1050
1051 movaps -0x7a(%rsi), %xmm8
1052 palignr $10, %xmm8, %xmm7
1053 movaps %xmm7, -0x70(%rdi)
1054
1055 movaps -0x8a(%rsi), %xmm9
1056 palignr $10, %xmm9, %xmm8
1057 movaps %xmm8, -0x80(%rdi)
1058
1059 sub $0x80, %rdx
1060 lea -0x80(%rdi), %rdi
1061 lea -0x80(%rsi), %rsi
1062 jae L(shl_10_bwd)
1063 movdqu %xmm0, (%r8)
1064 add $0x80, %rdx
1065 sub %rdx, %rdi
1066 sub %rdx, %rsi
1067 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1068
1069 .p2align 4
1070L(shl_11):
1071 sub $0x80, %rdx
1072 movaps -0x0b(%rsi), %xmm1
1073 movaps 0x05(%rsi), %xmm2
1074 movaps 0x15(%rsi), %xmm3
1075 movaps 0x25(%rsi), %xmm4
1076 movaps 0x35(%rsi), %xmm5
1077 movaps 0x45(%rsi), %xmm6
1078 movaps 0x55(%rsi), %xmm7
1079 movaps 0x65(%rsi), %xmm8
1080 movaps 0x75(%rsi), %xmm9
1081 lea 0x80(%rsi), %rsi
1082 palignr $11, %xmm8, %xmm9
1083 movaps %xmm9, 0x70(%rdi)
1084 palignr $11, %xmm7, %xmm8
1085 movaps %xmm8, 0x60(%rdi)
1086 palignr $11, %xmm6, %xmm7
1087 movaps %xmm7, 0x50(%rdi)
1088 palignr $11, %xmm5, %xmm6
1089 movaps %xmm6, 0x40(%rdi)
1090 palignr $11, %xmm4, %xmm5
1091 movaps %xmm5, 0x30(%rdi)
1092 palignr $11, %xmm3, %xmm4
1093 movaps %xmm4, 0x20(%rdi)
1094 palignr $11, %xmm2, %xmm3
1095 movaps %xmm3, 0x10(%rdi)
1096 palignr $11, %xmm1, %xmm2
1097 movaps %xmm2, (%rdi)
1098 lea 0x80(%rdi), %rdi
1099 jae L(shl_11)
1100 movdqu %xmm0, (%r8)
1101 add $0x80, %rdx
1102 add %rdx, %rdi
1103 add %rdx, %rsi
1104 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1105
1106 .p2align 4
1107L(shl_11_bwd):
1108 movaps -0x0b(%rsi), %xmm1
1109
1110 movaps -0x1b(%rsi), %xmm2
1111 palignr $11, %xmm2, %xmm1
1112 movaps %xmm1, -0x10(%rdi)
1113
1114 movaps -0x2b(%rsi), %xmm3
1115 palignr $11, %xmm3, %xmm2
1116 movaps %xmm2, -0x20(%rdi)
1117
1118 movaps -0x3b(%rsi), %xmm4
1119 palignr $11, %xmm4, %xmm3
1120 movaps %xmm3, -0x30(%rdi)
1121
1122 movaps -0x4b(%rsi), %xmm5
1123 palignr $11, %xmm5, %xmm4
1124 movaps %xmm4, -0x40(%rdi)
1125
1126 movaps -0x5b(%rsi), %xmm6
1127 palignr $11, %xmm6, %xmm5
1128 movaps %xmm5, -0x50(%rdi)
1129
1130 movaps -0x6b(%rsi), %xmm7
1131 palignr $11, %xmm7, %xmm6
1132 movaps %xmm6, -0x60(%rdi)
1133
1134 movaps -0x7b(%rsi), %xmm8
1135 palignr $11, %xmm8, %xmm7
1136 movaps %xmm7, -0x70(%rdi)
1137
1138 movaps -0x8b(%rsi), %xmm9
1139 palignr $11, %xmm9, %xmm8
1140 movaps %xmm8, -0x80(%rdi)
1141
1142 sub $0x80, %rdx
1143 lea -0x80(%rdi), %rdi
1144 lea -0x80(%rsi), %rsi
1145 jae L(shl_11_bwd)
1146 movdqu %xmm0, (%r8)
1147 add $0x80, %rdx
1148 sub %rdx, %rdi
1149 sub %rdx, %rsi
1150 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1151
1152 .p2align 4
1153L(shl_12):
1154 sub $0x80, %rdx
1155 movdqa -0x0c(%rsi), %xmm1
1156 movaps 0x04(%rsi), %xmm2
1157 movaps 0x14(%rsi), %xmm3
1158 movaps 0x24(%rsi), %xmm4
1159 movaps 0x34(%rsi), %xmm5
1160 movaps 0x44(%rsi), %xmm6
1161 movaps 0x54(%rsi), %xmm7
1162 movaps 0x64(%rsi), %xmm8
1163 movaps 0x74(%rsi), %xmm9
1164 lea 0x80(%rsi), %rsi
1165 palignr $12, %xmm8, %xmm9
1166 movaps %xmm9, 0x70(%rdi)
1167 palignr $12, %xmm7, %xmm8
1168 movaps %xmm8, 0x60(%rdi)
1169 palignr $12, %xmm6, %xmm7
1170 movaps %xmm7, 0x50(%rdi)
1171 palignr $12, %xmm5, %xmm6
1172 movaps %xmm6, 0x40(%rdi)
1173 palignr $12, %xmm4, %xmm5
1174 movaps %xmm5, 0x30(%rdi)
1175 palignr $12, %xmm3, %xmm4
1176 movaps %xmm4, 0x20(%rdi)
1177 palignr $12, %xmm2, %xmm3
1178 movaps %xmm3, 0x10(%rdi)
1179 palignr $12, %xmm1, %xmm2
1180 movaps %xmm2, (%rdi)
1181
1182 lea 0x80(%rdi), %rdi
1183 jae L(shl_12)
1184 movdqu %xmm0, (%r8)
1185 add $0x80, %rdx
1186 add %rdx, %rdi
1187 add %rdx, %rsi
1188 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1189
1190 .p2align 4
1191L(shl_12_bwd):
1192 movaps -0x0c(%rsi), %xmm1
1193
1194 movaps -0x1c(%rsi), %xmm2
1195 palignr $12, %xmm2, %xmm1
1196 movaps %xmm1, -0x10(%rdi)
1197
1198 movaps -0x2c(%rsi), %xmm3
1199 palignr $12, %xmm3, %xmm2
1200 movaps %xmm2, -0x20(%rdi)
1201
1202 movaps -0x3c(%rsi), %xmm4
1203 palignr $12, %xmm4, %xmm3
1204 movaps %xmm3, -0x30(%rdi)
1205
1206 movaps -0x4c(%rsi), %xmm5
1207 palignr $12, %xmm5, %xmm4
1208 movaps %xmm4, -0x40(%rdi)
1209
1210 movaps -0x5c(%rsi), %xmm6
1211 palignr $12, %xmm6, %xmm5
1212 movaps %xmm5, -0x50(%rdi)
1213
1214 movaps -0x6c(%rsi), %xmm7
1215 palignr $12, %xmm7, %xmm6
1216 movaps %xmm6, -0x60(%rdi)
1217
1218 movaps -0x7c(%rsi), %xmm8
1219 palignr $12, %xmm8, %xmm7
1220 movaps %xmm7, -0x70(%rdi)
1221
1222 movaps -0x8c(%rsi), %xmm9
1223 palignr $12, %xmm9, %xmm8
1224 movaps %xmm8, -0x80(%rdi)
1225
1226 sub $0x80, %rdx
1227 lea -0x80(%rdi), %rdi
1228 lea -0x80(%rsi), %rsi
1229 jae L(shl_12_bwd)
1230 movdqu %xmm0, (%r8)
1231 add $0x80, %rdx
1232 sub %rdx, %rdi
1233 sub %rdx, %rsi
1234 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1235
1236 .p2align 4
1237L(shl_13):
1238 sub $0x80, %rdx
1239 movaps -0x0d(%rsi), %xmm1
1240 movaps 0x03(%rsi), %xmm2
1241 movaps 0x13(%rsi), %xmm3
1242 movaps 0x23(%rsi), %xmm4
1243 movaps 0x33(%rsi), %xmm5
1244 movaps 0x43(%rsi), %xmm6
1245 movaps 0x53(%rsi), %xmm7
1246 movaps 0x63(%rsi), %xmm8
1247 movaps 0x73(%rsi), %xmm9
1248 lea 0x80(%rsi), %rsi
1249 palignr $13, %xmm8, %xmm9
1250 movaps %xmm9, 0x70(%rdi)
1251 palignr $13, %xmm7, %xmm8
1252 movaps %xmm8, 0x60(%rdi)
1253 palignr $13, %xmm6, %xmm7
1254 movaps %xmm7, 0x50(%rdi)
1255 palignr $13, %xmm5, %xmm6
1256 movaps %xmm6, 0x40(%rdi)
1257 palignr $13, %xmm4, %xmm5
1258 movaps %xmm5, 0x30(%rdi)
1259 palignr $13, %xmm3, %xmm4
1260 movaps %xmm4, 0x20(%rdi)
1261 palignr $13, %xmm2, %xmm3
1262 movaps %xmm3, 0x10(%rdi)
1263 palignr $13, %xmm1, %xmm2
1264 movaps %xmm2, (%rdi)
1265 lea 0x80(%rdi), %rdi
1266 jae L(shl_13)
1267 movdqu %xmm0, (%r8)
1268 add $0x80, %rdx
1269 add %rdx, %rdi
1270 add %rdx, %rsi
1271 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1272
1273 .p2align 4
1274L(shl_13_bwd):
1275 movaps -0x0d(%rsi), %xmm1
1276
1277 movaps -0x1d(%rsi), %xmm2
1278 palignr $13, %xmm2, %xmm1
1279 movaps %xmm1, -0x10(%rdi)
1280
1281 movaps -0x2d(%rsi), %xmm3
1282 palignr $13, %xmm3, %xmm2
1283 movaps %xmm2, -0x20(%rdi)
1284
1285 movaps -0x3d(%rsi), %xmm4
1286 palignr $13, %xmm4, %xmm3
1287 movaps %xmm3, -0x30(%rdi)
1288
1289 movaps -0x4d(%rsi), %xmm5
1290 palignr $13, %xmm5, %xmm4
1291 movaps %xmm4, -0x40(%rdi)
1292
1293 movaps -0x5d(%rsi), %xmm6
1294 palignr $13, %xmm6, %xmm5
1295 movaps %xmm5, -0x50(%rdi)
1296
1297 movaps -0x6d(%rsi), %xmm7
1298 palignr $13, %xmm7, %xmm6
1299 movaps %xmm6, -0x60(%rdi)
1300
1301 movaps -0x7d(%rsi), %xmm8
1302 palignr $13, %xmm8, %xmm7
1303 movaps %xmm7, -0x70(%rdi)
1304
1305 movaps -0x8d(%rsi), %xmm9
1306 palignr $13, %xmm9, %xmm8
1307 movaps %xmm8, -0x80(%rdi)
1308
1309 sub $0x80, %rdx
1310 lea -0x80(%rdi), %rdi
1311 lea -0x80(%rsi), %rsi
1312 jae L(shl_13_bwd)
1313 movdqu %xmm0, (%r8)
1314 add $0x80, %rdx
1315 sub %rdx, %rdi
1316 sub %rdx, %rsi
1317 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1318
1319 .p2align 4
1320L(shl_14):
1321 sub $0x80, %rdx
1322 movaps -0x0e(%rsi), %xmm1
1323 movaps 0x02(%rsi), %xmm2
1324 movaps 0x12(%rsi), %xmm3
1325 movaps 0x22(%rsi), %xmm4
1326 movaps 0x32(%rsi), %xmm5
1327 movaps 0x42(%rsi), %xmm6
1328 movaps 0x52(%rsi), %xmm7
1329 movaps 0x62(%rsi), %xmm8
1330 movaps 0x72(%rsi), %xmm9
1331 lea 0x80(%rsi), %rsi
1332 palignr $14, %xmm8, %xmm9
1333 movaps %xmm9, 0x70(%rdi)
1334 palignr $14, %xmm7, %xmm8
1335 movaps %xmm8, 0x60(%rdi)
1336 palignr $14, %xmm6, %xmm7
1337 movaps %xmm7, 0x50(%rdi)
1338 palignr $14, %xmm5, %xmm6
1339 movaps %xmm6, 0x40(%rdi)
1340 palignr $14, %xmm4, %xmm5
1341 movaps %xmm5, 0x30(%rdi)
1342 palignr $14, %xmm3, %xmm4
1343 movaps %xmm4, 0x20(%rdi)
1344 palignr $14, %xmm2, %xmm3
1345 movaps %xmm3, 0x10(%rdi)
1346 palignr $14, %xmm1, %xmm2
1347 movaps %xmm2, (%rdi)
1348 lea 0x80(%rdi), %rdi
1349 jae L(shl_14)
1350 movdqu %xmm0, (%r8)
1351 add $0x80, %rdx
1352 add %rdx, %rdi
1353 add %rdx, %rsi
1354 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1355
1356 .p2align 4
1357L(shl_14_bwd):
1358 movaps -0x0e(%rsi), %xmm1
1359
1360 movaps -0x1e(%rsi), %xmm2
1361 palignr $14, %xmm2, %xmm1
1362 movaps %xmm1, -0x10(%rdi)
1363
1364 movaps -0x2e(%rsi), %xmm3
1365 palignr $14, %xmm3, %xmm2
1366 movaps %xmm2, -0x20(%rdi)
1367
1368 movaps -0x3e(%rsi), %xmm4
1369 palignr $14, %xmm4, %xmm3
1370 movaps %xmm3, -0x30(%rdi)
1371
1372 movaps -0x4e(%rsi), %xmm5
1373 palignr $14, %xmm5, %xmm4
1374 movaps %xmm4, -0x40(%rdi)
1375
1376 movaps -0x5e(%rsi), %xmm6
1377 palignr $14, %xmm6, %xmm5
1378 movaps %xmm5, -0x50(%rdi)
1379
1380 movaps -0x6e(%rsi), %xmm7
1381 palignr $14, %xmm7, %xmm6
1382 movaps %xmm6, -0x60(%rdi)
1383
1384 movaps -0x7e(%rsi), %xmm8
1385 palignr $14, %xmm8, %xmm7
1386 movaps %xmm7, -0x70(%rdi)
1387
1388 movaps -0x8e(%rsi), %xmm9
1389 palignr $14, %xmm9, %xmm8
1390 movaps %xmm8, -0x80(%rdi)
1391
1392 sub $0x80, %rdx
1393 lea -0x80(%rdi), %rdi
1394 lea -0x80(%rsi), %rsi
1395 jae L(shl_14_bwd)
1396 movdqu %xmm0, (%r8)
1397 add $0x80, %rdx
1398 sub %rdx, %rdi
1399 sub %rdx, %rsi
1400 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1401
1402 .p2align 4
1403L(shl_15):
1404 sub $0x80, %rdx
1405 movaps -0x0f(%rsi), %xmm1
1406 movaps 0x01(%rsi), %xmm2
1407 movaps 0x11(%rsi), %xmm3
1408 movaps 0x21(%rsi), %xmm4
1409 movaps 0x31(%rsi), %xmm5
1410 movaps 0x41(%rsi), %xmm6
1411 movaps 0x51(%rsi), %xmm7
1412 movaps 0x61(%rsi), %xmm8
1413 movaps 0x71(%rsi), %xmm9
1414 lea 0x80(%rsi), %rsi
1415 palignr $15, %xmm8, %xmm9
1416 movaps %xmm9, 0x70(%rdi)
1417 palignr $15, %xmm7, %xmm8
1418 movaps %xmm8, 0x60(%rdi)
1419 palignr $15, %xmm6, %xmm7
1420 movaps %xmm7, 0x50(%rdi)
1421 palignr $15, %xmm5, %xmm6
1422 movaps %xmm6, 0x40(%rdi)
1423 palignr $15, %xmm4, %xmm5
1424 movaps %xmm5, 0x30(%rdi)
1425 palignr $15, %xmm3, %xmm4
1426 movaps %xmm4, 0x20(%rdi)
1427 palignr $15, %xmm2, %xmm3
1428 movaps %xmm3, 0x10(%rdi)
1429 palignr $15, %xmm1, %xmm2
1430 movaps %xmm2, (%rdi)
1431 lea 0x80(%rdi), %rdi
1432 jae L(shl_15)
1433 movdqu %xmm0, (%r8)
1434 add $0x80, %rdx
1435 add %rdx, %rdi
1436 add %rdx, %rsi
1437 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1438
1439 .p2align 4
1440L(shl_15_bwd):
1441 movaps -0x0f(%rsi), %xmm1
1442
1443 movaps -0x1f(%rsi), %xmm2
1444 palignr $15, %xmm2, %xmm1
1445 movaps %xmm1, -0x10(%rdi)
1446
1447 movaps -0x2f(%rsi), %xmm3
1448 palignr $15, %xmm3, %xmm2
1449 movaps %xmm2, -0x20(%rdi)
1450
1451 movaps -0x3f(%rsi), %xmm4
1452 palignr $15, %xmm4, %xmm3
1453 movaps %xmm3, -0x30(%rdi)
1454
1455 movaps -0x4f(%rsi), %xmm5
1456 palignr $15, %xmm5, %xmm4
1457 movaps %xmm4, -0x40(%rdi)
1458
1459 movaps -0x5f(%rsi), %xmm6
1460 palignr $15, %xmm6, %xmm5
1461 movaps %xmm5, -0x50(%rdi)
1462
1463 movaps -0x6f(%rsi), %xmm7
1464 palignr $15, %xmm7, %xmm6
1465 movaps %xmm6, -0x60(%rdi)
1466
1467 movaps -0x7f(%rsi), %xmm8
1468 palignr $15, %xmm8, %xmm7
1469 movaps %xmm7, -0x70(%rdi)
1470
1471 movaps -0x8f(%rsi), %xmm9
1472 palignr $15, %xmm9, %xmm8
1473 movaps %xmm8, -0x80(%rdi)
1474
1475 sub $0x80, %rdx
1476 lea -0x80(%rdi), %rdi
1477 lea -0x80(%rsi), %rsi
1478 jae L(shl_15_bwd)
1479 movdqu %xmm0, (%r8)
1480 add $0x80, %rdx
1481 sub %rdx, %rdi
1482 sub %rdx, %rsi
1483 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1484
1485 .p2align 4
1486L(gobble_mem_fwd):
1487 movdqu (%rsi), %xmm1
1488 movdqu %xmm0, (%r8)
1489 movdqa %xmm1, (%rdi)
1490 sub $16, %rdx
1491 add $16, %rsi
1492 add $16, %rdi
1493
1494#ifdef SHARED_CACHE_SIZE_HALF
1495 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1496#else
1497 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1498#endif
1499#ifdef USE_AS_MEMMOVE
1500 mov %rsi, %r9
1501 sub %rdi, %r9
1502 cmp %rdx, %r9
1503 jae L(memmove_is_memcpy_fwd)
1504 cmp %rcx, %r9
1505 jbe L(ll_cache_copy_fwd_start)
1506L(memmove_is_memcpy_fwd):
1507#endif
1508 cmp %rcx, %rdx
1509 ja L(bigger_in_fwd)
1510 mov %rdx, %rcx
1511L(bigger_in_fwd):
1512 sub %rcx, %rdx
1513 cmp $0x1000, %rdx
1514 jbe L(ll_cache_copy_fwd)
1515
1516 mov %rcx, %r9
1517 shl $3, %r9
1518 cmp %r9, %rdx
1519 jbe L(2steps_copy_fwd)
1520 add %rcx, %rdx
1521 xor %rcx, %rcx
1522L(2steps_copy_fwd):
1523 sub $0x80, %rdx
1524L(gobble_mem_fwd_loop):
1525 sub $0x80, %rdx
1526 prefetcht0 0x200(%rsi)
1527 prefetcht0 0x300(%rsi)
1528 movdqu (%rsi), %xmm0
1529 movdqu 0x10(%rsi), %xmm1
1530 movdqu 0x20(%rsi), %xmm2
1531 movdqu 0x30(%rsi), %xmm3
1532 movdqu 0x40(%rsi), %xmm4
1533 movdqu 0x50(%rsi), %xmm5
1534 movdqu 0x60(%rsi), %xmm6
1535 movdqu 0x70(%rsi), %xmm7
1536 lfence
1537 movntdq %xmm0, (%rdi)
1538 movntdq %xmm1, 0x10(%rdi)
1539 movntdq %xmm2, 0x20(%rdi)
1540 movntdq %xmm3, 0x30(%rdi)
1541 movntdq %xmm4, 0x40(%rdi)
1542 movntdq %xmm5, 0x50(%rdi)
1543 movntdq %xmm6, 0x60(%rdi)
1544 movntdq %xmm7, 0x70(%rdi)
1545 lea 0x80(%rsi), %rsi
1546 lea 0x80(%rdi), %rdi
1547 jae L(gobble_mem_fwd_loop)
1548 sfence
1549 cmp $0x80, %rcx
1550 jb L(gobble_mem_fwd_end)
1551 add $0x80, %rdx
1552L(ll_cache_copy_fwd):
1553 add %rcx, %rdx
1554L(ll_cache_copy_fwd_start):
1555 sub $0x80, %rdx
1556L(gobble_ll_loop_fwd):
1557 prefetchnta 0x1c0(%rsi)
1558 prefetchnta 0x280(%rsi)
1559 prefetchnta 0x1c0(%rdi)
1560 prefetchnta 0x280(%rdi)
1561 sub $0x80, %rdx
1562 movdqu (%rsi), %xmm0
1563 movdqu 0x10(%rsi), %xmm1
1564 movdqu 0x20(%rsi), %xmm2
1565 movdqu 0x30(%rsi), %xmm3
1566 movdqu 0x40(%rsi), %xmm4
1567 movdqu 0x50(%rsi), %xmm5
1568 movdqu 0x60(%rsi), %xmm6
1569 movdqu 0x70(%rsi), %xmm7
1570 movdqa %xmm0, (%rdi)
1571 movdqa %xmm1, 0x10(%rdi)
1572 movdqa %xmm2, 0x20(%rdi)
1573 movdqa %xmm3, 0x30(%rdi)
1574 movdqa %xmm4, 0x40(%rdi)
1575 movdqa %xmm5, 0x50(%rdi)
1576 movdqa %xmm6, 0x60(%rdi)
1577 movdqa %xmm7, 0x70(%rdi)
1578 lea 0x80(%rsi), %rsi
1579 lea 0x80(%rdi), %rdi
1580 jae L(gobble_ll_loop_fwd)
1581L(gobble_mem_fwd_end):
1582 add $0x80, %rdx
1583 add %rdx, %rsi
1584 add %rdx, %rdi
1585 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1586
1587 .p2align 4
1588L(gobble_mem_bwd):
1589 add %rdx, %rsi
1590 add %rdx, %rdi
1591
1592 movdqu -16(%rsi), %xmm0
1593 lea -16(%rdi), %r8
1594 mov %rdi, %r9
1595 and $-16, %rdi
1596 sub %rdi, %r9
1597 sub %r9, %rsi
1598 sub %r9, %rdx
1599
1600
1601#ifdef SHARED_CACHE_SIZE_HALF
1602 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1603#else
1604 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1605#endif
1606#ifdef USE_AS_MEMMOVE
1607 mov %rdi, %r9
1608 sub %rsi, %r9
1609 cmp %rdx, %r9
1610 jae L(memmove_is_memcpy_bwd)
1611 cmp %rcx, %r9
1612 jbe L(ll_cache_copy_bwd_start)
1613L(memmove_is_memcpy_bwd):
1614#endif
1615 cmp %rcx, %rdx
1616 ja L(bigger)
1617 mov %rdx, %rcx
1618L(bigger):
1619 sub %rcx, %rdx
1620 cmp $0x1000, %rdx
1621 jbe L(ll_cache_copy)
1622
1623 mov %rcx, %r9
1624 shl $3, %r9
1625 cmp %r9, %rdx
1626 jbe L(2steps_copy)
1627 add %rcx, %rdx
1628 xor %rcx, %rcx
1629L(2steps_copy):
1630 sub $0x80, %rdx
1631L(gobble_mem_bwd_loop):
1632 sub $0x80, %rdx
1633 prefetcht0 -0x200(%rsi)
1634 prefetcht0 -0x300(%rsi)
1635 movdqu -0x10(%rsi), %xmm1
1636 movdqu -0x20(%rsi), %xmm2
1637 movdqu -0x30(%rsi), %xmm3
1638 movdqu -0x40(%rsi), %xmm4
1639 movdqu -0x50(%rsi), %xmm5
1640 movdqu -0x60(%rsi), %xmm6
1641 movdqu -0x70(%rsi), %xmm7
1642 movdqu -0x80(%rsi), %xmm8
1643 lfence
1644 movntdq %xmm1, -0x10(%rdi)
1645 movntdq %xmm2, -0x20(%rdi)
1646 movntdq %xmm3, -0x30(%rdi)
1647 movntdq %xmm4, -0x40(%rdi)
1648 movntdq %xmm5, -0x50(%rdi)
1649 movntdq %xmm6, -0x60(%rdi)
1650 movntdq %xmm7, -0x70(%rdi)
1651 movntdq %xmm8, -0x80(%rdi)
1652 lea -0x80(%rsi), %rsi
1653 lea -0x80(%rdi), %rdi
1654 jae L(gobble_mem_bwd_loop)
1655 sfence
1656 cmp $0x80, %rcx
1657 jb L(gobble_mem_bwd_end)
1658 add $0x80, %rdx
1659L(ll_cache_copy):
1660 add %rcx, %rdx
1661L(ll_cache_copy_bwd_start):
1662 sub $0x80, %rdx
1663L(gobble_ll_loop):
1664 prefetchnta -0x1c0(%rsi)
1665 prefetchnta -0x280(%rsi)
1666 prefetchnta -0x1c0(%rdi)
1667 prefetchnta -0x280(%rdi)
1668 sub $0x80, %rdx
1669 movdqu -0x10(%rsi), %xmm1
1670 movdqu -0x20(%rsi), %xmm2
1671 movdqu -0x30(%rsi), %xmm3
1672 movdqu -0x40(%rsi), %xmm4
1673 movdqu -0x50(%rsi), %xmm5
1674 movdqu -0x60(%rsi), %xmm6
1675 movdqu -0x70(%rsi), %xmm7
1676 movdqu -0x80(%rsi), %xmm8
1677 movdqa %xmm1, -0x10(%rdi)
1678 movdqa %xmm2, -0x20(%rdi)
1679 movdqa %xmm3, -0x30(%rdi)
1680 movdqa %xmm4, -0x40(%rdi)
1681 movdqa %xmm5, -0x50(%rdi)
1682 movdqa %xmm6, -0x60(%rdi)
1683 movdqa %xmm7, -0x70(%rdi)
1684 movdqa %xmm8, -0x80(%rdi)
1685 lea -0x80(%rsi), %rsi
1686 lea -0x80(%rdi), %rdi
1687 jae L(gobble_ll_loop)
1688L(gobble_mem_bwd_end):
1689 movdqu %xmm0, (%r8)
1690 add $0x80, %rdx
1691 sub %rdx, %rsi
1692 sub %rdx, %rdi
1693 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1694
1695 .p2align 4
1696L(fwd_write_128bytes):
1697 lddqu -128(%rsi), %xmm0
1698 movdqu %xmm0, -128(%rdi)
1699L(fwd_write_112bytes):
1700 lddqu -112(%rsi), %xmm0
1701 movdqu %xmm0, -112(%rdi)
1702L(fwd_write_96bytes):
1703 lddqu -96(%rsi), %xmm0
1704 movdqu %xmm0, -96(%rdi)
1705L(fwd_write_80bytes):
1706 lddqu -80(%rsi), %xmm0
1707 movdqu %xmm0, -80(%rdi)
1708L(fwd_write_64bytes):
1709 lddqu -64(%rsi), %xmm0
1710 movdqu %xmm0, -64(%rdi)
1711L(fwd_write_48bytes):
1712 lddqu -48(%rsi), %xmm0
1713 movdqu %xmm0, -48(%rdi)
1714L(fwd_write_32bytes):
1715 lddqu -32(%rsi), %xmm0
1716 movdqu %xmm0, -32(%rdi)
1717L(fwd_write_16bytes):
1718 lddqu -16(%rsi), %xmm0
1719 movdqu %xmm0, -16(%rdi)
1720L(fwd_write_0bytes):
1721 ret
1722
1723
1724 .p2align 4
1725L(fwd_write_143bytes):
1726 lddqu -143(%rsi), %xmm0
1727 movdqu %xmm0, -143(%rdi)
1728L(fwd_write_127bytes):
1729 lddqu -127(%rsi), %xmm0
1730 movdqu %xmm0, -127(%rdi)
1731L(fwd_write_111bytes):
1732 lddqu -111(%rsi), %xmm0
1733 movdqu %xmm0, -111(%rdi)
1734L(fwd_write_95bytes):
1735 lddqu -95(%rsi), %xmm0
1736 movdqu %xmm0, -95(%rdi)
1737L(fwd_write_79bytes):
1738 lddqu -79(%rsi), %xmm0
1739 movdqu %xmm0, -79(%rdi)
1740L(fwd_write_63bytes):
1741 lddqu -63(%rsi), %xmm0
1742 movdqu %xmm0, -63(%rdi)
1743L(fwd_write_47bytes):
1744 lddqu -47(%rsi), %xmm0
1745 movdqu %xmm0, -47(%rdi)
1746L(fwd_write_31bytes):
1747 lddqu -31(%rsi), %xmm0
1748 lddqu -16(%rsi), %xmm1
1749 movdqu %xmm0, -31(%rdi)
1750 movdqu %xmm1, -16(%rdi)
1751 ret
1752
1753 .p2align 4
1754L(fwd_write_15bytes):
1755 mov -15(%rsi), %rdx
1756 mov -8(%rsi), %rcx
1757 mov %rdx, -15(%rdi)
1758 mov %rcx, -8(%rdi)
1759 ret
1760
1761 .p2align 4
1762L(fwd_write_142bytes):
1763 lddqu -142(%rsi), %xmm0
1764 movdqu %xmm0, -142(%rdi)
1765L(fwd_write_126bytes):
1766 lddqu -126(%rsi), %xmm0
1767 movdqu %xmm0, -126(%rdi)
1768L(fwd_write_110bytes):
1769 lddqu -110(%rsi), %xmm0
1770 movdqu %xmm0, -110(%rdi)
1771L(fwd_write_94bytes):
1772 lddqu -94(%rsi), %xmm0
1773 movdqu %xmm0, -94(%rdi)
1774L(fwd_write_78bytes):
1775 lddqu -78(%rsi), %xmm0
1776 movdqu %xmm0, -78(%rdi)
1777L(fwd_write_62bytes):
1778 lddqu -62(%rsi), %xmm0
1779 movdqu %xmm0, -62(%rdi)
1780L(fwd_write_46bytes):
1781 lddqu -46(%rsi), %xmm0
1782 movdqu %xmm0, -46(%rdi)
1783L(fwd_write_30bytes):
1784 lddqu -30(%rsi), %xmm0
1785 lddqu -16(%rsi), %xmm1
1786 movdqu %xmm0, -30(%rdi)
1787 movdqu %xmm1, -16(%rdi)
1788 ret
1789
1790 .p2align 4
1791L(fwd_write_14bytes):
1792 mov -14(%rsi), %rdx
1793 mov -8(%rsi), %rcx
1794 mov %rdx, -14(%rdi)
1795 mov %rcx, -8(%rdi)
1796 ret
1797
1798 .p2align 4
1799L(fwd_write_141bytes):
1800 lddqu -141(%rsi), %xmm0
1801 movdqu %xmm0, -141(%rdi)
1802L(fwd_write_125bytes):
1803 lddqu -125(%rsi), %xmm0
1804 movdqu %xmm0, -125(%rdi)
1805L(fwd_write_109bytes):
1806 lddqu -109(%rsi), %xmm0
1807 movdqu %xmm0, -109(%rdi)
1808L(fwd_write_93bytes):
1809 lddqu -93(%rsi), %xmm0
1810 movdqu %xmm0, -93(%rdi)
1811L(fwd_write_77bytes):
1812 lddqu -77(%rsi), %xmm0
1813 movdqu %xmm0, -77(%rdi)
1814L(fwd_write_61bytes):
1815 lddqu -61(%rsi), %xmm0
1816 movdqu %xmm0, -61(%rdi)
1817L(fwd_write_45bytes):
1818 lddqu -45(%rsi), %xmm0
1819 movdqu %xmm0, -45(%rdi)
1820L(fwd_write_29bytes):
1821 lddqu -29(%rsi), %xmm0
1822 lddqu -16(%rsi), %xmm1
1823 movdqu %xmm0, -29(%rdi)
1824 movdqu %xmm1, -16(%rdi)
1825 ret
1826
1827 .p2align 4
1828L(fwd_write_13bytes):
1829 mov -13(%rsi), %rdx
1830 mov -8(%rsi), %rcx
1831 mov %rdx, -13(%rdi)
1832 mov %rcx, -8(%rdi)
1833 ret
1834
1835 .p2align 4
1836L(fwd_write_140bytes):
1837 lddqu -140(%rsi), %xmm0
1838 movdqu %xmm0, -140(%rdi)
1839L(fwd_write_124bytes):
1840 lddqu -124(%rsi), %xmm0
1841 movdqu %xmm0, -124(%rdi)
1842L(fwd_write_108bytes):
1843 lddqu -108(%rsi), %xmm0
1844 movdqu %xmm0, -108(%rdi)
1845L(fwd_write_92bytes):
1846 lddqu -92(%rsi), %xmm0
1847 movdqu %xmm0, -92(%rdi)
1848L(fwd_write_76bytes):
1849 lddqu -76(%rsi), %xmm0
1850 movdqu %xmm0, -76(%rdi)
1851L(fwd_write_60bytes):
1852 lddqu -60(%rsi), %xmm0
1853 movdqu %xmm0, -60(%rdi)
1854L(fwd_write_44bytes):
1855 lddqu -44(%rsi), %xmm0
1856 movdqu %xmm0, -44(%rdi)
1857L(fwd_write_28bytes):
1858 lddqu -28(%rsi), %xmm0
1859 lddqu -16(%rsi), %xmm1
1860 movdqu %xmm0, -28(%rdi)
1861 movdqu %xmm1, -16(%rdi)
1862 ret
1863
1864 .p2align 4
1865L(fwd_write_12bytes):
1866 mov -12(%rsi), %rdx
1867 mov -4(%rsi), %ecx
1868 mov %rdx, -12(%rdi)
1869 mov %ecx, -4(%rdi)
1870 ret
1871
1872 .p2align 4
1873L(fwd_write_139bytes):
1874 lddqu -139(%rsi), %xmm0
1875 movdqu %xmm0, -139(%rdi)
1876L(fwd_write_123bytes):
1877 lddqu -123(%rsi), %xmm0
1878 movdqu %xmm0, -123(%rdi)
1879L(fwd_write_107bytes):
1880 lddqu -107(%rsi), %xmm0
1881 movdqu %xmm0, -107(%rdi)
1882L(fwd_write_91bytes):
1883 lddqu -91(%rsi), %xmm0
1884 movdqu %xmm0, -91(%rdi)
1885L(fwd_write_75bytes):
1886 lddqu -75(%rsi), %xmm0
1887 movdqu %xmm0, -75(%rdi)
1888L(fwd_write_59bytes):
1889 lddqu -59(%rsi), %xmm0
1890 movdqu %xmm0, -59(%rdi)
1891L(fwd_write_43bytes):
1892 lddqu -43(%rsi), %xmm0
1893 movdqu %xmm0, -43(%rdi)
1894L(fwd_write_27bytes):
1895 lddqu -27(%rsi), %xmm0
1896 lddqu -16(%rsi), %xmm1
1897 movdqu %xmm0, -27(%rdi)
1898 movdqu %xmm1, -16(%rdi)
1899 ret
1900
1901 .p2align 4
1902L(fwd_write_11bytes):
1903 mov -11(%rsi), %rdx
1904 mov -4(%rsi), %ecx
1905 mov %rdx, -11(%rdi)
1906 mov %ecx, -4(%rdi)
1907 ret
1908
1909 .p2align 4
1910L(fwd_write_138bytes):
1911 lddqu -138(%rsi), %xmm0
1912 movdqu %xmm0, -138(%rdi)
1913L(fwd_write_122bytes):
1914 lddqu -122(%rsi), %xmm0
1915 movdqu %xmm0, -122(%rdi)
1916L(fwd_write_106bytes):
1917 lddqu -106(%rsi), %xmm0
1918 movdqu %xmm0, -106(%rdi)
1919L(fwd_write_90bytes):
1920 lddqu -90(%rsi), %xmm0
1921 movdqu %xmm0, -90(%rdi)
1922L(fwd_write_74bytes):
1923 lddqu -74(%rsi), %xmm0
1924 movdqu %xmm0, -74(%rdi)
1925L(fwd_write_58bytes):
1926 lddqu -58(%rsi), %xmm0
1927 movdqu %xmm0, -58(%rdi)
1928L(fwd_write_42bytes):
1929 lddqu -42(%rsi), %xmm0
1930 movdqu %xmm0, -42(%rdi)
1931L(fwd_write_26bytes):
1932 lddqu -26(%rsi), %xmm0
1933 lddqu -16(%rsi), %xmm1
1934 movdqu %xmm0, -26(%rdi)
1935 movdqu %xmm1, -16(%rdi)
1936 ret
1937
1938 .p2align 4
1939L(fwd_write_10bytes):
1940 mov -10(%rsi), %rdx
1941 mov -4(%rsi), %ecx
1942 mov %rdx, -10(%rdi)
1943 mov %ecx, -4(%rdi)
1944 ret
1945
1946 .p2align 4
1947L(fwd_write_137bytes):
1948 lddqu -137(%rsi), %xmm0
1949 movdqu %xmm0, -137(%rdi)
1950L(fwd_write_121bytes):
1951 lddqu -121(%rsi), %xmm0
1952 movdqu %xmm0, -121(%rdi)
1953L(fwd_write_105bytes):
1954 lddqu -105(%rsi), %xmm0
1955 movdqu %xmm0, -105(%rdi)
1956L(fwd_write_89bytes):
1957 lddqu -89(%rsi), %xmm0
1958 movdqu %xmm0, -89(%rdi)
1959L(fwd_write_73bytes):
1960 lddqu -73(%rsi), %xmm0
1961 movdqu %xmm0, -73(%rdi)
1962L(fwd_write_57bytes):
1963 lddqu -57(%rsi), %xmm0
1964 movdqu %xmm0, -57(%rdi)
1965L(fwd_write_41bytes):
1966 lddqu -41(%rsi), %xmm0
1967 movdqu %xmm0, -41(%rdi)
1968L(fwd_write_25bytes):
1969 lddqu -25(%rsi), %xmm0
1970 lddqu -16(%rsi), %xmm1
1971 movdqu %xmm0, -25(%rdi)
1972 movdqu %xmm1, -16(%rdi)
1973 ret
1974
1975 .p2align 4
1976L(fwd_write_9bytes):
1977 mov -9(%rsi), %rdx
1978 mov -4(%rsi), %ecx
1979 mov %rdx, -9(%rdi)
1980 mov %ecx, -4(%rdi)
1981 ret
1982
1983 .p2align 4
1984L(fwd_write_136bytes):
1985 lddqu -136(%rsi), %xmm0
1986 movdqu %xmm0, -136(%rdi)
1987L(fwd_write_120bytes):
1988 lddqu -120(%rsi), %xmm0
1989 movdqu %xmm0, -120(%rdi)
1990L(fwd_write_104bytes):
1991 lddqu -104(%rsi), %xmm0
1992 movdqu %xmm0, -104(%rdi)
1993L(fwd_write_88bytes):
1994 lddqu -88(%rsi), %xmm0
1995 movdqu %xmm0, -88(%rdi)
1996L(fwd_write_72bytes):
1997 lddqu -72(%rsi), %xmm0
1998 movdqu %xmm0, -72(%rdi)
1999L(fwd_write_56bytes):
2000 lddqu -56(%rsi), %xmm0
2001 movdqu %xmm0, -56(%rdi)
2002L(fwd_write_40bytes):
2003 lddqu -40(%rsi), %xmm0
2004 movdqu %xmm0, -40(%rdi)
2005L(fwd_write_24bytes):
2006 lddqu -24(%rsi), %xmm0
2007 lddqu -16(%rsi), %xmm1
2008 movdqu %xmm0, -24(%rdi)
2009 movdqu %xmm1, -16(%rdi)
2010 ret
2011
2012 .p2align 4
2013L(fwd_write_8bytes):
2014 mov -8(%rsi), %rdx
2015 mov %rdx, -8(%rdi)
2016 ret
2017
2018 .p2align 4
2019L(fwd_write_135bytes):
2020 lddqu -135(%rsi), %xmm0
2021 movdqu %xmm0, -135(%rdi)
2022L(fwd_write_119bytes):
2023 lddqu -119(%rsi), %xmm0
2024 movdqu %xmm0, -119(%rdi)
2025L(fwd_write_103bytes):
2026 lddqu -103(%rsi), %xmm0
2027 movdqu %xmm0, -103(%rdi)
2028L(fwd_write_87bytes):
2029 lddqu -87(%rsi), %xmm0
2030 movdqu %xmm0, -87(%rdi)
2031L(fwd_write_71bytes):
2032 lddqu -71(%rsi), %xmm0
2033 movdqu %xmm0, -71(%rdi)
2034L(fwd_write_55bytes):
2035 lddqu -55(%rsi), %xmm0
2036 movdqu %xmm0, -55(%rdi)
2037L(fwd_write_39bytes):
2038 lddqu -39(%rsi), %xmm0
2039 movdqu %xmm0, -39(%rdi)
2040L(fwd_write_23bytes):
2041 lddqu -23(%rsi), %xmm0
2042 lddqu -16(%rsi), %xmm1
2043 movdqu %xmm0, -23(%rdi)
2044 movdqu %xmm1, -16(%rdi)
2045 ret
2046
2047 .p2align 4
2048L(fwd_write_7bytes):
2049 mov -7(%rsi), %edx
2050 mov -4(%rsi), %ecx
2051 mov %edx, -7(%rdi)
2052 mov %ecx, -4(%rdi)
2053 ret
2054
2055 .p2align 4
2056L(fwd_write_134bytes):
2057 lddqu -134(%rsi), %xmm0
2058 movdqu %xmm0, -134(%rdi)
2059L(fwd_write_118bytes):
2060 lddqu -118(%rsi), %xmm0
2061 movdqu %xmm0, -118(%rdi)
2062L(fwd_write_102bytes):
2063 lddqu -102(%rsi), %xmm0
2064 movdqu %xmm0, -102(%rdi)
2065L(fwd_write_86bytes):
2066 lddqu -86(%rsi), %xmm0
2067 movdqu %xmm0, -86(%rdi)
2068L(fwd_write_70bytes):
2069 lddqu -70(%rsi), %xmm0
2070 movdqu %xmm0, -70(%rdi)
2071L(fwd_write_54bytes):
2072 lddqu -54(%rsi), %xmm0
2073 movdqu %xmm0, -54(%rdi)
2074L(fwd_write_38bytes):
2075 lddqu -38(%rsi), %xmm0
2076 movdqu %xmm0, -38(%rdi)
2077L(fwd_write_22bytes):
2078 lddqu -22(%rsi), %xmm0
2079 lddqu -16(%rsi), %xmm1
2080 movdqu %xmm0, -22(%rdi)
2081 movdqu %xmm1, -16(%rdi)
2082 ret
2083
2084 .p2align 4
2085L(fwd_write_6bytes):
2086 mov -6(%rsi), %edx
2087 mov -4(%rsi), %ecx
2088 mov %edx, -6(%rdi)
2089 mov %ecx, -4(%rdi)
2090 ret
2091
2092 .p2align 4
2093L(fwd_write_133bytes):
2094 lddqu -133(%rsi), %xmm0
2095 movdqu %xmm0, -133(%rdi)
2096L(fwd_write_117bytes):
2097 lddqu -117(%rsi), %xmm0
2098 movdqu %xmm0, -117(%rdi)
2099L(fwd_write_101bytes):
2100 lddqu -101(%rsi), %xmm0
2101 movdqu %xmm0, -101(%rdi)
2102L(fwd_write_85bytes):
2103 lddqu -85(%rsi), %xmm0
2104 movdqu %xmm0, -85(%rdi)
2105L(fwd_write_69bytes):
2106 lddqu -69(%rsi), %xmm0
2107 movdqu %xmm0, -69(%rdi)
2108L(fwd_write_53bytes):
2109 lddqu -53(%rsi), %xmm0
2110 movdqu %xmm0, -53(%rdi)
2111L(fwd_write_37bytes):
2112 lddqu -37(%rsi), %xmm0
2113 movdqu %xmm0, -37(%rdi)
2114L(fwd_write_21bytes):
2115 lddqu -21(%rsi), %xmm0
2116 lddqu -16(%rsi), %xmm1
2117 movdqu %xmm0, -21(%rdi)
2118 movdqu %xmm1, -16(%rdi)
2119 ret
2120
2121 .p2align 4
2122L(fwd_write_5bytes):
2123 mov -5(%rsi), %edx
2124 mov -4(%rsi), %ecx
2125 mov %edx, -5(%rdi)
2126 mov %ecx, -4(%rdi)
2127 ret
2128
2129 .p2align 4
2130L(fwd_write_132bytes):
2131 lddqu -132(%rsi), %xmm0
2132 movdqu %xmm0, -132(%rdi)
2133L(fwd_write_116bytes):
2134 lddqu -116(%rsi), %xmm0
2135 movdqu %xmm0, -116(%rdi)
2136L(fwd_write_100bytes):
2137 lddqu -100(%rsi), %xmm0
2138 movdqu %xmm0, -100(%rdi)
2139L(fwd_write_84bytes):
2140 lddqu -84(%rsi), %xmm0
2141 movdqu %xmm0, -84(%rdi)
2142L(fwd_write_68bytes):
2143 lddqu -68(%rsi), %xmm0
2144 movdqu %xmm0, -68(%rdi)
2145L(fwd_write_52bytes):
2146 lddqu -52(%rsi), %xmm0
2147 movdqu %xmm0, -52(%rdi)
2148L(fwd_write_36bytes):
2149 lddqu -36(%rsi), %xmm0
2150 movdqu %xmm0, -36(%rdi)
2151L(fwd_write_20bytes):
2152 lddqu -20(%rsi), %xmm0
2153 lddqu -16(%rsi), %xmm1
2154 movdqu %xmm0, -20(%rdi)
2155 movdqu %xmm1, -16(%rdi)
2156 ret
2157
2158 .p2align 4
2159L(fwd_write_4bytes):
2160 mov -4(%rsi), %edx
2161 mov %edx, -4(%rdi)
2162 ret
2163
2164 .p2align 4
2165L(fwd_write_131bytes):
2166 lddqu -131(%rsi), %xmm0
2167 movdqu %xmm0, -131(%rdi)
2168L(fwd_write_115bytes):
2169 lddqu -115(%rsi), %xmm0
2170 movdqu %xmm0, -115(%rdi)
2171L(fwd_write_99bytes):
2172 lddqu -99(%rsi), %xmm0
2173 movdqu %xmm0, -99(%rdi)
2174L(fwd_write_83bytes):
2175 lddqu -83(%rsi), %xmm0
2176 movdqu %xmm0, -83(%rdi)
2177L(fwd_write_67bytes):
2178 lddqu -67(%rsi), %xmm0
2179 movdqu %xmm0, -67(%rdi)
2180L(fwd_write_51bytes):
2181 lddqu -51(%rsi), %xmm0
2182 movdqu %xmm0, -51(%rdi)
2183L(fwd_write_35bytes):
2184 lddqu -35(%rsi), %xmm0
2185 movdqu %xmm0, -35(%rdi)
2186L(fwd_write_19bytes):
2187 lddqu -19(%rsi), %xmm0
2188 lddqu -16(%rsi), %xmm1
2189 movdqu %xmm0, -19(%rdi)
2190 movdqu %xmm1, -16(%rdi)
2191 ret
2192
2193 .p2align 4
2194L(fwd_write_3bytes):
2195 mov -3(%rsi), %dx
2196 mov -2(%rsi), %cx
2197 mov %dx, -3(%rdi)
2198 mov %cx, -2(%rdi)
2199 ret
2200
2201 .p2align 4
2202L(fwd_write_130bytes):
2203 lddqu -130(%rsi), %xmm0
2204 movdqu %xmm0, -130(%rdi)
2205L(fwd_write_114bytes):
2206 lddqu -114(%rsi), %xmm0
2207 movdqu %xmm0, -114(%rdi)
2208L(fwd_write_98bytes):
2209 lddqu -98(%rsi), %xmm0
2210 movdqu %xmm0, -98(%rdi)
2211L(fwd_write_82bytes):
2212 lddqu -82(%rsi), %xmm0
2213 movdqu %xmm0, -82(%rdi)
2214L(fwd_write_66bytes):
2215 lddqu -66(%rsi), %xmm0
2216 movdqu %xmm0, -66(%rdi)
2217L(fwd_write_50bytes):
2218 lddqu -50(%rsi), %xmm0
2219 movdqu %xmm0, -50(%rdi)
2220L(fwd_write_34bytes):
2221 lddqu -34(%rsi), %xmm0
2222 movdqu %xmm0, -34(%rdi)
2223L(fwd_write_18bytes):
2224 lddqu -18(%rsi), %xmm0
2225 lddqu -16(%rsi), %xmm1
2226 movdqu %xmm0, -18(%rdi)
2227 movdqu %xmm1, -16(%rdi)
2228 ret
2229
2230 .p2align 4
2231L(fwd_write_2bytes):
2232 movzwl -2(%rsi), %edx
2233 mov %dx, -2(%rdi)
2234 ret
2235
2236 .p2align 4
2237L(fwd_write_129bytes):
2238 lddqu -129(%rsi), %xmm0
2239 movdqu %xmm0, -129(%rdi)
2240L(fwd_write_113bytes):
2241 lddqu -113(%rsi), %xmm0
2242 movdqu %xmm0, -113(%rdi)
2243L(fwd_write_97bytes):
2244 lddqu -97(%rsi), %xmm0
2245 movdqu %xmm0, -97(%rdi)
2246L(fwd_write_81bytes):
2247 lddqu -81(%rsi), %xmm0
2248 movdqu %xmm0, -81(%rdi)
2249L(fwd_write_65bytes):
2250 lddqu -65(%rsi), %xmm0
2251 movdqu %xmm0, -65(%rdi)
2252L(fwd_write_49bytes):
2253 lddqu -49(%rsi), %xmm0
2254 movdqu %xmm0, -49(%rdi)
2255L(fwd_write_33bytes):
2256 lddqu -33(%rsi), %xmm0
2257 movdqu %xmm0, -33(%rdi)
2258L(fwd_write_17bytes):
2259 lddqu -17(%rsi), %xmm0
2260 lddqu -16(%rsi), %xmm1
2261 movdqu %xmm0, -17(%rdi)
2262 movdqu %xmm1, -16(%rdi)
2263 ret
2264
2265 .p2align 4
2266L(fwd_write_1bytes):
2267 movzbl -1(%rsi), %edx
2268 mov %dl, -1(%rdi)
2269 ret
2270
2271 .p2align 4
2272L(bwd_write_128bytes):
2273 lddqu 112(%rsi), %xmm0
2274 movdqu %xmm0, 112(%rdi)
2275L(bwd_write_112bytes):
2276 lddqu 96(%rsi), %xmm0
2277 movdqu %xmm0, 96(%rdi)
2278L(bwd_write_96bytes):
2279 lddqu 80(%rsi), %xmm0
2280 movdqu %xmm0, 80(%rdi)
2281L(bwd_write_80bytes):
2282 lddqu 64(%rsi), %xmm0
2283 movdqu %xmm0, 64(%rdi)
2284L(bwd_write_64bytes):
2285 lddqu 48(%rsi), %xmm0
2286 movdqu %xmm0, 48(%rdi)
2287L(bwd_write_48bytes):
2288 lddqu 32(%rsi), %xmm0
2289 movdqu %xmm0, 32(%rdi)
2290L(bwd_write_32bytes):
2291 lddqu 16(%rsi), %xmm0
2292 movdqu %xmm0, 16(%rdi)
2293L(bwd_write_16bytes):
2294 lddqu (%rsi), %xmm0
2295 movdqu %xmm0, (%rdi)
2296L(bwd_write_0bytes):
2297 ret
2298
2299 .p2align 4
2300L(bwd_write_143bytes):
2301 lddqu 127(%rsi), %xmm0
2302 movdqu %xmm0, 127(%rdi)
2303L(bwd_write_127bytes):
2304 lddqu 111(%rsi), %xmm0
2305 movdqu %xmm0, 111(%rdi)
2306L(bwd_write_111bytes):
2307 lddqu 95(%rsi), %xmm0
2308 movdqu %xmm0, 95(%rdi)
2309L(bwd_write_95bytes):
2310 lddqu 79(%rsi), %xmm0
2311 movdqu %xmm0, 79(%rdi)
2312L(bwd_write_79bytes):
2313 lddqu 63(%rsi), %xmm0
2314 movdqu %xmm0, 63(%rdi)
2315L(bwd_write_63bytes):
2316 lddqu 47(%rsi), %xmm0
2317 movdqu %xmm0, 47(%rdi)
2318L(bwd_write_47bytes):
2319 lddqu 31(%rsi), %xmm0
2320 movdqu %xmm0, 31(%rdi)
2321L(bwd_write_31bytes):
2322 lddqu 15(%rsi), %xmm0
2323 lddqu (%rsi), %xmm1
2324 movdqu %xmm0, 15(%rdi)
2325 movdqu %xmm1, (%rdi)
2326 ret
2327
2328
2329 .p2align 4
2330L(bwd_write_15bytes):
2331 mov 7(%rsi), %rdx
2332 mov (%rsi), %rcx
2333 mov %rdx, 7(%rdi)
2334 mov %rcx, (%rdi)
2335 ret
2336
2337 .p2align 4
2338L(bwd_write_142bytes):
2339 lddqu 126(%rsi), %xmm0
2340 movdqu %xmm0, 126(%rdi)
2341L(bwd_write_126bytes):
2342 lddqu 110(%rsi), %xmm0
2343 movdqu %xmm0, 110(%rdi)
2344L(bwd_write_110bytes):
2345 lddqu 94(%rsi), %xmm0
2346 movdqu %xmm0, 94(%rdi)
2347L(bwd_write_94bytes):
2348 lddqu 78(%rsi), %xmm0
2349 movdqu %xmm0, 78(%rdi)
2350L(bwd_write_78bytes):
2351 lddqu 62(%rsi), %xmm0
2352 movdqu %xmm0, 62(%rdi)
2353L(bwd_write_62bytes):
2354 lddqu 46(%rsi), %xmm0
2355 movdqu %xmm0, 46(%rdi)
2356L(bwd_write_46bytes):
2357 lddqu 30(%rsi), %xmm0
2358 movdqu %xmm0, 30(%rdi)
2359L(bwd_write_30bytes):
2360 lddqu 14(%rsi), %xmm0
2361 lddqu (%rsi), %xmm1
2362 movdqu %xmm0, 14(%rdi)
2363 movdqu %xmm1, (%rdi)
2364 ret
2365
2366 .p2align 4
2367L(bwd_write_14bytes):
2368 mov 6(%rsi), %rdx
2369 mov (%rsi), %rcx
2370 mov %rdx, 6(%rdi)
2371 mov %rcx, (%rdi)
2372 ret
2373
2374 .p2align 4
2375L(bwd_write_141bytes):
2376 lddqu 125(%rsi), %xmm0
2377 movdqu %xmm0, 125(%rdi)
2378L(bwd_write_125bytes):
2379 lddqu 109(%rsi), %xmm0
2380 movdqu %xmm0, 109(%rdi)
2381L(bwd_write_109bytes):
2382 lddqu 93(%rsi), %xmm0
2383 movdqu %xmm0, 93(%rdi)
2384L(bwd_write_93bytes):
2385 lddqu 77(%rsi), %xmm0
2386 movdqu %xmm0, 77(%rdi)
2387L(bwd_write_77bytes):
2388 lddqu 61(%rsi), %xmm0
2389 movdqu %xmm0, 61(%rdi)
2390L(bwd_write_61bytes):
2391 lddqu 45(%rsi), %xmm0
2392 movdqu %xmm0, 45(%rdi)
2393L(bwd_write_45bytes):
2394 lddqu 29(%rsi), %xmm0
2395 movdqu %xmm0, 29(%rdi)
2396L(bwd_write_29bytes):
2397 lddqu 13(%rsi), %xmm0
2398 lddqu (%rsi), %xmm1
2399 movdqu %xmm0, 13(%rdi)
2400 movdqu %xmm1, (%rdi)
2401 ret
2402
2403 .p2align 4
2404L(bwd_write_13bytes):
2405 mov 5(%rsi), %rdx
2406 mov (%rsi), %rcx
2407 mov %rdx, 5(%rdi)
2408 mov %rcx, (%rdi)
2409 ret
2410
2411 .p2align 4
2412L(bwd_write_140bytes):
2413 lddqu 124(%rsi), %xmm0
2414 movdqu %xmm0, 124(%rdi)
2415L(bwd_write_124bytes):
2416 lddqu 108(%rsi), %xmm0
2417 movdqu %xmm0, 108(%rdi)
2418L(bwd_write_108bytes):
2419 lddqu 92(%rsi), %xmm0
2420 movdqu %xmm0, 92(%rdi)
2421L(bwd_write_92bytes):
2422 lddqu 76(%rsi), %xmm0
2423 movdqu %xmm0, 76(%rdi)
2424L(bwd_write_76bytes):
2425 lddqu 60(%rsi), %xmm0
2426 movdqu %xmm0, 60(%rdi)
2427L(bwd_write_60bytes):
2428 lddqu 44(%rsi), %xmm0
2429 movdqu %xmm0, 44(%rdi)
2430L(bwd_write_44bytes):
2431 lddqu 28(%rsi), %xmm0
2432 movdqu %xmm0, 28(%rdi)
2433L(bwd_write_28bytes):
2434 lddqu 12(%rsi), %xmm0
2435 lddqu (%rsi), %xmm1
2436 movdqu %xmm0, 12(%rdi)
2437 movdqu %xmm1, (%rdi)
2438 ret
2439
2440 .p2align 4
2441L(bwd_write_12bytes):
2442 mov 4(%rsi), %rdx
2443 mov (%rsi), %rcx
2444 mov %rdx, 4(%rdi)
2445 mov %rcx, (%rdi)
2446 ret
2447
2448 .p2align 4
2449L(bwd_write_139bytes):
2450 lddqu 123(%rsi), %xmm0
2451 movdqu %xmm0, 123(%rdi)
2452L(bwd_write_123bytes):
2453 lddqu 107(%rsi), %xmm0
2454 movdqu %xmm0, 107(%rdi)
2455L(bwd_write_107bytes):
2456 lddqu 91(%rsi), %xmm0
2457 movdqu %xmm0, 91(%rdi)
2458L(bwd_write_91bytes):
2459 lddqu 75(%rsi), %xmm0
2460 movdqu %xmm0, 75(%rdi)
2461L(bwd_write_75bytes):
2462 lddqu 59(%rsi), %xmm0
2463 movdqu %xmm0, 59(%rdi)
2464L(bwd_write_59bytes):
2465 lddqu 43(%rsi), %xmm0
2466 movdqu %xmm0, 43(%rdi)
2467L(bwd_write_43bytes):
2468 lddqu 27(%rsi), %xmm0
2469 movdqu %xmm0, 27(%rdi)
2470L(bwd_write_27bytes):
2471 lddqu 11(%rsi), %xmm0
2472 lddqu (%rsi), %xmm1
2473 movdqu %xmm0, 11(%rdi)
2474 movdqu %xmm1, (%rdi)
2475 ret
2476
2477 .p2align 4
2478L(bwd_write_11bytes):
2479 mov 3(%rsi), %rdx
2480 mov (%rsi), %rcx
2481 mov %rdx, 3(%rdi)
2482 mov %rcx, (%rdi)
2483 ret
2484
2485 .p2align 4
2486L(bwd_write_138bytes):
2487 lddqu 122(%rsi), %xmm0
2488 movdqu %xmm0, 122(%rdi)
2489L(bwd_write_122bytes):
2490 lddqu 106(%rsi), %xmm0
2491 movdqu %xmm0, 106(%rdi)
2492L(bwd_write_106bytes):
2493 lddqu 90(%rsi), %xmm0
2494 movdqu %xmm0, 90(%rdi)
2495L(bwd_write_90bytes):
2496 lddqu 74(%rsi), %xmm0
2497 movdqu %xmm0, 74(%rdi)
2498L(bwd_write_74bytes):
2499 lddqu 58(%rsi), %xmm0
2500 movdqu %xmm0, 58(%rdi)
2501L(bwd_write_58bytes):
2502 lddqu 42(%rsi), %xmm0
2503 movdqu %xmm0, 42(%rdi)
2504L(bwd_write_42bytes):
2505 lddqu 26(%rsi), %xmm0
2506 movdqu %xmm0, 26(%rdi)
2507L(bwd_write_26bytes):
2508 lddqu 10(%rsi), %xmm0
2509 lddqu (%rsi), %xmm1
2510 movdqu %xmm0, 10(%rdi)
2511 movdqu %xmm1, (%rdi)
2512 ret
2513
2514 .p2align 4
2515L(bwd_write_10bytes):
2516 mov 2(%rsi), %rdx
2517 mov (%rsi), %rcx
2518 mov %rdx, 2(%rdi)
2519 mov %rcx, (%rdi)
2520 ret
2521
2522 .p2align 4
2523L(bwd_write_137bytes):
2524 lddqu 121(%rsi), %xmm0
2525 movdqu %xmm0, 121(%rdi)
2526L(bwd_write_121bytes):
2527 lddqu 105(%rsi), %xmm0
2528 movdqu %xmm0, 105(%rdi)
2529L(bwd_write_105bytes):
2530 lddqu 89(%rsi), %xmm0
2531 movdqu %xmm0, 89(%rdi)
2532L(bwd_write_89bytes):
2533 lddqu 73(%rsi), %xmm0
2534 movdqu %xmm0, 73(%rdi)
2535L(bwd_write_73bytes):
2536 lddqu 57(%rsi), %xmm0
2537 movdqu %xmm0, 57(%rdi)
2538L(bwd_write_57bytes):
2539 lddqu 41(%rsi), %xmm0
2540 movdqu %xmm0, 41(%rdi)
2541L(bwd_write_41bytes):
2542 lddqu 25(%rsi), %xmm0
2543 movdqu %xmm0, 25(%rdi)
2544L(bwd_write_25bytes):
2545 lddqu 9(%rsi), %xmm0
2546 lddqu (%rsi), %xmm1
2547 movdqu %xmm0, 9(%rdi)
2548 movdqu %xmm1, (%rdi)
2549 ret
2550
2551 .p2align 4
2552L(bwd_write_9bytes):
2553 mov 1(%rsi), %rdx
2554 mov (%rsi), %rcx
2555 mov %rdx, 1(%rdi)
2556 mov %rcx, (%rdi)
2557 ret
2558
2559 .p2align 4
2560L(bwd_write_136bytes):
2561 lddqu 120(%rsi), %xmm0
2562 movdqu %xmm0, 120(%rdi)
2563L(bwd_write_120bytes):
2564 lddqu 104(%rsi), %xmm0
2565 movdqu %xmm0, 104(%rdi)
2566L(bwd_write_104bytes):
2567 lddqu 88(%rsi), %xmm0
2568 movdqu %xmm0, 88(%rdi)
2569L(bwd_write_88bytes):
2570 lddqu 72(%rsi), %xmm0
2571 movdqu %xmm0, 72(%rdi)
2572L(bwd_write_72bytes):
2573 lddqu 56(%rsi), %xmm0
2574 movdqu %xmm0, 56(%rdi)
2575L(bwd_write_56bytes):
2576 lddqu 40(%rsi), %xmm0
2577 movdqu %xmm0, 40(%rdi)
2578L(bwd_write_40bytes):
2579 lddqu 24(%rsi), %xmm0
2580 movdqu %xmm0, 24(%rdi)
2581L(bwd_write_24bytes):
2582 lddqu 8(%rsi), %xmm0
2583 lddqu (%rsi), %xmm1
2584 movdqu %xmm0, 8(%rdi)
2585 movdqu %xmm1, (%rdi)
2586 ret
2587
2588 .p2align 4
2589L(bwd_write_8bytes):
2590 mov (%rsi), %rdx
2591 mov %rdx, (%rdi)
2592 ret
2593
2594 .p2align 4
2595L(bwd_write_135bytes):
2596 lddqu 119(%rsi), %xmm0
2597 movdqu %xmm0, 119(%rdi)
2598L(bwd_write_119bytes):
2599 lddqu 103(%rsi), %xmm0
2600 movdqu %xmm0, 103(%rdi)
2601L(bwd_write_103bytes):
2602 lddqu 87(%rsi), %xmm0
2603 movdqu %xmm0, 87(%rdi)
2604L(bwd_write_87bytes):
2605 lddqu 71(%rsi), %xmm0
2606 movdqu %xmm0, 71(%rdi)
2607L(bwd_write_71bytes):
2608 lddqu 55(%rsi), %xmm0
2609 movdqu %xmm0, 55(%rdi)
2610L(bwd_write_55bytes):
2611 lddqu 39(%rsi), %xmm0
2612 movdqu %xmm0, 39(%rdi)
2613L(bwd_write_39bytes):
2614 lddqu 23(%rsi), %xmm0
2615 movdqu %xmm0, 23(%rdi)
2616L(bwd_write_23bytes):
2617 lddqu 7(%rsi), %xmm0
2618 lddqu (%rsi), %xmm1
2619 movdqu %xmm0, 7(%rdi)
2620 movdqu %xmm1, (%rdi)
2621 ret
2622
2623 .p2align 4
2624L(bwd_write_7bytes):
2625 mov 3(%rsi), %edx
2626 mov (%rsi), %ecx
2627 mov %edx, 3(%rdi)
2628 mov %ecx, (%rdi)
2629 ret
2630
2631 .p2align 4
2632L(bwd_write_134bytes):
2633 lddqu 118(%rsi), %xmm0
2634 movdqu %xmm0, 118(%rdi)
2635L(bwd_write_118bytes):
2636 lddqu 102(%rsi), %xmm0
2637 movdqu %xmm0, 102(%rdi)
2638L(bwd_write_102bytes):
2639 lddqu 86(%rsi), %xmm0
2640 movdqu %xmm0, 86(%rdi)
2641L(bwd_write_86bytes):
2642 lddqu 70(%rsi), %xmm0
2643 movdqu %xmm0, 70(%rdi)
2644L(bwd_write_70bytes):
2645 lddqu 54(%rsi), %xmm0
2646 movdqu %xmm0, 54(%rdi)
2647L(bwd_write_54bytes):
2648 lddqu 38(%rsi), %xmm0
2649 movdqu %xmm0, 38(%rdi)
2650L(bwd_write_38bytes):
2651 lddqu 22(%rsi), %xmm0
2652 movdqu %xmm0, 22(%rdi)
2653L(bwd_write_22bytes):
2654 lddqu 6(%rsi), %xmm0
2655 lddqu (%rsi), %xmm1
2656 movdqu %xmm0, 6(%rdi)
2657 movdqu %xmm1, (%rdi)
2658 ret
2659
2660 .p2align 4
2661L(bwd_write_6bytes):
2662 mov 2(%rsi), %edx
2663 mov (%rsi), %ecx
2664 mov %edx, 2(%rdi)
2665 mov %ecx, (%rdi)
2666 ret
2667
2668 .p2align 4
2669L(bwd_write_133bytes):
2670 lddqu 117(%rsi), %xmm0
2671 movdqu %xmm0, 117(%rdi)
2672L(bwd_write_117bytes):
2673 lddqu 101(%rsi), %xmm0
2674 movdqu %xmm0, 101(%rdi)
2675L(bwd_write_101bytes):
2676 lddqu 85(%rsi), %xmm0
2677 movdqu %xmm0, 85(%rdi)
2678L(bwd_write_85bytes):
2679 lddqu 69(%rsi), %xmm0
2680 movdqu %xmm0, 69(%rdi)
2681L(bwd_write_69bytes):
2682 lddqu 53(%rsi), %xmm0
2683 movdqu %xmm0, 53(%rdi)
2684L(bwd_write_53bytes):
2685 lddqu 37(%rsi), %xmm0
2686 movdqu %xmm0, 37(%rdi)
2687L(bwd_write_37bytes):
2688 lddqu 21(%rsi), %xmm0
2689 movdqu %xmm0, 21(%rdi)
2690L(bwd_write_21bytes):
2691 lddqu 5(%rsi), %xmm0
2692 lddqu (%rsi), %xmm1
2693 movdqu %xmm0, 5(%rdi)
2694 movdqu %xmm1, (%rdi)
2695 ret
2696
2697 .p2align 4
2698L(bwd_write_5bytes):
2699 mov 1(%rsi), %edx
2700 mov (%rsi), %ecx
2701 mov %edx, 1(%rdi)
2702 mov %ecx, (%rdi)
2703 ret
2704
2705 .p2align 4
2706L(bwd_write_132bytes):
2707 lddqu 116(%rsi), %xmm0
2708 movdqu %xmm0, 116(%rdi)
2709L(bwd_write_116bytes):
2710 lddqu 100(%rsi), %xmm0
2711 movdqu %xmm0, 100(%rdi)
2712L(bwd_write_100bytes):
2713 lddqu 84(%rsi), %xmm0
2714 movdqu %xmm0, 84(%rdi)
2715L(bwd_write_84bytes):
2716 lddqu 68(%rsi), %xmm0
2717 movdqu %xmm0, 68(%rdi)
2718L(bwd_write_68bytes):
2719 lddqu 52(%rsi), %xmm0
2720 movdqu %xmm0, 52(%rdi)
2721L(bwd_write_52bytes):
2722 lddqu 36(%rsi), %xmm0
2723 movdqu %xmm0, 36(%rdi)
2724L(bwd_write_36bytes):
2725 lddqu 20(%rsi), %xmm0
2726 movdqu %xmm0, 20(%rdi)
2727L(bwd_write_20bytes):
2728 lddqu 4(%rsi), %xmm0
2729 lddqu (%rsi), %xmm1
2730 movdqu %xmm0, 4(%rdi)
2731 movdqu %xmm1, (%rdi)
2732 ret
2733
2734 .p2align 4
2735L(bwd_write_4bytes):
2736 mov (%rsi), %edx
2737 mov %edx, (%rdi)
2738 ret
2739
2740 .p2align 4
2741L(bwd_write_131bytes):
2742 lddqu 115(%rsi), %xmm0
2743 movdqu %xmm0, 115(%rdi)
2744L(bwd_write_115bytes):
2745 lddqu 99(%rsi), %xmm0
2746 movdqu %xmm0, 99(%rdi)
2747L(bwd_write_99bytes):
2748 lddqu 83(%rsi), %xmm0
2749 movdqu %xmm0, 83(%rdi)
2750L(bwd_write_83bytes):
2751 lddqu 67(%rsi), %xmm0
2752 movdqu %xmm0, 67(%rdi)
2753L(bwd_write_67bytes):
2754 lddqu 51(%rsi), %xmm0
2755 movdqu %xmm0, 51(%rdi)
2756L(bwd_write_51bytes):
2757 lddqu 35(%rsi), %xmm0
2758 movdqu %xmm0, 35(%rdi)
2759L(bwd_write_35bytes):
2760 lddqu 19(%rsi), %xmm0
2761 movdqu %xmm0, 19(%rdi)
2762L(bwd_write_19bytes):
2763 lddqu 3(%rsi), %xmm0
2764 lddqu (%rsi), %xmm1
2765 movdqu %xmm0, 3(%rdi)
2766 movdqu %xmm1, (%rdi)
2767 ret
2768
2769 .p2align 4
2770L(bwd_write_3bytes):
2771 mov 1(%rsi), %dx
2772 mov (%rsi), %cx
2773 mov %dx, 1(%rdi)
2774 mov %cx, (%rdi)
2775 ret
2776
2777 .p2align 4
2778L(bwd_write_130bytes):
2779 lddqu 114(%rsi), %xmm0
2780 movdqu %xmm0, 114(%rdi)
2781L(bwd_write_114bytes):
2782 lddqu 98(%rsi), %xmm0
2783 movdqu %xmm0, 98(%rdi)
2784L(bwd_write_98bytes):
2785 lddqu 82(%rsi), %xmm0
2786 movdqu %xmm0, 82(%rdi)
2787L(bwd_write_82bytes):
2788 lddqu 66(%rsi), %xmm0
2789 movdqu %xmm0, 66(%rdi)
2790L(bwd_write_66bytes):
2791 lddqu 50(%rsi), %xmm0
2792 movdqu %xmm0, 50(%rdi)
2793L(bwd_write_50bytes):
2794 lddqu 34(%rsi), %xmm0
2795 movdqu %xmm0, 34(%rdi)
2796L(bwd_write_34bytes):
2797 lddqu 18(%rsi), %xmm0
2798 movdqu %xmm0, 18(%rdi)
2799L(bwd_write_18bytes):
2800 lddqu 2(%rsi), %xmm0
2801 lddqu (%rsi), %xmm1
2802 movdqu %xmm0, 2(%rdi)
2803 movdqu %xmm1, (%rdi)
2804 ret
2805
2806 .p2align 4
2807L(bwd_write_2bytes):
2808 movzwl (%rsi), %edx
2809 mov %dx, (%rdi)
2810 ret
2811
2812 .p2align 4
2813L(bwd_write_129bytes):
2814 lddqu 113(%rsi), %xmm0
2815 movdqu %xmm0, 113(%rdi)
2816L(bwd_write_113bytes):
2817 lddqu 97(%rsi), %xmm0
2818 movdqu %xmm0, 97(%rdi)
2819L(bwd_write_97bytes):
2820 lddqu 81(%rsi), %xmm0
2821 movdqu %xmm0, 81(%rdi)
2822L(bwd_write_81bytes):
2823 lddqu 65(%rsi), %xmm0
2824 movdqu %xmm0, 65(%rdi)
2825L(bwd_write_65bytes):
2826 lddqu 49(%rsi), %xmm0
2827 movdqu %xmm0, 49(%rdi)
2828L(bwd_write_49bytes):
2829 lddqu 33(%rsi), %xmm0
2830 movdqu %xmm0, 33(%rdi)
2831L(bwd_write_33bytes):
2832 lddqu 17(%rsi), %xmm0
2833 movdqu %xmm0, 17(%rdi)
2834L(bwd_write_17bytes):
2835 lddqu 1(%rsi), %xmm0
2836 lddqu (%rsi), %xmm1
2837 movdqu %xmm0, 1(%rdi)
2838 movdqu %xmm1, (%rdi)
2839 ret
2840
2841 .p2align 4
2842L(bwd_write_1bytes):
2843 movzbl (%rsi), %edx
2844 mov %dl, (%rdi)
2845 ret
2846
2847END (MEMCPY)
2848
2849 .section .rodata.ssse3,"a",@progbits
2850 .p2align 3
2851L(table_144_bytes_bwd):
2852 .int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
2853 .int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
2854 .int JMPTBL (L(bwd_write_2bytes), L(table_144_bytes_bwd))
2855 .int JMPTBL (L(bwd_write_3bytes), L(table_144_bytes_bwd))
2856 .int JMPTBL (L(bwd_write_4bytes), L(table_144_bytes_bwd))
2857 .int JMPTBL (L(bwd_write_5bytes), L(table_144_bytes_bwd))
2858 .int JMPTBL (L(bwd_write_6bytes), L(table_144_bytes_bwd))
2859 .int JMPTBL (L(bwd_write_7bytes), L(table_144_bytes_bwd))
2860 .int JMPTBL (L(bwd_write_8bytes), L(table_144_bytes_bwd))
2861 .int JMPTBL (L(bwd_write_9bytes), L(table_144_bytes_bwd))
2862 .int JMPTBL (L(bwd_write_10bytes), L(table_144_bytes_bwd))
2863 .int JMPTBL (L(bwd_write_11bytes), L(table_144_bytes_bwd))
2864 .int JMPTBL (L(bwd_write_12bytes), L(table_144_bytes_bwd))
2865 .int JMPTBL (L(bwd_write_13bytes), L(table_144_bytes_bwd))
2866 .int JMPTBL (L(bwd_write_14bytes), L(table_144_bytes_bwd))
2867 .int JMPTBL (L(bwd_write_15bytes), L(table_144_bytes_bwd))
2868 .int JMPTBL (L(bwd_write_16bytes), L(table_144_bytes_bwd))
2869 .int JMPTBL (L(bwd_write_17bytes), L(table_144_bytes_bwd))
2870 .int JMPTBL (L(bwd_write_18bytes), L(table_144_bytes_bwd))
2871 .int JMPTBL (L(bwd_write_19bytes), L(table_144_bytes_bwd))
2872 .int JMPTBL (L(bwd_write_20bytes), L(table_144_bytes_bwd))
2873 .int JMPTBL (L(bwd_write_21bytes), L(table_144_bytes_bwd))
2874 .int JMPTBL (L(bwd_write_22bytes), L(table_144_bytes_bwd))
2875 .int JMPTBL (L(bwd_write_23bytes), L(table_144_bytes_bwd))
2876 .int JMPTBL (L(bwd_write_24bytes), L(table_144_bytes_bwd))
2877 .int JMPTBL (L(bwd_write_25bytes), L(table_144_bytes_bwd))
2878 .int JMPTBL (L(bwd_write_26bytes), L(table_144_bytes_bwd))
2879 .int JMPTBL (L(bwd_write_27bytes), L(table_144_bytes_bwd))
2880 .int JMPTBL (L(bwd_write_28bytes), L(table_144_bytes_bwd))
2881 .int JMPTBL (L(bwd_write_29bytes), L(table_144_bytes_bwd))
2882 .int JMPTBL (L(bwd_write_30bytes), L(table_144_bytes_bwd))
2883 .int JMPTBL (L(bwd_write_31bytes), L(table_144_bytes_bwd))
2884 .int JMPTBL (L(bwd_write_32bytes), L(table_144_bytes_bwd))
2885 .int JMPTBL (L(bwd_write_33bytes), L(table_144_bytes_bwd))
2886 .int JMPTBL (L(bwd_write_34bytes), L(table_144_bytes_bwd))
2887 .int JMPTBL (L(bwd_write_35bytes), L(table_144_bytes_bwd))
2888 .int JMPTBL (L(bwd_write_36bytes), L(table_144_bytes_bwd))
2889 .int JMPTBL (L(bwd_write_37bytes), L(table_144_bytes_bwd))
2890 .int JMPTBL (L(bwd_write_38bytes), L(table_144_bytes_bwd))
2891 .int JMPTBL (L(bwd_write_39bytes), L(table_144_bytes_bwd))
2892 .int JMPTBL (L(bwd_write_40bytes), L(table_144_bytes_bwd))
2893 .int JMPTBL (L(bwd_write_41bytes), L(table_144_bytes_bwd))
2894 .int JMPTBL (L(bwd_write_42bytes), L(table_144_bytes_bwd))
2895 .int JMPTBL (L(bwd_write_43bytes), L(table_144_bytes_bwd))
2896 .int JMPTBL (L(bwd_write_44bytes), L(table_144_bytes_bwd))
2897 .int JMPTBL (L(bwd_write_45bytes), L(table_144_bytes_bwd))
2898 .int JMPTBL (L(bwd_write_46bytes), L(table_144_bytes_bwd))
2899 .int JMPTBL (L(bwd_write_47bytes), L(table_144_bytes_bwd))
2900 .int JMPTBL (L(bwd_write_48bytes), L(table_144_bytes_bwd))
2901 .int JMPTBL (L(bwd_write_49bytes), L(table_144_bytes_bwd))
2902 .int JMPTBL (L(bwd_write_50bytes), L(table_144_bytes_bwd))
2903 .int JMPTBL (L(bwd_write_51bytes), L(table_144_bytes_bwd))
2904 .int JMPTBL (L(bwd_write_52bytes), L(table_144_bytes_bwd))
2905 .int JMPTBL (L(bwd_write_53bytes), L(table_144_bytes_bwd))
2906 .int JMPTBL (L(bwd_write_54bytes), L(table_144_bytes_bwd))
2907 .int JMPTBL (L(bwd_write_55bytes), L(table_144_bytes_bwd))
2908 .int JMPTBL (L(bwd_write_56bytes), L(table_144_bytes_bwd))
2909 .int JMPTBL (L(bwd_write_57bytes), L(table_144_bytes_bwd))
2910 .int JMPTBL (L(bwd_write_58bytes), L(table_144_bytes_bwd))
2911 .int JMPTBL (L(bwd_write_59bytes), L(table_144_bytes_bwd))
2912 .int JMPTBL (L(bwd_write_60bytes), L(table_144_bytes_bwd))
2913 .int JMPTBL (L(bwd_write_61bytes), L(table_144_bytes_bwd))
2914 .int JMPTBL (L(bwd_write_62bytes), L(table_144_bytes_bwd))
2915 .int JMPTBL (L(bwd_write_63bytes), L(table_144_bytes_bwd))
2916 .int JMPTBL (L(bwd_write_64bytes), L(table_144_bytes_bwd))
2917 .int JMPTBL (L(bwd_write_65bytes), L(table_144_bytes_bwd))
2918 .int JMPTBL (L(bwd_write_66bytes), L(table_144_bytes_bwd))
2919 .int JMPTBL (L(bwd_write_67bytes), L(table_144_bytes_bwd))
2920 .int JMPTBL (L(bwd_write_68bytes), L(table_144_bytes_bwd))
2921 .int JMPTBL (L(bwd_write_69bytes), L(table_144_bytes_bwd))
2922 .int JMPTBL (L(bwd_write_70bytes), L(table_144_bytes_bwd))
2923 .int JMPTBL (L(bwd_write_71bytes), L(table_144_bytes_bwd))
2924 .int JMPTBL (L(bwd_write_72bytes), L(table_144_bytes_bwd))
2925 .int JMPTBL (L(bwd_write_73bytes), L(table_144_bytes_bwd))
2926 .int JMPTBL (L(bwd_write_74bytes), L(table_144_bytes_bwd))
2927 .int JMPTBL (L(bwd_write_75bytes), L(table_144_bytes_bwd))
2928 .int JMPTBL (L(bwd_write_76bytes), L(table_144_bytes_bwd))
2929 .int JMPTBL (L(bwd_write_77bytes), L(table_144_bytes_bwd))
2930 .int JMPTBL (L(bwd_write_78bytes), L(table_144_bytes_bwd))
2931 .int JMPTBL (L(bwd_write_79bytes), L(table_144_bytes_bwd))
2932 .int JMPTBL (L(bwd_write_80bytes), L(table_144_bytes_bwd))
2933 .int JMPTBL (L(bwd_write_81bytes), L(table_144_bytes_bwd))
2934 .int JMPTBL (L(bwd_write_82bytes), L(table_144_bytes_bwd))
2935 .int JMPTBL (L(bwd_write_83bytes), L(table_144_bytes_bwd))
2936 .int JMPTBL (L(bwd_write_84bytes), L(table_144_bytes_bwd))
2937 .int JMPTBL (L(bwd_write_85bytes), L(table_144_bytes_bwd))
2938 .int JMPTBL (L(bwd_write_86bytes), L(table_144_bytes_bwd))
2939 .int JMPTBL (L(bwd_write_87bytes), L(table_144_bytes_bwd))
2940 .int JMPTBL (L(bwd_write_88bytes), L(table_144_bytes_bwd))
2941 .int JMPTBL (L(bwd_write_89bytes), L(table_144_bytes_bwd))
2942 .int JMPTBL (L(bwd_write_90bytes), L(table_144_bytes_bwd))
2943 .int JMPTBL (L(bwd_write_91bytes), L(table_144_bytes_bwd))
2944 .int JMPTBL (L(bwd_write_92bytes), L(table_144_bytes_bwd))
2945 .int JMPTBL (L(bwd_write_93bytes), L(table_144_bytes_bwd))
2946 .int JMPTBL (L(bwd_write_94bytes), L(table_144_bytes_bwd))
2947 .int JMPTBL (L(bwd_write_95bytes), L(table_144_bytes_bwd))
2948 .int JMPTBL (L(bwd_write_96bytes), L(table_144_bytes_bwd))
2949 .int JMPTBL (L(bwd_write_97bytes), L(table_144_bytes_bwd))
2950 .int JMPTBL (L(bwd_write_98bytes), L(table_144_bytes_bwd))
2951 .int JMPTBL (L(bwd_write_99bytes), L(table_144_bytes_bwd))
2952 .int JMPTBL (L(bwd_write_100bytes), L(table_144_bytes_bwd))
2953 .int JMPTBL (L(bwd_write_101bytes), L(table_144_bytes_bwd))
2954 .int JMPTBL (L(bwd_write_102bytes), L(table_144_bytes_bwd))
2955 .int JMPTBL (L(bwd_write_103bytes), L(table_144_bytes_bwd))
2956 .int JMPTBL (L(bwd_write_104bytes), L(table_144_bytes_bwd))
2957 .int JMPTBL (L(bwd_write_105bytes), L(table_144_bytes_bwd))
2958 .int JMPTBL (L(bwd_write_106bytes), L(table_144_bytes_bwd))
2959 .int JMPTBL (L(bwd_write_107bytes), L(table_144_bytes_bwd))
2960 .int JMPTBL (L(bwd_write_108bytes), L(table_144_bytes_bwd))
2961 .int JMPTBL (L(bwd_write_109bytes), L(table_144_bytes_bwd))
2962 .int JMPTBL (L(bwd_write_110bytes), L(table_144_bytes_bwd))
2963 .int JMPTBL (L(bwd_write_111bytes), L(table_144_bytes_bwd))
2964 .int JMPTBL (L(bwd_write_112bytes), L(table_144_bytes_bwd))
2965 .int JMPTBL (L(bwd_write_113bytes), L(table_144_bytes_bwd))
2966 .int JMPTBL (L(bwd_write_114bytes), L(table_144_bytes_bwd))
2967 .int JMPTBL (L(bwd_write_115bytes), L(table_144_bytes_bwd))
2968 .int JMPTBL (L(bwd_write_116bytes), L(table_144_bytes_bwd))
2969 .int JMPTBL (L(bwd_write_117bytes), L(table_144_bytes_bwd))
2970 .int JMPTBL (L(bwd_write_118bytes), L(table_144_bytes_bwd))
2971 .int JMPTBL (L(bwd_write_119bytes), L(table_144_bytes_bwd))
2972 .int JMPTBL (L(bwd_write_120bytes), L(table_144_bytes_bwd))
2973 .int JMPTBL (L(bwd_write_121bytes), L(table_144_bytes_bwd))
2974 .int JMPTBL (L(bwd_write_122bytes), L(table_144_bytes_bwd))
2975 .int JMPTBL (L(bwd_write_123bytes), L(table_144_bytes_bwd))
2976 .int JMPTBL (L(bwd_write_124bytes), L(table_144_bytes_bwd))
2977 .int JMPTBL (L(bwd_write_125bytes), L(table_144_bytes_bwd))
2978 .int JMPTBL (L(bwd_write_126bytes), L(table_144_bytes_bwd))
2979 .int JMPTBL (L(bwd_write_127bytes), L(table_144_bytes_bwd))
2980 .int JMPTBL (L(bwd_write_128bytes), L(table_144_bytes_bwd))
2981 .int JMPTBL (L(bwd_write_129bytes), L(table_144_bytes_bwd))
2982 .int JMPTBL (L(bwd_write_130bytes), L(table_144_bytes_bwd))
2983 .int JMPTBL (L(bwd_write_131bytes), L(table_144_bytes_bwd))
2984 .int JMPTBL (L(bwd_write_132bytes), L(table_144_bytes_bwd))
2985 .int JMPTBL (L(bwd_write_133bytes), L(table_144_bytes_bwd))
2986 .int JMPTBL (L(bwd_write_134bytes), L(table_144_bytes_bwd))
2987 .int JMPTBL (L(bwd_write_135bytes), L(table_144_bytes_bwd))
2988 .int JMPTBL (L(bwd_write_136bytes), L(table_144_bytes_bwd))
2989 .int JMPTBL (L(bwd_write_137bytes), L(table_144_bytes_bwd))
2990 .int JMPTBL (L(bwd_write_138bytes), L(table_144_bytes_bwd))
2991 .int JMPTBL (L(bwd_write_139bytes), L(table_144_bytes_bwd))
2992 .int JMPTBL (L(bwd_write_140bytes), L(table_144_bytes_bwd))
2993 .int JMPTBL (L(bwd_write_141bytes), L(table_144_bytes_bwd))
2994 .int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
2995 .int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
2996
2997 .p2align 3
2998L(table_144_bytes_fwd):
2999 .int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
3000 .int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
3001 .int JMPTBL (L(fwd_write_2bytes), L(table_144_bytes_fwd))
3002 .int JMPTBL (L(fwd_write_3bytes), L(table_144_bytes_fwd))
3003 .int JMPTBL (L(fwd_write_4bytes), L(table_144_bytes_fwd))
3004 .int JMPTBL (L(fwd_write_5bytes), L(table_144_bytes_fwd))
3005 .int JMPTBL (L(fwd_write_6bytes), L(table_144_bytes_fwd))
3006 .int JMPTBL (L(fwd_write_7bytes), L(table_144_bytes_fwd))
3007 .int JMPTBL (L(fwd_write_8bytes), L(table_144_bytes_fwd))
3008 .int JMPTBL (L(fwd_write_9bytes), L(table_144_bytes_fwd))
3009 .int JMPTBL (L(fwd_write_10bytes), L(table_144_bytes_fwd))
3010 .int JMPTBL (L(fwd_write_11bytes), L(table_144_bytes_fwd))
3011 .int JMPTBL (L(fwd_write_12bytes), L(table_144_bytes_fwd))
3012 .int JMPTBL (L(fwd_write_13bytes), L(table_144_bytes_fwd))
3013 .int JMPTBL (L(fwd_write_14bytes), L(table_144_bytes_fwd))
3014 .int JMPTBL (L(fwd_write_15bytes), L(table_144_bytes_fwd))
3015 .int JMPTBL (L(fwd_write_16bytes), L(table_144_bytes_fwd))
3016 .int JMPTBL (L(fwd_write_17bytes), L(table_144_bytes_fwd))
3017 .int JMPTBL (L(fwd_write_18bytes), L(table_144_bytes_fwd))
3018 .int JMPTBL (L(fwd_write_19bytes), L(table_144_bytes_fwd))
3019 .int JMPTBL (L(fwd_write_20bytes), L(table_144_bytes_fwd))
3020 .int JMPTBL (L(fwd_write_21bytes), L(table_144_bytes_fwd))
3021 .int JMPTBL (L(fwd_write_22bytes), L(table_144_bytes_fwd))
3022 .int JMPTBL (L(fwd_write_23bytes), L(table_144_bytes_fwd))
3023 .int JMPTBL (L(fwd_write_24bytes), L(table_144_bytes_fwd))
3024 .int JMPTBL (L(fwd_write_25bytes), L(table_144_bytes_fwd))
3025 .int JMPTBL (L(fwd_write_26bytes), L(table_144_bytes_fwd))
3026 .int JMPTBL (L(fwd_write_27bytes), L(table_144_bytes_fwd))
3027 .int JMPTBL (L(fwd_write_28bytes), L(table_144_bytes_fwd))
3028 .int JMPTBL (L(fwd_write_29bytes), L(table_144_bytes_fwd))
3029 .int JMPTBL (L(fwd_write_30bytes), L(table_144_bytes_fwd))
3030 .int JMPTBL (L(fwd_write_31bytes), L(table_144_bytes_fwd))
3031 .int JMPTBL (L(fwd_write_32bytes), L(table_144_bytes_fwd))
3032 .int JMPTBL (L(fwd_write_33bytes), L(table_144_bytes_fwd))
3033 .int JMPTBL (L(fwd_write_34bytes), L(table_144_bytes_fwd))
3034 .int JMPTBL (L(fwd_write_35bytes), L(table_144_bytes_fwd))
3035 .int JMPTBL (L(fwd_write_36bytes), L(table_144_bytes_fwd))
3036 .int JMPTBL (L(fwd_write_37bytes), L(table_144_bytes_fwd))
3037 .int JMPTBL (L(fwd_write_38bytes), L(table_144_bytes_fwd))
3038 .int JMPTBL (L(fwd_write_39bytes), L(table_144_bytes_fwd))
3039 .int JMPTBL (L(fwd_write_40bytes), L(table_144_bytes_fwd))
3040 .int JMPTBL (L(fwd_write_41bytes), L(table_144_bytes_fwd))
3041 .int JMPTBL (L(fwd_write_42bytes), L(table_144_bytes_fwd))
3042 .int JMPTBL (L(fwd_write_43bytes), L(table_144_bytes_fwd))
3043 .int JMPTBL (L(fwd_write_44bytes), L(table_144_bytes_fwd))
3044 .int JMPTBL (L(fwd_write_45bytes), L(table_144_bytes_fwd))
3045 .int JMPTBL (L(fwd_write_46bytes), L(table_144_bytes_fwd))
3046 .int JMPTBL (L(fwd_write_47bytes), L(table_144_bytes_fwd))
3047 .int JMPTBL (L(fwd_write_48bytes), L(table_144_bytes_fwd))
3048 .int JMPTBL (L(fwd_write_49bytes), L(table_144_bytes_fwd))
3049 .int JMPTBL (L(fwd_write_50bytes), L(table_144_bytes_fwd))
3050 .int JMPTBL (L(fwd_write_51bytes), L(table_144_bytes_fwd))
3051 .int JMPTBL (L(fwd_write_52bytes), L(table_144_bytes_fwd))
3052 .int JMPTBL (L(fwd_write_53bytes), L(table_144_bytes_fwd))
3053 .int JMPTBL (L(fwd_write_54bytes), L(table_144_bytes_fwd))
3054 .int JMPTBL (L(fwd_write_55bytes), L(table_144_bytes_fwd))
3055 .int JMPTBL (L(fwd_write_56bytes), L(table_144_bytes_fwd))
3056 .int JMPTBL (L(fwd_write_57bytes), L(table_144_bytes_fwd))
3057 .int JMPTBL (L(fwd_write_58bytes), L(table_144_bytes_fwd))
3058 .int JMPTBL (L(fwd_write_59bytes), L(table_144_bytes_fwd))
3059 .int JMPTBL (L(fwd_write_60bytes), L(table_144_bytes_fwd))
3060 .int JMPTBL (L(fwd_write_61bytes), L(table_144_bytes_fwd))
3061 .int JMPTBL (L(fwd_write_62bytes), L(table_144_bytes_fwd))
3062 .int JMPTBL (L(fwd_write_63bytes), L(table_144_bytes_fwd))
3063 .int JMPTBL (L(fwd_write_64bytes), L(table_144_bytes_fwd))
3064 .int JMPTBL (L(fwd_write_65bytes), L(table_144_bytes_fwd))
3065 .int JMPTBL (L(fwd_write_66bytes), L(table_144_bytes_fwd))
3066 .int JMPTBL (L(fwd_write_67bytes), L(table_144_bytes_fwd))
3067 .int JMPTBL (L(fwd_write_68bytes), L(table_144_bytes_fwd))
3068 .int JMPTBL (L(fwd_write_69bytes), L(table_144_bytes_fwd))
3069 .int JMPTBL (L(fwd_write_70bytes), L(table_144_bytes_fwd))
3070 .int JMPTBL (L(fwd_write_71bytes), L(table_144_bytes_fwd))
3071 .int JMPTBL (L(fwd_write_72bytes), L(table_144_bytes_fwd))
3072 .int JMPTBL (L(fwd_write_73bytes), L(table_144_bytes_fwd))
3073 .int JMPTBL (L(fwd_write_74bytes), L(table_144_bytes_fwd))
3074 .int JMPTBL (L(fwd_write_75bytes), L(table_144_bytes_fwd))
3075 .int JMPTBL (L(fwd_write_76bytes), L(table_144_bytes_fwd))
3076 .int JMPTBL (L(fwd_write_77bytes), L(table_144_bytes_fwd))
3077 .int JMPTBL (L(fwd_write_78bytes), L(table_144_bytes_fwd))
3078 .int JMPTBL (L(fwd_write_79bytes), L(table_144_bytes_fwd))
3079 .int JMPTBL (L(fwd_write_80bytes), L(table_144_bytes_fwd))
3080 .int JMPTBL (L(fwd_write_81bytes), L(table_144_bytes_fwd))
3081 .int JMPTBL (L(fwd_write_82bytes), L(table_144_bytes_fwd))
3082 .int JMPTBL (L(fwd_write_83bytes), L(table_144_bytes_fwd))
3083 .int JMPTBL (L(fwd_write_84bytes), L(table_144_bytes_fwd))
3084 .int JMPTBL (L(fwd_write_85bytes), L(table_144_bytes_fwd))
3085 .int JMPTBL (L(fwd_write_86bytes), L(table_144_bytes_fwd))
3086 .int JMPTBL (L(fwd_write_87bytes), L(table_144_bytes_fwd))
3087 .int JMPTBL (L(fwd_write_88bytes), L(table_144_bytes_fwd))
3088 .int JMPTBL (L(fwd_write_89bytes), L(table_144_bytes_fwd))
3089 .int JMPTBL (L(fwd_write_90bytes), L(table_144_bytes_fwd))
3090 .int JMPTBL (L(fwd_write_91bytes), L(table_144_bytes_fwd))
3091 .int JMPTBL (L(fwd_write_92bytes), L(table_144_bytes_fwd))
3092 .int JMPTBL (L(fwd_write_93bytes), L(table_144_bytes_fwd))
3093 .int JMPTBL (L(fwd_write_94bytes), L(table_144_bytes_fwd))
3094 .int JMPTBL (L(fwd_write_95bytes), L(table_144_bytes_fwd))
3095 .int JMPTBL (L(fwd_write_96bytes), L(table_144_bytes_fwd))
3096 .int JMPTBL (L(fwd_write_97bytes), L(table_144_bytes_fwd))
3097 .int JMPTBL (L(fwd_write_98bytes), L(table_144_bytes_fwd))
3098 .int JMPTBL (L(fwd_write_99bytes), L(table_144_bytes_fwd))
3099 .int JMPTBL (L(fwd_write_100bytes), L(table_144_bytes_fwd))
3100 .int JMPTBL (L(fwd_write_101bytes), L(table_144_bytes_fwd))
3101 .int JMPTBL (L(fwd_write_102bytes), L(table_144_bytes_fwd))
3102 .int JMPTBL (L(fwd_write_103bytes), L(table_144_bytes_fwd))
3103 .int JMPTBL (L(fwd_write_104bytes), L(table_144_bytes_fwd))
3104 .int JMPTBL (L(fwd_write_105bytes), L(table_144_bytes_fwd))
3105 .int JMPTBL (L(fwd_write_106bytes), L(table_144_bytes_fwd))
3106 .int JMPTBL (L(fwd_write_107bytes), L(table_144_bytes_fwd))
3107 .int JMPTBL (L(fwd_write_108bytes), L(table_144_bytes_fwd))
3108 .int JMPTBL (L(fwd_write_109bytes), L(table_144_bytes_fwd))
3109 .int JMPTBL (L(fwd_write_110bytes), L(table_144_bytes_fwd))
3110 .int JMPTBL (L(fwd_write_111bytes), L(table_144_bytes_fwd))
3111 .int JMPTBL (L(fwd_write_112bytes), L(table_144_bytes_fwd))
3112 .int JMPTBL (L(fwd_write_113bytes), L(table_144_bytes_fwd))
3113 .int JMPTBL (L(fwd_write_114bytes), L(table_144_bytes_fwd))
3114 .int JMPTBL (L(fwd_write_115bytes), L(table_144_bytes_fwd))
3115 .int JMPTBL (L(fwd_write_116bytes), L(table_144_bytes_fwd))
3116 .int JMPTBL (L(fwd_write_117bytes), L(table_144_bytes_fwd))
3117 .int JMPTBL (L(fwd_write_118bytes), L(table_144_bytes_fwd))
3118 .int JMPTBL (L(fwd_write_119bytes), L(table_144_bytes_fwd))
3119 .int JMPTBL (L(fwd_write_120bytes), L(table_144_bytes_fwd))
3120 .int JMPTBL (L(fwd_write_121bytes), L(table_144_bytes_fwd))
3121 .int JMPTBL (L(fwd_write_122bytes), L(table_144_bytes_fwd))
3122 .int JMPTBL (L(fwd_write_123bytes), L(table_144_bytes_fwd))
3123 .int JMPTBL (L(fwd_write_124bytes), L(table_144_bytes_fwd))
3124 .int JMPTBL (L(fwd_write_125bytes), L(table_144_bytes_fwd))
3125 .int JMPTBL (L(fwd_write_126bytes), L(table_144_bytes_fwd))
3126 .int JMPTBL (L(fwd_write_127bytes), L(table_144_bytes_fwd))
3127 .int JMPTBL (L(fwd_write_128bytes), L(table_144_bytes_fwd))
3128 .int JMPTBL (L(fwd_write_129bytes), L(table_144_bytes_fwd))
3129 .int JMPTBL (L(fwd_write_130bytes), L(table_144_bytes_fwd))
3130 .int JMPTBL (L(fwd_write_131bytes), L(table_144_bytes_fwd))
3131 .int JMPTBL (L(fwd_write_132bytes), L(table_144_bytes_fwd))
3132 .int JMPTBL (L(fwd_write_133bytes), L(table_144_bytes_fwd))
3133 .int JMPTBL (L(fwd_write_134bytes), L(table_144_bytes_fwd))
3134 .int JMPTBL (L(fwd_write_135bytes), L(table_144_bytes_fwd))
3135 .int JMPTBL (L(fwd_write_136bytes), L(table_144_bytes_fwd))
3136 .int JMPTBL (L(fwd_write_137bytes), L(table_144_bytes_fwd))
3137 .int JMPTBL (L(fwd_write_138bytes), L(table_144_bytes_fwd))
3138 .int JMPTBL (L(fwd_write_139bytes), L(table_144_bytes_fwd))
3139 .int JMPTBL (L(fwd_write_140bytes), L(table_144_bytes_fwd))
3140 .int JMPTBL (L(fwd_write_141bytes), L(table_144_bytes_fwd))
3141 .int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
3142 .int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
3143
3144 .p2align 3
3145L(shl_table_fwd):
3146 .int JMPTBL (L(shl_0), L(shl_table_fwd))
3147 .int JMPTBL (L(shl_1), L(shl_table_fwd))
3148 .int JMPTBL (L(shl_2), L(shl_table_fwd))
3149 .int JMPTBL (L(shl_3), L(shl_table_fwd))
3150 .int JMPTBL (L(shl_4), L(shl_table_fwd))
3151 .int JMPTBL (L(shl_5), L(shl_table_fwd))
3152 .int JMPTBL (L(shl_6), L(shl_table_fwd))
3153 .int JMPTBL (L(shl_7), L(shl_table_fwd))
3154 .int JMPTBL (L(shl_8), L(shl_table_fwd))
3155 .int JMPTBL (L(shl_9), L(shl_table_fwd))
3156 .int JMPTBL (L(shl_10), L(shl_table_fwd))
3157 .int JMPTBL (L(shl_11), L(shl_table_fwd))
3158 .int JMPTBL (L(shl_12), L(shl_table_fwd))
3159 .int JMPTBL (L(shl_13), L(shl_table_fwd))
3160 .int JMPTBL (L(shl_14), L(shl_table_fwd))
3161 .int JMPTBL (L(shl_15), L(shl_table_fwd))
3162
3163 .p2align 3
3164L(shl_table_bwd):
3165 .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
3166 .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
3167 .int JMPTBL (L(shl_2_bwd), L(shl_table_bwd))
3168 .int JMPTBL (L(shl_3_bwd), L(shl_table_bwd))
3169 .int JMPTBL (L(shl_4_bwd), L(shl_table_bwd))
3170 .int JMPTBL (L(shl_5_bwd), L(shl_table_bwd))
3171 .int JMPTBL (L(shl_6_bwd), L(shl_table_bwd))
3172 .int JMPTBL (L(shl_7_bwd), L(shl_table_bwd))
3173 .int JMPTBL (L(shl_8_bwd), L(shl_table_bwd))
3174 .int JMPTBL (L(shl_9_bwd), L(shl_table_bwd))
3175 .int JMPTBL (L(shl_10_bwd), L(shl_table_bwd))
3176 .int JMPTBL (L(shl_11_bwd), L(shl_table_bwd))
3177 .int JMPTBL (L(shl_12_bwd), L(shl_table_bwd))
3178 .int JMPTBL (L(shl_13_bwd), L(shl_table_bwd))
3179 .int JMPTBL (L(shl_14_bwd), L(shl_table_bwd))
3180 .int JMPTBL (L(shl_15_bwd), L(shl_table_bwd))
3181
3182#endif
3183