1/* memcpy with SSSE3 and REP string
2 Copyright (C) 2010-2017 Free Software Foundation, Inc.
3 Contributed by Intel Corporation.
4 This file is part of the GNU C Library.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20#include <sysdep.h>
21
22#if IS_IN (libc) \
23 && (defined SHARED \
24 || defined USE_AS_MEMMOVE \
25 || !defined USE_MULTIARCH)
26
27#include "asm-syntax.h"
28
29#ifndef MEMCPY
30# define MEMCPY __memcpy_ssse3_back
31# define MEMCPY_CHK __memcpy_chk_ssse3_back
32# define MEMPCPY __mempcpy_ssse3_back
33# define MEMPCPY_CHK __mempcpy_chk_ssse3_back
34#endif
35
36#define JMPTBL(I, B) I - B
37
38/* Branch to an entry in a jump table. TABLE is a jump table with
39 relative offsets. INDEX is a register contains the index into the
40 jump table. SCALE is the scale of INDEX. */
41#define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \
42 lea TABLE(%rip), %r11; \
43 movslq (%r11, INDEX, SCALE), INDEX; \
44 lea (%r11, INDEX), INDEX; \
45 jmp *INDEX; \
46 ud2
47
48 .section .text.ssse3,"ax",@progbits
49#if !defined USE_AS_MEMPCPY && !defined USE_AS_MEMMOVE
50ENTRY (MEMPCPY_CHK)
51 cmp %RDX_LP, %RCX_LP
52 jb HIDDEN_JUMPTARGET (__chk_fail)
53END (MEMPCPY_CHK)
54
55ENTRY (MEMPCPY)
56 mov %RDI_LP, %RAX_LP
57 add %RDX_LP, %RAX_LP
58 jmp L(start)
59END (MEMPCPY)
60#endif
61
62#if !defined USE_AS_BCOPY
63ENTRY (MEMCPY_CHK)
64 cmp %RDX_LP, %RCX_LP
65 jb HIDDEN_JUMPTARGET (__chk_fail)
66END (MEMCPY_CHK)
67#endif
68
69ENTRY (MEMCPY)
70 mov %RDI_LP, %RAX_LP
71#ifdef USE_AS_MEMPCPY
72 add %RDX_LP, %RAX_LP
73#endif
74
75#ifdef __ILP32__
76 /* Clear the upper 32 bits. */
77 mov %edx, %edx
78#endif
79
80#ifdef USE_AS_MEMMOVE
81 cmp %rsi, %rdi
82 jb L(copy_forward)
83 je L(bwd_write_0bytes)
84 cmp $144, %rdx
85 jae L(copy_backward)
86 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
87L(copy_forward):
88#endif
89L(start):
90 cmp $144, %rdx
91 jae L(144bytesormore)
92
93L(fwd_write_less32bytes):
94#ifndef USE_AS_MEMMOVE
95 cmp %dil, %sil
96 jbe L(bk_write)
97#endif
98 add %rdx, %rsi
99 add %rdx, %rdi
100 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
101#ifndef USE_AS_MEMMOVE
102L(bk_write):
103
104 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
105#endif
106
107 .p2align 4
108L(144bytesormore):
109
110#ifndef USE_AS_MEMMOVE
111 cmp %dil, %sil
112 jle L(copy_backward)
113#endif
114 movdqu (%rsi), %xmm0
115 mov %rdi, %r8
116 and $-16, %rdi
117 add $16, %rdi
118 mov %rdi, %r9
119 sub %r8, %r9
120 sub %r9, %rdx
121 add %r9, %rsi
122 mov %rsi, %r9
123 and $0xf, %r9
124 jz L(shl_0)
125#ifdef DATA_CACHE_SIZE
126 mov $DATA_CACHE_SIZE, %RCX_LP
127#else
128 mov __x86_data_cache_size(%rip), %RCX_LP
129#endif
130 cmp %rcx, %rdx
131 jae L(gobble_mem_fwd)
132 lea L(shl_table_fwd)(%rip), %r11
133 sub $0x80, %rdx
134 movslq (%r11, %r9, 4), %r9
135 add %r11, %r9
136 jmp *%r9
137 ud2
138
139 .p2align 4
140L(copy_backward):
141#ifdef DATA_CACHE_SIZE
142 mov $DATA_CACHE_SIZE, %RCX_LP
143#else
144 mov __x86_data_cache_size(%rip), %RCX_LP
145#endif
146 shl $1, %rcx
147 cmp %rcx, %rdx
148 ja L(gobble_mem_bwd)
149
150 add %rdx, %rdi
151 add %rdx, %rsi
152 movdqu -16(%rsi), %xmm0
153 lea -16(%rdi), %r8
154 mov %rdi, %r9
155 and $0xf, %r9
156 xor %r9, %rdi
157 sub %r9, %rsi
158 sub %r9, %rdx
159 mov %rsi, %r9
160 and $0xf, %r9
161 jz L(shl_0_bwd)
162 lea L(shl_table_bwd)(%rip), %r11
163 sub $0x80, %rdx
164 movslq (%r11, %r9, 4), %r9
165 add %r11, %r9
166 jmp *%r9
167 ud2
168
169 .p2align 4
170L(shl_0):
171
172 mov %rdx, %r9
173 shr $8, %r9
174 add %rdx, %r9
175#ifdef DATA_CACHE_SIZE
176 cmp $DATA_CACHE_SIZE_HALF, %R9_LP
177#else
178 cmp __x86_data_cache_size_half(%rip), %R9_LP
179#endif
180 jae L(gobble_mem_fwd)
181 sub $0x80, %rdx
182 .p2align 4
183L(shl_0_loop):
184 movdqa (%rsi), %xmm1
185 movdqa %xmm1, (%rdi)
186 movaps 0x10(%rsi), %xmm2
187 movaps %xmm2, 0x10(%rdi)
188 movaps 0x20(%rsi), %xmm3
189 movaps %xmm3, 0x20(%rdi)
190 movaps 0x30(%rsi), %xmm4
191 movaps %xmm4, 0x30(%rdi)
192 movaps 0x40(%rsi), %xmm1
193 movaps %xmm1, 0x40(%rdi)
194 movaps 0x50(%rsi), %xmm2
195 movaps %xmm2, 0x50(%rdi)
196 movaps 0x60(%rsi), %xmm3
197 movaps %xmm3, 0x60(%rdi)
198 movaps 0x70(%rsi), %xmm4
199 movaps %xmm4, 0x70(%rdi)
200 sub $0x80, %rdx
201 lea 0x80(%rsi), %rsi
202 lea 0x80(%rdi), %rdi
203 jae L(shl_0_loop)
204 movdqu %xmm0, (%r8)
205 add $0x80, %rdx
206 add %rdx, %rsi
207 add %rdx, %rdi
208 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
209
210 .p2align 4
211L(shl_0_bwd):
212 sub $0x80, %rdx
213L(copy_backward_loop):
214 movaps -0x10(%rsi), %xmm1
215 movaps %xmm1, -0x10(%rdi)
216 movaps -0x20(%rsi), %xmm2
217 movaps %xmm2, -0x20(%rdi)
218 movaps -0x30(%rsi), %xmm3
219 movaps %xmm3, -0x30(%rdi)
220 movaps -0x40(%rsi), %xmm4
221 movaps %xmm4, -0x40(%rdi)
222 movaps -0x50(%rsi), %xmm5
223 movaps %xmm5, -0x50(%rdi)
224 movaps -0x60(%rsi), %xmm5
225 movaps %xmm5, -0x60(%rdi)
226 movaps -0x70(%rsi), %xmm5
227 movaps %xmm5, -0x70(%rdi)
228 movaps -0x80(%rsi), %xmm5
229 movaps %xmm5, -0x80(%rdi)
230 sub $0x80, %rdx
231 lea -0x80(%rdi), %rdi
232 lea -0x80(%rsi), %rsi
233 jae L(copy_backward_loop)
234
235 movdqu %xmm0, (%r8)
236 add $0x80, %rdx
237 sub %rdx, %rdi
238 sub %rdx, %rsi
239 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
240
241 .p2align 4
242L(shl_1):
243 sub $0x80, %rdx
244 movaps -0x01(%rsi), %xmm1
245 movaps 0x0f(%rsi), %xmm2
246 movaps 0x1f(%rsi), %xmm3
247 movaps 0x2f(%rsi), %xmm4
248 movaps 0x3f(%rsi), %xmm5
249 movaps 0x4f(%rsi), %xmm6
250 movaps 0x5f(%rsi), %xmm7
251 movaps 0x6f(%rsi), %xmm8
252 movaps 0x7f(%rsi), %xmm9
253 lea 0x80(%rsi), %rsi
254 palignr $1, %xmm8, %xmm9
255 movaps %xmm9, 0x70(%rdi)
256 palignr $1, %xmm7, %xmm8
257 movaps %xmm8, 0x60(%rdi)
258 palignr $1, %xmm6, %xmm7
259 movaps %xmm7, 0x50(%rdi)
260 palignr $1, %xmm5, %xmm6
261 movaps %xmm6, 0x40(%rdi)
262 palignr $1, %xmm4, %xmm5
263 movaps %xmm5, 0x30(%rdi)
264 palignr $1, %xmm3, %xmm4
265 movaps %xmm4, 0x20(%rdi)
266 palignr $1, %xmm2, %xmm3
267 movaps %xmm3, 0x10(%rdi)
268 palignr $1, %xmm1, %xmm2
269 movaps %xmm2, (%rdi)
270 lea 0x80(%rdi), %rdi
271 jae L(shl_1)
272 movdqu %xmm0, (%r8)
273 add $0x80, %rdx
274 add %rdx, %rdi
275 add %rdx, %rsi
276 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
277
278 .p2align 4
279L(shl_1_bwd):
280 movaps -0x01(%rsi), %xmm1
281
282 movaps -0x11(%rsi), %xmm2
283 palignr $1, %xmm2, %xmm1
284 movaps %xmm1, -0x10(%rdi)
285
286 movaps -0x21(%rsi), %xmm3
287 palignr $1, %xmm3, %xmm2
288 movaps %xmm2, -0x20(%rdi)
289
290 movaps -0x31(%rsi), %xmm4
291 palignr $1, %xmm4, %xmm3
292 movaps %xmm3, -0x30(%rdi)
293
294 movaps -0x41(%rsi), %xmm5
295 palignr $1, %xmm5, %xmm4
296 movaps %xmm4, -0x40(%rdi)
297
298 movaps -0x51(%rsi), %xmm6
299 palignr $1, %xmm6, %xmm5
300 movaps %xmm5, -0x50(%rdi)
301
302 movaps -0x61(%rsi), %xmm7
303 palignr $1, %xmm7, %xmm6
304 movaps %xmm6, -0x60(%rdi)
305
306 movaps -0x71(%rsi), %xmm8
307 palignr $1, %xmm8, %xmm7
308 movaps %xmm7, -0x70(%rdi)
309
310 movaps -0x81(%rsi), %xmm9
311 palignr $1, %xmm9, %xmm8
312 movaps %xmm8, -0x80(%rdi)
313
314 sub $0x80, %rdx
315 lea -0x80(%rdi), %rdi
316 lea -0x80(%rsi), %rsi
317 jae L(shl_1_bwd)
318 movdqu %xmm0, (%r8)
319 add $0x80, %rdx
320 sub %rdx, %rdi
321 sub %rdx, %rsi
322 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
323
324 .p2align 4
325L(shl_2):
326 sub $0x80, %rdx
327 movaps -0x02(%rsi), %xmm1
328 movaps 0x0e(%rsi), %xmm2
329 movaps 0x1e(%rsi), %xmm3
330 movaps 0x2e(%rsi), %xmm4
331 movaps 0x3e(%rsi), %xmm5
332 movaps 0x4e(%rsi), %xmm6
333 movaps 0x5e(%rsi), %xmm7
334 movaps 0x6e(%rsi), %xmm8
335 movaps 0x7e(%rsi), %xmm9
336 lea 0x80(%rsi), %rsi
337 palignr $2, %xmm8, %xmm9
338 movaps %xmm9, 0x70(%rdi)
339 palignr $2, %xmm7, %xmm8
340 movaps %xmm8, 0x60(%rdi)
341 palignr $2, %xmm6, %xmm7
342 movaps %xmm7, 0x50(%rdi)
343 palignr $2, %xmm5, %xmm6
344 movaps %xmm6, 0x40(%rdi)
345 palignr $2, %xmm4, %xmm5
346 movaps %xmm5, 0x30(%rdi)
347 palignr $2, %xmm3, %xmm4
348 movaps %xmm4, 0x20(%rdi)
349 palignr $2, %xmm2, %xmm3
350 movaps %xmm3, 0x10(%rdi)
351 palignr $2, %xmm1, %xmm2
352 movaps %xmm2, (%rdi)
353 lea 0x80(%rdi), %rdi
354 jae L(shl_2)
355 movdqu %xmm0, (%r8)
356 add $0x80, %rdx
357 add %rdx, %rdi
358 add %rdx, %rsi
359 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
360
361 .p2align 4
362L(shl_2_bwd):
363 movaps -0x02(%rsi), %xmm1
364
365 movaps -0x12(%rsi), %xmm2
366 palignr $2, %xmm2, %xmm1
367 movaps %xmm1, -0x10(%rdi)
368
369 movaps -0x22(%rsi), %xmm3
370 palignr $2, %xmm3, %xmm2
371 movaps %xmm2, -0x20(%rdi)
372
373 movaps -0x32(%rsi), %xmm4
374 palignr $2, %xmm4, %xmm3
375 movaps %xmm3, -0x30(%rdi)
376
377 movaps -0x42(%rsi), %xmm5
378 palignr $2, %xmm5, %xmm4
379 movaps %xmm4, -0x40(%rdi)
380
381 movaps -0x52(%rsi), %xmm6
382 palignr $2, %xmm6, %xmm5
383 movaps %xmm5, -0x50(%rdi)
384
385 movaps -0x62(%rsi), %xmm7
386 palignr $2, %xmm7, %xmm6
387 movaps %xmm6, -0x60(%rdi)
388
389 movaps -0x72(%rsi), %xmm8
390 palignr $2, %xmm8, %xmm7
391 movaps %xmm7, -0x70(%rdi)
392
393 movaps -0x82(%rsi), %xmm9
394 palignr $2, %xmm9, %xmm8
395 movaps %xmm8, -0x80(%rdi)
396
397 sub $0x80, %rdx
398 lea -0x80(%rdi), %rdi
399 lea -0x80(%rsi), %rsi
400 jae L(shl_2_bwd)
401 movdqu %xmm0, (%r8)
402 add $0x80, %rdx
403 sub %rdx, %rdi
404 sub %rdx, %rsi
405 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
406
407 .p2align 4
408L(shl_3):
409 sub $0x80, %rdx
410 movaps -0x03(%rsi), %xmm1
411 movaps 0x0d(%rsi), %xmm2
412 movaps 0x1d(%rsi), %xmm3
413 movaps 0x2d(%rsi), %xmm4
414 movaps 0x3d(%rsi), %xmm5
415 movaps 0x4d(%rsi), %xmm6
416 movaps 0x5d(%rsi), %xmm7
417 movaps 0x6d(%rsi), %xmm8
418 movaps 0x7d(%rsi), %xmm9
419 lea 0x80(%rsi), %rsi
420 palignr $3, %xmm8, %xmm9
421 movaps %xmm9, 0x70(%rdi)
422 palignr $3, %xmm7, %xmm8
423 movaps %xmm8, 0x60(%rdi)
424 palignr $3, %xmm6, %xmm7
425 movaps %xmm7, 0x50(%rdi)
426 palignr $3, %xmm5, %xmm6
427 movaps %xmm6, 0x40(%rdi)
428 palignr $3, %xmm4, %xmm5
429 movaps %xmm5, 0x30(%rdi)
430 palignr $3, %xmm3, %xmm4
431 movaps %xmm4, 0x20(%rdi)
432 palignr $3, %xmm2, %xmm3
433 movaps %xmm3, 0x10(%rdi)
434 palignr $3, %xmm1, %xmm2
435 movaps %xmm2, (%rdi)
436 lea 0x80(%rdi), %rdi
437 jae L(shl_3)
438 movdqu %xmm0, (%r8)
439 add $0x80, %rdx
440 add %rdx, %rdi
441 add %rdx, %rsi
442 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
443
444 .p2align 4
445L(shl_3_bwd):
446 movaps -0x03(%rsi), %xmm1
447
448 movaps -0x13(%rsi), %xmm2
449 palignr $3, %xmm2, %xmm1
450 movaps %xmm1, -0x10(%rdi)
451
452 movaps -0x23(%rsi), %xmm3
453 palignr $3, %xmm3, %xmm2
454 movaps %xmm2, -0x20(%rdi)
455
456 movaps -0x33(%rsi), %xmm4
457 palignr $3, %xmm4, %xmm3
458 movaps %xmm3, -0x30(%rdi)
459
460 movaps -0x43(%rsi), %xmm5
461 palignr $3, %xmm5, %xmm4
462 movaps %xmm4, -0x40(%rdi)
463
464 movaps -0x53(%rsi), %xmm6
465 palignr $3, %xmm6, %xmm5
466 movaps %xmm5, -0x50(%rdi)
467
468 movaps -0x63(%rsi), %xmm7
469 palignr $3, %xmm7, %xmm6
470 movaps %xmm6, -0x60(%rdi)
471
472 movaps -0x73(%rsi), %xmm8
473 palignr $3, %xmm8, %xmm7
474 movaps %xmm7, -0x70(%rdi)
475
476 movaps -0x83(%rsi), %xmm9
477 palignr $3, %xmm9, %xmm8
478 movaps %xmm8, -0x80(%rdi)
479
480 sub $0x80, %rdx
481 lea -0x80(%rdi), %rdi
482 lea -0x80(%rsi), %rsi
483 jae L(shl_3_bwd)
484 movdqu %xmm0, (%r8)
485 add $0x80, %rdx
486 sub %rdx, %rdi
487 sub %rdx, %rsi
488 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
489
490 .p2align 4
491L(shl_4):
492 sub $0x80, %rdx
493 movaps -0x04(%rsi), %xmm1
494 movaps 0x0c(%rsi), %xmm2
495 movaps 0x1c(%rsi), %xmm3
496 movaps 0x2c(%rsi), %xmm4
497 movaps 0x3c(%rsi), %xmm5
498 movaps 0x4c(%rsi), %xmm6
499 movaps 0x5c(%rsi), %xmm7
500 movaps 0x6c(%rsi), %xmm8
501 movaps 0x7c(%rsi), %xmm9
502 lea 0x80(%rsi), %rsi
503 palignr $4, %xmm8, %xmm9
504 movaps %xmm9, 0x70(%rdi)
505 palignr $4, %xmm7, %xmm8
506 movaps %xmm8, 0x60(%rdi)
507 palignr $4, %xmm6, %xmm7
508 movaps %xmm7, 0x50(%rdi)
509 palignr $4, %xmm5, %xmm6
510 movaps %xmm6, 0x40(%rdi)
511 palignr $4, %xmm4, %xmm5
512 movaps %xmm5, 0x30(%rdi)
513 palignr $4, %xmm3, %xmm4
514 movaps %xmm4, 0x20(%rdi)
515 palignr $4, %xmm2, %xmm3
516 movaps %xmm3, 0x10(%rdi)
517 palignr $4, %xmm1, %xmm2
518 movaps %xmm2, (%rdi)
519 lea 0x80(%rdi), %rdi
520 jae L(shl_4)
521 movdqu %xmm0, (%r8)
522 add $0x80, %rdx
523 add %rdx, %rdi
524 add %rdx, %rsi
525 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
526
527 .p2align 4
528L(shl_4_bwd):
529 movaps -0x04(%rsi), %xmm1
530
531 movaps -0x14(%rsi), %xmm2
532 palignr $4, %xmm2, %xmm1
533 movaps %xmm1, -0x10(%rdi)
534
535 movaps -0x24(%rsi), %xmm3
536 palignr $4, %xmm3, %xmm2
537 movaps %xmm2, -0x20(%rdi)
538
539 movaps -0x34(%rsi), %xmm4
540 palignr $4, %xmm4, %xmm3
541 movaps %xmm3, -0x30(%rdi)
542
543 movaps -0x44(%rsi), %xmm5
544 palignr $4, %xmm5, %xmm4
545 movaps %xmm4, -0x40(%rdi)
546
547 movaps -0x54(%rsi), %xmm6
548 palignr $4, %xmm6, %xmm5
549 movaps %xmm5, -0x50(%rdi)
550
551 movaps -0x64(%rsi), %xmm7
552 palignr $4, %xmm7, %xmm6
553 movaps %xmm6, -0x60(%rdi)
554
555 movaps -0x74(%rsi), %xmm8
556 palignr $4, %xmm8, %xmm7
557 movaps %xmm7, -0x70(%rdi)
558
559 movaps -0x84(%rsi), %xmm9
560 palignr $4, %xmm9, %xmm8
561 movaps %xmm8, -0x80(%rdi)
562
563 sub $0x80, %rdx
564 lea -0x80(%rdi), %rdi
565 lea -0x80(%rsi), %rsi
566 jae L(shl_4_bwd)
567 movdqu %xmm0, (%r8)
568 add $0x80, %rdx
569 sub %rdx, %rdi
570 sub %rdx, %rsi
571 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
572
573 .p2align 4
574L(shl_5):
575 sub $0x80, %rdx
576 movaps -0x05(%rsi), %xmm1
577 movaps 0x0b(%rsi), %xmm2
578 movaps 0x1b(%rsi), %xmm3
579 movaps 0x2b(%rsi), %xmm4
580 movaps 0x3b(%rsi), %xmm5
581 movaps 0x4b(%rsi), %xmm6
582 movaps 0x5b(%rsi), %xmm7
583 movaps 0x6b(%rsi), %xmm8
584 movaps 0x7b(%rsi), %xmm9
585 lea 0x80(%rsi), %rsi
586 palignr $5, %xmm8, %xmm9
587 movaps %xmm9, 0x70(%rdi)
588 palignr $5, %xmm7, %xmm8
589 movaps %xmm8, 0x60(%rdi)
590 palignr $5, %xmm6, %xmm7
591 movaps %xmm7, 0x50(%rdi)
592 palignr $5, %xmm5, %xmm6
593 movaps %xmm6, 0x40(%rdi)
594 palignr $5, %xmm4, %xmm5
595 movaps %xmm5, 0x30(%rdi)
596 palignr $5, %xmm3, %xmm4
597 movaps %xmm4, 0x20(%rdi)
598 palignr $5, %xmm2, %xmm3
599 movaps %xmm3, 0x10(%rdi)
600 palignr $5, %xmm1, %xmm2
601 movaps %xmm2, (%rdi)
602 lea 0x80(%rdi), %rdi
603 jae L(shl_5)
604 movdqu %xmm0, (%r8)
605 add $0x80, %rdx
606 add %rdx, %rdi
607 add %rdx, %rsi
608 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
609
610 .p2align 4
611L(shl_5_bwd):
612 movaps -0x05(%rsi), %xmm1
613
614 movaps -0x15(%rsi), %xmm2
615 palignr $5, %xmm2, %xmm1
616 movaps %xmm1, -0x10(%rdi)
617
618 movaps -0x25(%rsi), %xmm3
619 palignr $5, %xmm3, %xmm2
620 movaps %xmm2, -0x20(%rdi)
621
622 movaps -0x35(%rsi), %xmm4
623 palignr $5, %xmm4, %xmm3
624 movaps %xmm3, -0x30(%rdi)
625
626 movaps -0x45(%rsi), %xmm5
627 palignr $5, %xmm5, %xmm4
628 movaps %xmm4, -0x40(%rdi)
629
630 movaps -0x55(%rsi), %xmm6
631 palignr $5, %xmm6, %xmm5
632 movaps %xmm5, -0x50(%rdi)
633
634 movaps -0x65(%rsi), %xmm7
635 palignr $5, %xmm7, %xmm6
636 movaps %xmm6, -0x60(%rdi)
637
638 movaps -0x75(%rsi), %xmm8
639 palignr $5, %xmm8, %xmm7
640 movaps %xmm7, -0x70(%rdi)
641
642 movaps -0x85(%rsi), %xmm9
643 palignr $5, %xmm9, %xmm8
644 movaps %xmm8, -0x80(%rdi)
645
646 sub $0x80, %rdx
647 lea -0x80(%rdi), %rdi
648 lea -0x80(%rsi), %rsi
649 jae L(shl_5_bwd)
650 movdqu %xmm0, (%r8)
651 add $0x80, %rdx
652 sub %rdx, %rdi
653 sub %rdx, %rsi
654 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
655
656 .p2align 4
657L(shl_6):
658 sub $0x80, %rdx
659 movaps -0x06(%rsi), %xmm1
660 movaps 0x0a(%rsi), %xmm2
661 movaps 0x1a(%rsi), %xmm3
662 movaps 0x2a(%rsi), %xmm4
663 movaps 0x3a(%rsi), %xmm5
664 movaps 0x4a(%rsi), %xmm6
665 movaps 0x5a(%rsi), %xmm7
666 movaps 0x6a(%rsi), %xmm8
667 movaps 0x7a(%rsi), %xmm9
668 lea 0x80(%rsi), %rsi
669 palignr $6, %xmm8, %xmm9
670 movaps %xmm9, 0x70(%rdi)
671 palignr $6, %xmm7, %xmm8
672 movaps %xmm8, 0x60(%rdi)
673 palignr $6, %xmm6, %xmm7
674 movaps %xmm7, 0x50(%rdi)
675 palignr $6, %xmm5, %xmm6
676 movaps %xmm6, 0x40(%rdi)
677 palignr $6, %xmm4, %xmm5
678 movaps %xmm5, 0x30(%rdi)
679 palignr $6, %xmm3, %xmm4
680 movaps %xmm4, 0x20(%rdi)
681 palignr $6, %xmm2, %xmm3
682 movaps %xmm3, 0x10(%rdi)
683 palignr $6, %xmm1, %xmm2
684 movaps %xmm2, (%rdi)
685 lea 0x80(%rdi), %rdi
686 jae L(shl_6)
687 movdqu %xmm0, (%r8)
688 add $0x80, %rdx
689 add %rdx, %rdi
690 add %rdx, %rsi
691 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
692
693 .p2align 4
694L(shl_6_bwd):
695 movaps -0x06(%rsi), %xmm1
696
697 movaps -0x16(%rsi), %xmm2
698 palignr $6, %xmm2, %xmm1
699 movaps %xmm1, -0x10(%rdi)
700
701 movaps -0x26(%rsi), %xmm3
702 palignr $6, %xmm3, %xmm2
703 movaps %xmm2, -0x20(%rdi)
704
705 movaps -0x36(%rsi), %xmm4
706 palignr $6, %xmm4, %xmm3
707 movaps %xmm3, -0x30(%rdi)
708
709 movaps -0x46(%rsi), %xmm5
710 palignr $6, %xmm5, %xmm4
711 movaps %xmm4, -0x40(%rdi)
712
713 movaps -0x56(%rsi), %xmm6
714 palignr $6, %xmm6, %xmm5
715 movaps %xmm5, -0x50(%rdi)
716
717 movaps -0x66(%rsi), %xmm7
718 palignr $6, %xmm7, %xmm6
719 movaps %xmm6, -0x60(%rdi)
720
721 movaps -0x76(%rsi), %xmm8
722 palignr $6, %xmm8, %xmm7
723 movaps %xmm7, -0x70(%rdi)
724
725 movaps -0x86(%rsi), %xmm9
726 palignr $6, %xmm9, %xmm8
727 movaps %xmm8, -0x80(%rdi)
728
729 sub $0x80, %rdx
730 lea -0x80(%rdi), %rdi
731 lea -0x80(%rsi), %rsi
732 jae L(shl_6_bwd)
733 movdqu %xmm0, (%r8)
734 add $0x80, %rdx
735 sub %rdx, %rdi
736 sub %rdx, %rsi
737 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
738
739 .p2align 4
740L(shl_7):
741 sub $0x80, %rdx
742 movaps -0x07(%rsi), %xmm1
743 movaps 0x09(%rsi), %xmm2
744 movaps 0x19(%rsi), %xmm3
745 movaps 0x29(%rsi), %xmm4
746 movaps 0x39(%rsi), %xmm5
747 movaps 0x49(%rsi), %xmm6
748 movaps 0x59(%rsi), %xmm7
749 movaps 0x69(%rsi), %xmm8
750 movaps 0x79(%rsi), %xmm9
751 lea 0x80(%rsi), %rsi
752 palignr $7, %xmm8, %xmm9
753 movaps %xmm9, 0x70(%rdi)
754 palignr $7, %xmm7, %xmm8
755 movaps %xmm8, 0x60(%rdi)
756 palignr $7, %xmm6, %xmm7
757 movaps %xmm7, 0x50(%rdi)
758 palignr $7, %xmm5, %xmm6
759 movaps %xmm6, 0x40(%rdi)
760 palignr $7, %xmm4, %xmm5
761 movaps %xmm5, 0x30(%rdi)
762 palignr $7, %xmm3, %xmm4
763 movaps %xmm4, 0x20(%rdi)
764 palignr $7, %xmm2, %xmm3
765 movaps %xmm3, 0x10(%rdi)
766 palignr $7, %xmm1, %xmm2
767 movaps %xmm2, (%rdi)
768 lea 0x80(%rdi), %rdi
769 jae L(shl_7)
770 movdqu %xmm0, (%r8)
771 add $0x80, %rdx
772 add %rdx, %rdi
773 add %rdx, %rsi
774 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
775
776 .p2align 4
777L(shl_7_bwd):
778 movaps -0x07(%rsi), %xmm1
779
780 movaps -0x17(%rsi), %xmm2
781 palignr $7, %xmm2, %xmm1
782 movaps %xmm1, -0x10(%rdi)
783
784 movaps -0x27(%rsi), %xmm3
785 palignr $7, %xmm3, %xmm2
786 movaps %xmm2, -0x20(%rdi)
787
788 movaps -0x37(%rsi), %xmm4
789 palignr $7, %xmm4, %xmm3
790 movaps %xmm3, -0x30(%rdi)
791
792 movaps -0x47(%rsi), %xmm5
793 palignr $7, %xmm5, %xmm4
794 movaps %xmm4, -0x40(%rdi)
795
796 movaps -0x57(%rsi), %xmm6
797 palignr $7, %xmm6, %xmm5
798 movaps %xmm5, -0x50(%rdi)
799
800 movaps -0x67(%rsi), %xmm7
801 palignr $7, %xmm7, %xmm6
802 movaps %xmm6, -0x60(%rdi)
803
804 movaps -0x77(%rsi), %xmm8
805 palignr $7, %xmm8, %xmm7
806 movaps %xmm7, -0x70(%rdi)
807
808 movaps -0x87(%rsi), %xmm9
809 palignr $7, %xmm9, %xmm8
810 movaps %xmm8, -0x80(%rdi)
811
812 sub $0x80, %rdx
813 lea -0x80(%rdi), %rdi
814 lea -0x80(%rsi), %rsi
815 jae L(shl_7_bwd)
816 movdqu %xmm0, (%r8)
817 add $0x80, %rdx
818 sub %rdx, %rdi
819 sub %rdx, %rsi
820 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
821
822 .p2align 4
823L(shl_8):
824 sub $0x80, %rdx
825 movaps -0x08(%rsi), %xmm1
826 movaps 0x08(%rsi), %xmm2
827 movaps 0x18(%rsi), %xmm3
828 movaps 0x28(%rsi), %xmm4
829 movaps 0x38(%rsi), %xmm5
830 movaps 0x48(%rsi), %xmm6
831 movaps 0x58(%rsi), %xmm7
832 movaps 0x68(%rsi), %xmm8
833 movaps 0x78(%rsi), %xmm9
834 lea 0x80(%rsi), %rsi
835 palignr $8, %xmm8, %xmm9
836 movaps %xmm9, 0x70(%rdi)
837 palignr $8, %xmm7, %xmm8
838 movaps %xmm8, 0x60(%rdi)
839 palignr $8, %xmm6, %xmm7
840 movaps %xmm7, 0x50(%rdi)
841 palignr $8, %xmm5, %xmm6
842 movaps %xmm6, 0x40(%rdi)
843 palignr $8, %xmm4, %xmm5
844 movaps %xmm5, 0x30(%rdi)
845 palignr $8, %xmm3, %xmm4
846 movaps %xmm4, 0x20(%rdi)
847 palignr $8, %xmm2, %xmm3
848 movaps %xmm3, 0x10(%rdi)
849 palignr $8, %xmm1, %xmm2
850 movaps %xmm2, (%rdi)
851 lea 0x80(%rdi), %rdi
852 jae L(shl_8)
853 movdqu %xmm0, (%r8)
854 add $0x80, %rdx
855 add %rdx, %rdi
856 add %rdx, %rsi
857 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
858
859 .p2align 4
860L(shl_8_bwd):
861 movaps -0x08(%rsi), %xmm1
862
863 movaps -0x18(%rsi), %xmm2
864 palignr $8, %xmm2, %xmm1
865 movaps %xmm1, -0x10(%rdi)
866
867 movaps -0x28(%rsi), %xmm3
868 palignr $8, %xmm3, %xmm2
869 movaps %xmm2, -0x20(%rdi)
870
871 movaps -0x38(%rsi), %xmm4
872 palignr $8, %xmm4, %xmm3
873 movaps %xmm3, -0x30(%rdi)
874
875 movaps -0x48(%rsi), %xmm5
876 palignr $8, %xmm5, %xmm4
877 movaps %xmm4, -0x40(%rdi)
878
879 movaps -0x58(%rsi), %xmm6
880 palignr $8, %xmm6, %xmm5
881 movaps %xmm5, -0x50(%rdi)
882
883 movaps -0x68(%rsi), %xmm7
884 palignr $8, %xmm7, %xmm6
885 movaps %xmm6, -0x60(%rdi)
886
887 movaps -0x78(%rsi), %xmm8
888 palignr $8, %xmm8, %xmm7
889 movaps %xmm7, -0x70(%rdi)
890
891 movaps -0x88(%rsi), %xmm9
892 palignr $8, %xmm9, %xmm8
893 movaps %xmm8, -0x80(%rdi)
894
895 sub $0x80, %rdx
896 lea -0x80(%rdi), %rdi
897 lea -0x80(%rsi), %rsi
898 jae L(shl_8_bwd)
899L(shl_8_end_bwd):
900 movdqu %xmm0, (%r8)
901 add $0x80, %rdx
902 sub %rdx, %rdi
903 sub %rdx, %rsi
904 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
905
906 .p2align 4
907L(shl_9):
908 sub $0x80, %rdx
909 movaps -0x09(%rsi), %xmm1
910 movaps 0x07(%rsi), %xmm2
911 movaps 0x17(%rsi), %xmm3
912 movaps 0x27(%rsi), %xmm4
913 movaps 0x37(%rsi), %xmm5
914 movaps 0x47(%rsi), %xmm6
915 movaps 0x57(%rsi), %xmm7
916 movaps 0x67(%rsi), %xmm8
917 movaps 0x77(%rsi), %xmm9
918 lea 0x80(%rsi), %rsi
919 palignr $9, %xmm8, %xmm9
920 movaps %xmm9, 0x70(%rdi)
921 palignr $9, %xmm7, %xmm8
922 movaps %xmm8, 0x60(%rdi)
923 palignr $9, %xmm6, %xmm7
924 movaps %xmm7, 0x50(%rdi)
925 palignr $9, %xmm5, %xmm6
926 movaps %xmm6, 0x40(%rdi)
927 palignr $9, %xmm4, %xmm5
928 movaps %xmm5, 0x30(%rdi)
929 palignr $9, %xmm3, %xmm4
930 movaps %xmm4, 0x20(%rdi)
931 palignr $9, %xmm2, %xmm3
932 movaps %xmm3, 0x10(%rdi)
933 palignr $9, %xmm1, %xmm2
934 movaps %xmm2, (%rdi)
935 lea 0x80(%rdi), %rdi
936 jae L(shl_9)
937 movdqu %xmm0, (%r8)
938 add $0x80, %rdx
939 add %rdx, %rdi
940 add %rdx, %rsi
941 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
942
943 .p2align 4
944L(shl_9_bwd):
945 movaps -0x09(%rsi), %xmm1
946
947 movaps -0x19(%rsi), %xmm2
948 palignr $9, %xmm2, %xmm1
949 movaps %xmm1, -0x10(%rdi)
950
951 movaps -0x29(%rsi), %xmm3
952 palignr $9, %xmm3, %xmm2
953 movaps %xmm2, -0x20(%rdi)
954
955 movaps -0x39(%rsi), %xmm4
956 palignr $9, %xmm4, %xmm3
957 movaps %xmm3, -0x30(%rdi)
958
959 movaps -0x49(%rsi), %xmm5
960 palignr $9, %xmm5, %xmm4
961 movaps %xmm4, -0x40(%rdi)
962
963 movaps -0x59(%rsi), %xmm6
964 palignr $9, %xmm6, %xmm5
965 movaps %xmm5, -0x50(%rdi)
966
967 movaps -0x69(%rsi), %xmm7
968 palignr $9, %xmm7, %xmm6
969 movaps %xmm6, -0x60(%rdi)
970
971 movaps -0x79(%rsi), %xmm8
972 palignr $9, %xmm8, %xmm7
973 movaps %xmm7, -0x70(%rdi)
974
975 movaps -0x89(%rsi), %xmm9
976 palignr $9, %xmm9, %xmm8
977 movaps %xmm8, -0x80(%rdi)
978
979 sub $0x80, %rdx
980 lea -0x80(%rdi), %rdi
981 lea -0x80(%rsi), %rsi
982 jae L(shl_9_bwd)
983 movdqu %xmm0, (%r8)
984 add $0x80, %rdx
985 sub %rdx, %rdi
986 sub %rdx, %rsi
987 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
988
989 .p2align 4
990L(shl_10):
991 sub $0x80, %rdx
992 movaps -0x0a(%rsi), %xmm1
993 movaps 0x06(%rsi), %xmm2
994 movaps 0x16(%rsi), %xmm3
995 movaps 0x26(%rsi), %xmm4
996 movaps 0x36(%rsi), %xmm5
997 movaps 0x46(%rsi), %xmm6
998 movaps 0x56(%rsi), %xmm7
999 movaps 0x66(%rsi), %xmm8
1000 movaps 0x76(%rsi), %xmm9
1001 lea 0x80(%rsi), %rsi
1002 palignr $10, %xmm8, %xmm9
1003 movaps %xmm9, 0x70(%rdi)
1004 palignr $10, %xmm7, %xmm8
1005 movaps %xmm8, 0x60(%rdi)
1006 palignr $10, %xmm6, %xmm7
1007 movaps %xmm7, 0x50(%rdi)
1008 palignr $10, %xmm5, %xmm6
1009 movaps %xmm6, 0x40(%rdi)
1010 palignr $10, %xmm4, %xmm5
1011 movaps %xmm5, 0x30(%rdi)
1012 palignr $10, %xmm3, %xmm4
1013 movaps %xmm4, 0x20(%rdi)
1014 palignr $10, %xmm2, %xmm3
1015 movaps %xmm3, 0x10(%rdi)
1016 palignr $10, %xmm1, %xmm2
1017 movaps %xmm2, (%rdi)
1018 lea 0x80(%rdi), %rdi
1019 jae L(shl_10)
1020 movdqu %xmm0, (%r8)
1021 add $0x80, %rdx
1022 add %rdx, %rdi
1023 add %rdx, %rsi
1024 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1025
1026 .p2align 4
1027L(shl_10_bwd):
1028 movaps -0x0a(%rsi), %xmm1
1029
1030 movaps -0x1a(%rsi), %xmm2
1031 palignr $10, %xmm2, %xmm1
1032 movaps %xmm1, -0x10(%rdi)
1033
1034 movaps -0x2a(%rsi), %xmm3
1035 palignr $10, %xmm3, %xmm2
1036 movaps %xmm2, -0x20(%rdi)
1037
1038 movaps -0x3a(%rsi), %xmm4
1039 palignr $10, %xmm4, %xmm3
1040 movaps %xmm3, -0x30(%rdi)
1041
1042 movaps -0x4a(%rsi), %xmm5
1043 palignr $10, %xmm5, %xmm4
1044 movaps %xmm4, -0x40(%rdi)
1045
1046 movaps -0x5a(%rsi), %xmm6
1047 palignr $10, %xmm6, %xmm5
1048 movaps %xmm5, -0x50(%rdi)
1049
1050 movaps -0x6a(%rsi), %xmm7
1051 palignr $10, %xmm7, %xmm6
1052 movaps %xmm6, -0x60(%rdi)
1053
1054 movaps -0x7a(%rsi), %xmm8
1055 palignr $10, %xmm8, %xmm7
1056 movaps %xmm7, -0x70(%rdi)
1057
1058 movaps -0x8a(%rsi), %xmm9
1059 palignr $10, %xmm9, %xmm8
1060 movaps %xmm8, -0x80(%rdi)
1061
1062 sub $0x80, %rdx
1063 lea -0x80(%rdi), %rdi
1064 lea -0x80(%rsi), %rsi
1065 jae L(shl_10_bwd)
1066 movdqu %xmm0, (%r8)
1067 add $0x80, %rdx
1068 sub %rdx, %rdi
1069 sub %rdx, %rsi
1070 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1071
1072 .p2align 4
1073L(shl_11):
1074 sub $0x80, %rdx
1075 movaps -0x0b(%rsi), %xmm1
1076 movaps 0x05(%rsi), %xmm2
1077 movaps 0x15(%rsi), %xmm3
1078 movaps 0x25(%rsi), %xmm4
1079 movaps 0x35(%rsi), %xmm5
1080 movaps 0x45(%rsi), %xmm6
1081 movaps 0x55(%rsi), %xmm7
1082 movaps 0x65(%rsi), %xmm8
1083 movaps 0x75(%rsi), %xmm9
1084 lea 0x80(%rsi), %rsi
1085 palignr $11, %xmm8, %xmm9
1086 movaps %xmm9, 0x70(%rdi)
1087 palignr $11, %xmm7, %xmm8
1088 movaps %xmm8, 0x60(%rdi)
1089 palignr $11, %xmm6, %xmm7
1090 movaps %xmm7, 0x50(%rdi)
1091 palignr $11, %xmm5, %xmm6
1092 movaps %xmm6, 0x40(%rdi)
1093 palignr $11, %xmm4, %xmm5
1094 movaps %xmm5, 0x30(%rdi)
1095 palignr $11, %xmm3, %xmm4
1096 movaps %xmm4, 0x20(%rdi)
1097 palignr $11, %xmm2, %xmm3
1098 movaps %xmm3, 0x10(%rdi)
1099 palignr $11, %xmm1, %xmm2
1100 movaps %xmm2, (%rdi)
1101 lea 0x80(%rdi), %rdi
1102 jae L(shl_11)
1103 movdqu %xmm0, (%r8)
1104 add $0x80, %rdx
1105 add %rdx, %rdi
1106 add %rdx, %rsi
1107 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1108
1109 .p2align 4
1110L(shl_11_bwd):
1111 movaps -0x0b(%rsi), %xmm1
1112
1113 movaps -0x1b(%rsi), %xmm2
1114 palignr $11, %xmm2, %xmm1
1115 movaps %xmm1, -0x10(%rdi)
1116
1117 movaps -0x2b(%rsi), %xmm3
1118 palignr $11, %xmm3, %xmm2
1119 movaps %xmm2, -0x20(%rdi)
1120
1121 movaps -0x3b(%rsi), %xmm4
1122 palignr $11, %xmm4, %xmm3
1123 movaps %xmm3, -0x30(%rdi)
1124
1125 movaps -0x4b(%rsi), %xmm5
1126 palignr $11, %xmm5, %xmm4
1127 movaps %xmm4, -0x40(%rdi)
1128
1129 movaps -0x5b(%rsi), %xmm6
1130 palignr $11, %xmm6, %xmm5
1131 movaps %xmm5, -0x50(%rdi)
1132
1133 movaps -0x6b(%rsi), %xmm7
1134 palignr $11, %xmm7, %xmm6
1135 movaps %xmm6, -0x60(%rdi)
1136
1137 movaps -0x7b(%rsi), %xmm8
1138 palignr $11, %xmm8, %xmm7
1139 movaps %xmm7, -0x70(%rdi)
1140
1141 movaps -0x8b(%rsi), %xmm9
1142 palignr $11, %xmm9, %xmm8
1143 movaps %xmm8, -0x80(%rdi)
1144
1145 sub $0x80, %rdx
1146 lea -0x80(%rdi), %rdi
1147 lea -0x80(%rsi), %rsi
1148 jae L(shl_11_bwd)
1149 movdqu %xmm0, (%r8)
1150 add $0x80, %rdx
1151 sub %rdx, %rdi
1152 sub %rdx, %rsi
1153 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1154
1155 .p2align 4
1156L(shl_12):
1157 sub $0x80, %rdx
1158 movdqa -0x0c(%rsi), %xmm1
1159 movaps 0x04(%rsi), %xmm2
1160 movaps 0x14(%rsi), %xmm3
1161 movaps 0x24(%rsi), %xmm4
1162 movaps 0x34(%rsi), %xmm5
1163 movaps 0x44(%rsi), %xmm6
1164 movaps 0x54(%rsi), %xmm7
1165 movaps 0x64(%rsi), %xmm8
1166 movaps 0x74(%rsi), %xmm9
1167 lea 0x80(%rsi), %rsi
1168 palignr $12, %xmm8, %xmm9
1169 movaps %xmm9, 0x70(%rdi)
1170 palignr $12, %xmm7, %xmm8
1171 movaps %xmm8, 0x60(%rdi)
1172 palignr $12, %xmm6, %xmm7
1173 movaps %xmm7, 0x50(%rdi)
1174 palignr $12, %xmm5, %xmm6
1175 movaps %xmm6, 0x40(%rdi)
1176 palignr $12, %xmm4, %xmm5
1177 movaps %xmm5, 0x30(%rdi)
1178 palignr $12, %xmm3, %xmm4
1179 movaps %xmm4, 0x20(%rdi)
1180 palignr $12, %xmm2, %xmm3
1181 movaps %xmm3, 0x10(%rdi)
1182 palignr $12, %xmm1, %xmm2
1183 movaps %xmm2, (%rdi)
1184
1185 lea 0x80(%rdi), %rdi
1186 jae L(shl_12)
1187 movdqu %xmm0, (%r8)
1188 add $0x80, %rdx
1189 add %rdx, %rdi
1190 add %rdx, %rsi
1191 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1192
1193 .p2align 4
1194L(shl_12_bwd):
1195 movaps -0x0c(%rsi), %xmm1
1196
1197 movaps -0x1c(%rsi), %xmm2
1198 palignr $12, %xmm2, %xmm1
1199 movaps %xmm1, -0x10(%rdi)
1200
1201 movaps -0x2c(%rsi), %xmm3
1202 palignr $12, %xmm3, %xmm2
1203 movaps %xmm2, -0x20(%rdi)
1204
1205 movaps -0x3c(%rsi), %xmm4
1206 palignr $12, %xmm4, %xmm3
1207 movaps %xmm3, -0x30(%rdi)
1208
1209 movaps -0x4c(%rsi), %xmm5
1210 palignr $12, %xmm5, %xmm4
1211 movaps %xmm4, -0x40(%rdi)
1212
1213 movaps -0x5c(%rsi), %xmm6
1214 palignr $12, %xmm6, %xmm5
1215 movaps %xmm5, -0x50(%rdi)
1216
1217 movaps -0x6c(%rsi), %xmm7
1218 palignr $12, %xmm7, %xmm6
1219 movaps %xmm6, -0x60(%rdi)
1220
1221 movaps -0x7c(%rsi), %xmm8
1222 palignr $12, %xmm8, %xmm7
1223 movaps %xmm7, -0x70(%rdi)
1224
1225 movaps -0x8c(%rsi), %xmm9
1226 palignr $12, %xmm9, %xmm8
1227 movaps %xmm8, -0x80(%rdi)
1228
1229 sub $0x80, %rdx
1230 lea -0x80(%rdi), %rdi
1231 lea -0x80(%rsi), %rsi
1232 jae L(shl_12_bwd)
1233 movdqu %xmm0, (%r8)
1234 add $0x80, %rdx
1235 sub %rdx, %rdi
1236 sub %rdx, %rsi
1237 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1238
1239 .p2align 4
1240L(shl_13):
1241 sub $0x80, %rdx
1242 movaps -0x0d(%rsi), %xmm1
1243 movaps 0x03(%rsi), %xmm2
1244 movaps 0x13(%rsi), %xmm3
1245 movaps 0x23(%rsi), %xmm4
1246 movaps 0x33(%rsi), %xmm5
1247 movaps 0x43(%rsi), %xmm6
1248 movaps 0x53(%rsi), %xmm7
1249 movaps 0x63(%rsi), %xmm8
1250 movaps 0x73(%rsi), %xmm9
1251 lea 0x80(%rsi), %rsi
1252 palignr $13, %xmm8, %xmm9
1253 movaps %xmm9, 0x70(%rdi)
1254 palignr $13, %xmm7, %xmm8
1255 movaps %xmm8, 0x60(%rdi)
1256 palignr $13, %xmm6, %xmm7
1257 movaps %xmm7, 0x50(%rdi)
1258 palignr $13, %xmm5, %xmm6
1259 movaps %xmm6, 0x40(%rdi)
1260 palignr $13, %xmm4, %xmm5
1261 movaps %xmm5, 0x30(%rdi)
1262 palignr $13, %xmm3, %xmm4
1263 movaps %xmm4, 0x20(%rdi)
1264 palignr $13, %xmm2, %xmm3
1265 movaps %xmm3, 0x10(%rdi)
1266 palignr $13, %xmm1, %xmm2
1267 movaps %xmm2, (%rdi)
1268 lea 0x80(%rdi), %rdi
1269 jae L(shl_13)
1270 movdqu %xmm0, (%r8)
1271 add $0x80, %rdx
1272 add %rdx, %rdi
1273 add %rdx, %rsi
1274 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1275
1276 .p2align 4
1277L(shl_13_bwd):
1278 movaps -0x0d(%rsi), %xmm1
1279
1280 movaps -0x1d(%rsi), %xmm2
1281 palignr $13, %xmm2, %xmm1
1282 movaps %xmm1, -0x10(%rdi)
1283
1284 movaps -0x2d(%rsi), %xmm3
1285 palignr $13, %xmm3, %xmm2
1286 movaps %xmm2, -0x20(%rdi)
1287
1288 movaps -0x3d(%rsi), %xmm4
1289 palignr $13, %xmm4, %xmm3
1290 movaps %xmm3, -0x30(%rdi)
1291
1292 movaps -0x4d(%rsi), %xmm5
1293 palignr $13, %xmm5, %xmm4
1294 movaps %xmm4, -0x40(%rdi)
1295
1296 movaps -0x5d(%rsi), %xmm6
1297 palignr $13, %xmm6, %xmm5
1298 movaps %xmm5, -0x50(%rdi)
1299
1300 movaps -0x6d(%rsi), %xmm7
1301 palignr $13, %xmm7, %xmm6
1302 movaps %xmm6, -0x60(%rdi)
1303
1304 movaps -0x7d(%rsi), %xmm8
1305 palignr $13, %xmm8, %xmm7
1306 movaps %xmm7, -0x70(%rdi)
1307
1308 movaps -0x8d(%rsi), %xmm9
1309 palignr $13, %xmm9, %xmm8
1310 movaps %xmm8, -0x80(%rdi)
1311
1312 sub $0x80, %rdx
1313 lea -0x80(%rdi), %rdi
1314 lea -0x80(%rsi), %rsi
1315 jae L(shl_13_bwd)
1316 movdqu %xmm0, (%r8)
1317 add $0x80, %rdx
1318 sub %rdx, %rdi
1319 sub %rdx, %rsi
1320 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1321
1322 .p2align 4
1323L(shl_14):
1324 sub $0x80, %rdx
1325 movaps -0x0e(%rsi), %xmm1
1326 movaps 0x02(%rsi), %xmm2
1327 movaps 0x12(%rsi), %xmm3
1328 movaps 0x22(%rsi), %xmm4
1329 movaps 0x32(%rsi), %xmm5
1330 movaps 0x42(%rsi), %xmm6
1331 movaps 0x52(%rsi), %xmm7
1332 movaps 0x62(%rsi), %xmm8
1333 movaps 0x72(%rsi), %xmm9
1334 lea 0x80(%rsi), %rsi
1335 palignr $14, %xmm8, %xmm9
1336 movaps %xmm9, 0x70(%rdi)
1337 palignr $14, %xmm7, %xmm8
1338 movaps %xmm8, 0x60(%rdi)
1339 palignr $14, %xmm6, %xmm7
1340 movaps %xmm7, 0x50(%rdi)
1341 palignr $14, %xmm5, %xmm6
1342 movaps %xmm6, 0x40(%rdi)
1343 palignr $14, %xmm4, %xmm5
1344 movaps %xmm5, 0x30(%rdi)
1345 palignr $14, %xmm3, %xmm4
1346 movaps %xmm4, 0x20(%rdi)
1347 palignr $14, %xmm2, %xmm3
1348 movaps %xmm3, 0x10(%rdi)
1349 palignr $14, %xmm1, %xmm2
1350 movaps %xmm2, (%rdi)
1351 lea 0x80(%rdi), %rdi
1352 jae L(shl_14)
1353 movdqu %xmm0, (%r8)
1354 add $0x80, %rdx
1355 add %rdx, %rdi
1356 add %rdx, %rsi
1357 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1358
1359 .p2align 4
1360L(shl_14_bwd):
1361 movaps -0x0e(%rsi), %xmm1
1362
1363 movaps -0x1e(%rsi), %xmm2
1364 palignr $14, %xmm2, %xmm1
1365 movaps %xmm1, -0x10(%rdi)
1366
1367 movaps -0x2e(%rsi), %xmm3
1368 palignr $14, %xmm3, %xmm2
1369 movaps %xmm2, -0x20(%rdi)
1370
1371 movaps -0x3e(%rsi), %xmm4
1372 palignr $14, %xmm4, %xmm3
1373 movaps %xmm3, -0x30(%rdi)
1374
1375 movaps -0x4e(%rsi), %xmm5
1376 palignr $14, %xmm5, %xmm4
1377 movaps %xmm4, -0x40(%rdi)
1378
1379 movaps -0x5e(%rsi), %xmm6
1380 palignr $14, %xmm6, %xmm5
1381 movaps %xmm5, -0x50(%rdi)
1382
1383 movaps -0x6e(%rsi), %xmm7
1384 palignr $14, %xmm7, %xmm6
1385 movaps %xmm6, -0x60(%rdi)
1386
1387 movaps -0x7e(%rsi), %xmm8
1388 palignr $14, %xmm8, %xmm7
1389 movaps %xmm7, -0x70(%rdi)
1390
1391 movaps -0x8e(%rsi), %xmm9
1392 palignr $14, %xmm9, %xmm8
1393 movaps %xmm8, -0x80(%rdi)
1394
1395 sub $0x80, %rdx
1396 lea -0x80(%rdi), %rdi
1397 lea -0x80(%rsi), %rsi
1398 jae L(shl_14_bwd)
1399 movdqu %xmm0, (%r8)
1400 add $0x80, %rdx
1401 sub %rdx, %rdi
1402 sub %rdx, %rsi
1403 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1404
1405 .p2align 4
1406L(shl_15):
1407 sub $0x80, %rdx
1408 movaps -0x0f(%rsi), %xmm1
1409 movaps 0x01(%rsi), %xmm2
1410 movaps 0x11(%rsi), %xmm3
1411 movaps 0x21(%rsi), %xmm4
1412 movaps 0x31(%rsi), %xmm5
1413 movaps 0x41(%rsi), %xmm6
1414 movaps 0x51(%rsi), %xmm7
1415 movaps 0x61(%rsi), %xmm8
1416 movaps 0x71(%rsi), %xmm9
1417 lea 0x80(%rsi), %rsi
1418 palignr $15, %xmm8, %xmm9
1419 movaps %xmm9, 0x70(%rdi)
1420 palignr $15, %xmm7, %xmm8
1421 movaps %xmm8, 0x60(%rdi)
1422 palignr $15, %xmm6, %xmm7
1423 movaps %xmm7, 0x50(%rdi)
1424 palignr $15, %xmm5, %xmm6
1425 movaps %xmm6, 0x40(%rdi)
1426 palignr $15, %xmm4, %xmm5
1427 movaps %xmm5, 0x30(%rdi)
1428 palignr $15, %xmm3, %xmm4
1429 movaps %xmm4, 0x20(%rdi)
1430 palignr $15, %xmm2, %xmm3
1431 movaps %xmm3, 0x10(%rdi)
1432 palignr $15, %xmm1, %xmm2
1433 movaps %xmm2, (%rdi)
1434 lea 0x80(%rdi), %rdi
1435 jae L(shl_15)
1436 movdqu %xmm0, (%r8)
1437 add $0x80, %rdx
1438 add %rdx, %rdi
1439 add %rdx, %rsi
1440 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1441
1442 .p2align 4
1443L(shl_15_bwd):
1444 movaps -0x0f(%rsi), %xmm1
1445
1446 movaps -0x1f(%rsi), %xmm2
1447 palignr $15, %xmm2, %xmm1
1448 movaps %xmm1, -0x10(%rdi)
1449
1450 movaps -0x2f(%rsi), %xmm3
1451 palignr $15, %xmm3, %xmm2
1452 movaps %xmm2, -0x20(%rdi)
1453
1454 movaps -0x3f(%rsi), %xmm4
1455 palignr $15, %xmm4, %xmm3
1456 movaps %xmm3, -0x30(%rdi)
1457
1458 movaps -0x4f(%rsi), %xmm5
1459 palignr $15, %xmm5, %xmm4
1460 movaps %xmm4, -0x40(%rdi)
1461
1462 movaps -0x5f(%rsi), %xmm6
1463 palignr $15, %xmm6, %xmm5
1464 movaps %xmm5, -0x50(%rdi)
1465
1466 movaps -0x6f(%rsi), %xmm7
1467 palignr $15, %xmm7, %xmm6
1468 movaps %xmm6, -0x60(%rdi)
1469
1470 movaps -0x7f(%rsi), %xmm8
1471 palignr $15, %xmm8, %xmm7
1472 movaps %xmm7, -0x70(%rdi)
1473
1474 movaps -0x8f(%rsi), %xmm9
1475 palignr $15, %xmm9, %xmm8
1476 movaps %xmm8, -0x80(%rdi)
1477
1478 sub $0x80, %rdx
1479 lea -0x80(%rdi), %rdi
1480 lea -0x80(%rsi), %rsi
1481 jae L(shl_15_bwd)
1482 movdqu %xmm0, (%r8)
1483 add $0x80, %rdx
1484 sub %rdx, %rdi
1485 sub %rdx, %rsi
1486 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1487
1488 .p2align 4
1489L(gobble_mem_fwd):
1490 movdqu (%rsi), %xmm1
1491 movdqu %xmm0, (%r8)
1492 movdqa %xmm1, (%rdi)
1493 sub $16, %rdx
1494 add $16, %rsi
1495 add $16, %rdi
1496
1497#ifdef SHARED_CACHE_SIZE_HALF
1498 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1499#else
1500 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1501#endif
1502#ifdef USE_AS_MEMMOVE
1503 mov %rsi, %r9
1504 sub %rdi, %r9
1505 cmp %rdx, %r9
1506 jae L(memmove_is_memcpy_fwd)
1507 cmp %rcx, %r9
1508 jbe L(ll_cache_copy_fwd_start)
1509L(memmove_is_memcpy_fwd):
1510#endif
1511 cmp %rcx, %rdx
1512 ja L(bigger_in_fwd)
1513 mov %rdx, %rcx
1514L(bigger_in_fwd):
1515 sub %rcx, %rdx
1516 cmp $0x1000, %rdx
1517 jbe L(ll_cache_copy_fwd)
1518
1519 mov %rcx, %r9
1520 shl $3, %r9
1521 cmp %r9, %rdx
1522 jbe L(2steps_copy_fwd)
1523 add %rcx, %rdx
1524 xor %rcx, %rcx
1525L(2steps_copy_fwd):
1526 sub $0x80, %rdx
1527L(gobble_mem_fwd_loop):
1528 sub $0x80, %rdx
1529 prefetcht0 0x200(%rsi)
1530 prefetcht0 0x300(%rsi)
1531 movdqu (%rsi), %xmm0
1532 movdqu 0x10(%rsi), %xmm1
1533 movdqu 0x20(%rsi), %xmm2
1534 movdqu 0x30(%rsi), %xmm3
1535 movdqu 0x40(%rsi), %xmm4
1536 movdqu 0x50(%rsi), %xmm5
1537 movdqu 0x60(%rsi), %xmm6
1538 movdqu 0x70(%rsi), %xmm7
1539 lfence
1540 movntdq %xmm0, (%rdi)
1541 movntdq %xmm1, 0x10(%rdi)
1542 movntdq %xmm2, 0x20(%rdi)
1543 movntdq %xmm3, 0x30(%rdi)
1544 movntdq %xmm4, 0x40(%rdi)
1545 movntdq %xmm5, 0x50(%rdi)
1546 movntdq %xmm6, 0x60(%rdi)
1547 movntdq %xmm7, 0x70(%rdi)
1548 lea 0x80(%rsi), %rsi
1549 lea 0x80(%rdi), %rdi
1550 jae L(gobble_mem_fwd_loop)
1551 sfence
1552 cmp $0x80, %rcx
1553 jb L(gobble_mem_fwd_end)
1554 add $0x80, %rdx
1555L(ll_cache_copy_fwd):
1556 add %rcx, %rdx
1557L(ll_cache_copy_fwd_start):
1558 sub $0x80, %rdx
1559L(gobble_ll_loop_fwd):
1560 prefetchnta 0x1c0(%rsi)
1561 prefetchnta 0x280(%rsi)
1562 prefetchnta 0x1c0(%rdi)
1563 prefetchnta 0x280(%rdi)
1564 sub $0x80, %rdx
1565 movdqu (%rsi), %xmm0
1566 movdqu 0x10(%rsi), %xmm1
1567 movdqu 0x20(%rsi), %xmm2
1568 movdqu 0x30(%rsi), %xmm3
1569 movdqu 0x40(%rsi), %xmm4
1570 movdqu 0x50(%rsi), %xmm5
1571 movdqu 0x60(%rsi), %xmm6
1572 movdqu 0x70(%rsi), %xmm7
1573 movdqa %xmm0, (%rdi)
1574 movdqa %xmm1, 0x10(%rdi)
1575 movdqa %xmm2, 0x20(%rdi)
1576 movdqa %xmm3, 0x30(%rdi)
1577 movdqa %xmm4, 0x40(%rdi)
1578 movdqa %xmm5, 0x50(%rdi)
1579 movdqa %xmm6, 0x60(%rdi)
1580 movdqa %xmm7, 0x70(%rdi)
1581 lea 0x80(%rsi), %rsi
1582 lea 0x80(%rdi), %rdi
1583 jae L(gobble_ll_loop_fwd)
1584L(gobble_mem_fwd_end):
1585 add $0x80, %rdx
1586 add %rdx, %rsi
1587 add %rdx, %rdi
1588 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_fwd), %rdx, 4)
1589
1590 .p2align 4
1591L(gobble_mem_bwd):
1592 add %rdx, %rsi
1593 add %rdx, %rdi
1594
1595 movdqu -16(%rsi), %xmm0
1596 lea -16(%rdi), %r8
1597 mov %rdi, %r9
1598 and $-16, %rdi
1599 sub %rdi, %r9
1600 sub %r9, %rsi
1601 sub %r9, %rdx
1602
1603
1604#ifdef SHARED_CACHE_SIZE_HALF
1605 mov $SHARED_CACHE_SIZE_HALF, %RCX_LP
1606#else
1607 mov __x86_shared_cache_size_half(%rip), %RCX_LP
1608#endif
1609#ifdef USE_AS_MEMMOVE
1610 mov %rdi, %r9
1611 sub %rsi, %r9
1612 cmp %rdx, %r9
1613 jae L(memmove_is_memcpy_bwd)
1614 cmp %rcx, %r9
1615 jbe L(ll_cache_copy_bwd_start)
1616L(memmove_is_memcpy_bwd):
1617#endif
1618 cmp %rcx, %rdx
1619 ja L(bigger)
1620 mov %rdx, %rcx
1621L(bigger):
1622 sub %rcx, %rdx
1623 cmp $0x1000, %rdx
1624 jbe L(ll_cache_copy)
1625
1626 mov %rcx, %r9
1627 shl $3, %r9
1628 cmp %r9, %rdx
1629 jbe L(2steps_copy)
1630 add %rcx, %rdx
1631 xor %rcx, %rcx
1632L(2steps_copy):
1633 sub $0x80, %rdx
1634L(gobble_mem_bwd_loop):
1635 sub $0x80, %rdx
1636 prefetcht0 -0x200(%rsi)
1637 prefetcht0 -0x300(%rsi)
1638 movdqu -0x10(%rsi), %xmm1
1639 movdqu -0x20(%rsi), %xmm2
1640 movdqu -0x30(%rsi), %xmm3
1641 movdqu -0x40(%rsi), %xmm4
1642 movdqu -0x50(%rsi), %xmm5
1643 movdqu -0x60(%rsi), %xmm6
1644 movdqu -0x70(%rsi), %xmm7
1645 movdqu -0x80(%rsi), %xmm8
1646 lfence
1647 movntdq %xmm1, -0x10(%rdi)
1648 movntdq %xmm2, -0x20(%rdi)
1649 movntdq %xmm3, -0x30(%rdi)
1650 movntdq %xmm4, -0x40(%rdi)
1651 movntdq %xmm5, -0x50(%rdi)
1652 movntdq %xmm6, -0x60(%rdi)
1653 movntdq %xmm7, -0x70(%rdi)
1654 movntdq %xmm8, -0x80(%rdi)
1655 lea -0x80(%rsi), %rsi
1656 lea -0x80(%rdi), %rdi
1657 jae L(gobble_mem_bwd_loop)
1658 sfence
1659 cmp $0x80, %rcx
1660 jb L(gobble_mem_bwd_end)
1661 add $0x80, %rdx
1662L(ll_cache_copy):
1663 add %rcx, %rdx
1664L(ll_cache_copy_bwd_start):
1665 sub $0x80, %rdx
1666L(gobble_ll_loop):
1667 prefetchnta -0x1c0(%rsi)
1668 prefetchnta -0x280(%rsi)
1669 prefetchnta -0x1c0(%rdi)
1670 prefetchnta -0x280(%rdi)
1671 sub $0x80, %rdx
1672 movdqu -0x10(%rsi), %xmm1
1673 movdqu -0x20(%rsi), %xmm2
1674 movdqu -0x30(%rsi), %xmm3
1675 movdqu -0x40(%rsi), %xmm4
1676 movdqu -0x50(%rsi), %xmm5
1677 movdqu -0x60(%rsi), %xmm6
1678 movdqu -0x70(%rsi), %xmm7
1679 movdqu -0x80(%rsi), %xmm8
1680 movdqa %xmm1, -0x10(%rdi)
1681 movdqa %xmm2, -0x20(%rdi)
1682 movdqa %xmm3, -0x30(%rdi)
1683 movdqa %xmm4, -0x40(%rdi)
1684 movdqa %xmm5, -0x50(%rdi)
1685 movdqa %xmm6, -0x60(%rdi)
1686 movdqa %xmm7, -0x70(%rdi)
1687 movdqa %xmm8, -0x80(%rdi)
1688 lea -0x80(%rsi), %rsi
1689 lea -0x80(%rdi), %rdi
1690 jae L(gobble_ll_loop)
1691L(gobble_mem_bwd_end):
1692 movdqu %xmm0, (%r8)
1693 add $0x80, %rdx
1694 sub %rdx, %rsi
1695 sub %rdx, %rdi
1696 BRANCH_TO_JMPTBL_ENTRY (L(table_144_bytes_bwd), %rdx, 4)
1697
1698 .p2align 4
1699L(fwd_write_128bytes):
1700 lddqu -128(%rsi), %xmm0
1701 movdqu %xmm0, -128(%rdi)
1702L(fwd_write_112bytes):
1703 lddqu -112(%rsi), %xmm0
1704 movdqu %xmm0, -112(%rdi)
1705L(fwd_write_96bytes):
1706 lddqu -96(%rsi), %xmm0
1707 movdqu %xmm0, -96(%rdi)
1708L(fwd_write_80bytes):
1709 lddqu -80(%rsi), %xmm0
1710 movdqu %xmm0, -80(%rdi)
1711L(fwd_write_64bytes):
1712 lddqu -64(%rsi), %xmm0
1713 movdqu %xmm0, -64(%rdi)
1714L(fwd_write_48bytes):
1715 lddqu -48(%rsi), %xmm0
1716 movdqu %xmm0, -48(%rdi)
1717L(fwd_write_32bytes):
1718 lddqu -32(%rsi), %xmm0
1719 movdqu %xmm0, -32(%rdi)
1720L(fwd_write_16bytes):
1721 lddqu -16(%rsi), %xmm0
1722 movdqu %xmm0, -16(%rdi)
1723L(fwd_write_0bytes):
1724 ret
1725
1726
1727 .p2align 4
1728L(fwd_write_143bytes):
1729 lddqu -143(%rsi), %xmm0
1730 movdqu %xmm0, -143(%rdi)
1731L(fwd_write_127bytes):
1732 lddqu -127(%rsi), %xmm0
1733 movdqu %xmm0, -127(%rdi)
1734L(fwd_write_111bytes):
1735 lddqu -111(%rsi), %xmm0
1736 movdqu %xmm0, -111(%rdi)
1737L(fwd_write_95bytes):
1738 lddqu -95(%rsi), %xmm0
1739 movdqu %xmm0, -95(%rdi)
1740L(fwd_write_79bytes):
1741 lddqu -79(%rsi), %xmm0
1742 movdqu %xmm0, -79(%rdi)
1743L(fwd_write_63bytes):
1744 lddqu -63(%rsi), %xmm0
1745 movdqu %xmm0, -63(%rdi)
1746L(fwd_write_47bytes):
1747 lddqu -47(%rsi), %xmm0
1748 movdqu %xmm0, -47(%rdi)
1749L(fwd_write_31bytes):
1750 lddqu -31(%rsi), %xmm0
1751 lddqu -16(%rsi), %xmm1
1752 movdqu %xmm0, -31(%rdi)
1753 movdqu %xmm1, -16(%rdi)
1754 ret
1755
1756 .p2align 4
1757L(fwd_write_15bytes):
1758 mov -15(%rsi), %rdx
1759 mov -8(%rsi), %rcx
1760 mov %rdx, -15(%rdi)
1761 mov %rcx, -8(%rdi)
1762 ret
1763
1764 .p2align 4
1765L(fwd_write_142bytes):
1766 lddqu -142(%rsi), %xmm0
1767 movdqu %xmm0, -142(%rdi)
1768L(fwd_write_126bytes):
1769 lddqu -126(%rsi), %xmm0
1770 movdqu %xmm0, -126(%rdi)
1771L(fwd_write_110bytes):
1772 lddqu -110(%rsi), %xmm0
1773 movdqu %xmm0, -110(%rdi)
1774L(fwd_write_94bytes):
1775 lddqu -94(%rsi), %xmm0
1776 movdqu %xmm0, -94(%rdi)
1777L(fwd_write_78bytes):
1778 lddqu -78(%rsi), %xmm0
1779 movdqu %xmm0, -78(%rdi)
1780L(fwd_write_62bytes):
1781 lddqu -62(%rsi), %xmm0
1782 movdqu %xmm0, -62(%rdi)
1783L(fwd_write_46bytes):
1784 lddqu -46(%rsi), %xmm0
1785 movdqu %xmm0, -46(%rdi)
1786L(fwd_write_30bytes):
1787 lddqu -30(%rsi), %xmm0
1788 lddqu -16(%rsi), %xmm1
1789 movdqu %xmm0, -30(%rdi)
1790 movdqu %xmm1, -16(%rdi)
1791 ret
1792
1793 .p2align 4
1794L(fwd_write_14bytes):
1795 mov -14(%rsi), %rdx
1796 mov -8(%rsi), %rcx
1797 mov %rdx, -14(%rdi)
1798 mov %rcx, -8(%rdi)
1799 ret
1800
1801 .p2align 4
1802L(fwd_write_141bytes):
1803 lddqu -141(%rsi), %xmm0
1804 movdqu %xmm0, -141(%rdi)
1805L(fwd_write_125bytes):
1806 lddqu -125(%rsi), %xmm0
1807 movdqu %xmm0, -125(%rdi)
1808L(fwd_write_109bytes):
1809 lddqu -109(%rsi), %xmm0
1810 movdqu %xmm0, -109(%rdi)
1811L(fwd_write_93bytes):
1812 lddqu -93(%rsi), %xmm0
1813 movdqu %xmm0, -93(%rdi)
1814L(fwd_write_77bytes):
1815 lddqu -77(%rsi), %xmm0
1816 movdqu %xmm0, -77(%rdi)
1817L(fwd_write_61bytes):
1818 lddqu -61(%rsi), %xmm0
1819 movdqu %xmm0, -61(%rdi)
1820L(fwd_write_45bytes):
1821 lddqu -45(%rsi), %xmm0
1822 movdqu %xmm0, -45(%rdi)
1823L(fwd_write_29bytes):
1824 lddqu -29(%rsi), %xmm0
1825 lddqu -16(%rsi), %xmm1
1826 movdqu %xmm0, -29(%rdi)
1827 movdqu %xmm1, -16(%rdi)
1828 ret
1829
1830 .p2align 4
1831L(fwd_write_13bytes):
1832 mov -13(%rsi), %rdx
1833 mov -8(%rsi), %rcx
1834 mov %rdx, -13(%rdi)
1835 mov %rcx, -8(%rdi)
1836 ret
1837
1838 .p2align 4
1839L(fwd_write_140bytes):
1840 lddqu -140(%rsi), %xmm0
1841 movdqu %xmm0, -140(%rdi)
1842L(fwd_write_124bytes):
1843 lddqu -124(%rsi), %xmm0
1844 movdqu %xmm0, -124(%rdi)
1845L(fwd_write_108bytes):
1846 lddqu -108(%rsi), %xmm0
1847 movdqu %xmm0, -108(%rdi)
1848L(fwd_write_92bytes):
1849 lddqu -92(%rsi), %xmm0
1850 movdqu %xmm0, -92(%rdi)
1851L(fwd_write_76bytes):
1852 lddqu -76(%rsi), %xmm0
1853 movdqu %xmm0, -76(%rdi)
1854L(fwd_write_60bytes):
1855 lddqu -60(%rsi), %xmm0
1856 movdqu %xmm0, -60(%rdi)
1857L(fwd_write_44bytes):
1858 lddqu -44(%rsi), %xmm0
1859 movdqu %xmm0, -44(%rdi)
1860L(fwd_write_28bytes):
1861 lddqu -28(%rsi), %xmm0
1862 lddqu -16(%rsi), %xmm1
1863 movdqu %xmm0, -28(%rdi)
1864 movdqu %xmm1, -16(%rdi)
1865 ret
1866
1867 .p2align 4
1868L(fwd_write_12bytes):
1869 mov -12(%rsi), %rdx
1870 mov -4(%rsi), %ecx
1871 mov %rdx, -12(%rdi)
1872 mov %ecx, -4(%rdi)
1873 ret
1874
1875 .p2align 4
1876L(fwd_write_139bytes):
1877 lddqu -139(%rsi), %xmm0
1878 movdqu %xmm0, -139(%rdi)
1879L(fwd_write_123bytes):
1880 lddqu -123(%rsi), %xmm0
1881 movdqu %xmm0, -123(%rdi)
1882L(fwd_write_107bytes):
1883 lddqu -107(%rsi), %xmm0
1884 movdqu %xmm0, -107(%rdi)
1885L(fwd_write_91bytes):
1886 lddqu -91(%rsi), %xmm0
1887 movdqu %xmm0, -91(%rdi)
1888L(fwd_write_75bytes):
1889 lddqu -75(%rsi), %xmm0
1890 movdqu %xmm0, -75(%rdi)
1891L(fwd_write_59bytes):
1892 lddqu -59(%rsi), %xmm0
1893 movdqu %xmm0, -59(%rdi)
1894L(fwd_write_43bytes):
1895 lddqu -43(%rsi), %xmm0
1896 movdqu %xmm0, -43(%rdi)
1897L(fwd_write_27bytes):
1898 lddqu -27(%rsi), %xmm0
1899 lddqu -16(%rsi), %xmm1
1900 movdqu %xmm0, -27(%rdi)
1901 movdqu %xmm1, -16(%rdi)
1902 ret
1903
1904 .p2align 4
1905L(fwd_write_11bytes):
1906 mov -11(%rsi), %rdx
1907 mov -4(%rsi), %ecx
1908 mov %rdx, -11(%rdi)
1909 mov %ecx, -4(%rdi)
1910 ret
1911
1912 .p2align 4
1913L(fwd_write_138bytes):
1914 lddqu -138(%rsi), %xmm0
1915 movdqu %xmm0, -138(%rdi)
1916L(fwd_write_122bytes):
1917 lddqu -122(%rsi), %xmm0
1918 movdqu %xmm0, -122(%rdi)
1919L(fwd_write_106bytes):
1920 lddqu -106(%rsi), %xmm0
1921 movdqu %xmm0, -106(%rdi)
1922L(fwd_write_90bytes):
1923 lddqu -90(%rsi), %xmm0
1924 movdqu %xmm0, -90(%rdi)
1925L(fwd_write_74bytes):
1926 lddqu -74(%rsi), %xmm0
1927 movdqu %xmm0, -74(%rdi)
1928L(fwd_write_58bytes):
1929 lddqu -58(%rsi), %xmm0
1930 movdqu %xmm0, -58(%rdi)
1931L(fwd_write_42bytes):
1932 lddqu -42(%rsi), %xmm0
1933 movdqu %xmm0, -42(%rdi)
1934L(fwd_write_26bytes):
1935 lddqu -26(%rsi), %xmm0
1936 lddqu -16(%rsi), %xmm1
1937 movdqu %xmm0, -26(%rdi)
1938 movdqu %xmm1, -16(%rdi)
1939 ret
1940
1941 .p2align 4
1942L(fwd_write_10bytes):
1943 mov -10(%rsi), %rdx
1944 mov -4(%rsi), %ecx
1945 mov %rdx, -10(%rdi)
1946 mov %ecx, -4(%rdi)
1947 ret
1948
1949 .p2align 4
1950L(fwd_write_137bytes):
1951 lddqu -137(%rsi), %xmm0
1952 movdqu %xmm0, -137(%rdi)
1953L(fwd_write_121bytes):
1954 lddqu -121(%rsi), %xmm0
1955 movdqu %xmm0, -121(%rdi)
1956L(fwd_write_105bytes):
1957 lddqu -105(%rsi), %xmm0
1958 movdqu %xmm0, -105(%rdi)
1959L(fwd_write_89bytes):
1960 lddqu -89(%rsi), %xmm0
1961 movdqu %xmm0, -89(%rdi)
1962L(fwd_write_73bytes):
1963 lddqu -73(%rsi), %xmm0
1964 movdqu %xmm0, -73(%rdi)
1965L(fwd_write_57bytes):
1966 lddqu -57(%rsi), %xmm0
1967 movdqu %xmm0, -57(%rdi)
1968L(fwd_write_41bytes):
1969 lddqu -41(%rsi), %xmm0
1970 movdqu %xmm0, -41(%rdi)
1971L(fwd_write_25bytes):
1972 lddqu -25(%rsi), %xmm0
1973 lddqu -16(%rsi), %xmm1
1974 movdqu %xmm0, -25(%rdi)
1975 movdqu %xmm1, -16(%rdi)
1976 ret
1977
1978 .p2align 4
1979L(fwd_write_9bytes):
1980 mov -9(%rsi), %rdx
1981 mov -4(%rsi), %ecx
1982 mov %rdx, -9(%rdi)
1983 mov %ecx, -4(%rdi)
1984 ret
1985
1986 .p2align 4
1987L(fwd_write_136bytes):
1988 lddqu -136(%rsi), %xmm0
1989 movdqu %xmm0, -136(%rdi)
1990L(fwd_write_120bytes):
1991 lddqu -120(%rsi), %xmm0
1992 movdqu %xmm0, -120(%rdi)
1993L(fwd_write_104bytes):
1994 lddqu -104(%rsi), %xmm0
1995 movdqu %xmm0, -104(%rdi)
1996L(fwd_write_88bytes):
1997 lddqu -88(%rsi), %xmm0
1998 movdqu %xmm0, -88(%rdi)
1999L(fwd_write_72bytes):
2000 lddqu -72(%rsi), %xmm0
2001 movdqu %xmm0, -72(%rdi)
2002L(fwd_write_56bytes):
2003 lddqu -56(%rsi), %xmm0
2004 movdqu %xmm0, -56(%rdi)
2005L(fwd_write_40bytes):
2006 lddqu -40(%rsi), %xmm0
2007 movdqu %xmm0, -40(%rdi)
2008L(fwd_write_24bytes):
2009 lddqu -24(%rsi), %xmm0
2010 lddqu -16(%rsi), %xmm1
2011 movdqu %xmm0, -24(%rdi)
2012 movdqu %xmm1, -16(%rdi)
2013 ret
2014
2015 .p2align 4
2016L(fwd_write_8bytes):
2017 mov -8(%rsi), %rdx
2018 mov %rdx, -8(%rdi)
2019 ret
2020
2021 .p2align 4
2022L(fwd_write_135bytes):
2023 lddqu -135(%rsi), %xmm0
2024 movdqu %xmm0, -135(%rdi)
2025L(fwd_write_119bytes):
2026 lddqu -119(%rsi), %xmm0
2027 movdqu %xmm0, -119(%rdi)
2028L(fwd_write_103bytes):
2029 lddqu -103(%rsi), %xmm0
2030 movdqu %xmm0, -103(%rdi)
2031L(fwd_write_87bytes):
2032 lddqu -87(%rsi), %xmm0
2033 movdqu %xmm0, -87(%rdi)
2034L(fwd_write_71bytes):
2035 lddqu -71(%rsi), %xmm0
2036 movdqu %xmm0, -71(%rdi)
2037L(fwd_write_55bytes):
2038 lddqu -55(%rsi), %xmm0
2039 movdqu %xmm0, -55(%rdi)
2040L(fwd_write_39bytes):
2041 lddqu -39(%rsi), %xmm0
2042 movdqu %xmm0, -39(%rdi)
2043L(fwd_write_23bytes):
2044 lddqu -23(%rsi), %xmm0
2045 lddqu -16(%rsi), %xmm1
2046 movdqu %xmm0, -23(%rdi)
2047 movdqu %xmm1, -16(%rdi)
2048 ret
2049
2050 .p2align 4
2051L(fwd_write_7bytes):
2052 mov -7(%rsi), %edx
2053 mov -4(%rsi), %ecx
2054 mov %edx, -7(%rdi)
2055 mov %ecx, -4(%rdi)
2056 ret
2057
2058 .p2align 4
2059L(fwd_write_134bytes):
2060 lddqu -134(%rsi), %xmm0
2061 movdqu %xmm0, -134(%rdi)
2062L(fwd_write_118bytes):
2063 lddqu -118(%rsi), %xmm0
2064 movdqu %xmm0, -118(%rdi)
2065L(fwd_write_102bytes):
2066 lddqu -102(%rsi), %xmm0
2067 movdqu %xmm0, -102(%rdi)
2068L(fwd_write_86bytes):
2069 lddqu -86(%rsi), %xmm0
2070 movdqu %xmm0, -86(%rdi)
2071L(fwd_write_70bytes):
2072 lddqu -70(%rsi), %xmm0
2073 movdqu %xmm0, -70(%rdi)
2074L(fwd_write_54bytes):
2075 lddqu -54(%rsi), %xmm0
2076 movdqu %xmm0, -54(%rdi)
2077L(fwd_write_38bytes):
2078 lddqu -38(%rsi), %xmm0
2079 movdqu %xmm0, -38(%rdi)
2080L(fwd_write_22bytes):
2081 lddqu -22(%rsi), %xmm0
2082 lddqu -16(%rsi), %xmm1
2083 movdqu %xmm0, -22(%rdi)
2084 movdqu %xmm1, -16(%rdi)
2085 ret
2086
2087 .p2align 4
2088L(fwd_write_6bytes):
2089 mov -6(%rsi), %edx
2090 mov -4(%rsi), %ecx
2091 mov %edx, -6(%rdi)
2092 mov %ecx, -4(%rdi)
2093 ret
2094
2095 .p2align 4
2096L(fwd_write_133bytes):
2097 lddqu -133(%rsi), %xmm0
2098 movdqu %xmm0, -133(%rdi)
2099L(fwd_write_117bytes):
2100 lddqu -117(%rsi), %xmm0
2101 movdqu %xmm0, -117(%rdi)
2102L(fwd_write_101bytes):
2103 lddqu -101(%rsi), %xmm0
2104 movdqu %xmm0, -101(%rdi)
2105L(fwd_write_85bytes):
2106 lddqu -85(%rsi), %xmm0
2107 movdqu %xmm0, -85(%rdi)
2108L(fwd_write_69bytes):
2109 lddqu -69(%rsi), %xmm0
2110 movdqu %xmm0, -69(%rdi)
2111L(fwd_write_53bytes):
2112 lddqu -53(%rsi), %xmm0
2113 movdqu %xmm0, -53(%rdi)
2114L(fwd_write_37bytes):
2115 lddqu -37(%rsi), %xmm0
2116 movdqu %xmm0, -37(%rdi)
2117L(fwd_write_21bytes):
2118 lddqu -21(%rsi), %xmm0
2119 lddqu -16(%rsi), %xmm1
2120 movdqu %xmm0, -21(%rdi)
2121 movdqu %xmm1, -16(%rdi)
2122 ret
2123
2124 .p2align 4
2125L(fwd_write_5bytes):
2126 mov -5(%rsi), %edx
2127 mov -4(%rsi), %ecx
2128 mov %edx, -5(%rdi)
2129 mov %ecx, -4(%rdi)
2130 ret
2131
2132 .p2align 4
2133L(fwd_write_132bytes):
2134 lddqu -132(%rsi), %xmm0
2135 movdqu %xmm0, -132(%rdi)
2136L(fwd_write_116bytes):
2137 lddqu -116(%rsi), %xmm0
2138 movdqu %xmm0, -116(%rdi)
2139L(fwd_write_100bytes):
2140 lddqu -100(%rsi), %xmm0
2141 movdqu %xmm0, -100(%rdi)
2142L(fwd_write_84bytes):
2143 lddqu -84(%rsi), %xmm0
2144 movdqu %xmm0, -84(%rdi)
2145L(fwd_write_68bytes):
2146 lddqu -68(%rsi), %xmm0
2147 movdqu %xmm0, -68(%rdi)
2148L(fwd_write_52bytes):
2149 lddqu -52(%rsi), %xmm0
2150 movdqu %xmm0, -52(%rdi)
2151L(fwd_write_36bytes):
2152 lddqu -36(%rsi), %xmm0
2153 movdqu %xmm0, -36(%rdi)
2154L(fwd_write_20bytes):
2155 lddqu -20(%rsi), %xmm0
2156 lddqu -16(%rsi), %xmm1
2157 movdqu %xmm0, -20(%rdi)
2158 movdqu %xmm1, -16(%rdi)
2159 ret
2160
2161 .p2align 4
2162L(fwd_write_4bytes):
2163 mov -4(%rsi), %edx
2164 mov %edx, -4(%rdi)
2165 ret
2166
2167 .p2align 4
2168L(fwd_write_131bytes):
2169 lddqu -131(%rsi), %xmm0
2170 movdqu %xmm0, -131(%rdi)
2171L(fwd_write_115bytes):
2172 lddqu -115(%rsi), %xmm0
2173 movdqu %xmm0, -115(%rdi)
2174L(fwd_write_99bytes):
2175 lddqu -99(%rsi), %xmm0
2176 movdqu %xmm0, -99(%rdi)
2177L(fwd_write_83bytes):
2178 lddqu -83(%rsi), %xmm0
2179 movdqu %xmm0, -83(%rdi)
2180L(fwd_write_67bytes):
2181 lddqu -67(%rsi), %xmm0
2182 movdqu %xmm0, -67(%rdi)
2183L(fwd_write_51bytes):
2184 lddqu -51(%rsi), %xmm0
2185 movdqu %xmm0, -51(%rdi)
2186L(fwd_write_35bytes):
2187 lddqu -35(%rsi), %xmm0
2188 movdqu %xmm0, -35(%rdi)
2189L(fwd_write_19bytes):
2190 lddqu -19(%rsi), %xmm0
2191 lddqu -16(%rsi), %xmm1
2192 movdqu %xmm0, -19(%rdi)
2193 movdqu %xmm1, -16(%rdi)
2194 ret
2195
2196 .p2align 4
2197L(fwd_write_3bytes):
2198 mov -3(%rsi), %dx
2199 mov -2(%rsi), %cx
2200 mov %dx, -3(%rdi)
2201 mov %cx, -2(%rdi)
2202 ret
2203
2204 .p2align 4
2205L(fwd_write_130bytes):
2206 lddqu -130(%rsi), %xmm0
2207 movdqu %xmm0, -130(%rdi)
2208L(fwd_write_114bytes):
2209 lddqu -114(%rsi), %xmm0
2210 movdqu %xmm0, -114(%rdi)
2211L(fwd_write_98bytes):
2212 lddqu -98(%rsi), %xmm0
2213 movdqu %xmm0, -98(%rdi)
2214L(fwd_write_82bytes):
2215 lddqu -82(%rsi), %xmm0
2216 movdqu %xmm0, -82(%rdi)
2217L(fwd_write_66bytes):
2218 lddqu -66(%rsi), %xmm0
2219 movdqu %xmm0, -66(%rdi)
2220L(fwd_write_50bytes):
2221 lddqu -50(%rsi), %xmm0
2222 movdqu %xmm0, -50(%rdi)
2223L(fwd_write_34bytes):
2224 lddqu -34(%rsi), %xmm0
2225 movdqu %xmm0, -34(%rdi)
2226L(fwd_write_18bytes):
2227 lddqu -18(%rsi), %xmm0
2228 lddqu -16(%rsi), %xmm1
2229 movdqu %xmm0, -18(%rdi)
2230 movdqu %xmm1, -16(%rdi)
2231 ret
2232
2233 .p2align 4
2234L(fwd_write_2bytes):
2235 movzwl -2(%rsi), %edx
2236 mov %dx, -2(%rdi)
2237 ret
2238
2239 .p2align 4
2240L(fwd_write_129bytes):
2241 lddqu -129(%rsi), %xmm0
2242 movdqu %xmm0, -129(%rdi)
2243L(fwd_write_113bytes):
2244 lddqu -113(%rsi), %xmm0
2245 movdqu %xmm0, -113(%rdi)
2246L(fwd_write_97bytes):
2247 lddqu -97(%rsi), %xmm0
2248 movdqu %xmm0, -97(%rdi)
2249L(fwd_write_81bytes):
2250 lddqu -81(%rsi), %xmm0
2251 movdqu %xmm0, -81(%rdi)
2252L(fwd_write_65bytes):
2253 lddqu -65(%rsi), %xmm0
2254 movdqu %xmm0, -65(%rdi)
2255L(fwd_write_49bytes):
2256 lddqu -49(%rsi), %xmm0
2257 movdqu %xmm0, -49(%rdi)
2258L(fwd_write_33bytes):
2259 lddqu -33(%rsi), %xmm0
2260 movdqu %xmm0, -33(%rdi)
2261L(fwd_write_17bytes):
2262 lddqu -17(%rsi), %xmm0
2263 lddqu -16(%rsi), %xmm1
2264 movdqu %xmm0, -17(%rdi)
2265 movdqu %xmm1, -16(%rdi)
2266 ret
2267
2268 .p2align 4
2269L(fwd_write_1bytes):
2270 movzbl -1(%rsi), %edx
2271 mov %dl, -1(%rdi)
2272 ret
2273
2274 .p2align 4
2275L(bwd_write_128bytes):
2276 lddqu 112(%rsi), %xmm0
2277 movdqu %xmm0, 112(%rdi)
2278L(bwd_write_112bytes):
2279 lddqu 96(%rsi), %xmm0
2280 movdqu %xmm0, 96(%rdi)
2281L(bwd_write_96bytes):
2282 lddqu 80(%rsi), %xmm0
2283 movdqu %xmm0, 80(%rdi)
2284L(bwd_write_80bytes):
2285 lddqu 64(%rsi), %xmm0
2286 movdqu %xmm0, 64(%rdi)
2287L(bwd_write_64bytes):
2288 lddqu 48(%rsi), %xmm0
2289 movdqu %xmm0, 48(%rdi)
2290L(bwd_write_48bytes):
2291 lddqu 32(%rsi), %xmm0
2292 movdqu %xmm0, 32(%rdi)
2293L(bwd_write_32bytes):
2294 lddqu 16(%rsi), %xmm0
2295 movdqu %xmm0, 16(%rdi)
2296L(bwd_write_16bytes):
2297 lddqu (%rsi), %xmm0
2298 movdqu %xmm0, (%rdi)
2299L(bwd_write_0bytes):
2300 ret
2301
2302 .p2align 4
2303L(bwd_write_143bytes):
2304 lddqu 127(%rsi), %xmm0
2305 movdqu %xmm0, 127(%rdi)
2306L(bwd_write_127bytes):
2307 lddqu 111(%rsi), %xmm0
2308 movdqu %xmm0, 111(%rdi)
2309L(bwd_write_111bytes):
2310 lddqu 95(%rsi), %xmm0
2311 movdqu %xmm0, 95(%rdi)
2312L(bwd_write_95bytes):
2313 lddqu 79(%rsi), %xmm0
2314 movdqu %xmm0, 79(%rdi)
2315L(bwd_write_79bytes):
2316 lddqu 63(%rsi), %xmm0
2317 movdqu %xmm0, 63(%rdi)
2318L(bwd_write_63bytes):
2319 lddqu 47(%rsi), %xmm0
2320 movdqu %xmm0, 47(%rdi)
2321L(bwd_write_47bytes):
2322 lddqu 31(%rsi), %xmm0
2323 movdqu %xmm0, 31(%rdi)
2324L(bwd_write_31bytes):
2325 lddqu 15(%rsi), %xmm0
2326 lddqu (%rsi), %xmm1
2327 movdqu %xmm0, 15(%rdi)
2328 movdqu %xmm1, (%rdi)
2329 ret
2330
2331
2332 .p2align 4
2333L(bwd_write_15bytes):
2334 mov 7(%rsi), %rdx
2335 mov (%rsi), %rcx
2336 mov %rdx, 7(%rdi)
2337 mov %rcx, (%rdi)
2338 ret
2339
2340 .p2align 4
2341L(bwd_write_142bytes):
2342 lddqu 126(%rsi), %xmm0
2343 movdqu %xmm0, 126(%rdi)
2344L(bwd_write_126bytes):
2345 lddqu 110(%rsi), %xmm0
2346 movdqu %xmm0, 110(%rdi)
2347L(bwd_write_110bytes):
2348 lddqu 94(%rsi), %xmm0
2349 movdqu %xmm0, 94(%rdi)
2350L(bwd_write_94bytes):
2351 lddqu 78(%rsi), %xmm0
2352 movdqu %xmm0, 78(%rdi)
2353L(bwd_write_78bytes):
2354 lddqu 62(%rsi), %xmm0
2355 movdqu %xmm0, 62(%rdi)
2356L(bwd_write_62bytes):
2357 lddqu 46(%rsi), %xmm0
2358 movdqu %xmm0, 46(%rdi)
2359L(bwd_write_46bytes):
2360 lddqu 30(%rsi), %xmm0
2361 movdqu %xmm0, 30(%rdi)
2362L(bwd_write_30bytes):
2363 lddqu 14(%rsi), %xmm0
2364 lddqu (%rsi), %xmm1
2365 movdqu %xmm0, 14(%rdi)
2366 movdqu %xmm1, (%rdi)
2367 ret
2368
2369 .p2align 4
2370L(bwd_write_14bytes):
2371 mov 6(%rsi), %rdx
2372 mov (%rsi), %rcx
2373 mov %rdx, 6(%rdi)
2374 mov %rcx, (%rdi)
2375 ret
2376
2377 .p2align 4
2378L(bwd_write_141bytes):
2379 lddqu 125(%rsi), %xmm0
2380 movdqu %xmm0, 125(%rdi)
2381L(bwd_write_125bytes):
2382 lddqu 109(%rsi), %xmm0
2383 movdqu %xmm0, 109(%rdi)
2384L(bwd_write_109bytes):
2385 lddqu 93(%rsi), %xmm0
2386 movdqu %xmm0, 93(%rdi)
2387L(bwd_write_93bytes):
2388 lddqu 77(%rsi), %xmm0
2389 movdqu %xmm0, 77(%rdi)
2390L(bwd_write_77bytes):
2391 lddqu 61(%rsi), %xmm0
2392 movdqu %xmm0, 61(%rdi)
2393L(bwd_write_61bytes):
2394 lddqu 45(%rsi), %xmm0
2395 movdqu %xmm0, 45(%rdi)
2396L(bwd_write_45bytes):
2397 lddqu 29(%rsi), %xmm0
2398 movdqu %xmm0, 29(%rdi)
2399L(bwd_write_29bytes):
2400 lddqu 13(%rsi), %xmm0
2401 lddqu (%rsi), %xmm1
2402 movdqu %xmm0, 13(%rdi)
2403 movdqu %xmm1, (%rdi)
2404 ret
2405
2406 .p2align 4
2407L(bwd_write_13bytes):
2408 mov 5(%rsi), %rdx
2409 mov (%rsi), %rcx
2410 mov %rdx, 5(%rdi)
2411 mov %rcx, (%rdi)
2412 ret
2413
2414 .p2align 4
2415L(bwd_write_140bytes):
2416 lddqu 124(%rsi), %xmm0
2417 movdqu %xmm0, 124(%rdi)
2418L(bwd_write_124bytes):
2419 lddqu 108(%rsi), %xmm0
2420 movdqu %xmm0, 108(%rdi)
2421L(bwd_write_108bytes):
2422 lddqu 92(%rsi), %xmm0
2423 movdqu %xmm0, 92(%rdi)
2424L(bwd_write_92bytes):
2425 lddqu 76(%rsi), %xmm0
2426 movdqu %xmm0, 76(%rdi)
2427L(bwd_write_76bytes):
2428 lddqu 60(%rsi), %xmm0
2429 movdqu %xmm0, 60(%rdi)
2430L(bwd_write_60bytes):
2431 lddqu 44(%rsi), %xmm0
2432 movdqu %xmm0, 44(%rdi)
2433L(bwd_write_44bytes):
2434 lddqu 28(%rsi), %xmm0
2435 movdqu %xmm0, 28(%rdi)
2436L(bwd_write_28bytes):
2437 lddqu 12(%rsi), %xmm0
2438 lddqu (%rsi), %xmm1
2439 movdqu %xmm0, 12(%rdi)
2440 movdqu %xmm1, (%rdi)
2441 ret
2442
2443 .p2align 4
2444L(bwd_write_12bytes):
2445 mov 4(%rsi), %rdx
2446 mov (%rsi), %rcx
2447 mov %rdx, 4(%rdi)
2448 mov %rcx, (%rdi)
2449 ret
2450
2451 .p2align 4
2452L(bwd_write_139bytes):
2453 lddqu 123(%rsi), %xmm0
2454 movdqu %xmm0, 123(%rdi)
2455L(bwd_write_123bytes):
2456 lddqu 107(%rsi), %xmm0
2457 movdqu %xmm0, 107(%rdi)
2458L(bwd_write_107bytes):
2459 lddqu 91(%rsi), %xmm0
2460 movdqu %xmm0, 91(%rdi)
2461L(bwd_write_91bytes):
2462 lddqu 75(%rsi), %xmm0
2463 movdqu %xmm0, 75(%rdi)
2464L(bwd_write_75bytes):
2465 lddqu 59(%rsi), %xmm0
2466 movdqu %xmm0, 59(%rdi)
2467L(bwd_write_59bytes):
2468 lddqu 43(%rsi), %xmm0
2469 movdqu %xmm0, 43(%rdi)
2470L(bwd_write_43bytes):
2471 lddqu 27(%rsi), %xmm0
2472 movdqu %xmm0, 27(%rdi)
2473L(bwd_write_27bytes):
2474 lddqu 11(%rsi), %xmm0
2475 lddqu (%rsi), %xmm1
2476 movdqu %xmm0, 11(%rdi)
2477 movdqu %xmm1, (%rdi)
2478 ret
2479
2480 .p2align 4
2481L(bwd_write_11bytes):
2482 mov 3(%rsi), %rdx
2483 mov (%rsi), %rcx
2484 mov %rdx, 3(%rdi)
2485 mov %rcx, (%rdi)
2486 ret
2487
2488 .p2align 4
2489L(bwd_write_138bytes):
2490 lddqu 122(%rsi), %xmm0
2491 movdqu %xmm0, 122(%rdi)
2492L(bwd_write_122bytes):
2493 lddqu 106(%rsi), %xmm0
2494 movdqu %xmm0, 106(%rdi)
2495L(bwd_write_106bytes):
2496 lddqu 90(%rsi), %xmm0
2497 movdqu %xmm0, 90(%rdi)
2498L(bwd_write_90bytes):
2499 lddqu 74(%rsi), %xmm0
2500 movdqu %xmm0, 74(%rdi)
2501L(bwd_write_74bytes):
2502 lddqu 58(%rsi), %xmm0
2503 movdqu %xmm0, 58(%rdi)
2504L(bwd_write_58bytes):
2505 lddqu 42(%rsi), %xmm0
2506 movdqu %xmm0, 42(%rdi)
2507L(bwd_write_42bytes):
2508 lddqu 26(%rsi), %xmm0
2509 movdqu %xmm0, 26(%rdi)
2510L(bwd_write_26bytes):
2511 lddqu 10(%rsi), %xmm0
2512 lddqu (%rsi), %xmm1
2513 movdqu %xmm0, 10(%rdi)
2514 movdqu %xmm1, (%rdi)
2515 ret
2516
2517 .p2align 4
2518L(bwd_write_10bytes):
2519 mov 2(%rsi), %rdx
2520 mov (%rsi), %rcx
2521 mov %rdx, 2(%rdi)
2522 mov %rcx, (%rdi)
2523 ret
2524
2525 .p2align 4
2526L(bwd_write_137bytes):
2527 lddqu 121(%rsi), %xmm0
2528 movdqu %xmm0, 121(%rdi)
2529L(bwd_write_121bytes):
2530 lddqu 105(%rsi), %xmm0
2531 movdqu %xmm0, 105(%rdi)
2532L(bwd_write_105bytes):
2533 lddqu 89(%rsi), %xmm0
2534 movdqu %xmm0, 89(%rdi)
2535L(bwd_write_89bytes):
2536 lddqu 73(%rsi), %xmm0
2537 movdqu %xmm0, 73(%rdi)
2538L(bwd_write_73bytes):
2539 lddqu 57(%rsi), %xmm0
2540 movdqu %xmm0, 57(%rdi)
2541L(bwd_write_57bytes):
2542 lddqu 41(%rsi), %xmm0
2543 movdqu %xmm0, 41(%rdi)
2544L(bwd_write_41bytes):
2545 lddqu 25(%rsi), %xmm0
2546 movdqu %xmm0, 25(%rdi)
2547L(bwd_write_25bytes):
2548 lddqu 9(%rsi), %xmm0
2549 lddqu (%rsi), %xmm1
2550 movdqu %xmm0, 9(%rdi)
2551 movdqu %xmm1, (%rdi)
2552 ret
2553
2554 .p2align 4
2555L(bwd_write_9bytes):
2556 mov 1(%rsi), %rdx
2557 mov (%rsi), %rcx
2558 mov %rdx, 1(%rdi)
2559 mov %rcx, (%rdi)
2560 ret
2561
2562 .p2align 4
2563L(bwd_write_136bytes):
2564 lddqu 120(%rsi), %xmm0
2565 movdqu %xmm0, 120(%rdi)
2566L(bwd_write_120bytes):
2567 lddqu 104(%rsi), %xmm0
2568 movdqu %xmm0, 104(%rdi)
2569L(bwd_write_104bytes):
2570 lddqu 88(%rsi), %xmm0
2571 movdqu %xmm0, 88(%rdi)
2572L(bwd_write_88bytes):
2573 lddqu 72(%rsi), %xmm0
2574 movdqu %xmm0, 72(%rdi)
2575L(bwd_write_72bytes):
2576 lddqu 56(%rsi), %xmm0
2577 movdqu %xmm0, 56(%rdi)
2578L(bwd_write_56bytes):
2579 lddqu 40(%rsi), %xmm0
2580 movdqu %xmm0, 40(%rdi)
2581L(bwd_write_40bytes):
2582 lddqu 24(%rsi), %xmm0
2583 movdqu %xmm0, 24(%rdi)
2584L(bwd_write_24bytes):
2585 lddqu 8(%rsi), %xmm0
2586 lddqu (%rsi), %xmm1
2587 movdqu %xmm0, 8(%rdi)
2588 movdqu %xmm1, (%rdi)
2589 ret
2590
2591 .p2align 4
2592L(bwd_write_8bytes):
2593 mov (%rsi), %rdx
2594 mov %rdx, (%rdi)
2595 ret
2596
2597 .p2align 4
2598L(bwd_write_135bytes):
2599 lddqu 119(%rsi), %xmm0
2600 movdqu %xmm0, 119(%rdi)
2601L(bwd_write_119bytes):
2602 lddqu 103(%rsi), %xmm0
2603 movdqu %xmm0, 103(%rdi)
2604L(bwd_write_103bytes):
2605 lddqu 87(%rsi), %xmm0
2606 movdqu %xmm0, 87(%rdi)
2607L(bwd_write_87bytes):
2608 lddqu 71(%rsi), %xmm0
2609 movdqu %xmm0, 71(%rdi)
2610L(bwd_write_71bytes):
2611 lddqu 55(%rsi), %xmm0
2612 movdqu %xmm0, 55(%rdi)
2613L(bwd_write_55bytes):
2614 lddqu 39(%rsi), %xmm0
2615 movdqu %xmm0, 39(%rdi)
2616L(bwd_write_39bytes):
2617 lddqu 23(%rsi), %xmm0
2618 movdqu %xmm0, 23(%rdi)
2619L(bwd_write_23bytes):
2620 lddqu 7(%rsi), %xmm0
2621 lddqu (%rsi), %xmm1
2622 movdqu %xmm0, 7(%rdi)
2623 movdqu %xmm1, (%rdi)
2624 ret
2625
2626 .p2align 4
2627L(bwd_write_7bytes):
2628 mov 3(%rsi), %edx
2629 mov (%rsi), %ecx
2630 mov %edx, 3(%rdi)
2631 mov %ecx, (%rdi)
2632 ret
2633
2634 .p2align 4
2635L(bwd_write_134bytes):
2636 lddqu 118(%rsi), %xmm0
2637 movdqu %xmm0, 118(%rdi)
2638L(bwd_write_118bytes):
2639 lddqu 102(%rsi), %xmm0
2640 movdqu %xmm0, 102(%rdi)
2641L(bwd_write_102bytes):
2642 lddqu 86(%rsi), %xmm0
2643 movdqu %xmm0, 86(%rdi)
2644L(bwd_write_86bytes):
2645 lddqu 70(%rsi), %xmm0
2646 movdqu %xmm0, 70(%rdi)
2647L(bwd_write_70bytes):
2648 lddqu 54(%rsi), %xmm0
2649 movdqu %xmm0, 54(%rdi)
2650L(bwd_write_54bytes):
2651 lddqu 38(%rsi), %xmm0
2652 movdqu %xmm0, 38(%rdi)
2653L(bwd_write_38bytes):
2654 lddqu 22(%rsi), %xmm0
2655 movdqu %xmm0, 22(%rdi)
2656L(bwd_write_22bytes):
2657 lddqu 6(%rsi), %xmm0
2658 lddqu (%rsi), %xmm1
2659 movdqu %xmm0, 6(%rdi)
2660 movdqu %xmm1, (%rdi)
2661 ret
2662
2663 .p2align 4
2664L(bwd_write_6bytes):
2665 mov 2(%rsi), %edx
2666 mov (%rsi), %ecx
2667 mov %edx, 2(%rdi)
2668 mov %ecx, (%rdi)
2669 ret
2670
2671 .p2align 4
2672L(bwd_write_133bytes):
2673 lddqu 117(%rsi), %xmm0
2674 movdqu %xmm0, 117(%rdi)
2675L(bwd_write_117bytes):
2676 lddqu 101(%rsi), %xmm0
2677 movdqu %xmm0, 101(%rdi)
2678L(bwd_write_101bytes):
2679 lddqu 85(%rsi), %xmm0
2680 movdqu %xmm0, 85(%rdi)
2681L(bwd_write_85bytes):
2682 lddqu 69(%rsi), %xmm0
2683 movdqu %xmm0, 69(%rdi)
2684L(bwd_write_69bytes):
2685 lddqu 53(%rsi), %xmm0
2686 movdqu %xmm0, 53(%rdi)
2687L(bwd_write_53bytes):
2688 lddqu 37(%rsi), %xmm0
2689 movdqu %xmm0, 37(%rdi)
2690L(bwd_write_37bytes):
2691 lddqu 21(%rsi), %xmm0
2692 movdqu %xmm0, 21(%rdi)
2693L(bwd_write_21bytes):
2694 lddqu 5(%rsi), %xmm0
2695 lddqu (%rsi), %xmm1
2696 movdqu %xmm0, 5(%rdi)
2697 movdqu %xmm1, (%rdi)
2698 ret
2699
2700 .p2align 4
2701L(bwd_write_5bytes):
2702 mov 1(%rsi), %edx
2703 mov (%rsi), %ecx
2704 mov %edx, 1(%rdi)
2705 mov %ecx, (%rdi)
2706 ret
2707
2708 .p2align 4
2709L(bwd_write_132bytes):
2710 lddqu 116(%rsi), %xmm0
2711 movdqu %xmm0, 116(%rdi)
2712L(bwd_write_116bytes):
2713 lddqu 100(%rsi), %xmm0
2714 movdqu %xmm0, 100(%rdi)
2715L(bwd_write_100bytes):
2716 lddqu 84(%rsi), %xmm0
2717 movdqu %xmm0, 84(%rdi)
2718L(bwd_write_84bytes):
2719 lddqu 68(%rsi), %xmm0
2720 movdqu %xmm0, 68(%rdi)
2721L(bwd_write_68bytes):
2722 lddqu 52(%rsi), %xmm0
2723 movdqu %xmm0, 52(%rdi)
2724L(bwd_write_52bytes):
2725 lddqu 36(%rsi), %xmm0
2726 movdqu %xmm0, 36(%rdi)
2727L(bwd_write_36bytes):
2728 lddqu 20(%rsi), %xmm0
2729 movdqu %xmm0, 20(%rdi)
2730L(bwd_write_20bytes):
2731 lddqu 4(%rsi), %xmm0
2732 lddqu (%rsi), %xmm1
2733 movdqu %xmm0, 4(%rdi)
2734 movdqu %xmm1, (%rdi)
2735 ret
2736
2737 .p2align 4
2738L(bwd_write_4bytes):
2739 mov (%rsi), %edx
2740 mov %edx, (%rdi)
2741 ret
2742
2743 .p2align 4
2744L(bwd_write_131bytes):
2745 lddqu 115(%rsi), %xmm0
2746 movdqu %xmm0, 115(%rdi)
2747L(bwd_write_115bytes):
2748 lddqu 99(%rsi), %xmm0
2749 movdqu %xmm0, 99(%rdi)
2750L(bwd_write_99bytes):
2751 lddqu 83(%rsi), %xmm0
2752 movdqu %xmm0, 83(%rdi)
2753L(bwd_write_83bytes):
2754 lddqu 67(%rsi), %xmm0
2755 movdqu %xmm0, 67(%rdi)
2756L(bwd_write_67bytes):
2757 lddqu 51(%rsi), %xmm0
2758 movdqu %xmm0, 51(%rdi)
2759L(bwd_write_51bytes):
2760 lddqu 35(%rsi), %xmm0
2761 movdqu %xmm0, 35(%rdi)
2762L(bwd_write_35bytes):
2763 lddqu 19(%rsi), %xmm0
2764 movdqu %xmm0, 19(%rdi)
2765L(bwd_write_19bytes):
2766 lddqu 3(%rsi), %xmm0
2767 lddqu (%rsi), %xmm1
2768 movdqu %xmm0, 3(%rdi)
2769 movdqu %xmm1, (%rdi)
2770 ret
2771
2772 .p2align 4
2773L(bwd_write_3bytes):
2774 mov 1(%rsi), %dx
2775 mov (%rsi), %cx
2776 mov %dx, 1(%rdi)
2777 mov %cx, (%rdi)
2778 ret
2779
2780 .p2align 4
2781L(bwd_write_130bytes):
2782 lddqu 114(%rsi), %xmm0
2783 movdqu %xmm0, 114(%rdi)
2784L(bwd_write_114bytes):
2785 lddqu 98(%rsi), %xmm0
2786 movdqu %xmm0, 98(%rdi)
2787L(bwd_write_98bytes):
2788 lddqu 82(%rsi), %xmm0
2789 movdqu %xmm0, 82(%rdi)
2790L(bwd_write_82bytes):
2791 lddqu 66(%rsi), %xmm0
2792 movdqu %xmm0, 66(%rdi)
2793L(bwd_write_66bytes):
2794 lddqu 50(%rsi), %xmm0
2795 movdqu %xmm0, 50(%rdi)
2796L(bwd_write_50bytes):
2797 lddqu 34(%rsi), %xmm0
2798 movdqu %xmm0, 34(%rdi)
2799L(bwd_write_34bytes):
2800 lddqu 18(%rsi), %xmm0
2801 movdqu %xmm0, 18(%rdi)
2802L(bwd_write_18bytes):
2803 lddqu 2(%rsi), %xmm0
2804 lddqu (%rsi), %xmm1
2805 movdqu %xmm0, 2(%rdi)
2806 movdqu %xmm1, (%rdi)
2807 ret
2808
2809 .p2align 4
2810L(bwd_write_2bytes):
2811 movzwl (%rsi), %edx
2812 mov %dx, (%rdi)
2813 ret
2814
2815 .p2align 4
2816L(bwd_write_129bytes):
2817 lddqu 113(%rsi), %xmm0
2818 movdqu %xmm0, 113(%rdi)
2819L(bwd_write_113bytes):
2820 lddqu 97(%rsi), %xmm0
2821 movdqu %xmm0, 97(%rdi)
2822L(bwd_write_97bytes):
2823 lddqu 81(%rsi), %xmm0
2824 movdqu %xmm0, 81(%rdi)
2825L(bwd_write_81bytes):
2826 lddqu 65(%rsi), %xmm0
2827 movdqu %xmm0, 65(%rdi)
2828L(bwd_write_65bytes):
2829 lddqu 49(%rsi), %xmm0
2830 movdqu %xmm0, 49(%rdi)
2831L(bwd_write_49bytes):
2832 lddqu 33(%rsi), %xmm0
2833 movdqu %xmm0, 33(%rdi)
2834L(bwd_write_33bytes):
2835 lddqu 17(%rsi), %xmm0
2836 movdqu %xmm0, 17(%rdi)
2837L(bwd_write_17bytes):
2838 lddqu 1(%rsi), %xmm0
2839 lddqu (%rsi), %xmm1
2840 movdqu %xmm0, 1(%rdi)
2841 movdqu %xmm1, (%rdi)
2842 ret
2843
2844 .p2align 4
2845L(bwd_write_1bytes):
2846 movzbl (%rsi), %edx
2847 mov %dl, (%rdi)
2848 ret
2849
2850END (MEMCPY)
2851
2852 .section .rodata.ssse3,"a",@progbits
2853 .p2align 3
2854L(table_144_bytes_bwd):
2855 .int JMPTBL (L(bwd_write_0bytes), L(table_144_bytes_bwd))
2856 .int JMPTBL (L(bwd_write_1bytes), L(table_144_bytes_bwd))
2857 .int JMPTBL (L(bwd_write_2bytes), L(table_144_bytes_bwd))
2858 .int JMPTBL (L(bwd_write_3bytes), L(table_144_bytes_bwd))
2859 .int JMPTBL (L(bwd_write_4bytes), L(table_144_bytes_bwd))
2860 .int JMPTBL (L(bwd_write_5bytes), L(table_144_bytes_bwd))
2861 .int JMPTBL (L(bwd_write_6bytes), L(table_144_bytes_bwd))
2862 .int JMPTBL (L(bwd_write_7bytes), L(table_144_bytes_bwd))
2863 .int JMPTBL (L(bwd_write_8bytes), L(table_144_bytes_bwd))
2864 .int JMPTBL (L(bwd_write_9bytes), L(table_144_bytes_bwd))
2865 .int JMPTBL (L(bwd_write_10bytes), L(table_144_bytes_bwd))
2866 .int JMPTBL (L(bwd_write_11bytes), L(table_144_bytes_bwd))
2867 .int JMPTBL (L(bwd_write_12bytes), L(table_144_bytes_bwd))
2868 .int JMPTBL (L(bwd_write_13bytes), L(table_144_bytes_bwd))
2869 .int JMPTBL (L(bwd_write_14bytes), L(table_144_bytes_bwd))
2870 .int JMPTBL (L(bwd_write_15bytes), L(table_144_bytes_bwd))
2871 .int JMPTBL (L(bwd_write_16bytes), L(table_144_bytes_bwd))
2872 .int JMPTBL (L(bwd_write_17bytes), L(table_144_bytes_bwd))
2873 .int JMPTBL (L(bwd_write_18bytes), L(table_144_bytes_bwd))
2874 .int JMPTBL (L(bwd_write_19bytes), L(table_144_bytes_bwd))
2875 .int JMPTBL (L(bwd_write_20bytes), L(table_144_bytes_bwd))
2876 .int JMPTBL (L(bwd_write_21bytes), L(table_144_bytes_bwd))
2877 .int JMPTBL (L(bwd_write_22bytes), L(table_144_bytes_bwd))
2878 .int JMPTBL (L(bwd_write_23bytes), L(table_144_bytes_bwd))
2879 .int JMPTBL (L(bwd_write_24bytes), L(table_144_bytes_bwd))
2880 .int JMPTBL (L(bwd_write_25bytes), L(table_144_bytes_bwd))
2881 .int JMPTBL (L(bwd_write_26bytes), L(table_144_bytes_bwd))
2882 .int JMPTBL (L(bwd_write_27bytes), L(table_144_bytes_bwd))
2883 .int JMPTBL (L(bwd_write_28bytes), L(table_144_bytes_bwd))
2884 .int JMPTBL (L(bwd_write_29bytes), L(table_144_bytes_bwd))
2885 .int JMPTBL (L(bwd_write_30bytes), L(table_144_bytes_bwd))
2886 .int JMPTBL (L(bwd_write_31bytes), L(table_144_bytes_bwd))
2887 .int JMPTBL (L(bwd_write_32bytes), L(table_144_bytes_bwd))
2888 .int JMPTBL (L(bwd_write_33bytes), L(table_144_bytes_bwd))
2889 .int JMPTBL (L(bwd_write_34bytes), L(table_144_bytes_bwd))
2890 .int JMPTBL (L(bwd_write_35bytes), L(table_144_bytes_bwd))
2891 .int JMPTBL (L(bwd_write_36bytes), L(table_144_bytes_bwd))
2892 .int JMPTBL (L(bwd_write_37bytes), L(table_144_bytes_bwd))
2893 .int JMPTBL (L(bwd_write_38bytes), L(table_144_bytes_bwd))
2894 .int JMPTBL (L(bwd_write_39bytes), L(table_144_bytes_bwd))
2895 .int JMPTBL (L(bwd_write_40bytes), L(table_144_bytes_bwd))
2896 .int JMPTBL (L(bwd_write_41bytes), L(table_144_bytes_bwd))
2897 .int JMPTBL (L(bwd_write_42bytes), L(table_144_bytes_bwd))
2898 .int JMPTBL (L(bwd_write_43bytes), L(table_144_bytes_bwd))
2899 .int JMPTBL (L(bwd_write_44bytes), L(table_144_bytes_bwd))
2900 .int JMPTBL (L(bwd_write_45bytes), L(table_144_bytes_bwd))
2901 .int JMPTBL (L(bwd_write_46bytes), L(table_144_bytes_bwd))
2902 .int JMPTBL (L(bwd_write_47bytes), L(table_144_bytes_bwd))
2903 .int JMPTBL (L(bwd_write_48bytes), L(table_144_bytes_bwd))
2904 .int JMPTBL (L(bwd_write_49bytes), L(table_144_bytes_bwd))
2905 .int JMPTBL (L(bwd_write_50bytes), L(table_144_bytes_bwd))
2906 .int JMPTBL (L(bwd_write_51bytes), L(table_144_bytes_bwd))
2907 .int JMPTBL (L(bwd_write_52bytes), L(table_144_bytes_bwd))
2908 .int JMPTBL (L(bwd_write_53bytes), L(table_144_bytes_bwd))
2909 .int JMPTBL (L(bwd_write_54bytes), L(table_144_bytes_bwd))
2910 .int JMPTBL (L(bwd_write_55bytes), L(table_144_bytes_bwd))
2911 .int JMPTBL (L(bwd_write_56bytes), L(table_144_bytes_bwd))
2912 .int JMPTBL (L(bwd_write_57bytes), L(table_144_bytes_bwd))
2913 .int JMPTBL (L(bwd_write_58bytes), L(table_144_bytes_bwd))
2914 .int JMPTBL (L(bwd_write_59bytes), L(table_144_bytes_bwd))
2915 .int JMPTBL (L(bwd_write_60bytes), L(table_144_bytes_bwd))
2916 .int JMPTBL (L(bwd_write_61bytes), L(table_144_bytes_bwd))
2917 .int JMPTBL (L(bwd_write_62bytes), L(table_144_bytes_bwd))
2918 .int JMPTBL (L(bwd_write_63bytes), L(table_144_bytes_bwd))
2919 .int JMPTBL (L(bwd_write_64bytes), L(table_144_bytes_bwd))
2920 .int JMPTBL (L(bwd_write_65bytes), L(table_144_bytes_bwd))
2921 .int JMPTBL (L(bwd_write_66bytes), L(table_144_bytes_bwd))
2922 .int JMPTBL (L(bwd_write_67bytes), L(table_144_bytes_bwd))
2923 .int JMPTBL (L(bwd_write_68bytes), L(table_144_bytes_bwd))
2924 .int JMPTBL (L(bwd_write_69bytes), L(table_144_bytes_bwd))
2925 .int JMPTBL (L(bwd_write_70bytes), L(table_144_bytes_bwd))
2926 .int JMPTBL (L(bwd_write_71bytes), L(table_144_bytes_bwd))
2927 .int JMPTBL (L(bwd_write_72bytes), L(table_144_bytes_bwd))
2928 .int JMPTBL (L(bwd_write_73bytes), L(table_144_bytes_bwd))
2929 .int JMPTBL (L(bwd_write_74bytes), L(table_144_bytes_bwd))
2930 .int JMPTBL (L(bwd_write_75bytes), L(table_144_bytes_bwd))
2931 .int JMPTBL (L(bwd_write_76bytes), L(table_144_bytes_bwd))
2932 .int JMPTBL (L(bwd_write_77bytes), L(table_144_bytes_bwd))
2933 .int JMPTBL (L(bwd_write_78bytes), L(table_144_bytes_bwd))
2934 .int JMPTBL (L(bwd_write_79bytes), L(table_144_bytes_bwd))
2935 .int JMPTBL (L(bwd_write_80bytes), L(table_144_bytes_bwd))
2936 .int JMPTBL (L(bwd_write_81bytes), L(table_144_bytes_bwd))
2937 .int JMPTBL (L(bwd_write_82bytes), L(table_144_bytes_bwd))
2938 .int JMPTBL (L(bwd_write_83bytes), L(table_144_bytes_bwd))
2939 .int JMPTBL (L(bwd_write_84bytes), L(table_144_bytes_bwd))
2940 .int JMPTBL (L(bwd_write_85bytes), L(table_144_bytes_bwd))
2941 .int JMPTBL (L(bwd_write_86bytes), L(table_144_bytes_bwd))
2942 .int JMPTBL (L(bwd_write_87bytes), L(table_144_bytes_bwd))
2943 .int JMPTBL (L(bwd_write_88bytes), L(table_144_bytes_bwd))
2944 .int JMPTBL (L(bwd_write_89bytes), L(table_144_bytes_bwd))
2945 .int JMPTBL (L(bwd_write_90bytes), L(table_144_bytes_bwd))
2946 .int JMPTBL (L(bwd_write_91bytes), L(table_144_bytes_bwd))
2947 .int JMPTBL (L(bwd_write_92bytes), L(table_144_bytes_bwd))
2948 .int JMPTBL (L(bwd_write_93bytes), L(table_144_bytes_bwd))
2949 .int JMPTBL (L(bwd_write_94bytes), L(table_144_bytes_bwd))
2950 .int JMPTBL (L(bwd_write_95bytes), L(table_144_bytes_bwd))
2951 .int JMPTBL (L(bwd_write_96bytes), L(table_144_bytes_bwd))
2952 .int JMPTBL (L(bwd_write_97bytes), L(table_144_bytes_bwd))
2953 .int JMPTBL (L(bwd_write_98bytes), L(table_144_bytes_bwd))
2954 .int JMPTBL (L(bwd_write_99bytes), L(table_144_bytes_bwd))
2955 .int JMPTBL (L(bwd_write_100bytes), L(table_144_bytes_bwd))
2956 .int JMPTBL (L(bwd_write_101bytes), L(table_144_bytes_bwd))
2957 .int JMPTBL (L(bwd_write_102bytes), L(table_144_bytes_bwd))
2958 .int JMPTBL (L(bwd_write_103bytes), L(table_144_bytes_bwd))
2959 .int JMPTBL (L(bwd_write_104bytes), L(table_144_bytes_bwd))
2960 .int JMPTBL (L(bwd_write_105bytes), L(table_144_bytes_bwd))
2961 .int JMPTBL (L(bwd_write_106bytes), L(table_144_bytes_bwd))
2962 .int JMPTBL (L(bwd_write_107bytes), L(table_144_bytes_bwd))
2963 .int JMPTBL (L(bwd_write_108bytes), L(table_144_bytes_bwd))
2964 .int JMPTBL (L(bwd_write_109bytes), L(table_144_bytes_bwd))
2965 .int JMPTBL (L(bwd_write_110bytes), L(table_144_bytes_bwd))
2966 .int JMPTBL (L(bwd_write_111bytes), L(table_144_bytes_bwd))
2967 .int JMPTBL (L(bwd_write_112bytes), L(table_144_bytes_bwd))
2968 .int JMPTBL (L(bwd_write_113bytes), L(table_144_bytes_bwd))
2969 .int JMPTBL (L(bwd_write_114bytes), L(table_144_bytes_bwd))
2970 .int JMPTBL (L(bwd_write_115bytes), L(table_144_bytes_bwd))
2971 .int JMPTBL (L(bwd_write_116bytes), L(table_144_bytes_bwd))
2972 .int JMPTBL (L(bwd_write_117bytes), L(table_144_bytes_bwd))
2973 .int JMPTBL (L(bwd_write_118bytes), L(table_144_bytes_bwd))
2974 .int JMPTBL (L(bwd_write_119bytes), L(table_144_bytes_bwd))
2975 .int JMPTBL (L(bwd_write_120bytes), L(table_144_bytes_bwd))
2976 .int JMPTBL (L(bwd_write_121bytes), L(table_144_bytes_bwd))
2977 .int JMPTBL (L(bwd_write_122bytes), L(table_144_bytes_bwd))
2978 .int JMPTBL (L(bwd_write_123bytes), L(table_144_bytes_bwd))
2979 .int JMPTBL (L(bwd_write_124bytes), L(table_144_bytes_bwd))
2980 .int JMPTBL (L(bwd_write_125bytes), L(table_144_bytes_bwd))
2981 .int JMPTBL (L(bwd_write_126bytes), L(table_144_bytes_bwd))
2982 .int JMPTBL (L(bwd_write_127bytes), L(table_144_bytes_bwd))
2983 .int JMPTBL (L(bwd_write_128bytes), L(table_144_bytes_bwd))
2984 .int JMPTBL (L(bwd_write_129bytes), L(table_144_bytes_bwd))
2985 .int JMPTBL (L(bwd_write_130bytes), L(table_144_bytes_bwd))
2986 .int JMPTBL (L(bwd_write_131bytes), L(table_144_bytes_bwd))
2987 .int JMPTBL (L(bwd_write_132bytes), L(table_144_bytes_bwd))
2988 .int JMPTBL (L(bwd_write_133bytes), L(table_144_bytes_bwd))
2989 .int JMPTBL (L(bwd_write_134bytes), L(table_144_bytes_bwd))
2990 .int JMPTBL (L(bwd_write_135bytes), L(table_144_bytes_bwd))
2991 .int JMPTBL (L(bwd_write_136bytes), L(table_144_bytes_bwd))
2992 .int JMPTBL (L(bwd_write_137bytes), L(table_144_bytes_bwd))
2993 .int JMPTBL (L(bwd_write_138bytes), L(table_144_bytes_bwd))
2994 .int JMPTBL (L(bwd_write_139bytes), L(table_144_bytes_bwd))
2995 .int JMPTBL (L(bwd_write_140bytes), L(table_144_bytes_bwd))
2996 .int JMPTBL (L(bwd_write_141bytes), L(table_144_bytes_bwd))
2997 .int JMPTBL (L(bwd_write_142bytes), L(table_144_bytes_bwd))
2998 .int JMPTBL (L(bwd_write_143bytes), L(table_144_bytes_bwd))
2999
3000 .p2align 3
3001L(table_144_bytes_fwd):
3002 .int JMPTBL (L(fwd_write_0bytes), L(table_144_bytes_fwd))
3003 .int JMPTBL (L(fwd_write_1bytes), L(table_144_bytes_fwd))
3004 .int JMPTBL (L(fwd_write_2bytes), L(table_144_bytes_fwd))
3005 .int JMPTBL (L(fwd_write_3bytes), L(table_144_bytes_fwd))
3006 .int JMPTBL (L(fwd_write_4bytes), L(table_144_bytes_fwd))
3007 .int JMPTBL (L(fwd_write_5bytes), L(table_144_bytes_fwd))
3008 .int JMPTBL (L(fwd_write_6bytes), L(table_144_bytes_fwd))
3009 .int JMPTBL (L(fwd_write_7bytes), L(table_144_bytes_fwd))
3010 .int JMPTBL (L(fwd_write_8bytes), L(table_144_bytes_fwd))
3011 .int JMPTBL (L(fwd_write_9bytes), L(table_144_bytes_fwd))
3012 .int JMPTBL (L(fwd_write_10bytes), L(table_144_bytes_fwd))
3013 .int JMPTBL (L(fwd_write_11bytes), L(table_144_bytes_fwd))
3014 .int JMPTBL (L(fwd_write_12bytes), L(table_144_bytes_fwd))
3015 .int JMPTBL (L(fwd_write_13bytes), L(table_144_bytes_fwd))
3016 .int JMPTBL (L(fwd_write_14bytes), L(table_144_bytes_fwd))
3017 .int JMPTBL (L(fwd_write_15bytes), L(table_144_bytes_fwd))
3018 .int JMPTBL (L(fwd_write_16bytes), L(table_144_bytes_fwd))
3019 .int JMPTBL (L(fwd_write_17bytes), L(table_144_bytes_fwd))
3020 .int JMPTBL (L(fwd_write_18bytes), L(table_144_bytes_fwd))
3021 .int JMPTBL (L(fwd_write_19bytes), L(table_144_bytes_fwd))
3022 .int JMPTBL (L(fwd_write_20bytes), L(table_144_bytes_fwd))
3023 .int JMPTBL (L(fwd_write_21bytes), L(table_144_bytes_fwd))
3024 .int JMPTBL (L(fwd_write_22bytes), L(table_144_bytes_fwd))
3025 .int JMPTBL (L(fwd_write_23bytes), L(table_144_bytes_fwd))
3026 .int JMPTBL (L(fwd_write_24bytes), L(table_144_bytes_fwd))
3027 .int JMPTBL (L(fwd_write_25bytes), L(table_144_bytes_fwd))
3028 .int JMPTBL (L(fwd_write_26bytes), L(table_144_bytes_fwd))
3029 .int JMPTBL (L(fwd_write_27bytes), L(table_144_bytes_fwd))
3030 .int JMPTBL (L(fwd_write_28bytes), L(table_144_bytes_fwd))
3031 .int JMPTBL (L(fwd_write_29bytes), L(table_144_bytes_fwd))
3032 .int JMPTBL (L(fwd_write_30bytes), L(table_144_bytes_fwd))
3033 .int JMPTBL (L(fwd_write_31bytes), L(table_144_bytes_fwd))
3034 .int JMPTBL (L(fwd_write_32bytes), L(table_144_bytes_fwd))
3035 .int JMPTBL (L(fwd_write_33bytes), L(table_144_bytes_fwd))
3036 .int JMPTBL (L(fwd_write_34bytes), L(table_144_bytes_fwd))
3037 .int JMPTBL (L(fwd_write_35bytes), L(table_144_bytes_fwd))
3038 .int JMPTBL (L(fwd_write_36bytes), L(table_144_bytes_fwd))
3039 .int JMPTBL (L(fwd_write_37bytes), L(table_144_bytes_fwd))
3040 .int JMPTBL (L(fwd_write_38bytes), L(table_144_bytes_fwd))
3041 .int JMPTBL (L(fwd_write_39bytes), L(table_144_bytes_fwd))
3042 .int JMPTBL (L(fwd_write_40bytes), L(table_144_bytes_fwd))
3043 .int JMPTBL (L(fwd_write_41bytes), L(table_144_bytes_fwd))
3044 .int JMPTBL (L(fwd_write_42bytes), L(table_144_bytes_fwd))
3045 .int JMPTBL (L(fwd_write_43bytes), L(table_144_bytes_fwd))
3046 .int JMPTBL (L(fwd_write_44bytes), L(table_144_bytes_fwd))
3047 .int JMPTBL (L(fwd_write_45bytes), L(table_144_bytes_fwd))
3048 .int JMPTBL (L(fwd_write_46bytes), L(table_144_bytes_fwd))
3049 .int JMPTBL (L(fwd_write_47bytes), L(table_144_bytes_fwd))
3050 .int JMPTBL (L(fwd_write_48bytes), L(table_144_bytes_fwd))
3051 .int JMPTBL (L(fwd_write_49bytes), L(table_144_bytes_fwd))
3052 .int JMPTBL (L(fwd_write_50bytes), L(table_144_bytes_fwd))
3053 .int JMPTBL (L(fwd_write_51bytes), L(table_144_bytes_fwd))
3054 .int JMPTBL (L(fwd_write_52bytes), L(table_144_bytes_fwd))
3055 .int JMPTBL (L(fwd_write_53bytes), L(table_144_bytes_fwd))
3056 .int JMPTBL (L(fwd_write_54bytes), L(table_144_bytes_fwd))
3057 .int JMPTBL (L(fwd_write_55bytes), L(table_144_bytes_fwd))
3058 .int JMPTBL (L(fwd_write_56bytes), L(table_144_bytes_fwd))
3059 .int JMPTBL (L(fwd_write_57bytes), L(table_144_bytes_fwd))
3060 .int JMPTBL (L(fwd_write_58bytes), L(table_144_bytes_fwd))
3061 .int JMPTBL (L(fwd_write_59bytes), L(table_144_bytes_fwd))
3062 .int JMPTBL (L(fwd_write_60bytes), L(table_144_bytes_fwd))
3063 .int JMPTBL (L(fwd_write_61bytes), L(table_144_bytes_fwd))
3064 .int JMPTBL (L(fwd_write_62bytes), L(table_144_bytes_fwd))
3065 .int JMPTBL (L(fwd_write_63bytes), L(table_144_bytes_fwd))
3066 .int JMPTBL (L(fwd_write_64bytes), L(table_144_bytes_fwd))
3067 .int JMPTBL (L(fwd_write_65bytes), L(table_144_bytes_fwd))
3068 .int JMPTBL (L(fwd_write_66bytes), L(table_144_bytes_fwd))
3069 .int JMPTBL (L(fwd_write_67bytes), L(table_144_bytes_fwd))
3070 .int JMPTBL (L(fwd_write_68bytes), L(table_144_bytes_fwd))
3071 .int JMPTBL (L(fwd_write_69bytes), L(table_144_bytes_fwd))
3072 .int JMPTBL (L(fwd_write_70bytes), L(table_144_bytes_fwd))
3073 .int JMPTBL (L(fwd_write_71bytes), L(table_144_bytes_fwd))
3074 .int JMPTBL (L(fwd_write_72bytes), L(table_144_bytes_fwd))
3075 .int JMPTBL (L(fwd_write_73bytes), L(table_144_bytes_fwd))
3076 .int JMPTBL (L(fwd_write_74bytes), L(table_144_bytes_fwd))
3077 .int JMPTBL (L(fwd_write_75bytes), L(table_144_bytes_fwd))
3078 .int JMPTBL (L(fwd_write_76bytes), L(table_144_bytes_fwd))
3079 .int JMPTBL (L(fwd_write_77bytes), L(table_144_bytes_fwd))
3080 .int JMPTBL (L(fwd_write_78bytes), L(table_144_bytes_fwd))
3081 .int JMPTBL (L(fwd_write_79bytes), L(table_144_bytes_fwd))
3082 .int JMPTBL (L(fwd_write_80bytes), L(table_144_bytes_fwd))
3083 .int JMPTBL (L(fwd_write_81bytes), L(table_144_bytes_fwd))
3084 .int JMPTBL (L(fwd_write_82bytes), L(table_144_bytes_fwd))
3085 .int JMPTBL (L(fwd_write_83bytes), L(table_144_bytes_fwd))
3086 .int JMPTBL (L(fwd_write_84bytes), L(table_144_bytes_fwd))
3087 .int JMPTBL (L(fwd_write_85bytes), L(table_144_bytes_fwd))
3088 .int JMPTBL (L(fwd_write_86bytes), L(table_144_bytes_fwd))
3089 .int JMPTBL (L(fwd_write_87bytes), L(table_144_bytes_fwd))
3090 .int JMPTBL (L(fwd_write_88bytes), L(table_144_bytes_fwd))
3091 .int JMPTBL (L(fwd_write_89bytes), L(table_144_bytes_fwd))
3092 .int JMPTBL (L(fwd_write_90bytes), L(table_144_bytes_fwd))
3093 .int JMPTBL (L(fwd_write_91bytes), L(table_144_bytes_fwd))
3094 .int JMPTBL (L(fwd_write_92bytes), L(table_144_bytes_fwd))
3095 .int JMPTBL (L(fwd_write_93bytes), L(table_144_bytes_fwd))
3096 .int JMPTBL (L(fwd_write_94bytes), L(table_144_bytes_fwd))
3097 .int JMPTBL (L(fwd_write_95bytes), L(table_144_bytes_fwd))
3098 .int JMPTBL (L(fwd_write_96bytes), L(table_144_bytes_fwd))
3099 .int JMPTBL (L(fwd_write_97bytes), L(table_144_bytes_fwd))
3100 .int JMPTBL (L(fwd_write_98bytes), L(table_144_bytes_fwd))
3101 .int JMPTBL (L(fwd_write_99bytes), L(table_144_bytes_fwd))
3102 .int JMPTBL (L(fwd_write_100bytes), L(table_144_bytes_fwd))
3103 .int JMPTBL (L(fwd_write_101bytes), L(table_144_bytes_fwd))
3104 .int JMPTBL (L(fwd_write_102bytes), L(table_144_bytes_fwd))
3105 .int JMPTBL (L(fwd_write_103bytes), L(table_144_bytes_fwd))
3106 .int JMPTBL (L(fwd_write_104bytes), L(table_144_bytes_fwd))
3107 .int JMPTBL (L(fwd_write_105bytes), L(table_144_bytes_fwd))
3108 .int JMPTBL (L(fwd_write_106bytes), L(table_144_bytes_fwd))
3109 .int JMPTBL (L(fwd_write_107bytes), L(table_144_bytes_fwd))
3110 .int JMPTBL (L(fwd_write_108bytes), L(table_144_bytes_fwd))
3111 .int JMPTBL (L(fwd_write_109bytes), L(table_144_bytes_fwd))
3112 .int JMPTBL (L(fwd_write_110bytes), L(table_144_bytes_fwd))
3113 .int JMPTBL (L(fwd_write_111bytes), L(table_144_bytes_fwd))
3114 .int JMPTBL (L(fwd_write_112bytes), L(table_144_bytes_fwd))
3115 .int JMPTBL (L(fwd_write_113bytes), L(table_144_bytes_fwd))
3116 .int JMPTBL (L(fwd_write_114bytes), L(table_144_bytes_fwd))
3117 .int JMPTBL (L(fwd_write_115bytes), L(table_144_bytes_fwd))
3118 .int JMPTBL (L(fwd_write_116bytes), L(table_144_bytes_fwd))
3119 .int JMPTBL (L(fwd_write_117bytes), L(table_144_bytes_fwd))
3120 .int JMPTBL (L(fwd_write_118bytes), L(table_144_bytes_fwd))
3121 .int JMPTBL (L(fwd_write_119bytes), L(table_144_bytes_fwd))
3122 .int JMPTBL (L(fwd_write_120bytes), L(table_144_bytes_fwd))
3123 .int JMPTBL (L(fwd_write_121bytes), L(table_144_bytes_fwd))
3124 .int JMPTBL (L(fwd_write_122bytes), L(table_144_bytes_fwd))
3125 .int JMPTBL (L(fwd_write_123bytes), L(table_144_bytes_fwd))
3126 .int JMPTBL (L(fwd_write_124bytes), L(table_144_bytes_fwd))
3127 .int JMPTBL (L(fwd_write_125bytes), L(table_144_bytes_fwd))
3128 .int JMPTBL (L(fwd_write_126bytes), L(table_144_bytes_fwd))
3129 .int JMPTBL (L(fwd_write_127bytes), L(table_144_bytes_fwd))
3130 .int JMPTBL (L(fwd_write_128bytes), L(table_144_bytes_fwd))
3131 .int JMPTBL (L(fwd_write_129bytes), L(table_144_bytes_fwd))
3132 .int JMPTBL (L(fwd_write_130bytes), L(table_144_bytes_fwd))
3133 .int JMPTBL (L(fwd_write_131bytes), L(table_144_bytes_fwd))
3134 .int JMPTBL (L(fwd_write_132bytes), L(table_144_bytes_fwd))
3135 .int JMPTBL (L(fwd_write_133bytes), L(table_144_bytes_fwd))
3136 .int JMPTBL (L(fwd_write_134bytes), L(table_144_bytes_fwd))
3137 .int JMPTBL (L(fwd_write_135bytes), L(table_144_bytes_fwd))
3138 .int JMPTBL (L(fwd_write_136bytes), L(table_144_bytes_fwd))
3139 .int JMPTBL (L(fwd_write_137bytes), L(table_144_bytes_fwd))
3140 .int JMPTBL (L(fwd_write_138bytes), L(table_144_bytes_fwd))
3141 .int JMPTBL (L(fwd_write_139bytes), L(table_144_bytes_fwd))
3142 .int JMPTBL (L(fwd_write_140bytes), L(table_144_bytes_fwd))
3143 .int JMPTBL (L(fwd_write_141bytes), L(table_144_bytes_fwd))
3144 .int JMPTBL (L(fwd_write_142bytes), L(table_144_bytes_fwd))
3145 .int JMPTBL (L(fwd_write_143bytes), L(table_144_bytes_fwd))
3146
3147 .p2align 3
3148L(shl_table_fwd):
3149 .int JMPTBL (L(shl_0), L(shl_table_fwd))
3150 .int JMPTBL (L(shl_1), L(shl_table_fwd))
3151 .int JMPTBL (L(shl_2), L(shl_table_fwd))
3152 .int JMPTBL (L(shl_3), L(shl_table_fwd))
3153 .int JMPTBL (L(shl_4), L(shl_table_fwd))
3154 .int JMPTBL (L(shl_5), L(shl_table_fwd))
3155 .int JMPTBL (L(shl_6), L(shl_table_fwd))
3156 .int JMPTBL (L(shl_7), L(shl_table_fwd))
3157 .int JMPTBL (L(shl_8), L(shl_table_fwd))
3158 .int JMPTBL (L(shl_9), L(shl_table_fwd))
3159 .int JMPTBL (L(shl_10), L(shl_table_fwd))
3160 .int JMPTBL (L(shl_11), L(shl_table_fwd))
3161 .int JMPTBL (L(shl_12), L(shl_table_fwd))
3162 .int JMPTBL (L(shl_13), L(shl_table_fwd))
3163 .int JMPTBL (L(shl_14), L(shl_table_fwd))
3164 .int JMPTBL (L(shl_15), L(shl_table_fwd))
3165
3166 .p2align 3
3167L(shl_table_bwd):
3168 .int JMPTBL (L(shl_0_bwd), L(shl_table_bwd))
3169 .int JMPTBL (L(shl_1_bwd), L(shl_table_bwd))
3170 .int JMPTBL (L(shl_2_bwd), L(shl_table_bwd))
3171 .int JMPTBL (L(shl_3_bwd), L(shl_table_bwd))
3172 .int JMPTBL (L(shl_4_bwd), L(shl_table_bwd))
3173 .int JMPTBL (L(shl_5_bwd), L(shl_table_bwd))
3174 .int JMPTBL (L(shl_6_bwd), L(shl_table_bwd))
3175 .int JMPTBL (L(shl_7_bwd), L(shl_table_bwd))
3176 .int JMPTBL (L(shl_8_bwd), L(shl_table_bwd))
3177 .int JMPTBL (L(shl_9_bwd), L(shl_table_bwd))
3178 .int JMPTBL (L(shl_10_bwd), L(shl_table_bwd))
3179 .int JMPTBL (L(shl_11_bwd), L(shl_table_bwd))
3180 .int JMPTBL (L(shl_12_bwd), L(shl_table_bwd))
3181 .int JMPTBL (L(shl_13_bwd), L(shl_table_bwd))
3182 .int JMPTBL (L(shl_14_bwd), L(shl_table_bwd))
3183 .int JMPTBL (L(shl_15_bwd), L(shl_table_bwd))
3184
3185#endif
3186