1/* x86-64 __mpn_lshift --
2 Copyright (C) 2007-2020 Free Software Foundation, Inc.
3 This file is part of the GNU MP Library.
4
5 The GNU MP Library is free software; you can redistribute it and/or modify
6 it under the terms of the GNU Lesser General Public License as published by
7 the Free Software Foundation; either version 2.1 of the License, or (at your
8 option) any later version.
9
10 The GNU MP Library is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
13 License for more details.
14
15 You should have received a copy of the GNU Lesser General Public License
16 along with the GNU MP Library; see the file COPYING.LIB. If not,
17 see <https://www.gnu.org/licenses/>. */
18
19#include "sysdep.h"
20#include "asm-syntax.h"
21
22#define rp %rdi
23#define up %rsi
24#define n %rdx
25#define cnt %cl
26
27 .text
28ENTRY (__mpn_lshift)
29 lea -8(rp,n,8), rp
30 lea -8(up,n,8), up
31
32 mov %edx, %eax
33 and $3, %eax
34 jne L(nb00)
35L(b00): /* n = 4, 8, 12, ... */
36 mov (up), %r10
37 mov -8(up), %r11
38 xor %eax, %eax
39 shld %cl, %r10, %rax
40 mov -16(up), %r8
41 lea 24(rp), rp
42 sub $4, n
43 jmp L(00)
44
45L(nb00):/* n = 1, 5, 9, ... */
46 cmp $2, %eax
47 jae L(nb01)
48L(b01): mov (up), %r9
49 xor %eax, %eax
50 shld %cl, %r9, %rax
51 sub $2, n
52 jb L(le1)
53 mov -8(up), %r10
54 mov -16(up), %r11
55 lea -8(up), up
56 lea 16(rp), rp
57 jmp L(01)
58L(le1): shl %cl, %r9
59 mov %r9, (rp)
60 ret
61
62L(nb01):/* n = 2, 6, 10, ... */
63 jne L(b11)
64L(b10): mov (up), %r8
65 mov -8(up), %r9
66 xor %eax, %eax
67 shld %cl, %r8, %rax
68 sub $3, n
69 jb L(le2)
70 mov -16(up), %r10
71 lea -16(up), up
72 lea 8(rp), rp
73 jmp L(10)
74L(le2): shld %cl, %r9, %r8
75 mov %r8, (rp)
76 shl %cl, %r9
77 mov %r9, -8(rp)
78 ret
79
80 .p2align 4 /* performance critical! */
81L(b11): /* n = 3, 7, 11, ... */
82 mov (up), %r11
83 mov -8(up), %r8
84 xor %eax, %eax
85 shld %cl, %r11, %rax
86 mov -16(up), %r9
87 lea -24(up), up
88 sub $4, n
89 jb L(end)
90
91 .p2align 4
92L(top): shld %cl, %r8, %r11
93 mov (up), %r10
94 mov %r11, (rp)
95L(10): shld %cl, %r9, %r8
96 mov -8(up), %r11
97 mov %r8, -8(rp)
98L(01): shld %cl, %r10, %r9
99 mov -16(up), %r8
100 mov %r9, -16(rp)
101L(00): shld %cl, %r11, %r10
102 mov -24(up), %r9
103 mov %r10, -24(rp)
104 add $-32, up
105 lea -32(rp), rp
106 sub $4, n
107 jnc L(top)
108
109L(end): shld %cl, %r8, %r11
110 mov %r11, (rp)
111 shld %cl, %r9, %r8
112 mov %r8, -8(rp)
113 shl %cl, %r9
114 mov %r9, -16(rp)
115 ret
116END (__mpn_lshift)
117