1/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
2 the result to a second limb vector.
3 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of the GNU MP Library.
5
6 The GNU MP Library is free software; you can redistribute it and/or modify
7 it under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or (at your
9 option) any later version.
10
11 The GNU MP Library is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
14 License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with the GNU MP Library; see the file COPYING.LIB. If not,
18 see <http://www.gnu.org/licenses/>. */
19
20#include "sysdep.h"
21#include "asm-syntax.h"
22
23#define rp %rdi
24#define up %rsi
25#define n %rdx
26#define v0 %rcx
27
28#ifndef func
29# define func __mpn_addmul_1
30# define ADDSUB add
31#endif
32
33 .text
34ENTRY (func)
35 push %rbx
36 push %rbp
37 lea (%rdx), %rbx
38 neg %rbx
39
40 mov (up), %rax
41 mov (rp), %r10
42
43 lea -16(rp,%rdx,8), rp
44 lea (up,%rdx,8), up
45 mul %rcx
46
47 bt $0, %ebx
48 jc L(odd)
49
50 lea (%rax), %r11
51 mov 8(up,%rbx,8), %rax
52 lea (%rdx), %rbp
53 mul %rcx
54 add $2, %rbx
55 jns L(n2)
56
57 lea (%rax), %r8
58 mov (up,%rbx,8), %rax
59 lea (%rdx), %r9
60 jmp L(mid)
61
62L(odd): add $1, %rbx
63 jns L(n1)
64
65 lea (%rax), %r8
66 mov (up,%rbx,8), %rax
67 lea (%rdx), %r9
68 mul %rcx
69 lea (%rax), %r11
70 mov 8(up,%rbx,8), %rax
71 lea (%rdx), %rbp
72 jmp L(e)
73
74 .p2align 4
75L(top): mul %rcx
76 ADDSUB %r8, %r10
77 lea (%rax), %r8
78 mov (up,%rbx,8), %rax
79 adc %r9, %r11
80 mov %r10, -8(rp,%rbx,8)
81 mov (rp,%rbx,8), %r10
82 lea (%rdx), %r9
83 adc $0, %rbp
84L(mid): mul %rcx
85 ADDSUB %r11, %r10
86 lea (%rax), %r11
87 mov 8(up,%rbx,8), %rax
88 adc %rbp, %r8
89 mov %r10, (rp,%rbx,8)
90 mov 8(rp,%rbx,8), %r10
91 lea (%rdx), %rbp
92 adc $0, %r9
93L(e): add $2, %rbx
94 js L(top)
95
96 mul %rcx
97 ADDSUB %r8, %r10
98 adc %r9, %r11
99 mov %r10, -8(rp)
100 adc $0, %rbp
101L(n2): mov (rp), %r10
102 ADDSUB %r11, %r10
103 adc %rbp, %rax
104 mov %r10, (rp)
105 adc $0, %rdx
106L(n1): mov 8(rp), %r10
107 ADDSUB %rax, %r10
108 mov %r10, 8(rp)
109 mov %ebx, %eax /* zero rax */
110 adc %rdx, %rax
111 pop %rbp
112 pop %rbx
113 ret
114END (func)
115