1 | /* _memcopy.c -- subroutines for memory copy functions. |
2 | Copyright (C) 1991-2016 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Torbjorn Granlund (tege@sics.se). |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ |
21 | |
22 | #include <stddef.h> |
23 | #include <memcopy.h> |
24 | |
25 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to |
26 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |
27 | Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ |
28 | |
29 | #ifndef WORDCOPY_FWD_ALIGNED |
30 | # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned |
31 | #endif |
32 | |
33 | void |
34 | WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) |
35 | { |
36 | op_t a0, a1; |
37 | |
38 | switch (len % 8) |
39 | { |
40 | case 2: |
41 | a0 = ((op_t *) srcp)[0]; |
42 | srcp -= 6 * OPSIZ; |
43 | dstp -= 7 * OPSIZ; |
44 | len += 6; |
45 | goto do1; |
46 | case 3: |
47 | a1 = ((op_t *) srcp)[0]; |
48 | srcp -= 5 * OPSIZ; |
49 | dstp -= 6 * OPSIZ; |
50 | len += 5; |
51 | goto do2; |
52 | case 4: |
53 | a0 = ((op_t *) srcp)[0]; |
54 | srcp -= 4 * OPSIZ; |
55 | dstp -= 5 * OPSIZ; |
56 | len += 4; |
57 | goto do3; |
58 | case 5: |
59 | a1 = ((op_t *) srcp)[0]; |
60 | srcp -= 3 * OPSIZ; |
61 | dstp -= 4 * OPSIZ; |
62 | len += 3; |
63 | goto do4; |
64 | case 6: |
65 | a0 = ((op_t *) srcp)[0]; |
66 | srcp -= 2 * OPSIZ; |
67 | dstp -= 3 * OPSIZ; |
68 | len += 2; |
69 | goto do5; |
70 | case 7: |
71 | a1 = ((op_t *) srcp)[0]; |
72 | srcp -= 1 * OPSIZ; |
73 | dstp -= 2 * OPSIZ; |
74 | len += 1; |
75 | goto do6; |
76 | |
77 | case 0: |
78 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
79 | return; |
80 | a0 = ((op_t *) srcp)[0]; |
81 | srcp -= 0 * OPSIZ; |
82 | dstp -= 1 * OPSIZ; |
83 | goto do7; |
84 | case 1: |
85 | a1 = ((op_t *) srcp)[0]; |
86 | srcp -=-1 * OPSIZ; |
87 | dstp -= 0 * OPSIZ; |
88 | len -= 1; |
89 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
90 | goto do0; |
91 | goto do8; /* No-op. */ |
92 | } |
93 | |
94 | do |
95 | { |
96 | do8: |
97 | a0 = ((op_t *) srcp)[0]; |
98 | ((op_t *) dstp)[0] = a1; |
99 | do7: |
100 | a1 = ((op_t *) srcp)[1]; |
101 | ((op_t *) dstp)[1] = a0; |
102 | do6: |
103 | a0 = ((op_t *) srcp)[2]; |
104 | ((op_t *) dstp)[2] = a1; |
105 | do5: |
106 | a1 = ((op_t *) srcp)[3]; |
107 | ((op_t *) dstp)[3] = a0; |
108 | do4: |
109 | a0 = ((op_t *) srcp)[4]; |
110 | ((op_t *) dstp)[4] = a1; |
111 | do3: |
112 | a1 = ((op_t *) srcp)[5]; |
113 | ((op_t *) dstp)[5] = a0; |
114 | do2: |
115 | a0 = ((op_t *) srcp)[6]; |
116 | ((op_t *) dstp)[6] = a1; |
117 | do1: |
118 | a1 = ((op_t *) srcp)[7]; |
119 | ((op_t *) dstp)[7] = a0; |
120 | |
121 | srcp += 8 * OPSIZ; |
122 | dstp += 8 * OPSIZ; |
123 | len -= 8; |
124 | } |
125 | while (len != 0); |
126 | |
127 | /* This is the right position for do0. Please don't move |
128 | it into the loop. */ |
129 | do0: |
130 | ((op_t *) dstp)[0] = a1; |
131 | } |
132 | |
133 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to |
134 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |
135 | DSTP should be aligned for memory operations on `op_t's, but SRCP must |
136 | *not* be aligned. */ |
137 | |
138 | #ifndef WORDCOPY_FWD_DEST_ALIGNED |
139 | # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned |
140 | #endif |
141 | |
142 | void |
143 | WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
144 | { |
145 | op_t a0, a1, a2, a3; |
146 | int sh_1, sh_2; |
147 | |
148 | /* Calculate how to shift a word read at the memory operation |
149 | aligned srcp to make it aligned for copy. */ |
150 | |
151 | sh_1 = 8 * (srcp % OPSIZ); |
152 | sh_2 = 8 * OPSIZ - sh_1; |
153 | |
154 | /* Make SRCP aligned by rounding it down to the beginning of the `op_t' |
155 | it points in the middle of. */ |
156 | srcp &= -OPSIZ; |
157 | |
158 | switch (len % 4) |
159 | { |
160 | case 2: |
161 | a1 = ((op_t *) srcp)[0]; |
162 | a2 = ((op_t *) srcp)[1]; |
163 | srcp -= 1 * OPSIZ; |
164 | dstp -= 3 * OPSIZ; |
165 | len += 2; |
166 | goto do1; |
167 | case 3: |
168 | a0 = ((op_t *) srcp)[0]; |
169 | a1 = ((op_t *) srcp)[1]; |
170 | srcp -= 0 * OPSIZ; |
171 | dstp -= 2 * OPSIZ; |
172 | len += 1; |
173 | goto do2; |
174 | case 0: |
175 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
176 | return; |
177 | a3 = ((op_t *) srcp)[0]; |
178 | a0 = ((op_t *) srcp)[1]; |
179 | srcp -=-1 * OPSIZ; |
180 | dstp -= 1 * OPSIZ; |
181 | len += 0; |
182 | goto do3; |
183 | case 1: |
184 | a2 = ((op_t *) srcp)[0]; |
185 | a3 = ((op_t *) srcp)[1]; |
186 | srcp -=-2 * OPSIZ; |
187 | dstp -= 0 * OPSIZ; |
188 | len -= 1; |
189 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
190 | goto do0; |
191 | goto do4; /* No-op. */ |
192 | } |
193 | |
194 | do |
195 | { |
196 | do4: |
197 | a0 = ((op_t *) srcp)[0]; |
198 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |
199 | do3: |
200 | a1 = ((op_t *) srcp)[1]; |
201 | ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); |
202 | do2: |
203 | a2 = ((op_t *) srcp)[2]; |
204 | ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); |
205 | do1: |
206 | a3 = ((op_t *) srcp)[3]; |
207 | ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); |
208 | |
209 | srcp += 4 * OPSIZ; |
210 | dstp += 4 * OPSIZ; |
211 | len -= 4; |
212 | } |
213 | while (len != 0); |
214 | |
215 | /* This is the right position for do0. Please don't move |
216 | it into the loop. */ |
217 | do0: |
218 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |
219 | } |
220 | |
221 | /* _wordcopy_bwd_aligned -- Copy block finishing right before |
222 | SRCP to block finishing right before DSTP with LEN `op_t' words |
223 | (not LEN bytes!). Both SRCP and DSTP should be aligned for memory |
224 | operations on `op_t's. */ |
225 | |
226 | #ifndef WORDCOPY_BWD_ALIGNED |
227 | # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned |
228 | #endif |
229 | |
230 | void |
231 | WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) |
232 | { |
233 | op_t a0, a1; |
234 | |
235 | switch (len % 8) |
236 | { |
237 | case 2: |
238 | srcp -= 2 * OPSIZ; |
239 | dstp -= 1 * OPSIZ; |
240 | a0 = ((op_t *) srcp)[1]; |
241 | len += 6; |
242 | goto do1; |
243 | case 3: |
244 | srcp -= 3 * OPSIZ; |
245 | dstp -= 2 * OPSIZ; |
246 | a1 = ((op_t *) srcp)[2]; |
247 | len += 5; |
248 | goto do2; |
249 | case 4: |
250 | srcp -= 4 * OPSIZ; |
251 | dstp -= 3 * OPSIZ; |
252 | a0 = ((op_t *) srcp)[3]; |
253 | len += 4; |
254 | goto do3; |
255 | case 5: |
256 | srcp -= 5 * OPSIZ; |
257 | dstp -= 4 * OPSIZ; |
258 | a1 = ((op_t *) srcp)[4]; |
259 | len += 3; |
260 | goto do4; |
261 | case 6: |
262 | srcp -= 6 * OPSIZ; |
263 | dstp -= 5 * OPSIZ; |
264 | a0 = ((op_t *) srcp)[5]; |
265 | len += 2; |
266 | goto do5; |
267 | case 7: |
268 | srcp -= 7 * OPSIZ; |
269 | dstp -= 6 * OPSIZ; |
270 | a1 = ((op_t *) srcp)[6]; |
271 | len += 1; |
272 | goto do6; |
273 | |
274 | case 0: |
275 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
276 | return; |
277 | srcp -= 8 * OPSIZ; |
278 | dstp -= 7 * OPSIZ; |
279 | a0 = ((op_t *) srcp)[7]; |
280 | goto do7; |
281 | case 1: |
282 | srcp -= 9 * OPSIZ; |
283 | dstp -= 8 * OPSIZ; |
284 | a1 = ((op_t *) srcp)[8]; |
285 | len -= 1; |
286 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
287 | goto do0; |
288 | goto do8; /* No-op. */ |
289 | } |
290 | |
291 | do |
292 | { |
293 | do8: |
294 | a0 = ((op_t *) srcp)[7]; |
295 | ((op_t *) dstp)[7] = a1; |
296 | do7: |
297 | a1 = ((op_t *) srcp)[6]; |
298 | ((op_t *) dstp)[6] = a0; |
299 | do6: |
300 | a0 = ((op_t *) srcp)[5]; |
301 | ((op_t *) dstp)[5] = a1; |
302 | do5: |
303 | a1 = ((op_t *) srcp)[4]; |
304 | ((op_t *) dstp)[4] = a0; |
305 | do4: |
306 | a0 = ((op_t *) srcp)[3]; |
307 | ((op_t *) dstp)[3] = a1; |
308 | do3: |
309 | a1 = ((op_t *) srcp)[2]; |
310 | ((op_t *) dstp)[2] = a0; |
311 | do2: |
312 | a0 = ((op_t *) srcp)[1]; |
313 | ((op_t *) dstp)[1] = a1; |
314 | do1: |
315 | a1 = ((op_t *) srcp)[0]; |
316 | ((op_t *) dstp)[0] = a0; |
317 | |
318 | srcp -= 8 * OPSIZ; |
319 | dstp -= 8 * OPSIZ; |
320 | len -= 8; |
321 | } |
322 | while (len != 0); |
323 | |
324 | /* This is the right position for do0. Please don't move |
325 | it into the loop. */ |
326 | do0: |
327 | ((op_t *) dstp)[7] = a1; |
328 | } |
329 | |
330 | /* _wordcopy_bwd_dest_aligned -- Copy block finishing right |
331 | before SRCP to block finishing right before DSTP with LEN `op_t' |
332 | words (not LEN bytes!). DSTP should be aligned for memory |
333 | operations on `op_t', but SRCP must *not* be aligned. */ |
334 | |
335 | #ifndef WORDCOPY_BWD_DEST_ALIGNED |
336 | # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned |
337 | #endif |
338 | |
339 | void |
340 | WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |
341 | { |
342 | op_t a0, a1, a2, a3; |
343 | int sh_1, sh_2; |
344 | |
345 | /* Calculate how to shift a word read at the memory operation |
346 | aligned srcp to make it aligned for copy. */ |
347 | |
348 | sh_1 = 8 * (srcp % OPSIZ); |
349 | sh_2 = 8 * OPSIZ - sh_1; |
350 | |
351 | /* Make srcp aligned by rounding it down to the beginning of the op_t |
352 | it points in the middle of. */ |
353 | srcp &= -OPSIZ; |
354 | srcp += OPSIZ; |
355 | |
356 | switch (len % 4) |
357 | { |
358 | case 2: |
359 | srcp -= 3 * OPSIZ; |
360 | dstp -= 1 * OPSIZ; |
361 | a2 = ((op_t *) srcp)[2]; |
362 | a1 = ((op_t *) srcp)[1]; |
363 | len += 2; |
364 | goto do1; |
365 | case 3: |
366 | srcp -= 4 * OPSIZ; |
367 | dstp -= 2 * OPSIZ; |
368 | a3 = ((op_t *) srcp)[3]; |
369 | a2 = ((op_t *) srcp)[2]; |
370 | len += 1; |
371 | goto do2; |
372 | case 0: |
373 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
374 | return; |
375 | srcp -= 5 * OPSIZ; |
376 | dstp -= 3 * OPSIZ; |
377 | a0 = ((op_t *) srcp)[4]; |
378 | a3 = ((op_t *) srcp)[3]; |
379 | goto do3; |
380 | case 1: |
381 | srcp -= 6 * OPSIZ; |
382 | dstp -= 4 * OPSIZ; |
383 | a1 = ((op_t *) srcp)[5]; |
384 | a0 = ((op_t *) srcp)[4]; |
385 | len -= 1; |
386 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |
387 | goto do0; |
388 | goto do4; /* No-op. */ |
389 | } |
390 | |
391 | do |
392 | { |
393 | do4: |
394 | a3 = ((op_t *) srcp)[3]; |
395 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |
396 | do3: |
397 | a2 = ((op_t *) srcp)[2]; |
398 | ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); |
399 | do2: |
400 | a1 = ((op_t *) srcp)[1]; |
401 | ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); |
402 | do1: |
403 | a0 = ((op_t *) srcp)[0]; |
404 | ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); |
405 | |
406 | srcp -= 4 * OPSIZ; |
407 | dstp -= 4 * OPSIZ; |
408 | len -= 4; |
409 | } |
410 | while (len != 0); |
411 | |
412 | /* This is the right position for do0. Please don't move |
413 | it into the loop. */ |
414 | do0: |
415 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |
416 | } |
417 | |