1 | /* _memcopy.c -- subroutines for memory copy functions. |
---|---|

2 | Copyright (C) 1991-2016 Free Software Foundation, Inc. |

3 | This file is part of the GNU C Library. |

4 | Contributed by Torbjorn Granlund (tege@sics.se). |

5 | |

6 | The GNU C Library is free software; you can redistribute it and/or |

7 | modify it under the terms of the GNU Lesser General Public |

8 | License as published by the Free Software Foundation; either |

9 | version 2.1 of the License, or (at your option) any later version. |

10 | |

11 | The GNU C Library is distributed in the hope that it will be useful, |

12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

14 | Lesser General Public License for more details. |

15 | |

16 | You should have received a copy of the GNU Lesser General Public |

17 | License along with the GNU C Library; if not, see |

18 | <http://www.gnu.org/licenses/>. */ |

19 | |

20 | /* BE VERY CAREFUL IF YOU CHANGE THIS CODE...! */ |

21 | |

22 | #include <stddef.h> |

23 | #include <memcopy.h> |

24 | |

25 | /* _wordcopy_fwd_aligned -- Copy block beginning at SRCP to |

26 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |

27 | Both SRCP and DSTP should be aligned for memory operations on `op_t's. */ |

28 | |

29 | #ifndef WORDCOPY_FWD_ALIGNED |

30 | # define WORDCOPY_FWD_ALIGNED _wordcopy_fwd_aligned |

31 | #endif |

32 | |

33 | void |

34 | WORDCOPY_FWD_ALIGNED (long int dstp, long int srcp, size_t len) |

35 | { |

36 | op_t a0, a1; |

37 | |

38 | switch (len % 8) |

39 | { |

40 | case 2: |

41 | a0 = ((op_t *) srcp)[0]; |

42 | srcp -= 6 * OPSIZ; |

43 | dstp -= 7 * OPSIZ; |

44 | len += 6; |

45 | goto do1; |

46 | case 3: |

47 | a1 = ((op_t *) srcp)[0]; |

48 | srcp -= 5 * OPSIZ; |

49 | dstp -= 6 * OPSIZ; |

50 | len += 5; |

51 | goto do2; |

52 | case 4: |

53 | a0 = ((op_t *) srcp)[0]; |

54 | srcp -= 4 * OPSIZ; |

55 | dstp -= 5 * OPSIZ; |

56 | len += 4; |

57 | goto do3; |

58 | case 5: |

59 | a1 = ((op_t *) srcp)[0]; |

60 | srcp -= 3 * OPSIZ; |

61 | dstp -= 4 * OPSIZ; |

62 | len += 3; |

63 | goto do4; |

64 | case 6: |

65 | a0 = ((op_t *) srcp)[0]; |

66 | srcp -= 2 * OPSIZ; |

67 | dstp -= 3 * OPSIZ; |

68 | len += 2; |

69 | goto do5; |

70 | case 7: |

71 | a1 = ((op_t *) srcp)[0]; |

72 | srcp -= 1 * OPSIZ; |

73 | dstp -= 2 * OPSIZ; |

74 | len += 1; |

75 | goto do6; |

76 | |

77 | case 0: |

78 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

79 | return; |

80 | a0 = ((op_t *) srcp)[0]; |

81 | srcp -= 0 * OPSIZ; |

82 | dstp -= 1 * OPSIZ; |

83 | goto do7; |

84 | case 1: |

85 | a1 = ((op_t *) srcp)[0]; |

86 | srcp -=-1 * OPSIZ; |

87 | dstp -= 0 * OPSIZ; |

88 | len -= 1; |

89 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

90 | goto do0; |

91 | goto do8; /* No-op. */ |

92 | } |

93 | |

94 | do |

95 | { |

96 | do8: |

97 | a0 = ((op_t *) srcp)[0]; |

98 | ((op_t *) dstp)[0] = a1; |

99 | do7: |

100 | a1 = ((op_t *) srcp)[1]; |

101 | ((op_t *) dstp)[1] = a0; |

102 | do6: |

103 | a0 = ((op_t *) srcp)[2]; |

104 | ((op_t *) dstp)[2] = a1; |

105 | do5: |

106 | a1 = ((op_t *) srcp)[3]; |

107 | ((op_t *) dstp)[3] = a0; |

108 | do4: |

109 | a0 = ((op_t *) srcp)[4]; |

110 | ((op_t *) dstp)[4] = a1; |

111 | do3: |

112 | a1 = ((op_t *) srcp)[5]; |

113 | ((op_t *) dstp)[5] = a0; |

114 | do2: |

115 | a0 = ((op_t *) srcp)[6]; |

116 | ((op_t *) dstp)[6] = a1; |

117 | do1: |

118 | a1 = ((op_t *) srcp)[7]; |

119 | ((op_t *) dstp)[7] = a0; |

120 | |

121 | srcp += 8 * OPSIZ; |

122 | dstp += 8 * OPSIZ; |

123 | len -= 8; |

124 | } |

125 | while (len != 0); |

126 | |

127 | /* This is the right position for do0. Please don't move |

128 | it into the loop. */ |

129 | do0: |

130 | ((op_t *) dstp)[0] = a1; |

131 | } |

132 | |

133 | /* _wordcopy_fwd_dest_aligned -- Copy block beginning at SRCP to |

134 | block beginning at DSTP with LEN `op_t' words (not LEN bytes!). |

135 | DSTP should be aligned for memory operations on `op_t's, but SRCP must |

136 | *not* be aligned. */ |

137 | |

138 | #ifndef WORDCOPY_FWD_DEST_ALIGNED |

139 | # define WORDCOPY_FWD_DEST_ALIGNED _wordcopy_fwd_dest_aligned |

140 | #endif |

141 | |

142 | void |

143 | WORDCOPY_FWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |

144 | { |

145 | op_t a0, a1, a2, a3; |

146 | int sh_1, sh_2; |

147 | |

148 | /* Calculate how to shift a word read at the memory operation |

149 | aligned srcp to make it aligned for copy. */ |

150 | |

151 | sh_1 = 8 * (srcp % OPSIZ); |

152 | sh_2 = 8 * OPSIZ - sh_1; |

153 | |

154 | /* Make SRCP aligned by rounding it down to the beginning of the `op_t' |

155 | it points in the middle of. */ |

156 | srcp &= -OPSIZ; |

157 | |

158 | switch (len % 4) |

159 | { |

160 | case 2: |

161 | a1 = ((op_t *) srcp)[0]; |

162 | a2 = ((op_t *) srcp)[1]; |

163 | srcp -= 1 * OPSIZ; |

164 | dstp -= 3 * OPSIZ; |

165 | len += 2; |

166 | goto do1; |

167 | case 3: |

168 | a0 = ((op_t *) srcp)[0]; |

169 | a1 = ((op_t *) srcp)[1]; |

170 | srcp -= 0 * OPSIZ; |

171 | dstp -= 2 * OPSIZ; |

172 | len += 1; |

173 | goto do2; |

174 | case 0: |

175 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

176 | return; |

177 | a3 = ((op_t *) srcp)[0]; |

178 | a0 = ((op_t *) srcp)[1]; |

179 | srcp -=-1 * OPSIZ; |

180 | dstp -= 1 * OPSIZ; |

181 | len += 0; |

182 | goto do3; |

183 | case 1: |

184 | a2 = ((op_t *) srcp)[0]; |

185 | a3 = ((op_t *) srcp)[1]; |

186 | srcp -=-2 * OPSIZ; |

187 | dstp -= 0 * OPSIZ; |

188 | len -= 1; |

189 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

190 | goto do0; |

191 | goto do4; /* No-op. */ |

192 | } |

193 | |

194 | do |

195 | { |

196 | do4: |

197 | a0 = ((op_t *) srcp)[0]; |

198 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |

199 | do3: |

200 | a1 = ((op_t *) srcp)[1]; |

201 | ((op_t *) dstp)[1] = MERGE (a3, sh_1, a0, sh_2); |

202 | do2: |

203 | a2 = ((op_t *) srcp)[2]; |

204 | ((op_t *) dstp)[2] = MERGE (a0, sh_1, a1, sh_2); |

205 | do1: |

206 | a3 = ((op_t *) srcp)[3]; |

207 | ((op_t *) dstp)[3] = MERGE (a1, sh_1, a2, sh_2); |

208 | |

209 | srcp += 4 * OPSIZ; |

210 | dstp += 4 * OPSIZ; |

211 | len -= 4; |

212 | } |

213 | while (len != 0); |

214 | |

215 | /* This is the right position for do0. Please don't move |

216 | it into the loop. */ |

217 | do0: |

218 | ((op_t *) dstp)[0] = MERGE (a2, sh_1, a3, sh_2); |

219 | } |

220 | |

221 | /* _wordcopy_bwd_aligned -- Copy block finishing right before |

222 | SRCP to block finishing right before DSTP with LEN `op_t' words |

223 | (not LEN bytes!). Both SRCP and DSTP should be aligned for memory |

224 | operations on `op_t's. */ |

225 | |

226 | #ifndef WORDCOPY_BWD_ALIGNED |

227 | # define WORDCOPY_BWD_ALIGNED _wordcopy_bwd_aligned |

228 | #endif |

229 | |

230 | void |

231 | WORDCOPY_BWD_ALIGNED (long int dstp, long int srcp, size_t len) |

232 | { |

233 | op_t a0, a1; |

234 | |

235 | switch (len % 8) |

236 | { |

237 | case 2: |

238 | srcp -= 2 * OPSIZ; |

239 | dstp -= 1 * OPSIZ; |

240 | a0 = ((op_t *) srcp)[1]; |

241 | len += 6; |

242 | goto do1; |

243 | case 3: |

244 | srcp -= 3 * OPSIZ; |

245 | dstp -= 2 * OPSIZ; |

246 | a1 = ((op_t *) srcp)[2]; |

247 | len += 5; |

248 | goto do2; |

249 | case 4: |

250 | srcp -= 4 * OPSIZ; |

251 | dstp -= 3 * OPSIZ; |

252 | a0 = ((op_t *) srcp)[3]; |

253 | len += 4; |

254 | goto do3; |

255 | case 5: |

256 | srcp -= 5 * OPSIZ; |

257 | dstp -= 4 * OPSIZ; |

258 | a1 = ((op_t *) srcp)[4]; |

259 | len += 3; |

260 | goto do4; |

261 | case 6: |

262 | srcp -= 6 * OPSIZ; |

263 | dstp -= 5 * OPSIZ; |

264 | a0 = ((op_t *) srcp)[5]; |

265 | len += 2; |

266 | goto do5; |

267 | case 7: |

268 | srcp -= 7 * OPSIZ; |

269 | dstp -= 6 * OPSIZ; |

270 | a1 = ((op_t *) srcp)[6]; |

271 | len += 1; |

272 | goto do6; |

273 | |

274 | case 0: |

275 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

276 | return; |

277 | srcp -= 8 * OPSIZ; |

278 | dstp -= 7 * OPSIZ; |

279 | a0 = ((op_t *) srcp)[7]; |

280 | goto do7; |

281 | case 1: |

282 | srcp -= 9 * OPSIZ; |

283 | dstp -= 8 * OPSIZ; |

284 | a1 = ((op_t *) srcp)[8]; |

285 | len -= 1; |

286 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

287 | goto do0; |

288 | goto do8; /* No-op. */ |

289 | } |

290 | |

291 | do |

292 | { |

293 | do8: |

294 | a0 = ((op_t *) srcp)[7]; |

295 | ((op_t *) dstp)[7] = a1; |

296 | do7: |

297 | a1 = ((op_t *) srcp)[6]; |

298 | ((op_t *) dstp)[6] = a0; |

299 | do6: |

300 | a0 = ((op_t *) srcp)[5]; |

301 | ((op_t *) dstp)[5] = a1; |

302 | do5: |

303 | a1 = ((op_t *) srcp)[4]; |

304 | ((op_t *) dstp)[4] = a0; |

305 | do4: |

306 | a0 = ((op_t *) srcp)[3]; |

307 | ((op_t *) dstp)[3] = a1; |

308 | do3: |

309 | a1 = ((op_t *) srcp)[2]; |

310 | ((op_t *) dstp)[2] = a0; |

311 | do2: |

312 | a0 = ((op_t *) srcp)[1]; |

313 | ((op_t *) dstp)[1] = a1; |

314 | do1: |

315 | a1 = ((op_t *) srcp)[0]; |

316 | ((op_t *) dstp)[0] = a0; |

317 | |

318 | srcp -= 8 * OPSIZ; |

319 | dstp -= 8 * OPSIZ; |

320 | len -= 8; |

321 | } |

322 | while (len != 0); |

323 | |

324 | /* This is the right position for do0. Please don't move |

325 | it into the loop. */ |

326 | do0: |

327 | ((op_t *) dstp)[7] = a1; |

328 | } |

329 | |

330 | /* _wordcopy_bwd_dest_aligned -- Copy block finishing right |

331 | before SRCP to block finishing right before DSTP with LEN `op_t' |

332 | words (not LEN bytes!). DSTP should be aligned for memory |

333 | operations on `op_t', but SRCP must *not* be aligned. */ |

334 | |

335 | #ifndef WORDCOPY_BWD_DEST_ALIGNED |

336 | # define WORDCOPY_BWD_DEST_ALIGNED _wordcopy_bwd_dest_aligned |

337 | #endif |

338 | |

339 | void |

340 | WORDCOPY_BWD_DEST_ALIGNED (long int dstp, long int srcp, size_t len) |

341 | { |

342 | op_t a0, a1, a2, a3; |

343 | int sh_1, sh_2; |

344 | |

345 | /* Calculate how to shift a word read at the memory operation |

346 | aligned srcp to make it aligned for copy. */ |

347 | |

348 | sh_1 = 8 * (srcp % OPSIZ); |

349 | sh_2 = 8 * OPSIZ - sh_1; |

350 | |

351 | /* Make srcp aligned by rounding it down to the beginning of the op_t |

352 | it points in the middle of. */ |

353 | srcp &= -OPSIZ; |

354 | srcp += OPSIZ; |

355 | |

356 | switch (len % 4) |

357 | { |

358 | case 2: |

359 | srcp -= 3 * OPSIZ; |

360 | dstp -= 1 * OPSIZ; |

361 | a2 = ((op_t *) srcp)[2]; |

362 | a1 = ((op_t *) srcp)[1]; |

363 | len += 2; |

364 | goto do1; |

365 | case 3: |

366 | srcp -= 4 * OPSIZ; |

367 | dstp -= 2 * OPSIZ; |

368 | a3 = ((op_t *) srcp)[3]; |

369 | a2 = ((op_t *) srcp)[2]; |

370 | len += 1; |

371 | goto do2; |

372 | case 0: |

373 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

374 | return; |

375 | srcp -= 5 * OPSIZ; |

376 | dstp -= 3 * OPSIZ; |

377 | a0 = ((op_t *) srcp)[4]; |

378 | a3 = ((op_t *) srcp)[3]; |

379 | goto do3; |

380 | case 1: |

381 | srcp -= 6 * OPSIZ; |

382 | dstp -= 4 * OPSIZ; |

383 | a1 = ((op_t *) srcp)[5]; |

384 | a0 = ((op_t *) srcp)[4]; |

385 | len -= 1; |

386 | if (OP_T_THRES <= 3 * OPSIZ && len == 0) |

387 | goto do0; |

388 | goto do4; /* No-op. */ |

389 | } |

390 | |

391 | do |

392 | { |

393 | do4: |

394 | a3 = ((op_t *) srcp)[3]; |

395 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |

396 | do3: |

397 | a2 = ((op_t *) srcp)[2]; |

398 | ((op_t *) dstp)[2] = MERGE (a3, sh_1, a0, sh_2); |

399 | do2: |

400 | a1 = ((op_t *) srcp)[1]; |

401 | ((op_t *) dstp)[1] = MERGE (a2, sh_1, a3, sh_2); |

402 | do1: |

403 | a0 = ((op_t *) srcp)[0]; |

404 | ((op_t *) dstp)[0] = MERGE (a1, sh_1, a2, sh_2); |

405 | |

406 | srcp -= 4 * OPSIZ; |

407 | dstp -= 4 * OPSIZ; |

408 | len -= 4; |

409 | } |

410 | while (len != 0); |

411 | |

412 | /* This is the right position for do0. Please don't move |

413 | it into the loop. */ |

414 | do0: |

415 | ((op_t *) dstp)[3] = MERGE (a0, sh_1, a1, sh_2); |

416 | } |

417 |