1 | /* longlong.h -- definitions for mixed size 32/64 bit arithmetic. |
---|---|

2 | Copyright (C) 1991-2019 Free Software Foundation, Inc. |

3 | |

4 | This file is part of the GNU C Library. |

5 | |

6 | The GNU C Library is free software; you can redistribute it and/or |

7 | modify it under the terms of the GNU Lesser General Public |

8 | License as published by the Free Software Foundation; either |

9 | version 2.1 of the License, or (at your option) any later version. |

10 | |

11 | In addition to the permissions in the GNU Lesser General Public |

12 | License, the Free Software Foundation gives you unlimited |

13 | permission to link the compiled version of this file into |

14 | combinations with other programs, and to distribute those |

15 | combinations without any restriction coming from the use of this |

16 | file. (The Lesser General Public License restrictions do apply in |

17 | other respects; for example, they cover modification of the file, |

18 | and distribution when not linked into a combine executable.) |

19 | |

20 | The GNU C Library is distributed in the hope that it will be useful, |

21 | but WITHOUT ANY WARRANTY; without even the implied warranty of |

22 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |

23 | Lesser General Public License for more details. |

24 | |

25 | You should have received a copy of the GNU Lesser General Public |

26 | License along with the GNU C Library; if not, see |

27 | <http://www.gnu.org/licenses/>. */ |

28 | |

29 | /* You have to define the following before including this file: |

30 | |

31 | UWtype -- An unsigned type, default type for operations (typically a "word") |

32 | UHWtype -- An unsigned type, at least half the size of UWtype. |

33 | UDWtype -- An unsigned type, at least twice as large a UWtype |

34 | W_TYPE_SIZE -- size in bits of UWtype |

35 | |

36 | UQItype -- Unsigned 8 bit type. |

37 | SItype, USItype -- Signed and unsigned 32 bit types. |

38 | DItype, UDItype -- Signed and unsigned 64 bit types. |

39 | |

40 | On a 32 bit machine UWtype should typically be USItype; |

41 | on a 64 bit machine, UWtype should typically be UDItype. */ |

42 | |

43 | #define __BITS4 (W_TYPE_SIZE / 4) |

44 | #define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) |

45 | #define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) |

46 | #define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) |

47 | |

48 | #ifndef W_TYPE_SIZE |

49 | #define W_TYPE_SIZE 32 |

50 | #define UWtype USItype |

51 | #define UHWtype USItype |

52 | #define UDWtype UDItype |

53 | #endif |

54 | |

55 | /* Used in glibc only. */ |

56 | #ifndef attribute_hidden |

57 | #define attribute_hidden |

58 | #endif |

59 | |

60 | extern const UQItype __clz_tab[256] attribute_hidden; |

61 | |

62 | /* Define auxiliary asm macros. |

63 | |

64 | 1) umul_ppmm(high_prod, low_prod, multiplier, multiplicand) multiplies two |

65 | UWtype integers MULTIPLIER and MULTIPLICAND, and generates a two UWtype |

66 | word product in HIGH_PROD and LOW_PROD. |

67 | |

68 | 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a |

69 | UDWtype product. This is just a variant of umul_ppmm. |

70 | |

71 | 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, |

72 | denominator) divides a UDWtype, composed by the UWtype integers |

73 | HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient |

74 | in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less |

75 | than DENOMINATOR for correct operation. If, in addition, the most |

76 | significant bit of DENOMINATOR must be 1, then the pre-processor symbol |

77 | UDIV_NEEDS_NORMALIZATION is defined to 1. |

78 | |

79 | 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, |

80 | denominator). Like udiv_qrnnd but the numbers are signed. The quotient |

81 | is rounded towards 0. |

82 | |

83 | 5) count_leading_zeros(count, x) counts the number of zero-bits from the |

84 | msb to the first nonzero bit in the UWtype X. This is the number of |

85 | steps X needs to be shifted left to set the msb. Undefined for X == 0, |

86 | unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. |

87 | |

88 | 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts |

89 | from the least significant end. |

90 | |

91 | 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, |

92 | high_addend_2, low_addend_2) adds two UWtype integers, composed by |

93 | HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 |

94 | respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow |

95 | (i.e. carry out) is not stored anywhere, and is lost. |

96 | |

97 | 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, |

98 | high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, |

99 | composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and |

100 | LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE |

101 | and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, |

102 | and is lost. |

103 | |

104 | If any of these macros are left undefined for a particular CPU, |

105 | C macros are used. */ |

106 | |

107 | /* The CPUs come in alphabetical order below. |

108 | |

109 | Please add support for more CPUs here, or improve the current support |

110 | for the CPUs below! |

111 | (E.g. WE32100, IBM360.) */ |

112 | |

113 | #if defined (__GNUC__) && !defined (NO_ASM) |

114 | |

115 | /* We sometimes need to clobber "cc" with gcc2, but that would not be |

116 | understood by gcc1. Use cpp to avoid major code duplication. */ |

117 | #if __GNUC__ < 2 |

118 | #define __CLOBBER_CC |

119 | #define __AND_CLOBBER_CC |

120 | #else /* __GNUC__ >= 2 */ |

121 | #define __CLOBBER_CC : "cc" |

122 | #define __AND_CLOBBER_CC , "cc" |

123 | #endif /* __GNUC__ < 2 */ |

124 | |

125 | #if defined (__aarch64__) |

126 | |

127 | #if W_TYPE_SIZE == 32 |

128 | #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) |

129 | #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) |

130 | #define COUNT_LEADING_ZEROS_0 32 |

131 | #endif /* W_TYPE_SIZE == 32 */ |

132 | |

133 | #if W_TYPE_SIZE == 64 |

134 | #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clzll (X)) |

135 | #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctzll (X)) |

136 | #define COUNT_LEADING_ZEROS_0 64 |

137 | #endif /* W_TYPE_SIZE == 64 */ |

138 | |

139 | #endif /* __aarch64__ */ |

140 | |

141 | #if defined (__alpha) && W_TYPE_SIZE == 64 |

142 | /* There is a bug in g++ before version 5 that |

143 | errors on __builtin_alpha_umulh. */ |

144 | #if !defined(__cplusplus) || __GNUC__ >= 5 |

145 | #define umul_ppmm(ph, pl, m0, m1) \ |

146 | do { \ |

147 | UDItype __m0 = (m0), __m1 = (m1); \ |

148 | (ph) = __builtin_alpha_umulh (__m0, __m1); \ |

149 | (pl) = __m0 * __m1; \ |

150 | } while (0) |

151 | #define UMUL_TIME 46 |

152 | #endif /* !c++ */ |

153 | #ifndef LONGLONG_STANDALONE |

154 | #define udiv_qrnnd(q, r, n1, n0, d) \ |

155 | do { UDItype __r; \ |

156 | (q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \ |

157 | (r) = __r; \ |

158 | } while (0) |

159 | extern UDItype __udiv_qrnnd (UDItype *, UDItype, UDItype, UDItype); |

160 | #define UDIV_TIME 220 |

161 | #endif /* LONGLONG_STANDALONE */ |

162 | #ifdef __alpha_cix__ |

163 | #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) |

164 | #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) |

165 | #define COUNT_LEADING_ZEROS_0 64 |

166 | #else |

167 | #define count_leading_zeros(COUNT,X) \ |

168 | do { \ |

169 | UDItype __xr = (X), __t, __a; \ |

170 | __t = __builtin_alpha_cmpbge (0, __xr); \ |

171 | __a = __clz_tab[__t ^ 0xff] - 1; \ |

172 | __t = __builtin_alpha_extbl (__xr, __a); \ |

173 | (COUNT) = 64 - (__clz_tab[__t] + __a*8); \ |

174 | } while (0) |

175 | #define count_trailing_zeros(COUNT,X) \ |

176 | do { \ |

177 | UDItype __xr = (X), __t, __a; \ |

178 | __t = __builtin_alpha_cmpbge (0, __xr); \ |

179 | __t = ~__t & -~__t; \ |

180 | __a = ((__t & 0xCC) != 0) * 2; \ |

181 | __a += ((__t & 0xF0) != 0) * 4; \ |

182 | __a += ((__t & 0xAA) != 0); \ |

183 | __t = __builtin_alpha_extbl (__xr, __a); \ |

184 | __a <<= 3; \ |

185 | __t &= -__t; \ |

186 | __a += ((__t & 0xCC) != 0) * 2; \ |

187 | __a += ((__t & 0xF0) != 0) * 4; \ |

188 | __a += ((__t & 0xAA) != 0); \ |

189 | (COUNT) = __a; \ |

190 | } while (0) |

191 | #endif /* __alpha_cix__ */ |

192 | #endif /* __alpha */ |

193 | |

194 | #if defined (__arc__) && W_TYPE_SIZE == 32 |

195 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

196 | __asm__ ("add.f %1, %4, %5\n\tadc %0, %2, %3" \ |

197 | : "=r" ((USItype) (sh)), \ |

198 | "=&r" ((USItype) (sl)) \ |

199 | : "%r" ((USItype) (ah)), \ |

200 | "rICal" ((USItype) (bh)), \ |

201 | "%r" ((USItype) (al)), \ |

202 | "rICal" ((USItype) (bl))) |

203 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

204 | __asm__ ("sub.f %1, %4, %5\n\tsbc %0, %2, %3" \ |

205 | : "=r" ((USItype) (sh)), \ |

206 | "=&r" ((USItype) (sl)) \ |

207 | : "r" ((USItype) (ah)), \ |

208 | "rICal" ((USItype) (bh)), \ |

209 | "r" ((USItype) (al)), \ |

210 | "rICal" ((USItype) (bl))) |

211 | |

212 | #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) |

213 | #ifdef __ARC_NORM__ |

214 | #define count_leading_zeros(count, x) \ |

215 | do \ |

216 | { \ |

217 | SItype c_; \ |

218 | \ |

219 | __asm__ ("norm.f\t%0,%1\n\tmov.mi\t%0,-1" : "=r" (c_) : "r" (x) : "cc");\ |

220 | (count) = c_ + 1; \ |

221 | } \ |

222 | while (0) |

223 | #define COUNT_LEADING_ZEROS_0 32 |

224 | #endif /* __ARC_NORM__ */ |

225 | #endif /* __arc__ */ |

226 | |

227 | #if defined (__arm__) && (defined (__thumb2__) || !defined (__thumb__)) \ |

228 | && W_TYPE_SIZE == 32 |

229 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

230 | __asm__ ("adds %1, %4, %5\n\tadc %0, %2, %3" \ |

231 | : "=r" ((USItype) (sh)), \ |

232 | "=&r" ((USItype) (sl)) \ |

233 | : "%r" ((USItype) (ah)), \ |

234 | "rI" ((USItype) (bh)), \ |

235 | "%r" ((USItype) (al)), \ |

236 | "rI" ((USItype) (bl)) __CLOBBER_CC) |

237 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

238 | __asm__ ("subs %1, %4, %5\n\tsbc %0, %2, %3" \ |

239 | : "=r" ((USItype) (sh)), \ |

240 | "=&r" ((USItype) (sl)) \ |

241 | : "r" ((USItype) (ah)), \ |

242 | "rI" ((USItype) (bh)), \ |

243 | "r" ((USItype) (al)), \ |

244 | "rI" ((USItype) (bl)) __CLOBBER_CC) |

245 | # if defined(__ARM_ARCH_2__) || defined(__ARM_ARCH_2A__) \ |

246 | || defined(__ARM_ARCH_3__) |

247 | # define umul_ppmm(xh, xl, a, b) \ |

248 | do { \ |

249 | register USItype __t0, __t1, __t2; \ |

250 | __asm__ ("%@ Inlined umul_ppmm\n" \ |

251 | " mov %2, %5, lsr #16\n" \ |

252 | " mov %0, %6, lsr #16\n" \ |

253 | " bic %3, %5, %2, lsl #16\n" \ |

254 | " bic %4, %6, %0, lsl #16\n" \ |

255 | " mul %1, %3, %4\n" \ |

256 | " mul %4, %2, %4\n" \ |

257 | " mul %3, %0, %3\n" \ |

258 | " mul %0, %2, %0\n" \ |

259 | " adds %3, %4, %3\n" \ |

260 | " addcs %0, %0, #65536\n" \ |

261 | " adds %1, %1, %3, lsl #16\n" \ |

262 | " adc %0, %0, %3, lsr #16" \ |

263 | : "=&r" ((USItype) (xh)), \ |

264 | "=r" ((USItype) (xl)), \ |

265 | "=&r" (__t0), "=&r" (__t1), "=r" (__t2) \ |

266 | : "r" ((USItype) (a)), \ |

267 | "r" ((USItype) (b)) __CLOBBER_CC ); \ |

268 | } while (0) |

269 | # define UMUL_TIME 20 |

270 | # else |

271 | # define umul_ppmm(xh, xl, a, b) \ |

272 | do { \ |

273 | /* Generate umull, under compiler control. */ \ |

274 | register UDItype __t0 = (UDItype)(USItype)(a) * (USItype)(b); \ |

275 | (xl) = (USItype)__t0; \ |

276 | (xh) = (USItype)(__t0 >> 32); \ |

277 | } while (0) |

278 | # define UMUL_TIME 3 |

279 | # endif |

280 | # define UDIV_TIME 100 |

281 | #endif /* __arm__ */ |

282 | |

283 | #if defined(__arm__) |

284 | /* Let gcc decide how best to implement count_leading_zeros. */ |

285 | #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) |

286 | #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) |

287 | #define COUNT_LEADING_ZEROS_0 32 |

288 | #endif |

289 | |

290 | #if defined (__AVR__) |

291 | |

292 | #if W_TYPE_SIZE == 16 |

293 | #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) |

294 | #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctz (X)) |

295 | #define COUNT_LEADING_ZEROS_0 16 |

296 | #endif /* W_TYPE_SIZE == 16 */ |

297 | |

298 | #if W_TYPE_SIZE == 32 |

299 | #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzl (X)) |

300 | #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzl (X)) |

301 | #define COUNT_LEADING_ZEROS_0 32 |

302 | #endif /* W_TYPE_SIZE == 32 */ |

303 | |

304 | #if W_TYPE_SIZE == 64 |

305 | #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clzll (X)) |

306 | #define count_trailing_zeros(COUNT,X) ((COUNT) = __builtin_ctzll (X)) |

307 | #define COUNT_LEADING_ZEROS_0 64 |

308 | #endif /* W_TYPE_SIZE == 64 */ |

309 | |

310 | #endif /* defined (__AVR__) */ |

311 | |

312 | #if defined (__CRIS__) |

313 | |

314 | #if __CRIS_arch_version >= 3 |

315 | #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) |

316 | #define COUNT_LEADING_ZEROS_0 32 |

317 | #endif /* __CRIS_arch_version >= 3 */ |

318 | |

319 | #if __CRIS_arch_version >= 8 |

320 | #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) |

321 | #endif /* __CRIS_arch_version >= 8 */ |

322 | |

323 | #if __CRIS_arch_version >= 10 |

324 | #define __umulsidi3(u,v) ((UDItype)(USItype) (u) * (UDItype)(USItype) (v)) |

325 | #else |

326 | #define __umulsidi3 __umulsidi3 |

327 | extern UDItype __umulsidi3 (USItype, USItype); |

328 | #endif /* __CRIS_arch_version >= 10 */ |

329 | |

330 | #define umul_ppmm(w1, w0, u, v) \ |

331 | do { \ |

332 | UDItype __x = __umulsidi3 (u, v); \ |

333 | (w0) = (USItype) (__x); \ |

334 | (w1) = (USItype) (__x >> 32); \ |

335 | } while (0) |

336 | |

337 | /* FIXME: defining add_ssaaaa and sub_ddmmss should be advantageous for |

338 | DFmode ("double" intrinsics, avoiding two of the three insns handling |

339 | carry), but defining them as open-code C composing and doing the |

340 | operation in DImode (UDImode) shows that the DImode needs work: |

341 | register pressure from requiring neighboring registers and the |

342 | traffic to and from them come to dominate, in the 4.7 series. */ |

343 | |

344 | #endif /* defined (__CRIS__) */ |

345 | |

346 | #if defined (__hppa) && W_TYPE_SIZE == 32 |

347 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

348 | __asm__ ("add %4,%5,%1\n\taddc %2,%3,%0" \ |

349 | : "=r" ((USItype) (sh)), \ |

350 | "=&r" ((USItype) (sl)) \ |

351 | : "%rM" ((USItype) (ah)), \ |

352 | "rM" ((USItype) (bh)), \ |

353 | "%rM" ((USItype) (al)), \ |

354 | "rM" ((USItype) (bl))) |

355 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

356 | __asm__ ("sub %4,%5,%1\n\tsubb %2,%3,%0" \ |

357 | : "=r" ((USItype) (sh)), \ |

358 | "=&r" ((USItype) (sl)) \ |

359 | : "rM" ((USItype) (ah)), \ |

360 | "rM" ((USItype) (bh)), \ |

361 | "rM" ((USItype) (al)), \ |

362 | "rM" ((USItype) (bl))) |

363 | #if defined (_PA_RISC1_1) |

364 | #define umul_ppmm(w1, w0, u, v) \ |

365 | do { \ |

366 | union \ |

367 | { \ |

368 | UDItype __f; \ |

369 | struct {USItype __w1, __w0;} __w1w0; \ |

370 | } __t; \ |

371 | __asm__ ("xmpyu %1,%2,%0" \ |

372 | : "=x" (__t.__f) \ |

373 | : "x" ((USItype) (u)), \ |

374 | "x" ((USItype) (v))); \ |

375 | (w1) = __t.__w1w0.__w1; \ |

376 | (w0) = __t.__w1w0.__w0; \ |

377 | } while (0) |

378 | #define UMUL_TIME 8 |

379 | #else |

380 | #define UMUL_TIME 30 |

381 | #endif |

382 | #define UDIV_TIME 40 |

383 | #define count_leading_zeros(count, x) \ |

384 | do { \ |

385 | USItype __tmp; \ |

386 | __asm__ ( \ |

387 | "ldi 1,%0\n" \ |

388 | " extru,= %1,15,16,%%r0 ; Bits 31..16 zero?\n" \ |

389 | " extru,tr %1,15,16,%1 ; No. Shift down, skip add.\n"\ |

390 | " ldo 16(%0),%0 ; Yes. Perform add.\n" \ |

391 | " extru,= %1,23,8,%%r0 ; Bits 15..8 zero?\n" \ |

392 | " extru,tr %1,23,8,%1 ; No. Shift down, skip add.\n"\ |

393 | " ldo 8(%0),%0 ; Yes. Perform add.\n" \ |

394 | " extru,= %1,27,4,%%r0 ; Bits 7..4 zero?\n" \ |

395 | " extru,tr %1,27,4,%1 ; No. Shift down, skip add.\n"\ |

396 | " ldo 4(%0),%0 ; Yes. Perform add.\n" \ |

397 | " extru,= %1,29,2,%%r0 ; Bits 3..2 zero?\n" \ |

398 | " extru,tr %1,29,2,%1 ; No. Shift down, skip add.\n"\ |

399 | " ldo 2(%0),%0 ; Yes. Perform add.\n" \ |

400 | " extru %1,30,1,%1 ; Extract bit 1.\n" \ |

401 | " sub %0,%1,%0 ; Subtract it.\n" \ |

402 | : "=r" (count), "=r" (__tmp) : "1" (x)); \ |

403 | } while (0) |

404 | #endif |

405 | |

406 | #if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 |

407 | #if !defined (__zarch__) |

408 | #define smul_ppmm(xh, xl, m0, m1) \ |

409 | do { \ |

410 | union {DItype __ll; \ |

411 | struct {USItype __h, __l;} __i; \ |

412 | } __x; \ |

413 | __asm__ ("lr %N0,%1\n\tmr %0,%2" \ |

414 | : "=&r" (__x.__ll) \ |

415 | : "r" (m0), "r" (m1)); \ |

416 | (xh) = __x.__i.__h; (xl) = __x.__i.__l; \ |

417 | } while (0) |

418 | #define sdiv_qrnnd(q, r, n1, n0, d) \ |

419 | do { \ |

420 | union {DItype __ll; \ |

421 | struct {USItype __h, __l;} __i; \ |

422 | } __x; \ |

423 | __x.__i.__h = n1; __x.__i.__l = n0; \ |

424 | __asm__ ("dr %0,%2" \ |

425 | : "=r" (__x.__ll) \ |

426 | : "0" (__x.__ll), "r" (d)); \ |

427 | (q) = __x.__i.__l; (r) = __x.__i.__h; \ |

428 | } while (0) |

429 | #else |

430 | #define smul_ppmm(xh, xl, m0, m1) \ |

431 | do { \ |

432 | register SItype __r0 __asm__ ("0"); \ |

433 | register SItype __r1 __asm__ ("1") = (m0); \ |

434 | \ |

435 | __asm__ ("mr\t%%r0,%3" \ |

436 | : "=r" (__r0), "=r" (__r1) \ |

437 | : "r" (__r1), "r" (m1)); \ |

438 | (xh) = __r0; (xl) = __r1; \ |

439 | } while (0) |

440 | |

441 | #define sdiv_qrnnd(q, r, n1, n0, d) \ |

442 | do { \ |

443 | register SItype __r0 __asm__ ("0") = (n1); \ |

444 | register SItype __r1 __asm__ ("1") = (n0); \ |

445 | \ |

446 | __asm__ ("dr\t%%r0,%4" \ |

447 | : "=r" (__r0), "=r" (__r1) \ |

448 | : "r" (__r0), "r" (__r1), "r" (d)); \ |

449 | (q) = __r1; (r) = __r0; \ |

450 | } while (0) |

451 | #endif /* __zarch__ */ |

452 | #endif |

453 | |

454 | #if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 |

455 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

456 | __asm__ ("add{l} {%5,%1|%1,%5}\n\tadc{l} {%3,%0|%0,%3}" \ |

457 | : "=r" ((USItype) (sh)), \ |

458 | "=&r" ((USItype) (sl)) \ |

459 | : "%0" ((USItype) (ah)), \ |

460 | "g" ((USItype) (bh)), \ |

461 | "%1" ((USItype) (al)), \ |

462 | "g" ((USItype) (bl))) |

463 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

464 | __asm__ ("sub{l} {%5,%1|%1,%5}\n\tsbb{l} {%3,%0|%0,%3}" \ |

465 | : "=r" ((USItype) (sh)), \ |

466 | "=&r" ((USItype) (sl)) \ |

467 | : "0" ((USItype) (ah)), \ |

468 | "g" ((USItype) (bh)), \ |

469 | "1" ((USItype) (al)), \ |

470 | "g" ((USItype) (bl))) |

471 | #define umul_ppmm(w1, w0, u, v) \ |

472 | __asm__ ("mul{l} %3" \ |

473 | : "=a" ((USItype) (w0)), \ |

474 | "=d" ((USItype) (w1)) \ |

475 | : "%0" ((USItype) (u)), \ |

476 | "rm" ((USItype) (v))) |

477 | #define udiv_qrnnd(q, r, n1, n0, dv) \ |

478 | __asm__ ("div{l} %4" \ |

479 | : "=a" ((USItype) (q)), \ |

480 | "=d" ((USItype) (r)) \ |

481 | : "0" ((USItype) (n0)), \ |

482 | "1" ((USItype) (n1)), \ |

483 | "rm" ((USItype) (dv))) |

484 | #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) |

485 | #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) |

486 | #define UMUL_TIME 40 |

487 | #define UDIV_TIME 40 |

488 | #endif /* 80x86 */ |

489 | |

490 | #if defined (__x86_64__) && W_TYPE_SIZE == 64 |

491 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

492 | __asm__ ("add{q} {%5,%1|%1,%5}\n\tadc{q} {%3,%0|%0,%3}" \ |

493 | : "=r" ((UDItype) (sh)), \ |

494 | "=&r" ((UDItype) (sl)) \ |

495 | : "%0" ((UDItype) (ah)), \ |

496 | "rme" ((UDItype) (bh)), \ |

497 | "%1" ((UDItype) (al)), \ |

498 | "rme" ((UDItype) (bl))) |

499 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

500 | __asm__ ("sub{q} {%5,%1|%1,%5}\n\tsbb{q} {%3,%0|%0,%3}" \ |

501 | : "=r" ((UDItype) (sh)), \ |

502 | "=&r" ((UDItype) (sl)) \ |

503 | : "0" ((UDItype) (ah)), \ |

504 | "rme" ((UDItype) (bh)), \ |

505 | "1" ((UDItype) (al)), \ |

506 | "rme" ((UDItype) (bl))) |

507 | #define umul_ppmm(w1, w0, u, v) \ |

508 | __asm__ ("mul{q} %3" \ |

509 | : "=a" ((UDItype) (w0)), \ |

510 | "=d" ((UDItype) (w1)) \ |

511 | : "%0" ((UDItype) (u)), \ |

512 | "rm" ((UDItype) (v))) |

513 | #define udiv_qrnnd(q, r, n1, n0, dv) \ |

514 | __asm__ ("div{q} %4" \ |

515 | : "=a" ((UDItype) (q)), \ |

516 | "=d" ((UDItype) (r)) \ |

517 | : "0" ((UDItype) (n0)), \ |

518 | "1" ((UDItype) (n1)), \ |

519 | "rm" ((UDItype) (dv))) |

520 | #define count_leading_zeros(count, x) ((count) = __builtin_clzll (x)) |

521 | #define count_trailing_zeros(count, x) ((count) = __builtin_ctzll (x)) |

522 | #define UMUL_TIME 40 |

523 | #define UDIV_TIME 40 |

524 | #endif /* x86_64 */ |

525 | |

526 | #if defined (__i960__) && W_TYPE_SIZE == 32 |

527 | #define umul_ppmm(w1, w0, u, v) \ |

528 | ({union {UDItype __ll; \ |

529 | struct {USItype __l, __h;} __i; \ |

530 | } __xx; \ |

531 | __asm__ ("emul %2,%1,%0" \ |

532 | : "=d" (__xx.__ll) \ |

533 | : "%dI" ((USItype) (u)), \ |

534 | "dI" ((USItype) (v))); \ |

535 | (w1) = __xx.__i.__h; (w0) = __xx.__i.__l;}) |

536 | #define __umulsidi3(u, v) \ |

537 | ({UDItype __w; \ |

538 | __asm__ ("emul %2,%1,%0" \ |

539 | : "=d" (__w) \ |

540 | : "%dI" ((USItype) (u)), \ |

541 | "dI" ((USItype) (v))); \ |

542 | __w; }) |

543 | #endif /* __i960__ */ |

544 | |

545 | #if defined (__ia64) && W_TYPE_SIZE == 64 |

546 | /* This form encourages gcc (pre-release 3.4 at least) to emit predicated |

547 | "sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic |

548 | code using "al<bl" arithmetically comes out making an actual 0 or 1 in a |

549 | register, which takes an extra cycle. */ |

550 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

551 | do { \ |

552 | UWtype __x; \ |

553 | __x = (al) - (bl); \ |

554 | if ((al) < (bl)) \ |

555 | (sh) = (ah) - (bh) - 1; \ |

556 | else \ |

557 | (sh) = (ah) - (bh); \ |

558 | (sl) = __x; \ |

559 | } while (0) |

560 | |

561 | /* Do both product parts in assembly, since that gives better code with |

562 | all gcc versions. Some callers will just use the upper part, and in |

563 | that situation we waste an instruction, but not any cycles. */ |

564 | #define umul_ppmm(ph, pl, m0, m1) \ |

565 | __asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \ |

566 | : "=&f" (ph), "=f" (pl) \ |

567 | : "f" (m0), "f" (m1)) |

568 | #define count_leading_zeros(count, x) \ |

569 | do { \ |

570 | UWtype _x = (x), _y, _a, _c; \ |

571 | __asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \ |

572 | __asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \ |

573 | _c = (_a - 1) << 3; \ |

574 | _x >>= _c; \ |

575 | if (_x >= 1 << 4) \ |

576 | _x >>= 4, _c += 4; \ |

577 | if (_x >= 1 << 2) \ |

578 | _x >>= 2, _c += 2; \ |

579 | _c += _x >> 1; \ |

580 | (count) = W_TYPE_SIZE - 1 - _c; \ |

581 | } while (0) |

582 | /* similar to what gcc does for __builtin_ffs, but 0 based rather than 1 |

583 | based, and we don't need a special case for x==0 here */ |

584 | #define count_trailing_zeros(count, x) \ |

585 | do { \ |

586 | UWtype __ctz_x = (x); \ |

587 | __asm__ ("popcnt %0 = %1" \ |

588 | : "=r" (count) \ |

589 | : "r" ((__ctz_x-1) & ~__ctz_x)); \ |

590 | } while (0) |

591 | #define UMUL_TIME 14 |

592 | #endif |

593 | |

594 | #if defined (__M32R__) && W_TYPE_SIZE == 32 |

595 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

596 | /* The cmp clears the condition bit. */ \ |

597 | __asm__ ("cmp %0,%0\n\taddx %1,%5\n\taddx %0,%3" \ |

598 | : "=r" ((USItype) (sh)), \ |

599 | "=&r" ((USItype) (sl)) \ |

600 | : "0" ((USItype) (ah)), \ |

601 | "r" ((USItype) (bh)), \ |

602 | "1" ((USItype) (al)), \ |

603 | "r" ((USItype) (bl)) \ |

604 | : "cbit") |

605 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

606 | /* The cmp clears the condition bit. */ \ |

607 | __asm__ ("cmp %0,%0\n\tsubx %1,%5\n\tsubx %0,%3" \ |

608 | : "=r" ((USItype) (sh)), \ |

609 | "=&r" ((USItype) (sl)) \ |

610 | : "0" ((USItype) (ah)), \ |

611 | "r" ((USItype) (bh)), \ |

612 | "1" ((USItype) (al)), \ |

613 | "r" ((USItype) (bl)) \ |

614 | : "cbit") |

615 | #endif /* __M32R__ */ |

616 | |

617 | #if defined (__mc68000__) && W_TYPE_SIZE == 32 |

618 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

619 | __asm__ ("add%.l %5,%1\n\taddx%.l %3,%0" \ |

620 | : "=d" ((USItype) (sh)), \ |

621 | "=&d" ((USItype) (sl)) \ |

622 | : "%0" ((USItype) (ah)), \ |

623 | "d" ((USItype) (bh)), \ |

624 | "%1" ((USItype) (al)), \ |

625 | "g" ((USItype) (bl))) |

626 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

627 | __asm__ ("sub%.l %5,%1\n\tsubx%.l %3,%0" \ |

628 | : "=d" ((USItype) (sh)), \ |

629 | "=&d" ((USItype) (sl)) \ |

630 | : "0" ((USItype) (ah)), \ |

631 | "d" ((USItype) (bh)), \ |

632 | "1" ((USItype) (al)), \ |

633 | "g" ((USItype) (bl))) |

634 | |

635 | /* The '020, '030, '040, '060 and CPU32 have 32x32->64 and 64/32->32q-32r. */ |

636 | #if (defined (__mc68020__) && !defined (__mc68060__)) |

637 | #define umul_ppmm(w1, w0, u, v) \ |

638 | __asm__ ("mulu%.l %3,%1:%0" \ |

639 | : "=d" ((USItype) (w0)), \ |

640 | "=d" ((USItype) (w1)) \ |

641 | : "%0" ((USItype) (u)), \ |

642 | "dmi" ((USItype) (v))) |

643 | #define UMUL_TIME 45 |

644 | #define udiv_qrnnd(q, r, n1, n0, d) \ |

645 | __asm__ ("divu%.l %4,%1:%0" \ |

646 | : "=d" ((USItype) (q)), \ |

647 | "=d" ((USItype) (r)) \ |

648 | : "0" ((USItype) (n0)), \ |

649 | "1" ((USItype) (n1)), \ |

650 | "dmi" ((USItype) (d))) |

651 | #define UDIV_TIME 90 |

652 | #define sdiv_qrnnd(q, r, n1, n0, d) \ |

653 | __asm__ ("divs%.l %4,%1:%0" \ |

654 | : "=d" ((USItype) (q)), \ |

655 | "=d" ((USItype) (r)) \ |

656 | : "0" ((USItype) (n0)), \ |

657 | "1" ((USItype) (n1)), \ |

658 | "dmi" ((USItype) (d))) |

659 | |

660 | #elif defined (__mcoldfire__) /* not mc68020 */ |

661 | |

662 | #define umul_ppmm(xh, xl, a, b) \ |

663 | __asm__ ("| Inlined umul_ppmm\n" \ |

664 | " move%.l %2,%/d0\n" \ |

665 | " move%.l %3,%/d1\n" \ |

666 | " move%.l %/d0,%/d2\n" \ |

667 | " swap %/d0\n" \ |

668 | " move%.l %/d1,%/d3\n" \ |

669 | " swap %/d1\n" \ |

670 | " move%.w %/d2,%/d4\n" \ |

671 | " mulu %/d3,%/d4\n" \ |

672 | " mulu %/d1,%/d2\n" \ |

673 | " mulu %/d0,%/d3\n" \ |

674 | " mulu %/d0,%/d1\n" \ |

675 | " move%.l %/d4,%/d0\n" \ |

676 | " clr%.w %/d0\n" \ |

677 | " swap %/d0\n" \ |

678 | " add%.l %/d0,%/d2\n" \ |

679 | " add%.l %/d3,%/d2\n" \ |

680 | " jcc 1f\n" \ |

681 | " add%.l %#65536,%/d1\n" \ |

682 | "1: swap %/d2\n" \ |

683 | " moveq %#0,%/d0\n" \ |

684 | " move%.w %/d2,%/d0\n" \ |

685 | " move%.w %/d4,%/d2\n" \ |

686 | " move%.l %/d2,%1\n" \ |

687 | " add%.l %/d1,%/d0\n" \ |

688 | " move%.l %/d0,%0" \ |

689 | : "=g" ((USItype) (xh)), \ |

690 | "=g" ((USItype) (xl)) \ |

691 | : "g" ((USItype) (a)), \ |

692 | "g" ((USItype) (b)) \ |

693 | : "d0", "d1", "d2", "d3", "d4") |

694 | #define UMUL_TIME 100 |

695 | #define UDIV_TIME 400 |

696 | #else /* not ColdFire */ |

697 | /* %/ inserts REGISTER_PREFIX, %# inserts IMMEDIATE_PREFIX. */ |

698 | #define umul_ppmm(xh, xl, a, b) \ |

699 | __asm__ ("| Inlined umul_ppmm\n" \ |

700 | " move%.l %2,%/d0\n" \ |

701 | " move%.l %3,%/d1\n" \ |

702 | " move%.l %/d0,%/d2\n" \ |

703 | " swap %/d0\n" \ |

704 | " move%.l %/d1,%/d3\n" \ |

705 | " swap %/d1\n" \ |

706 | " move%.w %/d2,%/d4\n" \ |

707 | " mulu %/d3,%/d4\n" \ |

708 | " mulu %/d1,%/d2\n" \ |

709 | " mulu %/d0,%/d3\n" \ |

710 | " mulu %/d0,%/d1\n" \ |

711 | " move%.l %/d4,%/d0\n" \ |

712 | " eor%.w %/d0,%/d0\n" \ |

713 | " swap %/d0\n" \ |

714 | " add%.l %/d0,%/d2\n" \ |

715 | " add%.l %/d3,%/d2\n" \ |

716 | " jcc 1f\n" \ |

717 | " add%.l %#65536,%/d1\n" \ |

718 | "1: swap %/d2\n" \ |

719 | " moveq %#0,%/d0\n" \ |

720 | " move%.w %/d2,%/d0\n" \ |

721 | " move%.w %/d4,%/d2\n" \ |

722 | " move%.l %/d2,%1\n" \ |

723 | " add%.l %/d1,%/d0\n" \ |

724 | " move%.l %/d0,%0" \ |

725 | : "=g" ((USItype) (xh)), \ |

726 | "=g" ((USItype) (xl)) \ |

727 | : "g" ((USItype) (a)), \ |

728 | "g" ((USItype) (b)) \ |

729 | : "d0", "d1", "d2", "d3", "d4") |

730 | #define UMUL_TIME 100 |

731 | #define UDIV_TIME 400 |

732 | |

733 | #endif /* not mc68020 */ |

734 | |

735 | /* The '020, '030, '040 and '060 have bitfield insns. |

736 | cpu32 disguises as a 68020, but lacks them. */ |

737 | #if defined (__mc68020__) && !defined (__mcpu32__) |

738 | #define count_leading_zeros(count, x) \ |

739 | __asm__ ("bfffo %1{%b2:%b2},%0" \ |

740 | : "=d" ((USItype) (count)) \ |

741 | : "od" ((USItype) (x)), "n" (0)) |

742 | /* Some ColdFire architectures have a ff1 instruction supported via |

743 | __builtin_clz. */ |

744 | #elif defined (__mcfisaaplus__) || defined (__mcfisac__) |

745 | #define count_leading_zeros(count,x) ((count) = __builtin_clz (x)) |

746 | #define COUNT_LEADING_ZEROS_0 32 |

747 | #endif |

748 | #endif /* mc68000 */ |

749 | |

750 | #if defined (__m88000__) && W_TYPE_SIZE == 32 |

751 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

752 | __asm__ ("addu.co %1,%r4,%r5\n\taddu.ci %0,%r2,%r3" \ |

753 | : "=r" ((USItype) (sh)), \ |

754 | "=&r" ((USItype) (sl)) \ |

755 | : "%rJ" ((USItype) (ah)), \ |

756 | "rJ" ((USItype) (bh)), \ |

757 | "%rJ" ((USItype) (al)), \ |

758 | "rJ" ((USItype) (bl))) |

759 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

760 | __asm__ ("subu.co %1,%r4,%r5\n\tsubu.ci %0,%r2,%r3" \ |

761 | : "=r" ((USItype) (sh)), \ |

762 | "=&r" ((USItype) (sl)) \ |

763 | : "rJ" ((USItype) (ah)), \ |

764 | "rJ" ((USItype) (bh)), \ |

765 | "rJ" ((USItype) (al)), \ |

766 | "rJ" ((USItype) (bl))) |

767 | #define count_leading_zeros(count, x) \ |

768 | do { \ |

769 | USItype __cbtmp; \ |

770 | __asm__ ("ff1 %0,%1" \ |

771 | : "=r" (__cbtmp) \ |

772 | : "r" ((USItype) (x))); \ |

773 | (count) = __cbtmp ^ 31; \ |

774 | } while (0) |

775 | #define COUNT_LEADING_ZEROS_0 63 /* sic */ |

776 | #if defined (__mc88110__) |

777 | #define umul_ppmm(wh, wl, u, v) \ |

778 | do { \ |

779 | union {UDItype __ll; \ |

780 | struct {USItype __h, __l;} __i; \ |

781 | } __xx; \ |

782 | __asm__ ("mulu.d %0,%1,%2" \ |

783 | : "=r" (__xx.__ll) \ |

784 | : "r" ((USItype) (u)), \ |

785 | "r" ((USItype) (v))); \ |

786 | (wh) = __xx.__i.__h; \ |

787 | (wl) = __xx.__i.__l; \ |

788 | } while (0) |

789 | #define udiv_qrnnd(q, r, n1, n0, d) \ |

790 | ({union {UDItype __ll; \ |

791 | struct {USItype __h, __l;} __i; \ |

792 | } __xx; \ |

793 | USItype __q; \ |

794 | __xx.__i.__h = (n1); __xx.__i.__l = (n0); \ |

795 | __asm__ ("divu.d %0,%1,%2" \ |

796 | : "=r" (__q) \ |

797 | : "r" (__xx.__ll), \ |

798 | "r" ((USItype) (d))); \ |

799 | (r) = (n0) - __q * (d); (q) = __q; }) |

800 | #define UMUL_TIME 5 |

801 | #define UDIV_TIME 25 |

802 | #else |

803 | #define UMUL_TIME 17 |

804 | #define UDIV_TIME 150 |

805 | #endif /* __mc88110__ */ |

806 | #endif /* __m88000__ */ |

807 | |

808 | #if defined (__mn10300__) |

809 | # if defined (__AM33__) |

810 | # define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) |

811 | # define umul_ppmm(w1, w0, u, v) \ |

812 | asm("mulu %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) |

813 | # define smul_ppmm(w1, w0, u, v) \ |

814 | asm("mul %3,%2,%1,%0" : "=r"(w0), "=r"(w1) : "r"(u), "r"(v)) |

815 | # else |

816 | # define umul_ppmm(w1, w0, u, v) \ |

817 | asm("nop; nop; mulu %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) |

818 | # define smul_ppmm(w1, w0, u, v) \ |

819 | asm("nop; nop; mul %3,%0" : "=d"(w0), "=z"(w1) : "%0"(u), "d"(v)) |

820 | # endif |

821 | # define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

822 | do { \ |

823 | DWunion __s, __a, __b; \ |

824 | __a.s.low = (al); __a.s.high = (ah); \ |

825 | __b.s.low = (bl); __b.s.high = (bh); \ |

826 | __s.ll = __a.ll + __b.ll; \ |

827 | (sl) = __s.s.low; (sh) = __s.s.high; \ |

828 | } while (0) |

829 | # define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

830 | do { \ |

831 | DWunion __s, __a, __b; \ |

832 | __a.s.low = (al); __a.s.high = (ah); \ |

833 | __b.s.low = (bl); __b.s.high = (bh); \ |

834 | __s.ll = __a.ll - __b.ll; \ |

835 | (sl) = __s.s.low; (sh) = __s.s.high; \ |

836 | } while (0) |

837 | # define udiv_qrnnd(q, r, nh, nl, d) \ |

838 | asm("divu %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) |

839 | # define sdiv_qrnnd(q, r, nh, nl, d) \ |

840 | asm("div %2,%0" : "=D"(q), "=z"(r) : "D"(d), "0"(nl), "1"(nh)) |

841 | # define UMUL_TIME 3 |

842 | # define UDIV_TIME 38 |

843 | #endif |

844 | |

845 | #if defined (__mips__) && W_TYPE_SIZE == 32 |

846 | #define umul_ppmm(w1, w0, u, v) \ |

847 | do { \ |

848 | UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ |

849 | (w1) = (USItype) (__x >> 32); \ |

850 | (w0) = (USItype) (__x); \ |

851 | } while (0) |

852 | #define UMUL_TIME 10 |

853 | #define UDIV_TIME 100 |

854 | |

855 | #if (__mips == 32 || __mips == 64) && ! defined (__mips16) |

856 | #define count_leading_zeros(COUNT,X) ((COUNT) = __builtin_clz (X)) |

857 | #define COUNT_LEADING_ZEROS_0 32 |

858 | #endif |

859 | #endif /* __mips__ */ |

860 | |

861 | /* FIXME: We should test _IBMR2 here when we add assembly support for the |

862 | system vendor compilers. |

863 | FIXME: What's needed for gcc PowerPC VxWorks? __vxworks__ is not good |

864 | enough, since that hits ARM and m68k too. */ |

865 | #if (defined (_ARCH_PPC) /* AIX */ \ |

866 | || defined (__powerpc__) /* gcc */ \ |

867 | || defined (__POWERPC__) /* BEOS */ \ |

868 | || defined (__ppc__) /* Darwin */ \ |

869 | || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ |

870 | || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ |

871 | && CPU_FAMILY == PPC) \ |

872 | ) && W_TYPE_SIZE == 32 |

873 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

874 | do { \ |

875 | if (__builtin_constant_p (bh) && (bh) == 0) \ |

876 | __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ |

877 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |

878 | else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ |

879 | __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ |

880 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |

881 | else \ |

882 | __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ |

883 | : "=r" (sh), "=&r" (sl) \ |

884 | : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ |

885 | } while (0) |

886 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

887 | do { \ |

888 | if (__builtin_constant_p (ah) && (ah) == 0) \ |

889 | __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ |

890 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |

891 | else if (__builtin_constant_p (ah) && (ah) == ~(USItype) 0) \ |

892 | __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ |

893 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |

894 | else if (__builtin_constant_p (bh) && (bh) == 0) \ |

895 | __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ |

896 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |

897 | else if (__builtin_constant_p (bh) && (bh) == ~(USItype) 0) \ |

898 | __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ |

899 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |

900 | else \ |

901 | __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ |

902 | : "=r" (sh), "=&r" (sl) \ |

903 | : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ |

904 | } while (0) |

905 | #define count_leading_zeros(count, x) \ |

906 | __asm__ ("cntlzw %0,%1" : "=r" (count) : "r" (x)) |

907 | #define COUNT_LEADING_ZEROS_0 32 |

908 | #if defined (_ARCH_PPC) || defined (__powerpc__) || defined (__POWERPC__) \ |

909 | || defined (__ppc__) \ |

910 | || (defined (PPC) && ! defined (CPU_FAMILY)) /* gcc 2.7.x GNU&SysV */ \ |

911 | || (defined (PPC) && defined (CPU_FAMILY) /* VxWorks */ \ |

912 | && CPU_FAMILY == PPC) |

913 | #define umul_ppmm(ph, pl, m0, m1) \ |

914 | do { \ |

915 | USItype __m0 = (m0), __m1 = (m1); \ |

916 | __asm__ ("mulhwu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |

917 | (pl) = __m0 * __m1; \ |

918 | } while (0) |

919 | #define UMUL_TIME 15 |

920 | #define smul_ppmm(ph, pl, m0, m1) \ |

921 | do { \ |

922 | SItype __m0 = (m0), __m1 = (m1); \ |

923 | __asm__ ("mulhw %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |

924 | (pl) = __m0 * __m1; \ |

925 | } while (0) |

926 | #define SMUL_TIME 14 |

927 | #define UDIV_TIME 120 |

928 | #endif |

929 | #endif /* 32-bit POWER architecture variants. */ |

930 | |

931 | /* We should test _IBMR2 here when we add assembly support for the system |

932 | vendor compilers. */ |

933 | #if (defined (_ARCH_PPC64) || defined (__powerpc64__)) && W_TYPE_SIZE == 64 |

934 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

935 | do { \ |

936 | if (__builtin_constant_p (bh) && (bh) == 0) \ |

937 | __asm__ ("add%I4c %1,%3,%4\n\taddze %0,%2" \ |

938 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |

939 | else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ |

940 | __asm__ ("add%I4c %1,%3,%4\n\taddme %0,%2" \ |

941 | : "=r" (sh), "=&r" (sl) : "r" (ah), "%r" (al), "rI" (bl));\ |

942 | else \ |

943 | __asm__ ("add%I5c %1,%4,%5\n\tadde %0,%2,%3" \ |

944 | : "=r" (sh), "=&r" (sl) \ |

945 | : "%r" (ah), "r" (bh), "%r" (al), "rI" (bl)); \ |

946 | } while (0) |

947 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

948 | do { \ |

949 | if (__builtin_constant_p (ah) && (ah) == 0) \ |

950 | __asm__ ("subf%I3c %1,%4,%3\n\tsubfze %0,%2" \ |

951 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |

952 | else if (__builtin_constant_p (ah) && (ah) == ~(UDItype) 0) \ |

953 | __asm__ ("subf%I3c %1,%4,%3\n\tsubfme %0,%2" \ |

954 | : "=r" (sh), "=&r" (sl) : "r" (bh), "rI" (al), "r" (bl));\ |

955 | else if (__builtin_constant_p (bh) && (bh) == 0) \ |

956 | __asm__ ("subf%I3c %1,%4,%3\n\taddme %0,%2" \ |

957 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |

958 | else if (__builtin_constant_p (bh) && (bh) == ~(UDItype) 0) \ |

959 | __asm__ ("subf%I3c %1,%4,%3\n\taddze %0,%2" \ |

960 | : "=r" (sh), "=&r" (sl) : "r" (ah), "rI" (al), "r" (bl));\ |

961 | else \ |

962 | __asm__ ("subf%I4c %1,%5,%4\n\tsubfe %0,%3,%2" \ |

963 | : "=r" (sh), "=&r" (sl) \ |

964 | : "r" (ah), "r" (bh), "rI" (al), "r" (bl)); \ |

965 | } while (0) |

966 | #define count_leading_zeros(count, x) \ |

967 | __asm__ ("cntlzd %0,%1" : "=r" (count) : "r" (x)) |

968 | #define COUNT_LEADING_ZEROS_0 64 |

969 | #define umul_ppmm(ph, pl, m0, m1) \ |

970 | do { \ |

971 | UDItype __m0 = (m0), __m1 = (m1); \ |

972 | __asm__ ("mulhdu %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |

973 | (pl) = __m0 * __m1; \ |

974 | } while (0) |

975 | #define UMUL_TIME 15 |

976 | #define smul_ppmm(ph, pl, m0, m1) \ |

977 | do { \ |

978 | DItype __m0 = (m0), __m1 = (m1); \ |

979 | __asm__ ("mulhd %0,%1,%2" : "=r" (ph) : "%r" (m0), "r" (m1)); \ |

980 | (pl) = __m0 * __m1; \ |

981 | } while (0) |

982 | #define SMUL_TIME 14 /* ??? */ |

983 | #define UDIV_TIME 120 /* ??? */ |

984 | #endif /* 64-bit PowerPC. */ |

985 | |

986 | #if defined (__ibm032__) /* RT/ROMP */ && W_TYPE_SIZE == 32 |

987 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

988 | __asm__ ("a %1,%5\n\tae %0,%3" \ |

989 | : "=r" ((USItype) (sh)), \ |

990 | "=&r" ((USItype) (sl)) \ |

991 | : "%0" ((USItype) (ah)), \ |

992 | "r" ((USItype) (bh)), \ |

993 | "%1" ((USItype) (al)), \ |

994 | "r" ((USItype) (bl))) |

995 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

996 | __asm__ ("s %1,%5\n\tse %0,%3" \ |

997 | : "=r" ((USItype) (sh)), \ |

998 | "=&r" ((USItype) (sl)) \ |

999 | : "0" ((USItype) (ah)), \ |

1000 | "r" ((USItype) (bh)), \ |

1001 | "1" ((USItype) (al)), \ |

1002 | "r" ((USItype) (bl))) |

1003 | #define umul_ppmm(ph, pl, m0, m1) \ |

1004 | do { \ |

1005 | USItype __m0 = (m0), __m1 = (m1); \ |

1006 | __asm__ ( \ |

1007 | "s r2,r2\n" \ |

1008 | " mts r10,%2\n" \ |

1009 | " m r2,%3\n" \ |

1010 | " m r2,%3\n" \ |

1011 | " m r2,%3\n" \ |

1012 | " m r2,%3\n" \ |

1013 | " m r2,%3\n" \ |

1014 | " m r2,%3\n" \ |

1015 | " m r2,%3\n" \ |

1016 | " m r2,%3\n" \ |

1017 | " m r2,%3\n" \ |

1018 | " m r2,%3\n" \ |

1019 | " m r2,%3\n" \ |

1020 | " m r2,%3\n" \ |

1021 | " m r2,%3\n" \ |

1022 | " m r2,%3\n" \ |

1023 | " m r2,%3\n" \ |

1024 | " m r2,%3\n" \ |

1025 | " cas %0,r2,r0\n" \ |

1026 | " mfs r10,%1" \ |

1027 | : "=r" ((USItype) (ph)), \ |

1028 | "=r" ((USItype) (pl)) \ |

1029 | : "%r" (__m0), \ |

1030 | "r" (__m1) \ |

1031 | : "r2"); \ |

1032 | (ph) += ((((SItype) __m0 >> 31) & __m1) \ |

1033 | + (((SItype) __m1 >> 31) & __m0)); \ |

1034 | } while (0) |

1035 | #define UMUL_TIME 20 |

1036 | #define UDIV_TIME 200 |

1037 | #define count_leading_zeros(count, x) \ |

1038 | do { \ |

1039 | if ((x) >= 0x10000) \ |

1040 | __asm__ ("clz %0,%1" \ |

1041 | : "=r" ((USItype) (count)) \ |

1042 | : "r" ((USItype) (x) >> 16)); \ |

1043 | else \ |

1044 | { \ |

1045 | __asm__ ("clz %0,%1" \ |

1046 | : "=r" ((USItype) (count)) \ |

1047 | : "r" ((USItype) (x))); \ |

1048 | (count) += 16; \ |

1049 | } \ |

1050 | } while (0) |

1051 | #endif |

1052 | |

1053 | #if defined(__riscv) |

1054 | #ifdef __riscv_mul |

1055 | #define __umulsidi3(u,v) ((UDWtype)(UWtype)(u) * (UWtype)(v)) |

1056 | #define __muluw3(a, b) ((UWtype)(a) * (UWtype)(b)) |

1057 | #else |

1058 | #if __riscv_xlen == 32 |

1059 | #define MULUW3 "call __mulsi3" |

1060 | #elif __riscv_xlen == 64 |

1061 | #define MULUW3 "call __muldi3" |

1062 | #else |

1063 | #error unsupport xlen |

1064 | #endif /* __riscv_xlen */ |

1065 | /* We rely on the fact that MULUW3 doesn't clobber the t-registers. |

1066 | It can get better register allocation result. */ |

1067 | #define __muluw3(a, b) \ |

1068 | ({ \ |

1069 | register UWtype __op0 asm ("a0") = a; \ |

1070 | register UWtype __op1 asm ("a1") = b; \ |

1071 | asm volatile (MULUW3 \ |

1072 | : "+r" (__op0), "+r" (__op1) \ |

1073 | : \ |

1074 | : "ra", "a2", "a3"); \ |

1075 | __op0; \ |

1076 | }) |

1077 | #endif /* __riscv_mul */ |

1078 | #define umul_ppmm(w1, w0, u, v) \ |

1079 | do { \ |

1080 | UWtype __x0, __x1, __x2, __x3; \ |

1081 | UHWtype __ul, __vl, __uh, __vh; \ |

1082 | \ |

1083 | __ul = __ll_lowpart (u); \ |

1084 | __uh = __ll_highpart (u); \ |

1085 | __vl = __ll_lowpart (v); \ |

1086 | __vh = __ll_highpart (v); \ |

1087 | \ |

1088 | __x0 = __muluw3 (__ul, __vl); \ |

1089 | __x1 = __muluw3 (__ul, __vh); \ |

1090 | __x2 = __muluw3 (__uh, __vl); \ |

1091 | __x3 = __muluw3 (__uh, __vh); \ |

1092 | \ |

1093 | __x1 += __ll_highpart (__x0);/* this can't give carry */ \ |

1094 | __x1 += __x2; /* but this indeed can */ \ |

1095 | if (__x1 < __x2) /* did we get it? */ \ |

1096 | __x3 += __ll_B; /* yes, add it in the proper pos. */ \ |

1097 | \ |

1098 | (w1) = __x3 + __ll_highpart (__x1); \ |

1099 | (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ |

1100 | } while (0) |

1101 | #endif /* __riscv */ |

1102 | |

1103 | #if defined(__sh__) && W_TYPE_SIZE == 32 |

1104 | #ifndef __sh1__ |

1105 | #define umul_ppmm(w1, w0, u, v) \ |

1106 | __asm__ ( \ |

1107 | "dmulu.l %2,%3\n\tsts%M1 macl,%1\n\tsts%M0 mach,%0" \ |

1108 | : "=r<" ((USItype)(w1)), \ |

1109 | "=r<" ((USItype)(w0)) \ |

1110 | : "r" ((USItype)(u)), \ |

1111 | "r" ((USItype)(v)) \ |

1112 | : "macl", "mach") |

1113 | #define UMUL_TIME 5 |

1114 | #endif |

1115 | |

1116 | /* This is the same algorithm as __udiv_qrnnd_c. */ |

1117 | #define UDIV_NEEDS_NORMALIZATION 1 |

1118 | |

1119 | #ifdef __FDPIC__ |

1120 | /* FDPIC needs a special version of the asm fragment to extract the |

1121 | code address from the function descriptor. __udiv_qrnnd_16 is |

1122 | assumed to be local and not to use the GOT, so loading r12 is |

1123 | not needed. */ |

1124 | #define udiv_qrnnd(q, r, n1, n0, d) \ |

1125 | do { \ |

1126 | extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ |

1127 | __attribute__ ((visibility ("hidden"))); \ |

1128 | /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ |

1129 | __asm__ ( \ |

1130 | "mov%M4 %4,r5\n" \ |

1131 | " swap.w %3,r4\n" \ |

1132 | " swap.w r5,r6\n" \ |

1133 | " mov.l @%5,r2\n" \ |

1134 | " jsr @r2\n" \ |

1135 | " shll16 r6\n" \ |

1136 | " swap.w r4,r4\n" \ |

1137 | " mov.l @%5,r2\n" \ |

1138 | " jsr @r2\n" \ |

1139 | " swap.w r1,%0\n" \ |

1140 | " or r1,%0" \ |

1141 | : "=r" (q), "=&z" (r) \ |

1142 | : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ |

1143 | : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ |

1144 | } while (0) |

1145 | #else |

1146 | #define udiv_qrnnd(q, r, n1, n0, d) \ |

1147 | do { \ |

1148 | extern UWtype __udiv_qrnnd_16 (UWtype, UWtype) \ |

1149 | __attribute__ ((visibility ("hidden"))); \ |

1150 | /* r0: rn r1: qn */ /* r0: n1 r4: n0 r5: d r6: d1 */ /* r2: __m */ \ |

1151 | __asm__ ( \ |

1152 | "mov%M4 %4,r5\n" \ |

1153 | " swap.w %3,r4\n" \ |

1154 | " swap.w r5,r6\n" \ |

1155 | " jsr @%5\n" \ |

1156 | " shll16 r6\n" \ |

1157 | " swap.w r4,r4\n" \ |

1158 | " jsr @%5\n" \ |

1159 | " swap.w r1,%0\n" \ |

1160 | " or r1,%0" \ |

1161 | : "=r" (q), "=&z" (r) \ |

1162 | : "1" (n1), "r" (n0), "rm" (d), "r" (&__udiv_qrnnd_16) \ |

1163 | : "r1", "r2", "r4", "r5", "r6", "pr", "t"); \ |

1164 | } while (0) |

1165 | #endif /* __FDPIC__ */ |

1166 | |

1167 | #define UDIV_TIME 80 |

1168 | |

1169 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

1170 | __asm__ ("clrt;subc %5,%1; subc %4,%0" \ |

1171 | : "=r" (sh), "=r" (sl) \ |

1172 | : "0" (ah), "1" (al), "r" (bh), "r" (bl) : "t") |

1173 | |

1174 | #endif /* __sh__ */ |

1175 | |

1176 | #if defined (__sparc__) && !defined (__arch64__) && !defined (__sparcv9) \ |

1177 | && W_TYPE_SIZE == 32 |

1178 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

1179 | __asm__ ("addcc %r4,%5,%1\n\taddx %r2,%3,%0" \ |

1180 | : "=r" ((USItype) (sh)), \ |

1181 | "=&r" ((USItype) (sl)) \ |

1182 | : "%rJ" ((USItype) (ah)), \ |

1183 | "rI" ((USItype) (bh)), \ |

1184 | "%rJ" ((USItype) (al)), \ |

1185 | "rI" ((USItype) (bl)) \ |

1186 | __CLOBBER_CC) |

1187 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

1188 | __asm__ ("subcc %r4,%5,%1\n\tsubx %r2,%3,%0" \ |

1189 | : "=r" ((USItype) (sh)), \ |

1190 | "=&r" ((USItype) (sl)) \ |

1191 | : "rJ" ((USItype) (ah)), \ |

1192 | "rI" ((USItype) (bh)), \ |

1193 | "rJ" ((USItype) (al)), \ |

1194 | "rI" ((USItype) (bl)) \ |

1195 | __CLOBBER_CC) |

1196 | #if defined (__sparc_v9__) |

1197 | #define umul_ppmm(w1, w0, u, v) \ |

1198 | do { \ |

1199 | register USItype __g1 asm ("g1"); \ |

1200 | __asm__ ("umul\t%2,%3,%1\n\t" \ |

1201 | "srlx\t%1, 32, %0" \ |

1202 | : "=r" ((USItype) (w1)), \ |

1203 | "=r" (__g1) \ |

1204 | : "r" ((USItype) (u)), \ |

1205 | "r" ((USItype) (v))); \ |

1206 | (w0) = __g1; \ |

1207 | } while (0) |

1208 | #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ |

1209 | __asm__ ("mov\t%2,%%y\n\t" \ |

1210 | "udiv\t%3,%4,%0\n\t" \ |

1211 | "umul\t%0,%4,%1\n\t" \ |

1212 | "sub\t%3,%1,%1" \ |

1213 | : "=&r" ((USItype) (__q)), \ |

1214 | "=&r" ((USItype) (__r)) \ |

1215 | : "r" ((USItype) (__n1)), \ |

1216 | "r" ((USItype) (__n0)), \ |

1217 | "r" ((USItype) (__d))) |

1218 | #else |

1219 | #if defined (__sparc_v8__) |

1220 | #define umul_ppmm(w1, w0, u, v) \ |

1221 | __asm__ ("umul %2,%3,%1;rd %%y,%0" \ |

1222 | : "=r" ((USItype) (w1)), \ |

1223 | "=r" ((USItype) (w0)) \ |

1224 | : "r" ((USItype) (u)), \ |

1225 | "r" ((USItype) (v))) |

1226 | #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ |

1227 | __asm__ ("mov %2,%%y;nop;nop;nop;udiv %3,%4,%0;umul %0,%4,%1;sub %3,%1,%1"\ |

1228 | : "=&r" ((USItype) (__q)), \ |

1229 | "=&r" ((USItype) (__r)) \ |

1230 | : "r" ((USItype) (__n1)), \ |

1231 | "r" ((USItype) (__n0)), \ |

1232 | "r" ((USItype) (__d))) |

1233 | #else |

1234 | #if defined (__sparclite__) |

1235 | /* This has hardware multiply but not divide. It also has two additional |

1236 | instructions scan (ffs from high bit) and divscc. */ |

1237 | #define umul_ppmm(w1, w0, u, v) \ |

1238 | __asm__ ("umul %2,%3,%1;rd %%y,%0" \ |

1239 | : "=r" ((USItype) (w1)), \ |

1240 | "=r" ((USItype) (w0)) \ |

1241 | : "r" ((USItype) (u)), \ |

1242 | "r" ((USItype) (v))) |

1243 | #define udiv_qrnnd(q, r, n1, n0, d) \ |

1244 | __asm__ ("! Inlined udiv_qrnnd\n" \ |

1245 | " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ |

1246 | " tst %%g0\n" \ |

1247 | " divscc %3,%4,%%g1\n" \ |

1248 | " divscc %%g1,%4,%%g1\n" \ |

1249 | " divscc %%g1,%4,%%g1\n" \ |

1250 | " divscc %%g1,%4,%%g1\n" \ |

1251 | " divscc %%g1,%4,%%g1\n" \ |

1252 | " divscc %%g1,%4,%%g1\n" \ |

1253 | " divscc %%g1,%4,%%g1\n" \ |

1254 | " divscc %%g1,%4,%%g1\n" \ |

1255 | " divscc %%g1,%4,%%g1\n" \ |

1256 | " divscc %%g1,%4,%%g1\n" \ |

1257 | " divscc %%g1,%4,%%g1\n" \ |

1258 | " divscc %%g1,%4,%%g1\n" \ |

1259 | " divscc %%g1,%4,%%g1\n" \ |

1260 | " divscc %%g1,%4,%%g1\n" \ |

1261 | " divscc %%g1,%4,%%g1\n" \ |

1262 | " divscc %%g1,%4,%%g1\n" \ |

1263 | " divscc %%g1,%4,%%g1\n" \ |

1264 | " divscc %%g1,%4,%%g1\n" \ |

1265 | " divscc %%g1,%4,%%g1\n" \ |

1266 | " divscc %%g1,%4,%%g1\n" \ |

1267 | " divscc %%g1,%4,%%g1\n" \ |

1268 | " divscc %%g1,%4,%%g1\n" \ |

1269 | " divscc %%g1,%4,%%g1\n" \ |

1270 | " divscc %%g1,%4,%%g1\n" \ |

1271 | " divscc %%g1,%4,%%g1\n" \ |

1272 | " divscc %%g1,%4,%%g1\n" \ |

1273 | " divscc %%g1,%4,%%g1\n" \ |

1274 | " divscc %%g1,%4,%%g1\n" \ |

1275 | " divscc %%g1,%4,%%g1\n" \ |

1276 | " divscc %%g1,%4,%%g1\n" \ |

1277 | " divscc %%g1,%4,%%g1\n" \ |

1278 | " divscc %%g1,%4,%0\n" \ |

1279 | " rd %%y,%1\n" \ |

1280 | " bl,a 1f\n" \ |

1281 | " add %1,%4,%1\n" \ |

1282 | "1: ! End of inline udiv_qrnnd" \ |

1283 | : "=r" ((USItype) (q)), \ |

1284 | "=r" ((USItype) (r)) \ |

1285 | : "r" ((USItype) (n1)), \ |

1286 | "r" ((USItype) (n0)), \ |

1287 | "rI" ((USItype) (d)) \ |

1288 | : "g1" __AND_CLOBBER_CC) |

1289 | #define UDIV_TIME 37 |

1290 | #define count_leading_zeros(count, x) \ |

1291 | do { \ |

1292 | __asm__ ("scan %1,1,%0" \ |

1293 | : "=r" ((USItype) (count)) \ |

1294 | : "r" ((USItype) (x))); \ |

1295 | } while (0) |

1296 | /* Early sparclites return 63 for an argument of 0, but they warn that future |

1297 | implementations might change this. Therefore, leave COUNT_LEADING_ZEROS_0 |

1298 | undefined. */ |

1299 | #else |

1300 | /* SPARC without integer multiplication and divide instructions. |

1301 | (i.e. at least Sun4/20,40,60,65,75,110,260,280,330,360,380,470,490) */ |

1302 | #define umul_ppmm(w1, w0, u, v) \ |

1303 | __asm__ ("! Inlined umul_ppmm\n" \ |

1304 | " wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n"\ |

1305 | " sra %3,31,%%o5 ! Don't move this insn\n" \ |

1306 | " and %2,%%o5,%%o5 ! Don't move this insn\n" \ |

1307 | " andcc %%g0,0,%%g1 ! Don't move this insn\n" \ |

1308 | " mulscc %%g1,%3,%%g1\n" \ |

1309 | " mulscc %%g1,%3,%%g1\n" \ |

1310 | " mulscc %%g1,%3,%%g1\n" \ |

1311 | " mulscc %%g1,%3,%%g1\n" \ |

1312 | " mulscc %%g1,%3,%%g1\n" \ |

1313 | " mulscc %%g1,%3,%%g1\n" \ |

1314 | " mulscc %%g1,%3,%%g1\n" \ |

1315 | " mulscc %%g1,%3,%%g1\n" \ |

1316 | " mulscc %%g1,%3,%%g1\n" \ |

1317 | " mulscc %%g1,%3,%%g1\n" \ |

1318 | " mulscc %%g1,%3,%%g1\n" \ |

1319 | " mulscc %%g1,%3,%%g1\n" \ |

1320 | " mulscc %%g1,%3,%%g1\n" \ |

1321 | " mulscc %%g1,%3,%%g1\n" \ |

1322 | " mulscc %%g1,%3,%%g1\n" \ |

1323 | " mulscc %%g1,%3,%%g1\n" \ |

1324 | " mulscc %%g1,%3,%%g1\n" \ |

1325 | " mulscc %%g1,%3,%%g1\n" \ |

1326 | " mulscc %%g1,%3,%%g1\n" \ |

1327 | " mulscc %%g1,%3,%%g1\n" \ |

1328 | " mulscc %%g1,%3,%%g1\n" \ |

1329 | " mulscc %%g1,%3,%%g1\n" \ |

1330 | " mulscc %%g1,%3,%%g1\n" \ |

1331 | " mulscc %%g1,%3,%%g1\n" \ |

1332 | " mulscc %%g1,%3,%%g1\n" \ |

1333 | " mulscc %%g1,%3,%%g1\n" \ |

1334 | " mulscc %%g1,%3,%%g1\n" \ |

1335 | " mulscc %%g1,%3,%%g1\n" \ |

1336 | " mulscc %%g1,%3,%%g1\n" \ |

1337 | " mulscc %%g1,%3,%%g1\n" \ |

1338 | " mulscc %%g1,%3,%%g1\n" \ |

1339 | " mulscc %%g1,%3,%%g1\n" \ |

1340 | " mulscc %%g1,0,%%g1\n" \ |

1341 | " add %%g1,%%o5,%0\n" \ |

1342 | " rd %%y,%1" \ |

1343 | : "=r" ((USItype) (w1)), \ |

1344 | "=r" ((USItype) (w0)) \ |

1345 | : "%rI" ((USItype) (u)), \ |

1346 | "r" ((USItype) (v)) \ |

1347 | : "g1", "o5" __AND_CLOBBER_CC) |

1348 | #define UMUL_TIME 39 /* 39 instructions */ |

1349 | /* It's quite necessary to add this much assembler for the sparc. |

1350 | The default udiv_qrnnd (in C) is more than 10 times slower! */ |

1351 | #define udiv_qrnnd(__q, __r, __n1, __n0, __d) \ |

1352 | __asm__ ("! Inlined udiv_qrnnd\n" \ |

1353 | " mov 32,%%g1\n" \ |

1354 | " subcc %1,%2,%%g0\n" \ |

1355 | "1: bcs 5f\n" \ |

1356 | " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ |

1357 | " sub %1,%2,%1 ! this kills msb of n\n" \ |

1358 | " addx %1,%1,%1 ! so this can't give carry\n" \ |

1359 | " subcc %%g1,1,%%g1\n" \ |

1360 | "2: bne 1b\n" \ |

1361 | " subcc %1,%2,%%g0\n" \ |

1362 | " bcs 3f\n" \ |

1363 | " addxcc %0,%0,%0 ! shift n1n0 and a q-bit in lsb\n" \ |

1364 | " b 3f\n" \ |

1365 | " sub %1,%2,%1 ! this kills msb of n\n" \ |

1366 | "4: sub %1,%2,%1\n" \ |

1367 | "5: addxcc %1,%1,%1\n" \ |

1368 | " bcc 2b\n" \ |

1369 | " subcc %%g1,1,%%g1\n" \ |

1370 | "! Got carry from n. Subtract next step to cancel this carry.\n" \ |

1371 | " bne 4b\n" \ |

1372 | " addcc %0,%0,%0 ! shift n1n0 and a 0-bit in lsb\n" \ |

1373 | " sub %1,%2,%1\n" \ |

1374 | "3: xnor %0,0,%0\n" \ |

1375 | " ! End of inline udiv_qrnnd" \ |

1376 | : "=&r" ((USItype) (__q)), \ |

1377 | "=&r" ((USItype) (__r)) \ |

1378 | : "r" ((USItype) (__d)), \ |

1379 | "1" ((USItype) (__n1)), \ |

1380 | "0" ((USItype) (__n0)) : "g1" __AND_CLOBBER_CC) |

1381 | #define UDIV_TIME (3+7*32) /* 7 instructions/iteration. 32 iterations. */ |

1382 | #endif /* __sparclite__ */ |

1383 | #endif /* __sparc_v8__ */ |

1384 | #endif /* __sparc_v9__ */ |

1385 | #endif /* sparc32 */ |

1386 | |

1387 | #if ((defined (__sparc__) && defined (__arch64__)) || defined (__sparcv9)) \ |

1388 | && W_TYPE_SIZE == 64 |

1389 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

1390 | do { \ |

1391 | UDItype __carry = 0; \ |

1392 | __asm__ ("addcc\t%r5,%6,%1\n\t" \ |

1393 | "add\t%r3,%4,%0\n\t" \ |

1394 | "movcs\t%%xcc, 1, %2\n\t" \ |

1395 | "add\t%0, %2, %0" \ |

1396 | : "=r" ((UDItype)(sh)), \ |

1397 | "=&r" ((UDItype)(sl)), \ |

1398 | "+r" (__carry) \ |

1399 | : "%rJ" ((UDItype)(ah)), \ |

1400 | "rI" ((UDItype)(bh)), \ |

1401 | "%rJ" ((UDItype)(al)), \ |

1402 | "rI" ((UDItype)(bl)) \ |

1403 | __CLOBBER_CC); \ |

1404 | } while (0) |

1405 | |

1406 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

1407 | do { \ |

1408 | UDItype __carry = 0; \ |

1409 | __asm__ ("subcc\t%r5,%6,%1\n\t" \ |

1410 | "sub\t%r3,%4,%0\n\t" \ |

1411 | "movcs\t%%xcc, 1, %2\n\t" \ |

1412 | "sub\t%0, %2, %0" \ |

1413 | : "=r" ((UDItype)(sh)), \ |

1414 | "=&r" ((UDItype)(sl)), \ |

1415 | "+r" (__carry) \ |

1416 | : "%rJ" ((UDItype)(ah)), \ |

1417 | "rI" ((UDItype)(bh)), \ |

1418 | "%rJ" ((UDItype)(al)), \ |

1419 | "rI" ((UDItype)(bl)) \ |

1420 | __CLOBBER_CC); \ |

1421 | } while (0) |

1422 | |

1423 | #define umul_ppmm(wh, wl, u, v) \ |

1424 | do { \ |

1425 | UDItype tmp1, tmp2, tmp3, tmp4; \ |

1426 | __asm__ __volatile__ ( \ |

1427 | "srl %7,0,%3\n\t" \ |

1428 | "mulx %3,%6,%1\n\t" \ |

1429 | "srlx %6,32,%2\n\t" \ |

1430 | "mulx %2,%3,%4\n\t" \ |

1431 | "sllx %4,32,%5\n\t" \ |

1432 | "srl %6,0,%3\n\t" \ |

1433 | "sub %1,%5,%5\n\t" \ |

1434 | "srlx %5,32,%5\n\t" \ |

1435 | "addcc %4,%5,%4\n\t" \ |

1436 | "srlx %7,32,%5\n\t" \ |

1437 | "mulx %3,%5,%3\n\t" \ |

1438 | "mulx %2,%5,%5\n\t" \ |

1439 | "sethi %%hi(0x80000000),%2\n\t" \ |

1440 | "addcc %4,%3,%4\n\t" \ |

1441 | "srlx %4,32,%4\n\t" \ |

1442 | "add %2,%2,%2\n\t" \ |

1443 | "movcc %%xcc,%%g0,%2\n\t" \ |

1444 | "addcc %5,%4,%5\n\t" \ |

1445 | "sllx %3,32,%3\n\t" \ |

1446 | "add %1,%3,%1\n\t" \ |

1447 | "add %5,%2,%0" \ |

1448 | : "=r" ((UDItype)(wh)), \ |

1449 | "=&r" ((UDItype)(wl)), \ |

1450 | "=&r" (tmp1), "=&r" (tmp2), "=&r" (tmp3), "=&r" (tmp4) \ |

1451 | : "r" ((UDItype)(u)), \ |

1452 | "r" ((UDItype)(v)) \ |

1453 | __CLOBBER_CC); \ |

1454 | } while (0) |

1455 | #define UMUL_TIME 96 |

1456 | #define UDIV_TIME 230 |

1457 | #endif /* sparc64 */ |

1458 | |

1459 | #if defined (__vax__) && W_TYPE_SIZE == 32 |

1460 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

1461 | __asm__ ("addl2 %5,%1\n\tadwc %3,%0" \ |

1462 | : "=g" ((USItype) (sh)), \ |

1463 | "=&g" ((USItype) (sl)) \ |

1464 | : "%0" ((USItype) (ah)), \ |

1465 | "g" ((USItype) (bh)), \ |

1466 | "%1" ((USItype) (al)), \ |

1467 | "g" ((USItype) (bl))) |

1468 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

1469 | __asm__ ("subl2 %5,%1\n\tsbwc %3,%0" \ |

1470 | : "=g" ((USItype) (sh)), \ |

1471 | "=&g" ((USItype) (sl)) \ |

1472 | : "0" ((USItype) (ah)), \ |

1473 | "g" ((USItype) (bh)), \ |

1474 | "1" ((USItype) (al)), \ |

1475 | "g" ((USItype) (bl))) |

1476 | #define umul_ppmm(xh, xl, m0, m1) \ |

1477 | do { \ |

1478 | union { \ |

1479 | UDItype __ll; \ |

1480 | struct {USItype __l, __h;} __i; \ |

1481 | } __xx; \ |

1482 | USItype __m0 = (m0), __m1 = (m1); \ |

1483 | __asm__ ("emul %1,%2,$0,%0" \ |

1484 | : "=r" (__xx.__ll) \ |

1485 | : "g" (__m0), \ |

1486 | "g" (__m1)); \ |

1487 | (xh) = __xx.__i.__h; \ |

1488 | (xl) = __xx.__i.__l; \ |

1489 | (xh) += ((((SItype) __m0 >> 31) & __m1) \ |

1490 | + (((SItype) __m1 >> 31) & __m0)); \ |

1491 | } while (0) |

1492 | #define sdiv_qrnnd(q, r, n1, n0, d) \ |

1493 | do { \ |

1494 | union {DItype __ll; \ |

1495 | struct {SItype __l, __h;} __i; \ |

1496 | } __xx; \ |

1497 | __xx.__i.__h = n1; __xx.__i.__l = n0; \ |

1498 | __asm__ ("ediv %3,%2,%0,%1" \ |

1499 | : "=g" (q), "=g" (r) \ |

1500 | : "g" (__xx.__ll), "g" (d)); \ |

1501 | } while (0) |

1502 | #endif /* __vax__ */ |

1503 | |

1504 | #ifdef _TMS320C6X |

1505 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

1506 | do \ |

1507 | { \ |

1508 | UDItype __ll; \ |

1509 | __asm__ ("addu .l1 %1, %2, %0" \ |

1510 | : "=a" (__ll) : "a" (al), "a" (bl)); \ |

1511 | (sl) = (USItype)__ll; \ |

1512 | (sh) = ((USItype)(__ll >> 32)) + (ah) + (bh); \ |

1513 | } \ |

1514 | while (0) |

1515 | |

1516 | #ifdef _TMS320C6400_PLUS |

1517 | #define __umulsidi3(u,v) ((UDItype)(USItype)u*(USItype)v) |

1518 | #define umul_ppmm(w1, w0, u, v) \ |

1519 | do { \ |

1520 | UDItype __x = (UDItype) (USItype) (u) * (USItype) (v); \ |

1521 | (w1) = (USItype) (__x >> 32); \ |

1522 | (w0) = (USItype) (__x); \ |

1523 | } while (0) |

1524 | #endif /* _TMS320C6400_PLUS */ |

1525 | |

1526 | #define count_leading_zeros(count, x) ((count) = __builtin_clz (x)) |

1527 | #ifdef _TMS320C6400 |

1528 | #define count_trailing_zeros(count, x) ((count) = __builtin_ctz (x)) |

1529 | #endif |

1530 | #define UMUL_TIME 4 |

1531 | #define UDIV_TIME 40 |

1532 | #endif /* _TMS320C6X */ |

1533 | |

1534 | #if defined (__xtensa__) && W_TYPE_SIZE == 32 |

1535 | /* This code is not Xtensa-configuration-specific, so rely on the compiler |

1536 | to expand builtin functions depending on what configuration features |

1537 | are available. This avoids library calls when the operation can be |

1538 | performed in-line. */ |

1539 | #define umul_ppmm(w1, w0, u, v) \ |

1540 | do { \ |

1541 | DWunion __w; \ |

1542 | __w.ll = __builtin_umulsidi3 (u, v); \ |

1543 | w1 = __w.s.high; \ |

1544 | w0 = __w.s.low; \ |

1545 | } while (0) |

1546 | #define __umulsidi3(u, v) __builtin_umulsidi3 (u, v) |

1547 | #define count_leading_zeros(COUNT, X) ((COUNT) = __builtin_clz (X)) |

1548 | #define count_trailing_zeros(COUNT, X) ((COUNT) = __builtin_ctz (X)) |

1549 | #endif /* __xtensa__ */ |

1550 | |

1551 | #if defined xstormy16 |

1552 | extern UHItype __stormy16_count_leading_zeros (UHItype); |

1553 | #define count_leading_zeros(count, x) \ |

1554 | do \ |

1555 | { \ |

1556 | UHItype size; \ |

1557 | \ |

1558 | /* We assume that W_TYPE_SIZE is a multiple of 16... */ \ |

1559 | for ((count) = 0, size = W_TYPE_SIZE; size; size -= 16) \ |

1560 | { \ |

1561 | UHItype c; \ |

1562 | \ |

1563 | c = __clzhi2 ((x) >> (size - 16)); \ |

1564 | (count) += c; \ |

1565 | if (c != 16) \ |

1566 | break; \ |

1567 | } \ |

1568 | } \ |

1569 | while (0) |

1570 | #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE |

1571 | #endif |

1572 | |

1573 | #if defined (__z8000__) && W_TYPE_SIZE == 16 |

1574 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

1575 | __asm__ ("add %H1,%H5\n\tadc %H0,%H3" \ |

1576 | : "=r" ((unsigned int)(sh)), \ |

1577 | "=&r" ((unsigned int)(sl)) \ |

1578 | : "%0" ((unsigned int)(ah)), \ |

1579 | "r" ((unsigned int)(bh)), \ |

1580 | "%1" ((unsigned int)(al)), \ |

1581 | "rQR" ((unsigned int)(bl))) |

1582 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

1583 | __asm__ ("sub %H1,%H5\n\tsbc %H0,%H3" \ |

1584 | : "=r" ((unsigned int)(sh)), \ |

1585 | "=&r" ((unsigned int)(sl)) \ |

1586 | : "0" ((unsigned int)(ah)), \ |

1587 | "r" ((unsigned int)(bh)), \ |

1588 | "1" ((unsigned int)(al)), \ |

1589 | "rQR" ((unsigned int)(bl))) |

1590 | #define umul_ppmm(xh, xl, m0, m1) \ |

1591 | do { \ |

1592 | union {long int __ll; \ |

1593 | struct {unsigned int __h, __l;} __i; \ |

1594 | } __xx; \ |

1595 | unsigned int __m0 = (m0), __m1 = (m1); \ |

1596 | __asm__ ("mult %S0,%H3" \ |

1597 | : "=r" (__xx.__i.__h), \ |

1598 | "=r" (__xx.__i.__l) \ |

1599 | : "%1" (__m0), \ |

1600 | "rQR" (__m1)); \ |

1601 | (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \ |

1602 | (xh) += ((((signed int) __m0 >> 15) & __m1) \ |

1603 | + (((signed int) __m1 >> 15) & __m0)); \ |

1604 | } while (0) |

1605 | #endif /* __z8000__ */ |

1606 | |

1607 | #endif /* __GNUC__ */ |

1608 | |

1609 | /* If this machine has no inline assembler, use C macros. */ |

1610 | |

1611 | #if !defined (add_ssaaaa) |

1612 | #define add_ssaaaa(sh, sl, ah, al, bh, bl) \ |

1613 | do { \ |

1614 | UWtype __x; \ |

1615 | __x = (al) + (bl); \ |

1616 | (sh) = (ah) + (bh) + (__x < (al)); \ |

1617 | (sl) = __x; \ |

1618 | } while (0) |

1619 | #endif |

1620 | |

1621 | #if !defined (sub_ddmmss) |

1622 | #define sub_ddmmss(sh, sl, ah, al, bh, bl) \ |

1623 | do { \ |

1624 | UWtype __x; \ |

1625 | __x = (al) - (bl); \ |

1626 | (sh) = (ah) - (bh) - (__x > (al)); \ |

1627 | (sl) = __x; \ |

1628 | } while (0) |

1629 | #endif |

1630 | |

1631 | /* If we lack umul_ppmm but have smul_ppmm, define umul_ppmm in terms of |

1632 | smul_ppmm. */ |

1633 | #if !defined (umul_ppmm) && defined (smul_ppmm) |

1634 | #define umul_ppmm(w1, w0, u, v) \ |

1635 | do { \ |

1636 | UWtype __w1; \ |

1637 | UWtype __xm0 = (u), __xm1 = (v); \ |

1638 | smul_ppmm (__w1, w0, __xm0, __xm1); \ |

1639 | (w1) = __w1 + (-(__xm0 >> (W_TYPE_SIZE - 1)) & __xm1) \ |

1640 | + (-(__xm1 >> (W_TYPE_SIZE - 1)) & __xm0); \ |

1641 | } while (0) |

1642 | #endif |

1643 | |

1644 | /* If we still don't have umul_ppmm, define it using plain C. */ |

1645 | #if !defined (umul_ppmm) |

1646 | #define umul_ppmm(w1, w0, u, v) \ |

1647 | do { \ |

1648 | UWtype __x0, __x1, __x2, __x3; \ |

1649 | UHWtype __ul, __vl, __uh, __vh; \ |

1650 | \ |

1651 | __ul = __ll_lowpart (u); \ |

1652 | __uh = __ll_highpart (u); \ |

1653 | __vl = __ll_lowpart (v); \ |

1654 | __vh = __ll_highpart (v); \ |

1655 | \ |

1656 | __x0 = (UWtype) __ul * __vl; \ |

1657 | __x1 = (UWtype) __ul * __vh; \ |

1658 | __x2 = (UWtype) __uh * __vl; \ |

1659 | __x3 = (UWtype) __uh * __vh; \ |

1660 | \ |

1661 | __x1 += __ll_highpart (__x0);/* this can't give carry */ \ |

1662 | __x1 += __x2; /* but this indeed can */ \ |

1663 | if (__x1 < __x2) /* did we get it? */ \ |

1664 | __x3 += __ll_B; /* yes, add it in the proper pos. */ \ |

1665 | \ |

1666 | (w1) = __x3 + __ll_highpart (__x1); \ |

1667 | (w0) = __ll_lowpart (__x1) * __ll_B + __ll_lowpart (__x0); \ |

1668 | } while (0) |

1669 | #endif |

1670 | |

1671 | #if !defined (__umulsidi3) |

1672 | #define __umulsidi3(u, v) \ |

1673 | ({DWunion __w; \ |

1674 | umul_ppmm (__w.s.high, __w.s.low, u, v); \ |

1675 | __w.ll; }) |

1676 | #endif |

1677 | |

1678 | /* Define this unconditionally, so it can be used for debugging. */ |

1679 | #define __udiv_qrnnd_c(q, r, n1, n0, d) \ |

1680 | do { \ |

1681 | UWtype __d1, __d0, __q1, __q0; \ |

1682 | UWtype __r1, __r0, __m; \ |

1683 | __d1 = __ll_highpart (d); \ |

1684 | __d0 = __ll_lowpart (d); \ |

1685 | \ |

1686 | __r1 = (n1) % __d1; \ |

1687 | __q1 = (n1) / __d1; \ |

1688 | __m = (UWtype) __q1 * __d0; \ |

1689 | __r1 = __r1 * __ll_B | __ll_highpart (n0); \ |

1690 | if (__r1 < __m) \ |

1691 | { \ |

1692 | __q1--, __r1 += (d); \ |

1693 | if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */\ |

1694 | if (__r1 < __m) \ |

1695 | __q1--, __r1 += (d); \ |

1696 | } \ |

1697 | __r1 -= __m; \ |

1698 | \ |

1699 | __r0 = __r1 % __d1; \ |

1700 | __q0 = __r1 / __d1; \ |

1701 | __m = (UWtype) __q0 * __d0; \ |

1702 | __r0 = __r0 * __ll_B | __ll_lowpart (n0); \ |

1703 | if (__r0 < __m) \ |

1704 | { \ |

1705 | __q0--, __r0 += (d); \ |

1706 | if (__r0 >= (d)) \ |

1707 | if (__r0 < __m) \ |

1708 | __q0--, __r0 += (d); \ |

1709 | } \ |

1710 | __r0 -= __m; \ |

1711 | \ |

1712 | (q) = (UWtype) __q1 * __ll_B | __q0; \ |

1713 | (r) = __r0; \ |

1714 | } while (0) |

1715 | |

1716 | /* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through |

1717 | __udiv_w_sdiv (defined in libgcc or elsewhere). */ |

1718 | #if !defined (udiv_qrnnd) && defined (sdiv_qrnnd) |

1719 | #define udiv_qrnnd(q, r, nh, nl, d) \ |

1720 | do { \ |

1721 | extern UWtype __udiv_w_sdiv (UWtype *, UWtype, UWtype, UWtype); \ |

1722 | UWtype __r; \ |

1723 | (q) = __udiv_w_sdiv (&__r, nh, nl, d); \ |

1724 | (r) = __r; \ |

1725 | } while (0) |

1726 | #endif |

1727 | |

1728 | /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c. */ |

1729 | #if !defined (udiv_qrnnd) |

1730 | #define UDIV_NEEDS_NORMALIZATION 1 |

1731 | #define udiv_qrnnd __udiv_qrnnd_c |

1732 | #endif |

1733 | |

1734 | #if !defined (count_leading_zeros) |

1735 | #define count_leading_zeros(count, x) \ |

1736 | do { \ |

1737 | UWtype __xr = (x); \ |

1738 | UWtype __a; \ |

1739 | \ |

1740 | if (W_TYPE_SIZE <= 32) \ |

1741 | { \ |

1742 | __a = __xr < ((UWtype)1<<2*__BITS4) \ |

1743 | ? (__xr < ((UWtype)1<<__BITS4) ? 0 : __BITS4) \ |

1744 | : (__xr < ((UWtype)1<<3*__BITS4) ? 2*__BITS4 : 3*__BITS4); \ |

1745 | } \ |

1746 | else \ |

1747 | { \ |

1748 | for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \ |

1749 | if (((__xr >> __a) & 0xff) != 0) \ |

1750 | break; \ |

1751 | } \ |

1752 | \ |

1753 | (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \ |

1754 | } while (0) |

1755 | #define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE |

1756 | #endif |

1757 | |

1758 | #if !defined (count_trailing_zeros) |

1759 | /* Define count_trailing_zeros using count_leading_zeros. The latter might be |

1760 | defined in asm, but if it is not, the C version above is good enough. */ |

1761 | #define count_trailing_zeros(count, x) \ |

1762 | do { \ |

1763 | UWtype __ctz_x = (x); \ |

1764 | UWtype __ctz_c; \ |

1765 | count_leading_zeros (__ctz_c, __ctz_x & -__ctz_x); \ |

1766 | (count) = W_TYPE_SIZE - 1 - __ctz_c; \ |

1767 | } while (0) |

1768 | #endif |

1769 | |

1770 | #ifndef UDIV_NEEDS_NORMALIZATION |

1771 | #define UDIV_NEEDS_NORMALIZATION 0 |

1772 | #endif |

1773 |