1/* Convert string representing a number to integer value, using given locale.
2 Copyright (C) 1997-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20
21#if HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25#ifdef _LIBC
26# define USE_NUMBER_GROUPING
27# define HAVE_LIMITS_H
28#endif
29
30#include <ctype.h>
31#include <errno.h>
32#ifndef __set_errno
33# define __set_errno(Val) errno = (Val)
34#endif
35
36#ifdef HAVE_LIMITS_H
37# include <limits.h>
38#endif
39
40#include <stddef.h>
41#include <stdlib.h>
42#include <string.h>
43#include <locale.h>
44#include <stdint.h>
45#include <bits/wordsize.h>
46
47#ifdef USE_NUMBER_GROUPING
48# include "../locale/localeinfo.h"
49#endif
50
51/* Nonzero if we are defining `strtoul' or `strtoull', operating on
52 unsigned integers. */
53#ifndef UNSIGNED
54# define UNSIGNED 0
55# define INT LONG int
56#else
57# define INT unsigned LONG int
58#endif
59
60/* Determine the name. */
61#if UNSIGNED
62# ifdef USE_WIDE_CHAR
63# ifdef QUAD
64# define strtol_l wcstoull_l
65# else
66# define strtol_l wcstoul_l
67# endif
68# else
69# ifdef QUAD
70# define strtol_l strtoull_l
71# else
72# define strtol_l strtoul_l
73# endif
74# endif
75#else
76# ifdef USE_WIDE_CHAR
77# ifdef QUAD
78# define strtol_l wcstoll_l
79# else
80# define strtol_l wcstol_l
81# endif
82# else
83# ifdef QUAD
84# define strtol_l strtoll_l
85# else
86# define strtol_l strtol_l
87# endif
88# endif
89#endif
90
91#define __strtol_l __strtol_l2(strtol_l)
92#define __strtol_l2(name) __strtol_l3(name)
93#define __strtol_l3(name) __##name
94
95
96/* If QUAD is defined, we are defining `strtoll' or `strtoull',
97 operating on `long long int's. */
98#ifdef QUAD
99# define LONG long long
100# define STRTOL_LONG_MIN LONG_LONG_MIN
101# define STRTOL_LONG_MAX LONG_LONG_MAX
102# define STRTOL_ULONG_MAX ULONG_LONG_MAX
103#else
104# define LONG long
105
106# ifndef ULONG_MAX
107# define ULONG_MAX ((unsigned long int) ~(unsigned long int) 0)
108# endif
109# ifndef LONG_MAX
110# define LONG_MAX ((long int) (ULONG_MAX >> 1))
111# endif
112# define STRTOL_LONG_MIN LONG_MIN
113# define STRTOL_LONG_MAX LONG_MAX
114# define STRTOL_ULONG_MAX ULONG_MAX
115#endif
116
117
118/* We use this code for the extended locale handling where the
119 function gets as an additional argument the locale which has to be
120 used. To access the values we have to redefine the _NL_CURRENT and
121 _NL_CURRENT_WORD macros. */
122#undef _NL_CURRENT
123#define _NL_CURRENT(category, item) \
124 (current->values[_NL_ITEM_INDEX (item)].string)
125#undef _NL_CURRENT_WORD
126#define _NL_CURRENT_WORD(category, item) \
127 ((uint32_t) current->values[_NL_ITEM_INDEX (item)].word)
128
129#if defined _LIBC || defined HAVE_WCHAR_H
130# include <wchar.h>
131#endif
132
133#ifdef USE_WIDE_CHAR
134# include <wctype.h>
135# define L_(Ch) L##Ch
136# define UCHAR_TYPE wint_t
137# define STRING_TYPE wchar_t
138# define ISSPACE(Ch) __iswspace_l ((Ch), loc)
139# define ISALPHA(Ch) __iswalpha_l ((Ch), _nl_C_locobj_ptr)
140# define TOUPPER(Ch) __towupper_l ((Ch), _nl_C_locobj_ptr)
141#else
142# if defined _LIBC \
143 || defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
144# define IN_CTYPE_DOMAIN(c) 1
145# else
146# define IN_CTYPE_DOMAIN(c) isascii(c)
147# endif
148# define L_(Ch) Ch
149# define UCHAR_TYPE unsigned char
150# define STRING_TYPE char
151# define ISSPACE(Ch) __isspace_l ((Ch), loc)
152# define ISALPHA(Ch) __isalpha_l ((Ch), _nl_C_locobj_ptr)
153# define TOUPPER(Ch) __toupper_l ((Ch), _nl_C_locobj_ptr)
154#endif
155
156#define INTERNAL(X) INTERNAL1(X)
157#define INTERNAL1(X) __##X##_internal
158#define WEAKNAME(X) WEAKNAME1(X)
159
160#ifdef USE_NUMBER_GROUPING
161/* This file defines a function to check for correct grouping. */
162# include "grouping.h"
163#endif
164
165
166/* Define tables of maximum values and remainders in order to detect
167 overflow. Do this at compile-time in order to avoid the runtime
168 overhead of the division. */
169extern const unsigned long __strtol_ul_max_tab[] attribute_hidden;
170extern const unsigned char __strtol_ul_rem_tab[] attribute_hidden;
171#if defined(QUAD) && __WORDSIZE == 32
172extern const unsigned long long __strtol_ull_max_tab[] attribute_hidden;
173extern const unsigned char __strtol_ull_rem_tab[] attribute_hidden;
174#endif
175
176#define DEF(TYPE, NAME) \
177 const TYPE NAME[] attribute_hidden = \
178 { \
179 F(2), F(3), F(4), F(5), F(6), F(7), F(8), F(9), F(10), \
180 F(11), F(12), F(13), F(14), F(15), F(16), F(17), F(18), F(19), F(20), \
181 F(21), F(22), F(23), F(24), F(25), F(26), F(27), F(28), F(29), F(30), \
182 F(31), F(32), F(33), F(34), F(35), F(36) \
183 }
184
185#if !UNSIGNED && !defined (USE_WIDE_CHAR) && !defined (QUAD)
186# define F(X) ULONG_MAX / X
187 DEF (unsigned long, __strtol_ul_max_tab);
188# undef F
189# define F(X) ULONG_MAX % X
190 DEF (unsigned char, __strtol_ul_rem_tab);
191# undef F
192#endif
193#if !UNSIGNED && !defined (USE_WIDE_CHAR) && defined (QUAD) \
194 && __WORDSIZE == 32
195# define F(X) ULONG_LONG_MAX / X
196 DEF (unsigned long long, __strtol_ull_max_tab);
197# undef F
198# define F(X) ULONG_LONG_MAX % X
199 DEF (unsigned char, __strtol_ull_rem_tab);
200# undef F
201#endif
202#undef DEF
203
204/* Define some more readable aliases for these arrays which correspond
205 to how they'll be used in the function below. */
206#define jmax_tab __strtol_ul_max_tab
207#if defined(QUAD) && __WORDSIZE == 32
208# define cutoff_tab __strtol_ull_max_tab
209# define cutlim_tab __strtol_ull_rem_tab
210#else
211# define cutoff_tab __strtol_ul_max_tab
212# define cutlim_tab __strtol_ul_rem_tab
213#endif
214
215
216/* Convert NPTR to an `unsigned long int' or `long int' in base BASE.
217 If BASE is 0 the base is determined by the presence of a leading
218 zero, indicating octal or a leading "0x" or "0X", indicating hexadecimal.
219 If BASE is < 2 or > 36, it is reset to 10.
220 If ENDPTR is not NULL, a pointer to the character after the last
221 one converted is stored in *ENDPTR. */
222
223INT
224INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
225 int base, int group, locale_t loc)
226{
227 int negative;
228 unsigned LONG int cutoff;
229 unsigned int cutlim;
230 unsigned LONG int i;
231 const STRING_TYPE *s;
232 UCHAR_TYPE c;
233 const STRING_TYPE *save, *end;
234 int overflow;
235#ifndef USE_WIDE_CHAR
236 size_t cnt;
237#endif
238
239#ifdef USE_NUMBER_GROUPING
240 struct __locale_data *current = loc->__locales[LC_NUMERIC];
241 /* The thousands character of the current locale. */
242# ifdef USE_WIDE_CHAR
243 wchar_t thousands = L'\0';
244# else
245 const char *thousands = NULL;
246 size_t thousands_len = 0;
247# endif
248 /* The numeric grouping specification of the current locale,
249 in the format described in <locale.h>. */
250 const char *grouping;
251
252 if (__glibc_unlikely (group))
253 {
254 grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
255 if (*grouping <= 0 || *grouping == CHAR_MAX)
256 grouping = NULL;
257 else
258 {
259 /* Figure out the thousands separator character. */
260# ifdef USE_WIDE_CHAR
261# ifdef _LIBC
262 thousands = _NL_CURRENT_WORD (LC_NUMERIC,
263 _NL_NUMERIC_THOUSANDS_SEP_WC);
264# endif
265 if (thousands == L'\0')
266 grouping = NULL;
267# else
268# ifdef _LIBC
269 thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
270# endif
271 if (*thousands == '\0')
272 {
273 thousands = NULL;
274 grouping = NULL;
275 }
276# endif
277 }
278 }
279 else
280 grouping = NULL;
281#endif
282
283 if (base < 0 || base == 1 || base > 36)
284 {
285 __set_errno (EINVAL);
286 return 0;
287 }
288
289 save = s = nptr;
290
291 /* Skip white space. */
292 while (ISSPACE (*s))
293 ++s;
294 if (__glibc_unlikely (*s == L_('\0')))
295 goto noconv;
296
297 /* Check for a sign. */
298 negative = 0;
299 if (*s == L_('-'))
300 {
301 negative = 1;
302 ++s;
303 }
304 else if (*s == L_('+'))
305 ++s;
306
307 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
308 if (*s == L_('0'))
309 {
310 if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X'))
311 {
312 s += 2;
313 base = 16;
314 }
315 else if (base == 0)
316 base = 8;
317 }
318 else if (base == 0)
319 base = 10;
320
321 /* Save the pointer so we can check later if anything happened. */
322 save = s;
323
324#ifdef USE_NUMBER_GROUPING
325 if (base != 10)
326 grouping = NULL;
327
328 if (__glibc_unlikely (grouping != NULL))
329 {
330# ifndef USE_WIDE_CHAR
331 thousands_len = strlen (thousands);
332# endif
333
334 /* Find the end of the digit string and check its grouping. */
335 end = s;
336 if (
337# ifdef USE_WIDE_CHAR
338 *s != thousands
339# else
340 ({ for (cnt = 0; cnt < thousands_len; ++cnt)
341 if (thousands[cnt] != end[cnt])
342 break;
343 cnt < thousands_len; })
344# endif
345 )
346 {
347 for (c = *end; c != L_('\0'); c = *++end)
348 if (((STRING_TYPE) c < L_('0') || (STRING_TYPE) c > L_('9'))
349# ifdef USE_WIDE_CHAR
350 && (wchar_t) c != thousands
351# else
352 && ({ for (cnt = 0; cnt < thousands_len; ++cnt)
353 if (thousands[cnt] != end[cnt])
354 break;
355 cnt < thousands_len; })
356# endif
357 && (!ISALPHA (c)
358 || (int) (TOUPPER (c) - L_('A') + 10) >= base))
359 break;
360
361# ifdef USE_WIDE_CHAR
362 end = __correctly_grouped_prefixwc (s, end, thousands, grouping);
363# else
364 end = __correctly_grouped_prefixmb (s, end, thousands, grouping);
365# endif
366 }
367 }
368 else
369#endif
370 end = NULL;
371
372 /* Avoid runtime division; lookup cutoff and limit. */
373 cutoff = cutoff_tab[base - 2];
374 cutlim = cutlim_tab[base - 2];
375
376 overflow = 0;
377 i = 0;
378 c = *s;
379 if (sizeof (long int) != sizeof (LONG int))
380 {
381 unsigned long int j = 0;
382 unsigned long int jmax = jmax_tab[base - 2];
383
384 for (;c != L_('\0'); c = *++s)
385 {
386 if (s == end)
387 break;
388 if (c >= L_('0') && c <= L_('9'))
389 c -= L_('0');
390#ifdef USE_NUMBER_GROUPING
391# ifdef USE_WIDE_CHAR
392 else if (grouping && (wchar_t) c == thousands)
393 continue;
394# else
395 else if (thousands_len)
396 {
397 for (cnt = 0; cnt < thousands_len; ++cnt)
398 if (thousands[cnt] != s[cnt])
399 break;
400 if (cnt == thousands_len)
401 {
402 s += thousands_len - 1;
403 continue;
404 }
405 if (ISALPHA (c))
406 c = TOUPPER (c) - L_('A') + 10;
407 else
408 break;
409 }
410# endif
411#endif
412 else if (ISALPHA (c))
413 c = TOUPPER (c) - L_('A') + 10;
414 else
415 break;
416 if ((int) c >= base)
417 break;
418 /* Note that we never can have an overflow. */
419 else if (j >= jmax)
420 {
421 /* We have an overflow. Now use the long representation. */
422 i = (unsigned LONG int) j;
423 goto use_long;
424 }
425 else
426 j = j * (unsigned long int) base + c;
427 }
428
429 i = (unsigned LONG int) j;
430 }
431 else
432 for (;c != L_('\0'); c = *++s)
433 {
434 if (s == end)
435 break;
436 if (c >= L_('0') && c <= L_('9'))
437 c -= L_('0');
438#ifdef USE_NUMBER_GROUPING
439# ifdef USE_WIDE_CHAR
440 else if (grouping && (wchar_t) c == thousands)
441 continue;
442# else
443 else if (thousands_len)
444 {
445 for (cnt = 0; cnt < thousands_len; ++cnt)
446 if (thousands[cnt] != s[cnt])
447 break;
448 if (cnt == thousands_len)
449 {
450 s += thousands_len - 1;
451 continue;
452 }
453 if (ISALPHA (c))
454 c = TOUPPER (c) - L_('A') + 10;
455 else
456 break;
457 }
458# endif
459#endif
460 else if (ISALPHA (c))
461 c = TOUPPER (c) - L_('A') + 10;
462 else
463 break;
464 if ((int) c >= base)
465 break;
466 /* Check for overflow. */
467 if (i > cutoff || (i == cutoff && c > cutlim))
468 overflow = 1;
469 else
470 {
471 use_long:
472 i *= (unsigned LONG int) base;
473 i += c;
474 }
475 }
476
477 /* Check if anything actually happened. */
478 if (s == save)
479 goto noconv;
480
481 /* Store in ENDPTR the address of one character
482 past the last character we converted. */
483 if (endptr != NULL)
484 *endptr = (STRING_TYPE *) s;
485
486#if !UNSIGNED
487 /* Check for a value that is within the range of
488 `unsigned LONG int', but outside the range of `LONG int'. */
489 if (overflow == 0
490 && i > (negative
491 ? -((unsigned LONG int) (STRTOL_LONG_MIN + 1)) + 1
492 : (unsigned LONG int) STRTOL_LONG_MAX))
493 overflow = 1;
494#endif
495
496 if (__glibc_unlikely (overflow))
497 {
498 __set_errno (ERANGE);
499#if UNSIGNED
500 return STRTOL_ULONG_MAX;
501#else
502 return negative ? STRTOL_LONG_MIN : STRTOL_LONG_MAX;
503#endif
504 }
505
506 /* Return the result of the appropriate sign. */
507 return negative ? -i : i;
508
509noconv:
510 /* We must handle a special case here: the base is 0 or 16 and the
511 first two characters are '0' and 'x', but the rest are no
512 hexadecimal digits. This is no error case. We return 0 and
513 ENDPTR points to the `x`. */
514 if (endptr != NULL)
515 {
516 if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X')
517 && save[-2] == L_('0'))
518 *endptr = (STRING_TYPE *) &save[-1];
519 else
520 /* There was no number to convert. */
521 *endptr = (STRING_TYPE *) nptr;
522 }
523
524 return 0L;
525}
526#if defined _LIBC && !defined USE_WIDE_CHAR
527libc_hidden_def (INTERNAL (__strtol_l))
528#endif
529
530/* External user entry point. */
531
532#if _LIBC - 0 == 0
533
534/* Prototype. */
535extern INT __strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr,
536 int base);
537#endif
538
539
540INT
541#ifdef weak_function
542weak_function
543#endif
544__strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr,
545 int base, locale_t loc)
546{
547 return INTERNAL (__strtol_l) (nptr, endptr, base, 0, loc);
548}
549libc_hidden_def (__strtol_l)
550weak_alias (__strtol_l, strtol_l)
551