1/* Convert string representing a number to integer value, using given locale.
2 Copyright (C) 1997-2017 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20
21#if HAVE_CONFIG_H
22# include <config.h>
23#endif
24
25#ifdef _LIBC
26# define USE_NUMBER_GROUPING
27# define HAVE_LIMITS_H
28#endif
29
30#include <ctype.h>
31#include <errno.h>
32#ifndef __set_errno
33# define __set_errno(Val) errno = (Val)
34#endif
35
36#ifdef HAVE_LIMITS_H
37# include <limits.h>
38#endif
39
40#include <stddef.h>
41#include <stdlib.h>
42#include <string.h>
43#include <locale.h>
44#include <xlocale.h>
45#include <stdint.h>
46#include <bits/wordsize.h>
47
48#ifdef USE_NUMBER_GROUPING
49# include "../locale/localeinfo.h"
50#endif
51
52/* Nonzero if we are defining `strtoul' or `strtoull', operating on
53 unsigned integers. */
54#ifndef UNSIGNED
55# define UNSIGNED 0
56# define INT LONG int
57#else
58# define INT unsigned LONG int
59#endif
60
61/* Determine the name. */
62#if UNSIGNED
63# ifdef USE_WIDE_CHAR
64# ifdef QUAD
65# define strtol_l wcstoull_l
66# else
67# define strtol_l wcstoul_l
68# endif
69# else
70# ifdef QUAD
71# define strtol_l strtoull_l
72# else
73# define strtol_l strtoul_l
74# endif
75# endif
76#else
77# ifdef USE_WIDE_CHAR
78# ifdef QUAD
79# define strtol_l wcstoll_l
80# else
81# define strtol_l wcstol_l
82# endif
83# else
84# ifdef QUAD
85# define strtol_l strtoll_l
86# else
87# define strtol_l strtol_l
88# endif
89# endif
90#endif
91
92#define __strtol_l __strtol_l2(strtol_l)
93#define __strtol_l2(name) __strtol_l3(name)
94#define __strtol_l3(name) __##name
95
96
97/* If QUAD is defined, we are defining `strtoll' or `strtoull',
98 operating on `long long int's. */
99#ifdef QUAD
100# define LONG long long
101# define STRTOL_LONG_MIN LONG_LONG_MIN
102# define STRTOL_LONG_MAX LONG_LONG_MAX
103# define STRTOL_ULONG_MAX ULONG_LONG_MAX
104#else
105# define LONG long
106
107# ifndef ULONG_MAX
108# define ULONG_MAX ((unsigned long int) ~(unsigned long int) 0)
109# endif
110# ifndef LONG_MAX
111# define LONG_MAX ((long int) (ULONG_MAX >> 1))
112# endif
113# define STRTOL_LONG_MIN LONG_MIN
114# define STRTOL_LONG_MAX LONG_MAX
115# define STRTOL_ULONG_MAX ULONG_MAX
116#endif
117
118
119/* We use this code for the extended locale handling where the
120 function gets as an additional argument the locale which has to be
121 used. To access the values we have to redefine the _NL_CURRENT and
122 _NL_CURRENT_WORD macros. */
123#undef _NL_CURRENT
124#define _NL_CURRENT(category, item) \
125 (current->values[_NL_ITEM_INDEX (item)].string)
126#undef _NL_CURRENT_WORD
127#define _NL_CURRENT_WORD(category, item) \
128 ((uint32_t) current->values[_NL_ITEM_INDEX (item)].word)
129
130#if defined _LIBC || defined HAVE_WCHAR_H
131# include <wchar.h>
132#endif
133
134#ifdef USE_WIDE_CHAR
135# include <wctype.h>
136# define L_(Ch) L##Ch
137# define UCHAR_TYPE wint_t
138# define STRING_TYPE wchar_t
139# define ISSPACE(Ch) __iswspace_l ((Ch), loc)
140# define ISALPHA(Ch) __iswalpha_l ((Ch), _nl_C_locobj_ptr)
141# define TOUPPER(Ch) __towupper_l ((Ch), _nl_C_locobj_ptr)
142#else
143# if defined _LIBC \
144 || defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
145# define IN_CTYPE_DOMAIN(c) 1
146# else
147# define IN_CTYPE_DOMAIN(c) isascii(c)
148# endif
149# define L_(Ch) Ch
150# define UCHAR_TYPE unsigned char
151# define STRING_TYPE char
152# define ISSPACE(Ch) __isspace_l ((Ch), loc)
153# define ISALPHA(Ch) __isalpha_l ((Ch), _nl_C_locobj_ptr)
154# define TOUPPER(Ch) __toupper_l ((Ch), _nl_C_locobj_ptr)
155#endif
156
157#define INTERNAL(X) INTERNAL1(X)
158#define INTERNAL1(X) __##X##_internal
159#define WEAKNAME(X) WEAKNAME1(X)
160
161#ifdef USE_NUMBER_GROUPING
162/* This file defines a function to check for correct grouping. */
163# include "grouping.h"
164#endif
165
166
167/* Define tables of maximum values and remainders in order to detect
168 overflow. Do this at compile-time in order to avoid the runtime
169 overhead of the division. */
170extern const unsigned long __strtol_ul_max_tab[] attribute_hidden;
171extern const unsigned char __strtol_ul_rem_tab[] attribute_hidden;
172#if defined(QUAD) && __WORDSIZE == 32
173extern const unsigned long long __strtol_ull_max_tab[] attribute_hidden;
174extern const unsigned char __strtol_ull_rem_tab[] attribute_hidden;
175#endif
176
177#define DEF(TYPE, NAME) \
178 const TYPE NAME[] attribute_hidden = \
179 { \
180 F(2), F(3), F(4), F(5), F(6), F(7), F(8), F(9), F(10), \
181 F(11), F(12), F(13), F(14), F(15), F(16), F(17), F(18), F(19), F(20), \
182 F(21), F(22), F(23), F(24), F(25), F(26), F(27), F(28), F(29), F(30), \
183 F(31), F(32), F(33), F(34), F(35), F(36) \
184 }
185
186#if !UNSIGNED && !defined (USE_WIDE_CHAR) && !defined (QUAD)
187# define F(X) ULONG_MAX / X
188 DEF (unsigned long, __strtol_ul_max_tab);
189# undef F
190# define F(X) ULONG_MAX % X
191 DEF (unsigned char, __strtol_ul_rem_tab);
192# undef F
193#endif
194#if !UNSIGNED && !defined (USE_WIDE_CHAR) && defined (QUAD) \
195 && __WORDSIZE == 32
196# define F(X) ULONG_LONG_MAX / X
197 DEF (unsigned long long, __strtol_ull_max_tab);
198# undef F
199# define F(X) ULONG_LONG_MAX % X
200 DEF (unsigned char, __strtol_ull_rem_tab);
201# undef F
202#endif
203#undef DEF
204
205/* Define some more readable aliases for these arrays which correspond
206 to how they'll be used in the function below. */
207#define jmax_tab __strtol_ul_max_tab
208#if defined(QUAD) && __WORDSIZE == 32
209# define cutoff_tab __strtol_ull_max_tab
210# define cutlim_tab __strtol_ull_rem_tab
211#else
212# define cutoff_tab __strtol_ul_max_tab
213# define cutlim_tab __strtol_ul_rem_tab
214#endif
215
216
217/* Convert NPTR to an `unsigned long int' or `long int' in base BASE.
218 If BASE is 0 the base is determined by the presence of a leading
219 zero, indicating octal or a leading "0x" or "0X", indicating hexadecimal.
220 If BASE is < 2 or > 36, it is reset to 10.
221 If ENDPTR is not NULL, a pointer to the character after the last
222 one converted is stored in *ENDPTR. */
223
224INT
225INTERNAL (__strtol_l) (const STRING_TYPE *nptr, STRING_TYPE **endptr,
226 int base, int group, __locale_t loc)
227{
228 int negative;
229 unsigned LONG int cutoff;
230 unsigned int cutlim;
231 unsigned LONG int i;
232 const STRING_TYPE *s;
233 UCHAR_TYPE c;
234 const STRING_TYPE *save, *end;
235 int overflow;
236#ifndef USE_WIDE_CHAR
237 size_t cnt;
238#endif
239
240#ifdef USE_NUMBER_GROUPING
241 struct __locale_data *current = loc->__locales[LC_NUMERIC];
242 /* The thousands character of the current locale. */
243# ifdef USE_WIDE_CHAR
244 wchar_t thousands = L'\0';
245# else
246 const char *thousands = NULL;
247 size_t thousands_len = 0;
248# endif
249 /* The numeric grouping specification of the current locale,
250 in the format described in <locale.h>. */
251 const char *grouping;
252
253 if (__glibc_unlikely (group))
254 {
255 grouping = _NL_CURRENT (LC_NUMERIC, GROUPING);
256 if (*grouping <= 0 || *grouping == CHAR_MAX)
257 grouping = NULL;
258 else
259 {
260 /* Figure out the thousands separator character. */
261# ifdef USE_WIDE_CHAR
262# ifdef _LIBC
263 thousands = _NL_CURRENT_WORD (LC_NUMERIC,
264 _NL_NUMERIC_THOUSANDS_SEP_WC);
265# endif
266 if (thousands == L'\0')
267 grouping = NULL;
268# else
269# ifdef _LIBC
270 thousands = _NL_CURRENT (LC_NUMERIC, THOUSANDS_SEP);
271# endif
272 if (*thousands == '\0')
273 {
274 thousands = NULL;
275 grouping = NULL;
276 }
277# endif
278 }
279 }
280 else
281 grouping = NULL;
282#endif
283
284 if (base < 0 || base == 1 || base > 36)
285 {
286 __set_errno (EINVAL);
287 return 0;
288 }
289
290 save = s = nptr;
291
292 /* Skip white space. */
293 while (ISSPACE (*s))
294 ++s;
295 if (__glibc_unlikely (*s == L_('\0')))
296 goto noconv;
297
298 /* Check for a sign. */
299 negative = 0;
300 if (*s == L_('-'))
301 {
302 negative = 1;
303 ++s;
304 }
305 else if (*s == L_('+'))
306 ++s;
307
308 /* Recognize number prefix and if BASE is zero, figure it out ourselves. */
309 if (*s == L_('0'))
310 {
311 if ((base == 0 || base == 16) && TOUPPER (s[1]) == L_('X'))
312 {
313 s += 2;
314 base = 16;
315 }
316 else if (base == 0)
317 base = 8;
318 }
319 else if (base == 0)
320 base = 10;
321
322 /* Save the pointer so we can check later if anything happened. */
323 save = s;
324
325#ifdef USE_NUMBER_GROUPING
326 if (base != 10)
327 grouping = NULL;
328
329 if (__glibc_unlikely (grouping != NULL))
330 {
331# ifndef USE_WIDE_CHAR
332 thousands_len = strlen (thousands);
333# endif
334
335 /* Find the end of the digit string and check its grouping. */
336 end = s;
337 if (
338# ifdef USE_WIDE_CHAR
339 *s != thousands
340# else
341 ({ for (cnt = 0; cnt < thousands_len; ++cnt)
342 if (thousands[cnt] != end[cnt])
343 break;
344 cnt < thousands_len; })
345# endif
346 )
347 {
348 for (c = *end; c != L_('\0'); c = *++end)
349 if (((STRING_TYPE) c < L_('0') || (STRING_TYPE) c > L_('9'))
350# ifdef USE_WIDE_CHAR
351 && (wchar_t) c != thousands
352# else
353 && ({ for (cnt = 0; cnt < thousands_len; ++cnt)
354 if (thousands[cnt] != end[cnt])
355 break;
356 cnt < thousands_len; })
357# endif
358 && (!ISALPHA (c)
359 || (int) (TOUPPER (c) - L_('A') + 10) >= base))
360 break;
361
362# ifdef USE_WIDE_CHAR
363 end = __correctly_grouped_prefixwc (s, end, thousands, grouping);
364# else
365 end = __correctly_grouped_prefixmb (s, end, thousands, grouping);
366# endif
367 }
368 }
369 else
370#endif
371 end = NULL;
372
373 /* Avoid runtime division; lookup cutoff and limit. */
374 cutoff = cutoff_tab[base - 2];
375 cutlim = cutlim_tab[base - 2];
376
377 overflow = 0;
378 i = 0;
379 c = *s;
380 if (sizeof (long int) != sizeof (LONG int))
381 {
382 unsigned long int j = 0;
383 unsigned long int jmax = jmax_tab[base - 2];
384
385 for (;c != L_('\0'); c = *++s)
386 {
387 if (s == end)
388 break;
389 if (c >= L_('0') && c <= L_('9'))
390 c -= L_('0');
391#ifdef USE_NUMBER_GROUPING
392# ifdef USE_WIDE_CHAR
393 else if (grouping && (wchar_t) c == thousands)
394 continue;
395# else
396 else if (thousands_len)
397 {
398 for (cnt = 0; cnt < thousands_len; ++cnt)
399 if (thousands[cnt] != s[cnt])
400 break;
401 if (cnt == thousands_len)
402 {
403 s += thousands_len - 1;
404 continue;
405 }
406 if (ISALPHA (c))
407 c = TOUPPER (c) - L_('A') + 10;
408 else
409 break;
410 }
411# endif
412#endif
413 else if (ISALPHA (c))
414 c = TOUPPER (c) - L_('A') + 10;
415 else
416 break;
417 if ((int) c >= base)
418 break;
419 /* Note that we never can have an overflow. */
420 else if (j >= jmax)
421 {
422 /* We have an overflow. Now use the long representation. */
423 i = (unsigned LONG int) j;
424 goto use_long;
425 }
426 else
427 j = j * (unsigned long int) base + c;
428 }
429
430 i = (unsigned LONG int) j;
431 }
432 else
433 for (;c != L_('\0'); c = *++s)
434 {
435 if (s == end)
436 break;
437 if (c >= L_('0') && c <= L_('9'))
438 c -= L_('0');
439#ifdef USE_NUMBER_GROUPING
440# ifdef USE_WIDE_CHAR
441 else if (grouping && (wchar_t) c == thousands)
442 continue;
443# else
444 else if (thousands_len)
445 {
446 for (cnt = 0; cnt < thousands_len; ++cnt)
447 if (thousands[cnt] != s[cnt])
448 break;
449 if (cnt == thousands_len)
450 {
451 s += thousands_len - 1;
452 continue;
453 }
454 if (ISALPHA (c))
455 c = TOUPPER (c) - L_('A') + 10;
456 else
457 break;
458 }
459# endif
460#endif
461 else if (ISALPHA (c))
462 c = TOUPPER (c) - L_('A') + 10;
463 else
464 break;
465 if ((int) c >= base)
466 break;
467 /* Check for overflow. */
468 if (i > cutoff || (i == cutoff && c > cutlim))
469 overflow = 1;
470 else
471 {
472 use_long:
473 i *= (unsigned LONG int) base;
474 i += c;
475 }
476 }
477
478 /* Check if anything actually happened. */
479 if (s == save)
480 goto noconv;
481
482 /* Store in ENDPTR the address of one character
483 past the last character we converted. */
484 if (endptr != NULL)
485 *endptr = (STRING_TYPE *) s;
486
487#if !UNSIGNED
488 /* Check for a value that is within the range of
489 `unsigned LONG int', but outside the range of `LONG int'. */
490 if (overflow == 0
491 && i > (negative
492 ? -((unsigned LONG int) (STRTOL_LONG_MIN + 1)) + 1
493 : (unsigned LONG int) STRTOL_LONG_MAX))
494 overflow = 1;
495#endif
496
497 if (__glibc_unlikely (overflow))
498 {
499 __set_errno (ERANGE);
500#if UNSIGNED
501 return STRTOL_ULONG_MAX;
502#else
503 return negative ? STRTOL_LONG_MIN : STRTOL_LONG_MAX;
504#endif
505 }
506
507 /* Return the result of the appropriate sign. */
508 return negative ? -i : i;
509
510noconv:
511 /* We must handle a special case here: the base is 0 or 16 and the
512 first two characters are '0' and 'x', but the rest are no
513 hexadecimal digits. This is no error case. We return 0 and
514 ENDPTR points to the `x`. */
515 if (endptr != NULL)
516 {
517 if (save - nptr >= 2 && TOUPPER (save[-1]) == L_('X')
518 && save[-2] == L_('0'))
519 *endptr = (STRING_TYPE *) &save[-1];
520 else
521 /* There was no number to convert. */
522 *endptr = (STRING_TYPE *) nptr;
523 }
524
525 return 0L;
526}
527#if defined _LIBC && !defined USE_WIDE_CHAR
528libc_hidden_def (INTERNAL (__strtol_l))
529#endif
530
531/* External user entry point. */
532
533#if _LIBC - 0 == 0
534
535/* Prototype. */
536extern INT __strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr,
537 int base);
538#endif
539
540
541INT
542#ifdef weak_function
543weak_function
544#endif
545__strtol_l (const STRING_TYPE *nptr, STRING_TYPE **endptr,
546 int base, __locale_t loc)
547{
548 return INTERNAL (__strtol_l) (nptr, endptr, base, 0, loc);
549}
550libc_hidden_def (__strtol_l)
551weak_alias (__strtol_l, strtol_l)
552