1/* Copyright (C) 1991-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#include <assert.h>
19#include <errno.h>
20#include <limits.h>
21#include <ctype.h>
22#include <stdarg.h>
23#include <stdbool.h>
24#include <stdio.h>
25#include <stdint.h>
26#include <stdlib.h>
27#include <string.h>
28#include <wchar.h>
29#include <wctype.h>
30#include <libc-internal.h>
31#include <libc-lock.h>
32#include <locale/localeinfo.h>
33#include <scratch_buffer.h>
34
35#ifdef __GNUC__
36# define HAVE_LONGLONG
37# define LONGLONG long long
38#else
39# define LONGLONG long
40#endif
41
42/* Determine whether we have to handle `long long' at all. */
43#if LONG_MAX == LONG_LONG_MAX
44# define need_longlong 0
45#else
46# define need_longlong 1
47#endif
48
49/* Determine whether we have to handle `long'. */
50#if INT_MAX == LONG_MAX
51# define need_long 0
52#else
53# define need_long 1
54#endif
55
56/* Those are flags in the conversion format. */
57#define LONG 0x0001 /* l: long or double */
58#define LONGDBL 0x0002 /* L: long long or long double */
59#define SHORT 0x0004 /* h: short */
60#define SUPPRESS 0x0008 /* *: suppress assignment */
61#define POINTER 0x0010 /* weird %p pointer (`fake hex') */
62#define NOSKIP 0x0020 /* do not skip blanks */
63#define NUMBER_SIGNED 0x0040 /* signed integer */
64#define GROUP 0x0080 /* ': group numbers */
65#define GNU_MALLOC 0x0100 /* a: malloc strings */
66#define CHAR 0x0200 /* hh: char */
67#define I18N 0x0400 /* I: use locale's digits */
68#define HEXA_FLOAT 0x0800 /* hexadecimal float */
69#define READ_POINTER 0x1000 /* this is a pointer value */
70#define POSIX_MALLOC 0x2000 /* m: malloc strings */
71#define MALLOC (GNU_MALLOC | POSIX_MALLOC)
72
73#include <locale/localeinfo.h>
74#include <libioP.h>
75#include <libio.h>
76
77#undef va_list
78#define va_list _IO_va_list
79
80#ifdef COMPILE_WSCANF
81# define ungetc(c, s) ((void) (c == WEOF \
82 || (--read_in, \
83 _IO_sputbackwc (s, c))))
84# define ungetc_not_eof(c, s) ((void) (--read_in, \
85 _IO_sputbackwc (s, c)))
86# define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
87 : ((c = _IO_getwc_unlocked (s)), \
88 (void) (c != WEOF \
89 ? ++read_in \
90 : (size_t) (inchar_errno = errno)), c))
91
92# define ISSPACE(Ch) iswspace (Ch)
93# define ISDIGIT(Ch) iswdigit (Ch)
94# define ISXDIGIT(Ch) iswxdigit (Ch)
95# define TOLOWER(Ch) towlower (Ch)
96# define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
97# define __strtoll_internal __wcstoll_internal
98# define __strtoull_internal __wcstoull_internal
99# define __strtol_internal __wcstol_internal
100# define __strtoul_internal __wcstoul_internal
101# define __strtold_internal __wcstold_internal
102# define __strtod_internal __wcstod_internal
103# define __strtof_internal __wcstof_internal
104
105# define L_(Str) L##Str
106# define CHAR_T wchar_t
107# define UCHAR_T unsigned int
108# define WINT_T wint_t
109# undef EOF
110# define EOF WEOF
111#else
112# define ungetc(c, s) ((void) ((int) c == EOF \
113 || (--read_in, \
114 _IO_sputbackc (s, (unsigned char) c))))
115# define ungetc_not_eof(c, s) ((void) (--read_in, \
116 _IO_sputbackc (s, (unsigned char) c)))
117# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
118 : ((c = _IO_getc_unlocked (s)), \
119 (void) (c != EOF \
120 ? ++read_in \
121 : (size_t) (inchar_errno = errno)), c))
122# define ISSPACE(Ch) __isspace_l (Ch, loc)
123# define ISDIGIT(Ch) __isdigit_l (Ch, loc)
124# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
125# define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
126# define ORIENT if (_IO_vtable_offset (s) == 0 \
127 && _IO_fwide (s, -1) != -1) \
128 return EOF
129
130# define L_(Str) Str
131# define CHAR_T char
132# define UCHAR_T unsigned char
133# define WINT_T int
134#endif
135
136#define encode_error() do { \
137 errval = 4; \
138 __set_errno (EILSEQ); \
139 goto errout; \
140 } while (0)
141#define conv_error() do { \
142 errval = 2; \
143 goto errout; \
144 } while (0)
145#define input_error() do { \
146 errval = 1; \
147 if (done == 0) done = EOF; \
148 goto errout; \
149 } while (0)
150#define add_ptr_to_free(ptr) \
151 do \
152 { \
153 if (ptrs_to_free == NULL \
154 || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \
155 / sizeof (ptrs_to_free->ptrs[0]))) \
156 { \
157 struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \
158 new_ptrs->count = 0; \
159 new_ptrs->next = ptrs_to_free; \
160 ptrs_to_free = new_ptrs; \
161 } \
162 ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \
163 } \
164 while (0)
165#define ARGCHECK(s, format) \
166 do \
167 { \
168 /* Check file argument for consistence. */ \
169 CHECK_FILE (s, EOF); \
170 if (s->_flags & _IO_NO_READS) \
171 { \
172 __set_errno (EBADF); \
173 return EOF; \
174 } \
175 else if (format == NULL) \
176 { \
177 MAYBE_SET_EINVAL; \
178 return EOF; \
179 } \
180 } while (0)
181#define LOCK_STREAM(S) \
182 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
183 _IO_flockfile (S)
184#define UNLOCK_STREAM(S) \
185 _IO_funlockfile (S); \
186 __libc_cleanup_region_end (0)
187
188struct ptrs_to_free
189{
190 size_t count;
191 struct ptrs_to_free *next;
192 char **ptrs[32];
193};
194
195struct char_buffer {
196 CHAR_T *current;
197 CHAR_T *end;
198 struct scratch_buffer scratch;
199};
200
201/* Returns a pointer to the first CHAR_T object in the buffer. Only
202 valid if char_buffer_add (BUFFER, CH) has been called and
203 char_buffer_error (BUFFER) is false. */
204static inline CHAR_T *
205char_buffer_start (const struct char_buffer *buffer)
206{
207 return (CHAR_T *) buffer->scratch.data;
208}
209
210/* Returns the number of CHAR_T objects in the buffer. Only valid if
211 char_buffer_error (BUFFER) is false. */
212static inline size_t
213char_buffer_size (const struct char_buffer *buffer)
214{
215 return buffer->current - char_buffer_start (buffer);
216}
217
218/* Reinitializes BUFFER->current and BUFFER->end to cover the entire
219 scratch buffer. */
220static inline void
221char_buffer_rewind (struct char_buffer *buffer)
222{
223 buffer->current = char_buffer_start (buffer);
224 buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T);
225}
226
227/* Returns true if a previous call to char_buffer_add (BUFFER, CH)
228 failed. */
229static inline bool
230char_buffer_error (const struct char_buffer *buffer)
231{
232 return __glibc_unlikely (buffer->current == NULL);
233}
234
235/* Slow path for char_buffer_add. */
236static void
237char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch)
238{
239 if (char_buffer_error (buffer))
240 return;
241 size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data;
242 if (!scratch_buffer_grow_preserve (&buffer->scratch))
243 {
244 buffer->current = NULL;
245 buffer->end = NULL;
246 return;
247 }
248 char_buffer_rewind (buffer);
249 buffer->current += offset;
250 *buffer->current++ = ch;
251}
252
253/* Adds CH to BUFFER. This function does not report any errors, check
254 for them with char_buffer_error. */
255static inline void
256char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
257 __attribute__ ((always_inline));
258static inline void
259char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
260{
261 if (__glibc_unlikely (buffer->current == buffer->end))
262 char_buffer_add_slow (buffer, ch);
263 else
264 *buffer->current++ = ch;
265}
266
267/* Read formatted input from S according to the format string
268 FORMAT, using the argument list in ARG.
269 Return the number of assignments made, or -1 for an input error. */
270#ifdef COMPILE_WSCANF
271int
272_IO_vfwscanf (_IO_FILE *s, const wchar_t *format, _IO_va_list argptr,
273 int *errp)
274#else
275int
276_IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
277 int *errp)
278#endif
279{
280 va_list arg;
281 const CHAR_T *f = format;
282 UCHAR_T fc; /* Current character of the format. */
283 WINT_T done = 0; /* Assignments done. */
284 size_t read_in = 0; /* Chars read in. */
285 WINT_T c = 0; /* Last char read. */
286 int width; /* Maximum field width. */
287 int flags; /* Modifiers for current format element. */
288 int errval = 0;
289#ifndef COMPILE_WSCANF
290 __locale_t loc = _NL_CURRENT_LOCALE;
291 struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
292#endif
293
294 /* Errno of last failed inchar call. */
295 int inchar_errno = 0;
296 /* Status for reading F-P nums. */
297 char got_digit, got_dot, got_e, negative;
298 /* If a [...] is a [^...]. */
299 CHAR_T not_in;
300#define exp_char not_in
301 /* Base for integral numbers. */
302 int base;
303 /* Decimal point character. */
304#ifdef COMPILE_WSCANF
305 wint_t decimal;
306#else
307 const char *decimal;
308#endif
309 /* The thousands character of the current locale. */
310#ifdef COMPILE_WSCANF
311 wint_t thousands;
312#else
313 const char *thousands;
314#endif
315 struct ptrs_to_free *ptrs_to_free = NULL;
316 /* State for the conversions. */
317 mbstate_t state;
318 /* Integral holding variables. */
319 union
320 {
321 long long int q;
322 unsigned long long int uq;
323 long int l;
324 unsigned long int ul;
325 } num;
326 /* Character-buffer pointer. */
327 char *str = NULL;
328 wchar_t *wstr = NULL;
329 char **strptr = NULL;
330 ssize_t strsize = 0;
331 /* We must not react on white spaces immediately because they can
332 possibly be matched even if in the input stream no character is
333 available anymore. */
334 int skip_space = 0;
335 /* Workspace. */
336 CHAR_T *tw; /* Temporary pointer. */
337 struct char_buffer charbuf;
338 scratch_buffer_init (&charbuf.scratch);
339
340#ifdef __va_copy
341 __va_copy (arg, argptr);
342#else
343 arg = (va_list) argptr;
344#endif
345
346#ifdef ORIENT
347 ORIENT;
348#endif
349
350 ARGCHECK (s, format);
351
352 {
353#ifndef COMPILE_WSCANF
354 struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
355#endif
356
357 /* Figure out the decimal point character. */
358#ifdef COMPILE_WSCANF
359 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
360#else
361 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
362#endif
363 /* Figure out the thousands separator character. */
364#ifdef COMPILE_WSCANF
365 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
366#else
367 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
368 if (*thousands == '\0')
369 thousands = NULL;
370#endif
371 }
372
373 /* Lock the stream. */
374 LOCK_STREAM (s);
375
376
377#ifndef COMPILE_WSCANF
378 /* From now on we use `state' to convert the format string. */
379 memset (&state, '\0', sizeof (state));
380#endif
381
382 /* Run through the format string. */
383 while (*f != '\0')
384 {
385 unsigned int argpos;
386 /* Extract the next argument, which is of type TYPE.
387 For a %N$... spec, this is the Nth argument from the beginning;
388 otherwise it is the next argument after the state now in ARG. */
389#ifdef __va_copy
390# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
391 ({ unsigned int pos = argpos; \
392 va_list arg; \
393 __va_copy (arg, argptr); \
394 while (--pos > 0) \
395 (void) va_arg (arg, void *); \
396 va_arg (arg, type); \
397 }))
398#else
399# if 0
400 /* XXX Possible optimization. */
401# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
402 ({ va_list arg = (va_list) argptr; \
403 arg = (va_list) ((char *) arg \
404 + (argpos - 1) \
405 * __va_rounded_size (void *)); \
406 va_arg (arg, type); \
407 }))
408# else
409# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
410 ({ unsigned int pos = argpos; \
411 va_list arg = (va_list) argptr; \
412 while (--pos > 0) \
413 (void) va_arg (arg, void *); \
414 va_arg (arg, type); \
415 }))
416# endif
417#endif
418
419#ifndef COMPILE_WSCANF
420 if (!isascii ((unsigned char) *f))
421 {
422 /* Non-ASCII, may be a multibyte. */
423 int len = __mbrlen (f, strlen (f), &state);
424 if (len > 0)
425 {
426 do
427 {
428 c = inchar ();
429 if (__glibc_unlikely (c == EOF))
430 input_error ();
431 else if (c != (unsigned char) *f++)
432 {
433 ungetc_not_eof (c, s);
434 conv_error ();
435 }
436 }
437 while (--len > 0);
438 continue;
439 }
440 }
441#endif
442
443 fc = *f++;
444 if (fc != '%')
445 {
446 /* Remember to skip spaces. */
447 if (ISSPACE (fc))
448 {
449 skip_space = 1;
450 continue;
451 }
452
453 /* Read a character. */
454 c = inchar ();
455
456 /* Characters other than format specs must just match. */
457 if (__glibc_unlikely (c == EOF))
458 input_error ();
459
460 /* We saw white space char as the last character in the format
461 string. Now it's time to skip all leading white space. */
462 if (skip_space)
463 {
464 while (ISSPACE (c))
465 if (__glibc_unlikely (inchar () == EOF))
466 input_error ();
467 skip_space = 0;
468 }
469
470 if (__glibc_unlikely (c != fc))
471 {
472 ungetc (c, s);
473 conv_error ();
474 }
475
476 continue;
477 }
478
479 /* This is the start of the conversion string. */
480 flags = 0;
481
482 /* Initialize state of modifiers. */
483 argpos = 0;
484
485 /* Prepare temporary buffer. */
486 char_buffer_rewind (&charbuf);
487
488 /* Check for a positional parameter specification. */
489 if (ISDIGIT ((UCHAR_T) *f))
490 {
491 argpos = (UCHAR_T) *f++ - L_('0');
492 while (ISDIGIT ((UCHAR_T) *f))
493 argpos = argpos * 10 + ((UCHAR_T) *f++ - L_('0'));
494 if (*f == L_('$'))
495 ++f;
496 else
497 {
498 /* Oops; that was actually the field width. */
499 width = argpos;
500 argpos = 0;
501 goto got_width;
502 }
503 }
504
505 /* Check for the assignment-suppressing, the number grouping flag,
506 and the signal to use the locale's digit representation. */
507 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
508 switch (*f++)
509 {
510 case L_('*'):
511 flags |= SUPPRESS;
512 break;
513 case L_('\''):
514#ifdef COMPILE_WSCANF
515 if (thousands != L'\0')
516#else
517 if (thousands != NULL)
518#endif
519 flags |= GROUP;
520 break;
521 case L_('I'):
522 flags |= I18N;
523 break;
524 }
525
526 /* Find the maximum field width. */
527 width = 0;
528 while (ISDIGIT ((UCHAR_T) *f))
529 {
530 width *= 10;
531 width += (UCHAR_T) *f++ - L_('0');
532 }
533 got_width:
534 if (width == 0)
535 width = -1;
536
537 /* Check for type modifiers. */
538 switch (*f++)
539 {
540 case L_('h'):
541 /* ints are short ints or chars. */
542 if (*f == L_('h'))
543 {
544 ++f;
545 flags |= CHAR;
546 }
547 else
548 flags |= SHORT;
549 break;
550 case L_('l'):
551 if (*f == L_('l'))
552 {
553 /* A double `l' is equivalent to an `L'. */
554 ++f;
555 flags |= LONGDBL | LONG;
556 }
557 else
558 /* ints are long ints. */
559 flags |= LONG;
560 break;
561 case L_('q'):
562 case L_('L'):
563 /* doubles are long doubles, and ints are long long ints. */
564 flags |= LONGDBL | LONG;
565 break;
566 case L_('a'):
567 /* The `a' is used as a flag only if followed by `s', `S' or
568 `['. */
569 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
570 {
571 --f;
572 break;
573 }
574 /* In __isoc99_*scanf %as, %aS and %a[ extension is not
575 supported at all. */
576 if (s->_flags2 & _IO_FLAGS2_SCANF_STD)
577 {
578 --f;
579 break;
580 }
581 /* String conversions (%s, %[) take a `char **'
582 arg and fill it in with a malloc'd pointer. */
583 flags |= GNU_MALLOC;
584 break;
585 case L_('m'):
586 flags |= POSIX_MALLOC;
587 if (*f == L_('l'))
588 {
589 ++f;
590 flags |= LONG;
591 }
592 break;
593 case L_('z'):
594 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
595 flags |= LONGDBL;
596 else if (sizeof (size_t) > sizeof (unsigned int))
597 flags |= LONG;
598 break;
599 case L_('j'):
600 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
601 flags |= LONGDBL;
602 else if (sizeof (uintmax_t) > sizeof (unsigned int))
603 flags |= LONG;
604 break;
605 case L_('t'):
606 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
607 flags |= LONGDBL;
608 else if (sizeof (ptrdiff_t) > sizeof (int))
609 flags |= LONG;
610 break;
611 default:
612 /* Not a recognized modifier. Backup. */
613 --f;
614 break;
615 }
616
617 /* End of the format string? */
618 if (__glibc_unlikely (*f == L_('\0')))
619 conv_error ();
620
621 /* Find the conversion specifier. */
622 fc = *f++;
623 if (skip_space || (fc != L_('[') && fc != L_('c')
624 && fc != L_('C') && fc != L_('n')))
625 {
626 /* Eat whitespace. */
627 int save_errno = errno;
628 __set_errno (0);
629 do
630 /* We add the additional test for EOF here since otherwise
631 inchar will restore the old errno value which might be
632 EINTR but does not indicate an interrupt since nothing
633 was read at this time. */
634 if (__builtin_expect ((c == EOF || inchar () == EOF)
635 && errno == EINTR, 0))
636 input_error ();
637 while (ISSPACE (c));
638 __set_errno (save_errno);
639 ungetc (c, s);
640 skip_space = 0;
641 }
642
643 switch (fc)
644 {
645 case L_('%'): /* Must match a literal '%'. */
646 c = inchar ();
647 if (__glibc_unlikely (c == EOF))
648 input_error ();
649 if (__glibc_unlikely (c != fc))
650 {
651 ungetc_not_eof (c, s);
652 conv_error ();
653 }
654 break;
655
656 case L_('n'): /* Answer number of assignments done. */
657 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
658 with the 'n' conversion specifier. */
659 if (!(flags & SUPPRESS))
660 {
661 /* Don't count the read-ahead. */
662 if (need_longlong && (flags & LONGDBL))
663 *ARG (long long int *) = read_in;
664 else if (need_long && (flags & LONG))
665 *ARG (long int *) = read_in;
666 else if (flags & SHORT)
667 *ARG (short int *) = read_in;
668 else if (!(flags & CHAR))
669 *ARG (int *) = read_in;
670 else
671 *ARG (char *) = read_in;
672
673#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
674 /* We have a severe problem here. The ISO C standard
675 contradicts itself in explaining the effect of the %n
676 format in `scanf'. While in ISO C:1990 and the ISO C
677 Amendement 1:1995 the result is described as
678
679 Execution of a %n directive does not effect the
680 assignment count returned at the completion of
681 execution of the f(w)scanf function.
682
683 in ISO C Corrigendum 1:1994 the following was added:
684
685 Subclause 7.9.6.2
686 Add the following fourth example:
687 In:
688 #include <stdio.h>
689 int d1, d2, n1, n2, i;
690 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
691 the value 123 is assigned to d1 and the value3 to n1.
692 Because %n can never get an input failure the value
693 of 3 is also assigned to n2. The value of d2 is not
694 affected. The value 3 is assigned to i.
695
696 We go for now with the historically correct code from ISO C,
697 i.e., we don't count the %n assignments. When it ever
698 should proof to be wrong just remove the #ifdef above. */
699 ++done;
700#endif
701 }
702 break;
703
704 case L_('c'): /* Match characters. */
705 if ((flags & LONG) == 0)
706 {
707 if (width == -1)
708 width = 1;
709
710#define STRING_ARG(Str, Type, Width) \
711 do if (!(flags & SUPPRESS)) \
712 { \
713 if (flags & MALLOC) \
714 { \
715 /* The string is to be stored in a malloc'd buffer. */ \
716 /* For %mS using char ** is actually wrong, but \
717 shouldn't make a difference on any arch glibc \
718 supports and would unnecessarily complicate \
719 things. */ \
720 strptr = ARG (char **); \
721 if (strptr == NULL) \
722 conv_error (); \
723 /* Allocate an initial buffer. */ \
724 strsize = Width; \
725 *strptr = (char *) malloc (strsize * sizeof (Type)); \
726 Str = (Type *) *strptr; \
727 if (Str != NULL) \
728 add_ptr_to_free (strptr); \
729 else if (flags & POSIX_MALLOC) \
730 { \
731 done = EOF; \
732 goto errout; \
733 } \
734 } \
735 else \
736 Str = ARG (Type *); \
737 if (Str == NULL) \
738 conv_error (); \
739 } while (0)
740#ifdef COMPILE_WSCANF
741 STRING_ARG (str, char, 100);
742#else
743 STRING_ARG (str, char, (width > 1024 ? 1024 : width));
744#endif
745
746 c = inchar ();
747 if (__glibc_unlikely (c == EOF))
748 input_error ();
749
750#ifdef COMPILE_WSCANF
751 /* We have to convert the wide character(s) into multibyte
752 characters and store the result. */
753 memset (&state, '\0', sizeof (state));
754
755 do
756 {
757 size_t n;
758
759 if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC)
760 && str + MB_CUR_MAX >= *strptr + strsize)
761 {
762 /* We have to enlarge the buffer if the `m' flag
763 was given. */
764 size_t strleng = str - *strptr;
765 char *newstr;
766
767 newstr = (char *) realloc (*strptr, strsize * 2);
768 if (newstr == NULL)
769 {
770 /* Can't allocate that much. Last-ditch effort. */
771 newstr = (char *) realloc (*strptr,
772 strleng + MB_CUR_MAX);
773 if (newstr == NULL)
774 {
775 /* c can't have `a' flag, only `m'. */
776 done = EOF;
777 goto errout;
778 }
779 else
780 {
781 *strptr = newstr;
782 str = newstr + strleng;
783 strsize = strleng + MB_CUR_MAX;
784 }
785 }
786 else
787 {
788 *strptr = newstr;
789 str = newstr + strleng;
790 strsize *= 2;
791 }
792 }
793
794 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
795 if (__glibc_unlikely (n == (size_t) -1))
796 /* No valid wide character. */
797 input_error ();
798
799 /* Increment the output pointer. Even if we don't
800 write anything. */
801 str += n;
802 }
803 while (--width > 0 && inchar () != EOF);
804#else
805 if (!(flags & SUPPRESS))
806 {
807 do
808 {
809 if ((flags & MALLOC)
810 && (char *) str == *strptr + strsize)
811 {
812 /* Enlarge the buffer. */
813 size_t newsize
814 = strsize
815 + (strsize >= width ? width - 1 : strsize);
816
817 str = (char *) realloc (*strptr, newsize);
818 if (str == NULL)
819 {
820 /* Can't allocate that much. Last-ditch
821 effort. */
822 str = (char *) realloc (*strptr, strsize + 1);
823 if (str == NULL)
824 {
825 /* c can't have `a' flag, only `m'. */
826 done = EOF;
827 goto errout;
828 }
829 else
830 {
831 *strptr = (char *) str;
832 str += strsize;
833 ++strsize;
834 }
835 }
836 else
837 {
838 *strptr = (char *) str;
839 str += strsize;
840 strsize = newsize;
841 }
842 }
843 *str++ = c;
844 }
845 while (--width > 0 && inchar () != EOF);
846 }
847 else
848 while (--width > 0 && inchar () != EOF);
849#endif
850
851 if (!(flags & SUPPRESS))
852 {
853 if ((flags & MALLOC) && str - *strptr != strsize)
854 {
855 char *cp = (char *) realloc (*strptr, str - *strptr);
856 if (cp != NULL)
857 *strptr = cp;
858 }
859 strptr = NULL;
860 ++done;
861 }
862
863 break;
864 }
865 /* FALLTHROUGH */
866 case L_('C'):
867 if (width == -1)
868 width = 1;
869
870 STRING_ARG (wstr, wchar_t, (width > 1024 ? 1024 : width));
871
872 c = inchar ();
873 if (__glibc_unlikely (c == EOF))
874 input_error ();
875
876#ifdef COMPILE_WSCANF
877 /* Just store the incoming wide characters. */
878 if (!(flags & SUPPRESS))
879 {
880 do
881 {
882 if ((flags & MALLOC)
883 && wstr == (wchar_t *) *strptr + strsize)
884 {
885 size_t newsize
886 = strsize + (strsize > width ? width - 1 : strsize);
887 /* Enlarge the buffer. */
888 wstr = (wchar_t *) realloc (*strptr,
889 newsize * sizeof (wchar_t));
890 if (wstr == NULL)
891 {
892 /* Can't allocate that much. Last-ditch effort. */
893 wstr = (wchar_t *) realloc (*strptr,
894 (strsize + 1)
895 * sizeof (wchar_t));
896 if (wstr == NULL)
897 {
898 /* C or lc can't have `a' flag, only `m'
899 flag. */
900 done = EOF;
901 goto errout;
902 }
903 else
904 {
905 *strptr = (char *) wstr;
906 wstr += strsize;
907 ++strsize;
908 }
909 }
910 else
911 {
912 *strptr = (char *) wstr;
913 wstr += strsize;
914 strsize = newsize;
915 }
916 }
917 *wstr++ = c;
918 }
919 while (--width > 0 && inchar () != EOF);
920 }
921 else
922 while (--width > 0 && inchar () != EOF);
923#else
924 {
925 /* We have to convert the multibyte input sequence to wide
926 characters. */
927 char buf[1];
928 mbstate_t cstate;
929
930 memset (&cstate, '\0', sizeof (cstate));
931
932 do
933 {
934 /* This is what we present the mbrtowc function first. */
935 buf[0] = c;
936
937 if (!(flags & SUPPRESS) && (flags & MALLOC)
938 && wstr == (wchar_t *) *strptr + strsize)
939 {
940 size_t newsize
941 = strsize + (strsize > width ? width - 1 : strsize);
942 /* Enlarge the buffer. */
943 wstr = (wchar_t *) realloc (*strptr,
944 newsize * sizeof (wchar_t));
945 if (wstr == NULL)
946 {
947 /* Can't allocate that much. Last-ditch effort. */
948 wstr = (wchar_t *) realloc (*strptr,
949 ((strsize + 1)
950 * sizeof (wchar_t)));
951 if (wstr == NULL)
952 {
953 /* C or lc can't have `a' flag, only `m' flag. */
954 done = EOF;
955 goto errout;
956 }
957 else
958 {
959 *strptr = (char *) wstr;
960 wstr += strsize;
961 ++strsize;
962 }
963 }
964 else
965 {
966 *strptr = (char *) wstr;
967 wstr += strsize;
968 strsize = newsize;
969 }
970 }
971
972 while (1)
973 {
974 size_t n;
975
976 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
977 buf, 1, &cstate);
978
979 if (n == (size_t) -2)
980 {
981 /* Possibly correct character, just not enough
982 input. */
983 if (__glibc_unlikely (inchar () == EOF))
984 encode_error ();
985
986 buf[0] = c;
987 continue;
988 }
989
990 if (__glibc_unlikely (n != 1))
991 encode_error ();
992
993 /* We have a match. */
994 break;
995 }
996
997 /* Advance the result pointer. */
998 ++wstr;
999 }
1000 while (--width > 0 && inchar () != EOF);
1001 }
1002#endif
1003
1004 if (!(flags & SUPPRESS))
1005 {
1006 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1007 {
1008 wchar_t *cp = (wchar_t *) realloc (*strptr,
1009 ((wstr
1010 - (wchar_t *) *strptr)
1011 * sizeof (wchar_t)));
1012 if (cp != NULL)
1013 *strptr = (char *) cp;
1014 }
1015 strptr = NULL;
1016
1017 ++done;
1018 }
1019
1020 break;
1021
1022 case L_('s'): /* Read a string. */
1023 if (!(flags & LONG))
1024 {
1025 STRING_ARG (str, char, 100);
1026
1027 c = inchar ();
1028 if (__glibc_unlikely (c == EOF))
1029 input_error ();
1030
1031#ifdef COMPILE_WSCANF
1032 memset (&state, '\0', sizeof (state));
1033#endif
1034
1035 do
1036 {
1037 if (ISSPACE (c))
1038 {
1039 ungetc_not_eof (c, s);
1040 break;
1041 }
1042
1043#ifdef COMPILE_WSCANF
1044 /* This is quite complicated. We have to convert the
1045 wide characters into multibyte characters and then
1046 store them. */
1047 {
1048 size_t n;
1049
1050 if (!(flags & SUPPRESS) && (flags & MALLOC)
1051 && str + MB_CUR_MAX >= *strptr + strsize)
1052 {
1053 /* We have to enlarge the buffer if the `a' or `m'
1054 flag was given. */
1055 size_t strleng = str - *strptr;
1056 char *newstr;
1057
1058 newstr = (char *) realloc (*strptr, strsize * 2);
1059 if (newstr == NULL)
1060 {
1061 /* Can't allocate that much. Last-ditch
1062 effort. */
1063 newstr = (char *) realloc (*strptr,
1064 strleng + MB_CUR_MAX);
1065 if (newstr == NULL)
1066 {
1067 if (flags & POSIX_MALLOC)
1068 {
1069 done = EOF;
1070 goto errout;
1071 }
1072 /* We lose. Oh well. Terminate the
1073 string and stop converting,
1074 so at least we don't skip any input. */
1075 ((char *) (*strptr))[strleng] = '\0';
1076 strptr = NULL;
1077 ++done;
1078 conv_error ();
1079 }
1080 else
1081 {
1082 *strptr = newstr;
1083 str = newstr + strleng;
1084 strsize = strleng + MB_CUR_MAX;
1085 }
1086 }
1087 else
1088 {
1089 *strptr = newstr;
1090 str = newstr + strleng;
1091 strsize *= 2;
1092 }
1093 }
1094
1095 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
1096 &state);
1097 if (__glibc_unlikely (n == (size_t) -1))
1098 encode_error ();
1099
1100 assert (n <= MB_CUR_MAX);
1101 str += n;
1102 }
1103#else
1104 /* This is easy. */
1105 if (!(flags & SUPPRESS))
1106 {
1107 *str++ = c;
1108 if ((flags & MALLOC)
1109 && (char *) str == *strptr + strsize)
1110 {
1111 /* Enlarge the buffer. */
1112 str = (char *) realloc (*strptr, 2 * strsize);
1113 if (str == NULL)
1114 {
1115 /* Can't allocate that much. Last-ditch
1116 effort. */
1117 str = (char *) realloc (*strptr, strsize + 1);
1118 if (str == NULL)
1119 {
1120 if (flags & POSIX_MALLOC)
1121 {
1122 done = EOF;
1123 goto errout;
1124 }
1125 /* We lose. Oh well. Terminate the
1126 string and stop converting,
1127 so at least we don't skip any input. */
1128 ((char *) (*strptr))[strsize - 1] = '\0';
1129 strptr = NULL;
1130 ++done;
1131 conv_error ();
1132 }
1133 else
1134 {
1135 *strptr = (char *) str;
1136 str += strsize;
1137 ++strsize;
1138 }
1139 }
1140 else
1141 {
1142 *strptr = (char *) str;
1143 str += strsize;
1144 strsize *= 2;
1145 }
1146 }
1147 }
1148#endif
1149 }
1150 while ((width <= 0 || --width > 0) && inchar () != EOF);
1151
1152 if (!(flags & SUPPRESS))
1153 {
1154#ifdef COMPILE_WSCANF
1155 /* We have to emit the code to get into the initial
1156 state. */
1157 char buf[MB_LEN_MAX];
1158 size_t n = __wcrtomb (buf, L'\0', &state);
1159 if (n > 0 && (flags & MALLOC)
1160 && str + n >= *strptr + strsize)
1161 {
1162 /* Enlarge the buffer. */
1163 size_t strleng = str - *strptr;
1164 char *newstr;
1165
1166 newstr = (char *) realloc (*strptr, strleng + n + 1);
1167 if (newstr == NULL)
1168 {
1169 if (flags & POSIX_MALLOC)
1170 {
1171 done = EOF;
1172 goto errout;
1173 }
1174 /* We lose. Oh well. Terminate the string
1175 and stop converting, so at least we don't
1176 skip any input. */
1177 ((char *) (*strptr))[strleng] = '\0';
1178 strptr = NULL;
1179 ++done;
1180 conv_error ();
1181 }
1182 else
1183 {
1184 *strptr = newstr;
1185 str = newstr + strleng;
1186 strsize = strleng + n + 1;
1187 }
1188 }
1189
1190 str = __mempcpy (str, buf, n);
1191#endif
1192 *str++ = '\0';
1193
1194 if ((flags & MALLOC) && str - *strptr != strsize)
1195 {
1196 char *cp = (char *) realloc (*strptr, str - *strptr);
1197 if (cp != NULL)
1198 *strptr = cp;
1199 }
1200 strptr = NULL;
1201
1202 ++done;
1203 }
1204 break;
1205 }
1206 /* FALLTHROUGH */
1207
1208 case L_('S'):
1209 {
1210#ifndef COMPILE_WSCANF
1211 mbstate_t cstate;
1212#endif
1213
1214 /* Wide character string. */
1215 STRING_ARG (wstr, wchar_t, 100);
1216
1217 c = inchar ();
1218 if (__builtin_expect (c == EOF, 0))
1219 input_error ();
1220
1221#ifndef COMPILE_WSCANF
1222 memset (&cstate, '\0', sizeof (cstate));
1223#endif
1224
1225 do
1226 {
1227 if (ISSPACE (c))
1228 {
1229 ungetc_not_eof (c, s);
1230 break;
1231 }
1232
1233#ifdef COMPILE_WSCANF
1234 /* This is easy. */
1235 if (!(flags & SUPPRESS))
1236 {
1237 *wstr++ = c;
1238 if ((flags & MALLOC)
1239 && wstr == (wchar_t *) *strptr + strsize)
1240 {
1241 /* Enlarge the buffer. */
1242 wstr = (wchar_t *) realloc (*strptr,
1243 (2 * strsize)
1244 * sizeof (wchar_t));
1245 if (wstr == NULL)
1246 {
1247 /* Can't allocate that much. Last-ditch
1248 effort. */
1249 wstr = (wchar_t *) realloc (*strptr,
1250 (strsize + 1)
1251 * sizeof (wchar_t));
1252 if (wstr == NULL)
1253 {
1254 if (flags & POSIX_MALLOC)
1255 {
1256 done = EOF;
1257 goto errout;
1258 }
1259 /* We lose. Oh well. Terminate the string
1260 and stop converting, so at least we don't
1261 skip any input. */
1262 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1263 strptr = NULL;
1264 ++done;
1265 conv_error ();
1266 }
1267 else
1268 {
1269 *strptr = (char *) wstr;
1270 wstr += strsize;
1271 ++strsize;
1272 }
1273 }
1274 else
1275 {
1276 *strptr = (char *) wstr;
1277 wstr += strsize;
1278 strsize *= 2;
1279 }
1280 }
1281 }
1282#else
1283 {
1284 char buf[1];
1285
1286 buf[0] = c;
1287
1288 while (1)
1289 {
1290 size_t n;
1291
1292 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1293 buf, 1, &cstate);
1294
1295 if (n == (size_t) -2)
1296 {
1297 /* Possibly correct character, just not enough
1298 input. */
1299 if (__glibc_unlikely (inchar () == EOF))
1300 encode_error ();
1301
1302 buf[0] = c;
1303 continue;
1304 }
1305
1306 if (__glibc_unlikely (n != 1))
1307 encode_error ();
1308
1309 /* We have a match. */
1310 ++wstr;
1311 break;
1312 }
1313
1314 if (!(flags & SUPPRESS) && (flags & MALLOC)
1315 && wstr == (wchar_t *) *strptr + strsize)
1316 {
1317 /* Enlarge the buffer. */
1318 wstr = (wchar_t *) realloc (*strptr,
1319 (2 * strsize
1320 * sizeof (wchar_t)));
1321 if (wstr == NULL)
1322 {
1323 /* Can't allocate that much. Last-ditch effort. */
1324 wstr = (wchar_t *) realloc (*strptr,
1325 ((strsize + 1)
1326 * sizeof (wchar_t)));
1327 if (wstr == NULL)
1328 {
1329 if (flags & POSIX_MALLOC)
1330 {
1331 done = EOF;
1332 goto errout;
1333 }
1334 /* We lose. Oh well. Terminate the
1335 string and stop converting, so at
1336 least we don't skip any input. */
1337 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1338 strptr = NULL;
1339 ++done;
1340 conv_error ();
1341 }
1342 else
1343 {
1344 *strptr = (char *) wstr;
1345 wstr += strsize;
1346 ++strsize;
1347 }
1348 }
1349 else
1350 {
1351 *strptr = (char *) wstr;
1352 wstr += strsize;
1353 strsize *= 2;
1354 }
1355 }
1356 }
1357#endif
1358 }
1359 while ((width <= 0 || --width > 0) && inchar () != EOF);
1360
1361 if (!(flags & SUPPRESS))
1362 {
1363 *wstr++ = L'\0';
1364
1365 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1366 {
1367 wchar_t *cp = (wchar_t *) realloc (*strptr,
1368 ((wstr
1369 - (wchar_t *) *strptr)
1370 * sizeof(wchar_t)));
1371 if (cp != NULL)
1372 *strptr = (char *) cp;
1373 }
1374 strptr = NULL;
1375
1376 ++done;
1377 }
1378 }
1379 break;
1380
1381 case L_('x'): /* Hexadecimal integer. */
1382 case L_('X'): /* Ditto. */
1383 base = 16;
1384 goto number;
1385
1386 case L_('o'): /* Octal integer. */
1387 base = 8;
1388 goto number;
1389
1390 case L_('u'): /* Unsigned decimal integer. */
1391 base = 10;
1392 goto number;
1393
1394 case L_('d'): /* Signed decimal integer. */
1395 base = 10;
1396 flags |= NUMBER_SIGNED;
1397 goto number;
1398
1399 case L_('i'): /* Generic number. */
1400 base = 0;
1401 flags |= NUMBER_SIGNED;
1402
1403 number:
1404 c = inchar ();
1405 if (__glibc_unlikely (c == EOF))
1406 input_error ();
1407
1408 /* Check for a sign. */
1409 if (c == L_('-') || c == L_('+'))
1410 {
1411 char_buffer_add (&charbuf, c);
1412 if (width > 0)
1413 --width;
1414 c = inchar ();
1415 }
1416
1417 /* Look for a leading indication of base. */
1418 if (width != 0 && c == L_('0'))
1419 {
1420 if (width > 0)
1421 --width;
1422
1423 char_buffer_add (&charbuf, c);
1424 c = inchar ();
1425
1426 if (width != 0 && TOLOWER (c) == L_('x'))
1427 {
1428 if (base == 0)
1429 base = 16;
1430 if (base == 16)
1431 {
1432 if (width > 0)
1433 --width;
1434 c = inchar ();
1435 }
1436 }
1437 else if (base == 0)
1438 base = 8;
1439 }
1440
1441 if (base == 0)
1442 base = 10;
1443
1444 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1445 {
1446 int from_level;
1447 int to_level;
1448 int level;
1449#ifdef COMPILE_WSCANF
1450 const wchar_t *wcdigits[10];
1451 const wchar_t *wcdigits_extended[10];
1452#else
1453 const char *mbdigits[10];
1454 const char *mbdigits_extended[10];
1455#endif
1456 /* "to_inpunct" is a map from ASCII digits to their
1457 equivalent in locale. This is defined for locales
1458 which use an extra digits set. */
1459 wctrans_t map = __wctrans ("to_inpunct");
1460 int n;
1461
1462 from_level = 0;
1463#ifdef COMPILE_WSCANF
1464 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1465 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1466#else
1467 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1468#endif
1469
1470 /* Get the alternative digit forms if there are any. */
1471 if (__glibc_unlikely (map != NULL))
1472 {
1473 /* Adding new level for extra digits set in locale file. */
1474 ++to_level;
1475
1476 for (n = 0; n < 10; ++n)
1477 {
1478#ifdef COMPILE_WSCANF
1479 wcdigits[n] = (const wchar_t *)
1480 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1481
1482 wchar_t *wc_extended = (wchar_t *)
1483 alloca ((to_level + 2) * sizeof (wchar_t));
1484 __wmemcpy (wc_extended, wcdigits[n], to_level);
1485 wc_extended[to_level] = __towctrans (L'0' + n, map);
1486 wc_extended[to_level + 1] = '\0';
1487 wcdigits_extended[n] = wc_extended;
1488#else
1489 mbdigits[n]
1490 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1491
1492 /* Get the equivalent wide char in map. */
1493 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1494
1495 /* Convert it to multibyte representation. */
1496 mbstate_t state;
1497 memset (&state, '\0', sizeof (state));
1498
1499 char extra_mbdigit[MB_LEN_MAX];
1500 size_t mblen
1501 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1502
1503 if (mblen == (size_t) -1)
1504 {
1505 /* Ignore this new level. */
1506 map = NULL;
1507 break;
1508 }
1509
1510 /* Calculate the length of mbdigits[n]. */
1511 const char *last_char = mbdigits[n];
1512 for (level = 0; level < to_level; ++level)
1513 last_char = strchr (last_char, '\0') + 1;
1514
1515 size_t mbdigits_len = last_char - mbdigits[n];
1516
1517 /* Allocate memory for extended multibyte digit. */
1518 char *mb_extended;
1519 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1520
1521 /* And get the mbdigits + extra_digit string. */
1522 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1523 mbdigits_len),
1524 extra_mbdigit, mblen) = '\0';
1525 mbdigits_extended[n] = mb_extended;
1526#endif
1527 }
1528 }
1529
1530 /* Read the number into workspace. */
1531 while (c != EOF && width != 0)
1532 {
1533 /* In this round we get the pointer to the digit strings
1534 and also perform the first round of comparisons. */
1535 for (n = 0; n < 10; ++n)
1536 {
1537 /* Get the string for the digits with value N. */
1538#ifdef COMPILE_WSCANF
1539
1540 /* wcdigits_extended[] is fully set in the loop
1541 above, but the test for "map != NULL" is done
1542 inside the loop here and outside the loop there. */
1543 DIAG_PUSH_NEEDS_COMMENT;
1544 DIAG_IGNORE_NEEDS_COMMENT (4.7, "-Wmaybe-uninitialized");
1545
1546 if (__glibc_unlikely (map != NULL))
1547 wcdigits[n] = wcdigits_extended[n];
1548 else
1549 wcdigits[n] = (const wchar_t *)
1550 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1551 wcdigits[n] += from_level;
1552
1553 DIAG_POP_NEEDS_COMMENT;
1554
1555 if (c == (wint_t) *wcdigits[n])
1556 {
1557 to_level = from_level;
1558 break;
1559 }
1560
1561 /* Advance the pointer to the next string. */
1562 ++wcdigits[n];
1563#else
1564 const char *cmpp;
1565 int avail = width > 0 ? width : INT_MAX;
1566
1567 if (__glibc_unlikely (map != NULL))
1568 mbdigits[n] = mbdigits_extended[n];
1569 else
1570 mbdigits[n]
1571 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1572
1573 for (level = 0; level < from_level; level++)
1574 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1575
1576 cmpp = mbdigits[n];
1577 while ((unsigned char) *cmpp == c && avail >= 0)
1578 {
1579 if (*++cmpp == '\0')
1580 break;
1581 else
1582 {
1583 if (avail == 0 || inchar () == EOF)
1584 break;
1585 --avail;
1586 }
1587 }
1588
1589 if (*cmpp == '\0')
1590 {
1591 if (width > 0)
1592 width = avail;
1593 to_level = from_level;
1594 break;
1595 }
1596
1597 /* We are pushing all read characters back. */
1598 if (cmpp > mbdigits[n])
1599 {
1600 ungetc (c, s);
1601 while (--cmpp > mbdigits[n])
1602 ungetc_not_eof ((unsigned char) *cmpp, s);
1603 c = (unsigned char) *cmpp;
1604 }
1605
1606 /* Advance the pointer to the next string. */
1607 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1608#endif
1609 }
1610
1611 if (n == 10)
1612 {
1613 /* Have not yet found the digit. */
1614 for (level = from_level + 1; level <= to_level; ++level)
1615 {
1616 /* Search all ten digits of this level. */
1617 for (n = 0; n < 10; ++n)
1618 {
1619#ifdef COMPILE_WSCANF
1620 if (c == (wint_t) *wcdigits[n])
1621 break;
1622
1623 /* Advance the pointer to the next string. */
1624 ++wcdigits[n];
1625#else
1626 const char *cmpp;
1627 int avail = width > 0 ? width : INT_MAX;
1628
1629 cmpp = mbdigits[n];
1630 while ((unsigned char) *cmpp == c && avail >= 0)
1631 {
1632 if (*++cmpp == '\0')
1633 break;
1634 else
1635 {
1636 if (avail == 0 || inchar () == EOF)
1637 break;
1638 --avail;
1639 }
1640 }
1641
1642 if (*cmpp == '\0')
1643 {
1644 if (width > 0)
1645 width = avail;
1646 break;
1647 }
1648
1649 /* We are pushing all read characters back. */
1650 if (cmpp > mbdigits[n])
1651 {
1652 ungetc (c, s);
1653 while (--cmpp > mbdigits[n])
1654 ungetc_not_eof ((unsigned char) *cmpp, s);
1655 c = (unsigned char) *cmpp;
1656 }
1657
1658 /* Advance the pointer to the next string. */
1659 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1660#endif
1661 }
1662
1663 if (n < 10)
1664 {
1665 /* Found it. */
1666 from_level = level;
1667 to_level = level;
1668 break;
1669 }
1670 }
1671 }
1672
1673 if (n < 10)
1674 c = L_('0') + n;
1675 else if (flags & GROUP)
1676 {
1677 /* Try matching against the thousands separator. */
1678#ifdef COMPILE_WSCANF
1679 if (c != thousands)
1680 break;
1681#else
1682 const char *cmpp = thousands;
1683 int avail = width > 0 ? width : INT_MAX;
1684
1685 while ((unsigned char) *cmpp == c && avail >= 0)
1686 {
1687 char_buffer_add (&charbuf, c);
1688 if (*++cmpp == '\0')
1689 break;
1690 else
1691 {
1692 if (avail == 0 || inchar () == EOF)
1693 break;
1694 --avail;
1695 }
1696 }
1697
1698 if (char_buffer_error (&charbuf))
1699 {
1700 __set_errno (ENOMEM);
1701 done = EOF;
1702 goto errout;
1703 }
1704
1705 if (*cmpp != '\0')
1706 {
1707 /* We are pushing all read characters back. */
1708 if (cmpp > thousands)
1709 {
1710 charbuf.current -= cmpp - thousands;
1711 ungetc (c, s);
1712 while (--cmpp > thousands)
1713 ungetc_not_eof ((unsigned char) *cmpp, s);
1714 c = (unsigned char) *cmpp;
1715 }
1716 break;
1717 }
1718
1719 if (width > 0)
1720 width = avail;
1721
1722 /* The last thousands character will be added back by
1723 the char_buffer_add below. */
1724 --charbuf.current;
1725#endif
1726 }
1727 else
1728 break;
1729
1730 char_buffer_add (&charbuf, c);
1731 if (width > 0)
1732 --width;
1733
1734 c = inchar ();
1735 }
1736 }
1737 else
1738 /* Read the number into workspace. */
1739 while (c != EOF && width != 0)
1740 {
1741 if (base == 16)
1742 {
1743 if (!ISXDIGIT (c))
1744 break;
1745 }
1746 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1747 {
1748 if (base == 10 && (flags & GROUP))
1749 {
1750 /* Try matching against the thousands separator. */
1751#ifdef COMPILE_WSCANF
1752 if (c != thousands)
1753 break;
1754#else
1755 const char *cmpp = thousands;
1756 int avail = width > 0 ? width : INT_MAX;
1757
1758 while ((unsigned char) *cmpp == c && avail >= 0)
1759 {
1760 char_buffer_add (&charbuf, c);
1761 if (*++cmpp == '\0')
1762 break;
1763 else
1764 {
1765 if (avail == 0 || inchar () == EOF)
1766 break;
1767 --avail;
1768 }
1769 }
1770
1771 if (char_buffer_error (&charbuf))
1772 {
1773 __set_errno (ENOMEM);
1774 done = EOF;
1775 goto errout;
1776 }
1777
1778 if (*cmpp != '\0')
1779 {
1780 /* We are pushing all read characters back. */
1781 if (cmpp > thousands)
1782 {
1783 charbuf.current -= cmpp - thousands;
1784 ungetc (c, s);
1785 while (--cmpp > thousands)
1786 ungetc_not_eof ((unsigned char) *cmpp, s);
1787 c = (unsigned char) *cmpp;
1788 }
1789 break;
1790 }
1791
1792 if (width > 0)
1793 width = avail;
1794
1795 /* The last thousands character will be added back by
1796 the char_buffer_add below. */
1797 --charbuf.current;
1798#endif
1799 }
1800 else
1801 break;
1802 }
1803 char_buffer_add (&charbuf, c);
1804 if (width > 0)
1805 --width;
1806
1807 c = inchar ();
1808 }
1809
1810 if (char_buffer_error (&charbuf))
1811 {
1812 __set_errno (ENOMEM);
1813 done = EOF;
1814 goto errout;
1815 }
1816
1817 if (char_buffer_size (&charbuf) == 0
1818 || (char_buffer_size (&charbuf) == 1
1819 && (char_buffer_start (&charbuf)[0] == L_('+')
1820 || char_buffer_start (&charbuf)[0] == L_('-'))))
1821 {
1822 /* There was no number. If we are supposed to read a pointer
1823 we must recognize "(nil)" as well. */
1824 if (__builtin_expect (char_buffer_size (&charbuf) == 0
1825 && (flags & READ_POINTER)
1826 && (width < 0 || width >= 5)
1827 && c == '('
1828 && TOLOWER (inchar ()) == L_('n')
1829 && TOLOWER (inchar ()) == L_('i')
1830 && TOLOWER (inchar ()) == L_('l')
1831 && inchar () == L_(')'), 1))
1832 /* We must produce the value of a NULL pointer. A single
1833 '0' digit is enough. */
1834 char_buffer_add (&charbuf, L_('0'));
1835 else
1836 {
1837 /* The last read character is not part of the number
1838 anymore. */
1839 ungetc (c, s);
1840
1841 conv_error ();
1842 }
1843 }
1844 else
1845 /* The just read character is not part of the number anymore. */
1846 ungetc (c, s);
1847
1848 /* Convert the number. */
1849 char_buffer_add (&charbuf, L_('\0'));
1850 if (char_buffer_error (&charbuf))
1851 {
1852 __set_errno (ENOMEM);
1853 done = EOF;
1854 goto errout;
1855 }
1856 if (need_longlong && (flags & LONGDBL))
1857 {
1858 if (flags & NUMBER_SIGNED)
1859 num.q = __strtoll_internal
1860 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1861 else
1862 num.uq = __strtoull_internal
1863 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1864 }
1865 else
1866 {
1867 if (flags & NUMBER_SIGNED)
1868 num.l = __strtol_internal
1869 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1870 else
1871 num.ul = __strtoul_internal
1872 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1873 }
1874 if (__glibc_unlikely (char_buffer_start (&charbuf) == tw))
1875 conv_error ();
1876
1877 if (!(flags & SUPPRESS))
1878 {
1879 if (flags & NUMBER_SIGNED)
1880 {
1881 if (need_longlong && (flags & LONGDBL))
1882 *ARG (LONGLONG int *) = num.q;
1883 else if (need_long && (flags & LONG))
1884 *ARG (long int *) = num.l;
1885 else if (flags & SHORT)
1886 *ARG (short int *) = (short int) num.l;
1887 else if (!(flags & CHAR))
1888 *ARG (int *) = (int) num.l;
1889 else
1890 *ARG (signed char *) = (signed char) num.ul;
1891 }
1892 else
1893 {
1894 if (need_longlong && (flags & LONGDBL))
1895 *ARG (unsigned LONGLONG int *) = num.uq;
1896 else if (need_long && (flags & LONG))
1897 *ARG (unsigned long int *) = num.ul;
1898 else if (flags & SHORT)
1899 *ARG (unsigned short int *)
1900 = (unsigned short int) num.ul;
1901 else if (!(flags & CHAR))
1902 *ARG (unsigned int *) = (unsigned int) num.ul;
1903 else
1904 *ARG (unsigned char *) = (unsigned char) num.ul;
1905 }
1906 ++done;
1907 }
1908 break;
1909
1910 case L_('e'): /* Floating-point numbers. */
1911 case L_('E'):
1912 case L_('f'):
1913 case L_('F'):
1914 case L_('g'):
1915 case L_('G'):
1916 case L_('a'):
1917 case L_('A'):
1918 c = inchar ();
1919 if (width > 0)
1920 --width;
1921 if (__glibc_unlikely (c == EOF))
1922 input_error ();
1923
1924 got_digit = got_dot = got_e = 0;
1925
1926 /* Check for a sign. */
1927 if (c == L_('-') || c == L_('+'))
1928 {
1929 negative = c == L_('-');
1930 if (__glibc_unlikely (width == 0 || inchar () == EOF))
1931 /* EOF is only an input error before we read any chars. */
1932 conv_error ();
1933 if (width > 0)
1934 --width;
1935 }
1936 else
1937 negative = 0;
1938
1939 /* Take care for the special arguments "nan" and "inf". */
1940 if (TOLOWER (c) == L_('n'))
1941 {
1942 /* Maybe "nan". */
1943 char_buffer_add (&charbuf, c);
1944 if (__builtin_expect (width == 0
1945 || inchar () == EOF
1946 || TOLOWER (c) != L_('a'), 0))
1947 conv_error ();
1948 if (width > 0)
1949 --width;
1950 char_buffer_add (&charbuf, c);
1951 if (__builtin_expect (width == 0
1952 || inchar () == EOF
1953 || TOLOWER (c) != L_('n'), 0))
1954 conv_error ();
1955 if (width > 0)
1956 --width;
1957 char_buffer_add (&charbuf, c);
1958 /* It is "nan". */
1959 goto scan_float;
1960 }
1961 else if (TOLOWER (c) == L_('i'))
1962 {
1963 /* Maybe "inf" or "infinity". */
1964 char_buffer_add (&charbuf, c);
1965 if (__builtin_expect (width == 0
1966 || inchar () == EOF
1967 || TOLOWER (c) != L_('n'), 0))
1968 conv_error ();
1969 if (width > 0)
1970 --width;
1971 char_buffer_add (&charbuf, c);
1972 if (__builtin_expect (width == 0
1973 || inchar () == EOF
1974 || TOLOWER (c) != L_('f'), 0))
1975 conv_error ();
1976 if (width > 0)
1977 --width;
1978 char_buffer_add (&charbuf, c);
1979 /* It is as least "inf". */
1980 if (width != 0 && inchar () != EOF)
1981 {
1982 if (TOLOWER (c) == L_('i'))
1983 {
1984 if (width > 0)
1985 --width;
1986 /* Now we have to read the rest as well. */
1987 char_buffer_add (&charbuf, c);
1988 if (__builtin_expect (width == 0
1989 || inchar () == EOF
1990 || TOLOWER (c) != L_('n'), 0))
1991 conv_error ();
1992 if (width > 0)
1993 --width;
1994 char_buffer_add (&charbuf, c);
1995 if (__builtin_expect (width == 0
1996 || inchar () == EOF
1997 || TOLOWER (c) != L_('i'), 0))
1998 conv_error ();
1999 if (width > 0)
2000 --width;
2001 char_buffer_add (&charbuf, c);
2002 if (__builtin_expect (width == 0
2003 || inchar () == EOF
2004 || TOLOWER (c) != L_('t'), 0))
2005 conv_error ();
2006 if (width > 0)
2007 --width;
2008 char_buffer_add (&charbuf, c);
2009 if (__builtin_expect (width == 0
2010 || inchar () == EOF
2011 || TOLOWER (c) != L_('y'), 0))
2012 conv_error ();
2013 if (width > 0)
2014 --width;
2015 char_buffer_add (&charbuf, c);
2016 }
2017 else
2018 /* Never mind. */
2019 ungetc (c, s);
2020 }
2021 goto scan_float;
2022 }
2023
2024 exp_char = L_('e');
2025 if (width != 0 && c == L_('0'))
2026 {
2027 char_buffer_add (&charbuf, c);
2028 c = inchar ();
2029 if (width > 0)
2030 --width;
2031 if (width != 0 && TOLOWER (c) == L_('x'))
2032 {
2033 /* It is a number in hexadecimal format. */
2034 char_buffer_add (&charbuf, c);
2035
2036 flags |= HEXA_FLOAT;
2037 exp_char = L_('p');
2038
2039 /* Grouping is not allowed. */
2040 flags &= ~GROUP;
2041 c = inchar ();
2042 if (width > 0)
2043 --width;
2044 }
2045 else
2046 got_digit = 1;
2047 }
2048
2049 while (1)
2050 {
2051 if (char_buffer_error (&charbuf))
2052 {
2053 __set_errno (ENOMEM);
2054 done = EOF;
2055 goto errout;
2056 }
2057 if (ISDIGIT (c))
2058 {
2059 char_buffer_add (&charbuf, c);
2060 got_digit = 1;
2061 }
2062 else if (!got_e && (flags & HEXA_FLOAT) && ISXDIGIT (c))
2063 {
2064 char_buffer_add (&charbuf, c);
2065 got_digit = 1;
2066 }
2067 else if (got_e && charbuf.current[-1] == exp_char
2068 && (c == L_('-') || c == L_('+')))
2069 char_buffer_add (&charbuf, c);
2070 else if (got_digit && !got_e
2071 && (CHAR_T) TOLOWER (c) == exp_char)
2072 {
2073 char_buffer_add (&charbuf, exp_char);
2074 got_e = got_dot = 1;
2075 }
2076 else
2077 {
2078#ifdef COMPILE_WSCANF
2079 if (! got_dot && c == decimal)
2080 {
2081 char_buffer_add (&charbuf, c);
2082 got_dot = 1;
2083 }
2084 else if ((flags & GROUP) != 0 && ! got_dot && c == thousands)
2085 char_buffer_add (&charbuf, c);
2086 else
2087 {
2088 /* The last read character is not part of the number
2089 anymore. */
2090 ungetc (c, s);
2091 break;
2092 }
2093#else
2094 const char *cmpp = decimal;
2095 int avail = width > 0 ? width : INT_MAX;
2096
2097 if (! got_dot)
2098 {
2099 while ((unsigned char) *cmpp == c && avail >= 0)
2100 if (*++cmpp == '\0')
2101 break;
2102 else
2103 {
2104 if (avail == 0 || inchar () == EOF)
2105 break;
2106 --avail;
2107 }
2108 }
2109
2110 if (*cmpp == '\0')
2111 {
2112 /* Add all the characters. */
2113 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
2114 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2115 if (width > 0)
2116 width = avail;
2117 got_dot = 1;
2118 }
2119 else
2120 {
2121 /* Figure out whether it is a thousands separator.
2122 There is one problem: we possibly read more than
2123 one character. We cannot push them back but since
2124 we know that parts of the `decimal' string matched,
2125 we can compare against it. */
2126 const char *cmp2p = thousands;
2127
2128 if ((flags & GROUP) != 0 && ! got_dot)
2129 {
2130 while (cmp2p - thousands < cmpp - decimal
2131 && *cmp2p == decimal[cmp2p - thousands])
2132 ++cmp2p;
2133 if (cmp2p - thousands == cmpp - decimal)
2134 {
2135 while ((unsigned char) *cmp2p == c && avail >= 0)
2136 if (*++cmp2p == '\0')
2137 break;
2138 else
2139 {
2140 if (avail == 0 || inchar () == EOF)
2141 break;
2142 --avail;
2143 }
2144 }
2145 }
2146
2147 if (cmp2p != NULL && *cmp2p == '\0')
2148 {
2149 /* Add all the characters. */
2150 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
2151 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2152 if (width > 0)
2153 width = avail;
2154 }
2155 else
2156 {
2157 /* The last read character is not part of the number
2158 anymore. */
2159 ungetc (c, s);
2160 break;
2161 }
2162 }
2163#endif
2164 }
2165
2166 if (width == 0 || inchar () == EOF)
2167 break;
2168
2169 if (width > 0)
2170 --width;
2171 }
2172
2173 if (char_buffer_error (&charbuf))
2174 {
2175 __set_errno (ENOMEM);
2176 done = EOF;
2177 goto errout;
2178 }
2179
2180 wctrans_t map;
2181 if (__builtin_expect ((flags & I18N) != 0, 0)
2182 /* Hexadecimal floats make no sense, fixing localized
2183 digits with ASCII letters. */
2184 && !(flags & HEXA_FLOAT)
2185 /* Minimum requirement. */
2186 && (char_buffer_size (&charbuf) == 0 || got_dot)
2187 && (map = __wctrans ("to_inpunct")) != NULL)
2188 {
2189 /* Reget the first character. */
2190 inchar ();
2191
2192 /* Localized digits, decimal points, and thousands
2193 separator. */
2194 wint_t wcdigits[12];
2195
2196 /* First get decimal equivalent to check if we read it
2197 or not. */
2198 wcdigits[11] = __towctrans (L'.', map);
2199
2200 /* If we have not read any character or have just read
2201 locale decimal point which matches the decimal point
2202 for localized FP numbers, then we may have localized
2203 digits. Note, we test GOT_DOT above. */
2204#ifdef COMPILE_WSCANF
2205 if (char_buffer_size (&charbuf) == 0
2206 || (char_buffer_size (&charbuf) == 1
2207 && wcdigits[11] == decimal))
2208#else
2209 char mbdigits[12][MB_LEN_MAX + 1];
2210
2211 mbstate_t state;
2212 memset (&state, '\0', sizeof (state));
2213
2214 bool match_so_far = char_buffer_size (&charbuf) == 0;
2215 size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
2216 if (mblen != (size_t) -1)
2217 {
2218 mbdigits[11][mblen] = '\0';
2219 match_so_far |=
2220 (char_buffer_size (&charbuf) == strlen (decimal)
2221 && strcmp (decimal, mbdigits[11]) == 0);
2222 }
2223 else
2224 {
2225 size_t decimal_len = strlen (decimal);
2226 /* This should always be the case but the data comes
2227 from a file. */
2228 if (decimal_len <= MB_LEN_MAX)
2229 {
2230 match_so_far |= char_buffer_size (&charbuf) == decimal_len;
2231 memcpy (mbdigits[11], decimal, decimal_len + 1);
2232 }
2233 else
2234 match_so_far = false;
2235 }
2236
2237 if (match_so_far)
2238#endif
2239 {
2240 bool have_locthousands = (flags & GROUP) != 0;
2241
2242 /* Now get the digits and the thousands-sep equivalents. */
2243 for (int n = 0; n < 11; ++n)
2244 {
2245 if (n < 10)
2246 wcdigits[n] = __towctrans (L'0' + n, map);
2247 else if (n == 10)
2248 {
2249 wcdigits[10] = __towctrans (L',', map);
2250 have_locthousands &= wcdigits[10] != L'\0';
2251 }
2252
2253#ifndef COMPILE_WSCANF
2254 memset (&state, '\0', sizeof (state));
2255
2256 size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
2257 &state);
2258 if (mblen == (size_t) -1)
2259 {
2260 if (n == 10)
2261 {
2262 if (have_locthousands)
2263 {
2264 size_t thousands_len = strlen (thousands);
2265 if (thousands_len <= MB_LEN_MAX)
2266 memcpy (mbdigits[10], thousands,
2267 thousands_len + 1);
2268 else
2269 have_locthousands = false;
2270 }
2271 }
2272 else
2273 /* Ignore checking against localized digits. */
2274 goto no_i18nflt;
2275 }
2276 else
2277 mbdigits[n][mblen] = '\0';
2278#endif
2279 }
2280
2281 /* Start checking against localized digits, if
2282 conversion is done correctly. */
2283 while (1)
2284 {
2285 if (char_buffer_error (&charbuf))
2286 {
2287 __set_errno (ENOMEM);
2288 done = EOF;
2289 goto errout;
2290 }
2291 if (got_e && charbuf.current[-1] == exp_char
2292 && (c == L_('-') || c == L_('+')))
2293 char_buffer_add (&charbuf, c);
2294 else if (char_buffer_size (&charbuf) > 0 && !got_e
2295 && (CHAR_T) TOLOWER (c) == exp_char)
2296 {
2297 char_buffer_add (&charbuf, exp_char);
2298 got_e = got_dot = 1;
2299 }
2300 else
2301 {
2302 /* Check against localized digits, decimal point,
2303 and thousands separator. */
2304 int n;
2305 for (n = 0; n < 12; ++n)
2306 {
2307#ifdef COMPILE_WSCANF
2308 if (c == wcdigits[n])
2309 {
2310 if (n < 10)
2311 char_buffer_add (&charbuf, L_('0') + n);
2312 else if (n == 11 && !got_dot)
2313 {
2314 char_buffer_add (&charbuf, decimal);
2315 got_dot = 1;
2316 }
2317 else if (n == 10 && have_locthousands
2318 && ! got_dot)
2319 char_buffer_add (&charbuf, thousands);
2320 else
2321 /* The last read character is not part
2322 of the number anymore. */
2323 n = 12;
2324
2325 break;
2326 }
2327#else
2328 const char *cmpp = mbdigits[n];
2329 int avail = width > 0 ? width : INT_MAX;
2330
2331 while ((unsigned char) *cmpp == c && avail >= 0)
2332 if (*++cmpp == '\0')
2333 break;
2334 else
2335 {
2336 if (avail == 0 || inchar () == EOF)
2337 break;
2338 --avail;
2339 }
2340 if (*cmpp == '\0')
2341 {
2342 if (width > 0)
2343 width = avail;
2344
2345 if (n < 10)
2346 char_buffer_add (&charbuf, L_('0') + n);
2347 else if (n == 11 && !got_dot)
2348 {
2349 /* Add all the characters. */
2350 for (cmpp = decimal; *cmpp != '\0';
2351 ++cmpp)
2352 char_buffer_add (&charbuf,
2353 (unsigned char) *cmpp);
2354
2355 got_dot = 1;
2356 }
2357 else if (n == 10 && (flags & GROUP) != 0
2358 && ! got_dot)
2359 {
2360 /* Add all the characters. */
2361 for (cmpp = thousands; *cmpp != '\0';
2362 ++cmpp)
2363 char_buffer_add (&charbuf,
2364 (unsigned char) *cmpp);
2365 }
2366 else
2367 /* The last read character is not part
2368 of the number anymore. */
2369 n = 12;
2370
2371 break;
2372 }
2373
2374 /* We are pushing all read characters back. */
2375 if (cmpp > mbdigits[n])
2376 {
2377 ungetc (c, s);
2378 while (--cmpp > mbdigits[n])
2379 ungetc_not_eof ((unsigned char) *cmpp, s);
2380 c = (unsigned char) *cmpp;
2381 }
2382#endif
2383 }
2384
2385 if (n >= 12)
2386 {
2387 /* The last read character is not part
2388 of the number anymore. */
2389 ungetc (c, s);
2390 break;
2391 }
2392 }
2393
2394 if (width == 0 || inchar () == EOF)
2395 break;
2396
2397 if (width > 0)
2398 --width;
2399 }
2400 }
2401
2402#ifndef COMPILE_WSCANF
2403 no_i18nflt:
2404 ;
2405#endif
2406 }
2407
2408 if (char_buffer_error (&charbuf))
2409 {
2410 __set_errno (ENOMEM);
2411 done = EOF;
2412 goto errout;
2413 }
2414
2415 /* Have we read any character? If we try to read a number
2416 in hexadecimal notation and we have read only the `0x'
2417 prefix this is an error. */
2418 if (__glibc_unlikely (char_buffer_size (&charbuf) == 0
2419 || ((flags & HEXA_FLOAT)
2420 && char_buffer_size (&charbuf) == 2)))
2421 conv_error ();
2422
2423 scan_float:
2424 /* Convert the number. */
2425 char_buffer_add (&charbuf, L_('\0'));
2426 if (char_buffer_error (&charbuf))
2427 {
2428 __set_errno (ENOMEM);
2429 done = EOF;
2430 goto errout;
2431 }
2432 if ((flags & LONGDBL) && !__ldbl_is_dbl)
2433 {
2434 long double d = __strtold_internal
2435 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2436 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2437 *ARG (long double *) = negative ? -d : d;
2438 }
2439 else if (flags & (LONG | LONGDBL))
2440 {
2441 double d = __strtod_internal
2442 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2443 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2444 *ARG (double *) = negative ? -d : d;
2445 }
2446 else
2447 {
2448 float d = __strtof_internal
2449 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2450 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2451 *ARG (float *) = negative ? -d : d;
2452 }
2453
2454 if (__glibc_unlikely (tw == char_buffer_start (&charbuf)))
2455 conv_error ();
2456
2457 if (!(flags & SUPPRESS))
2458 ++done;
2459 break;
2460
2461 case L_('['): /* Character class. */
2462 if (flags & LONG)
2463 STRING_ARG (wstr, wchar_t, 100);
2464 else
2465 STRING_ARG (str, char, 100);
2466
2467 if (*f == L_('^'))
2468 {
2469 ++f;
2470 not_in = 1;
2471 }
2472 else
2473 not_in = 0;
2474
2475 if (width < 0)
2476 /* There is no width given so there is also no limit on the
2477 number of characters we read. Therefore we set width to
2478 a very high value to make the algorithm easier. */
2479 width = INT_MAX;
2480
2481#ifdef COMPILE_WSCANF
2482 /* Find the beginning and the end of the scanlist. We are not
2483 creating a lookup table since it would have to be too large.
2484 Instead we search each time through the string. This is not
2485 a constant lookup time but who uses this feature deserves to
2486 be punished. */
2487 tw = (wchar_t *) f; /* Marks the beginning. */
2488
2489 if (*f == L']')
2490 ++f;
2491
2492 while ((fc = *f++) != L'\0' && fc != L']');
2493
2494 if (__glibc_unlikely (fc == L'\0'))
2495 conv_error ();
2496 wchar_t *twend = (wchar_t *) f - 1;
2497#else
2498 /* Fill WP with byte flags indexed by character.
2499 We will use this flag map for matching input characters. */
2500 if (!scratch_buffer_set_array_size
2501 (&charbuf.scratch, UCHAR_MAX + 1, 1))
2502 {
2503 done = EOF;
2504 goto errout;
2505 }
2506 memset (charbuf.scratch.data, '\0', UCHAR_MAX + 1);
2507
2508 fc = *f;
2509 if (fc == ']' || fc == '-')
2510 {
2511 /* If ] or - appears before any char in the set, it is not
2512 the terminator or separator, but the first char in the
2513 set. */
2514 ((char *)charbuf.scratch.data)[fc] = 1;
2515 ++f;
2516 }
2517
2518 while ((fc = *f++) != '\0' && fc != ']')
2519 if (fc == '-' && *f != '\0' && *f != ']'
2520 && (unsigned char) f[-2] <= (unsigned char) *f)
2521 {
2522 /* Add all characters from the one before the '-'
2523 up to (but not including) the next format char. */
2524 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
2525 ((char *)charbuf.scratch.data)[fc] = 1;
2526 }
2527 else
2528 /* Add the character to the flag map. */
2529 ((char *)charbuf.scratch.data)[fc] = 1;
2530
2531 if (__glibc_unlikely (fc == '\0'))
2532 conv_error();
2533#endif
2534
2535 if (flags & LONG)
2536 {
2537 size_t now = read_in;
2538#ifdef COMPILE_WSCANF
2539 if (__glibc_unlikely (inchar () == WEOF))
2540 input_error ();
2541
2542 do
2543 {
2544 wchar_t *runp;
2545
2546 /* Test whether it's in the scanlist. */
2547 runp = tw;
2548 while (runp < twend)
2549 {
2550 if (runp[0] == L'-' && runp[1] != '\0'
2551 && runp + 1 != twend
2552 && runp != tw
2553 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2554 {
2555 /* Match against all characters in between the
2556 first and last character of the sequence. */
2557 wchar_t wc;
2558
2559 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2560 if ((wint_t) wc == c)
2561 break;
2562
2563 if (wc <= runp[1] && !not_in)
2564 break;
2565 if (wc <= runp[1] && not_in)
2566 {
2567 /* The current character is not in the
2568 scanset. */
2569 ungetc (c, s);
2570 goto out;
2571 }
2572
2573 runp += 2;
2574 }
2575 else
2576 {
2577 if ((wint_t) *runp == c && !not_in)
2578 break;
2579 if ((wint_t) *runp == c && not_in)
2580 {
2581 ungetc (c, s);
2582 goto out;
2583 }
2584
2585 ++runp;
2586 }
2587 }
2588
2589 if (runp == twend && !not_in)
2590 {
2591 ungetc (c, s);
2592 goto out;
2593 }
2594
2595 if (!(flags & SUPPRESS))
2596 {
2597 *wstr++ = c;
2598
2599 if ((flags & MALLOC)
2600 && wstr == (wchar_t *) *strptr + strsize)
2601 {
2602 /* Enlarge the buffer. */
2603 wstr = (wchar_t *) realloc (*strptr,
2604 (2 * strsize)
2605 * sizeof (wchar_t));
2606 if (wstr == NULL)
2607 {
2608 /* Can't allocate that much. Last-ditch
2609 effort. */
2610 wstr = (wchar_t *)
2611 realloc (*strptr, (strsize + 1)
2612 * sizeof (wchar_t));
2613 if (wstr == NULL)
2614 {
2615 if (flags & POSIX_MALLOC)
2616 {
2617 done = EOF;
2618 goto errout;
2619 }
2620 /* We lose. Oh well. Terminate the string
2621 and stop converting, so at least we don't
2622 skip any input. */
2623 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2624 strptr = NULL;
2625 ++done;
2626 conv_error ();
2627 }
2628 else
2629 {
2630 *strptr = (char *) wstr;
2631 wstr += strsize;
2632 ++strsize;
2633 }
2634 }
2635 else
2636 {
2637 *strptr = (char *) wstr;
2638 wstr += strsize;
2639 strsize *= 2;
2640 }
2641 }
2642 }
2643 }
2644 while (--width > 0 && inchar () != WEOF);
2645 out:
2646#else
2647 char buf[MB_LEN_MAX];
2648 size_t cnt = 0;
2649 mbstate_t cstate;
2650
2651 if (__glibc_unlikely (inchar () == EOF))
2652 input_error ();
2653
2654 memset (&cstate, '\0', sizeof (cstate));
2655
2656 do
2657 {
2658 if (((char *) charbuf.scratch.data)[c] == not_in)
2659 {
2660 ungetc_not_eof (c, s);
2661 break;
2662 }
2663
2664 /* This is easy. */
2665 if (!(flags & SUPPRESS))
2666 {
2667 size_t n;
2668
2669 /* Convert it into a wide character. */
2670 buf[0] = c;
2671 n = __mbrtowc (wstr, buf, 1, &cstate);
2672
2673 if (n == (size_t) -2)
2674 {
2675 /* Possibly correct character, just not enough
2676 input. */
2677 ++cnt;
2678 assert (cnt < MB_CUR_MAX);
2679 continue;
2680 }
2681 cnt = 0;
2682
2683 ++wstr;
2684 if ((flags & MALLOC)
2685 && wstr == (wchar_t *) *strptr + strsize)
2686 {
2687 /* Enlarge the buffer. */
2688 wstr = (wchar_t *) realloc (*strptr,
2689 (2 * strsize
2690 * sizeof (wchar_t)));
2691 if (wstr == NULL)
2692 {
2693 /* Can't allocate that much. Last-ditch
2694 effort. */
2695 wstr = (wchar_t *)
2696 realloc (*strptr, ((strsize + 1)
2697 * sizeof (wchar_t)));
2698 if (wstr == NULL)
2699 {
2700 if (flags & POSIX_MALLOC)
2701 {
2702 done = EOF;
2703 goto errout;
2704 }
2705 /* We lose. Oh well. Terminate the
2706 string and stop converting,
2707 so at least we don't skip any input. */
2708 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2709 strptr = NULL;
2710 ++done;
2711 conv_error ();
2712 }
2713 else
2714 {
2715 *strptr = (char *) wstr;
2716 wstr += strsize;
2717 ++strsize;
2718 }
2719 }
2720 else
2721 {
2722 *strptr = (char *) wstr;
2723 wstr += strsize;
2724 strsize *= 2;
2725 }
2726 }
2727 }
2728
2729 if (--width <= 0)
2730 break;
2731 }
2732 while (inchar () != EOF);
2733
2734 if (__glibc_unlikely (cnt != 0))
2735 /* We stopped in the middle of recognizing another
2736 character. That's a problem. */
2737 encode_error ();
2738#endif
2739
2740 if (__glibc_unlikely (now == read_in))
2741 /* We haven't succesfully read any character. */
2742 conv_error ();
2743
2744 if (!(flags & SUPPRESS))
2745 {
2746 *wstr++ = L'\0';
2747
2748 if ((flags & MALLOC)
2749 && wstr - (wchar_t *) *strptr != strsize)
2750 {
2751 wchar_t *cp = (wchar_t *)
2752 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2753 * sizeof(wchar_t)));
2754 if (cp != NULL)
2755 *strptr = (char *) cp;
2756 }
2757 strptr = NULL;
2758
2759 ++done;
2760 }
2761 }
2762 else
2763 {
2764 size_t now = read_in;
2765
2766 if (__glibc_unlikely (inchar () == EOF))
2767 input_error ();
2768
2769#ifdef COMPILE_WSCANF
2770
2771 memset (&state, '\0', sizeof (state));
2772
2773 do
2774 {
2775 wchar_t *runp;
2776 size_t n;
2777
2778 /* Test whether it's in the scanlist. */
2779 runp = tw;
2780 while (runp < twend)
2781 {
2782 if (runp[0] == L'-' && runp[1] != '\0'
2783 && runp + 1 != twend
2784 && runp != tw
2785 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2786 {
2787 /* Match against all characters in between the
2788 first and last character of the sequence. */
2789 wchar_t wc;
2790
2791 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2792 if ((wint_t) wc == c)
2793 break;
2794
2795 if (wc <= runp[1] && !not_in)
2796 break;
2797 if (wc <= runp[1] && not_in)
2798 {
2799 /* The current character is not in the
2800 scanset. */
2801 ungetc (c, s);
2802 goto out2;
2803 }
2804
2805 runp += 2;
2806 }
2807 else
2808 {
2809 if ((wint_t) *runp == c && !not_in)
2810 break;
2811 if ((wint_t) *runp == c && not_in)
2812 {
2813 ungetc (c, s);
2814 goto out2;
2815 }
2816
2817 ++runp;
2818 }
2819 }
2820
2821 if (runp == twend && !not_in)
2822 {
2823 ungetc (c, s);
2824 goto out2;
2825 }
2826
2827 if (!(flags & SUPPRESS))
2828 {
2829 if ((flags & MALLOC)
2830 && str + MB_CUR_MAX >= *strptr + strsize)
2831 {
2832 /* Enlarge the buffer. */
2833 size_t strleng = str - *strptr;
2834 char *newstr;
2835
2836 newstr = (char *) realloc (*strptr, 2 * strsize);
2837 if (newstr == NULL)
2838 {
2839 /* Can't allocate that much. Last-ditch
2840 effort. */
2841 newstr = (char *) realloc (*strptr,
2842 strleng + MB_CUR_MAX);
2843 if (newstr == NULL)
2844 {
2845 if (flags & POSIX_MALLOC)
2846 {
2847 done = EOF;
2848 goto errout;
2849 }
2850 /* We lose. Oh well. Terminate the string
2851 and stop converting, so at least we don't
2852 skip any input. */
2853 ((char *) (*strptr))[strleng] = '\0';
2854 strptr = NULL;
2855 ++done;
2856 conv_error ();
2857 }
2858 else
2859 {
2860 *strptr = newstr;
2861 str = newstr + strleng;
2862 strsize = strleng + MB_CUR_MAX;
2863 }
2864 }
2865 else
2866 {
2867 *strptr = newstr;
2868 str = newstr + strleng;
2869 strsize *= 2;
2870 }
2871 }
2872 }
2873
2874 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2875 if (__glibc_unlikely (n == (size_t) -1))
2876 encode_error ();
2877
2878 assert (n <= MB_CUR_MAX);
2879 str += n;
2880 }
2881 while (--width > 0 && inchar () != WEOF);
2882 out2:
2883#else
2884 do
2885 {
2886 if (((char *) charbuf.scratch.data)[c] == not_in)
2887 {
2888 ungetc_not_eof (c, s);
2889 break;
2890 }
2891
2892 /* This is easy. */
2893 if (!(flags & SUPPRESS))
2894 {
2895 *str++ = c;
2896 if ((flags & MALLOC)
2897 && (char *) str == *strptr + strsize)
2898 {
2899 /* Enlarge the buffer. */
2900 size_t newsize = 2 * strsize;
2901
2902 allocagain:
2903 str = (char *) realloc (*strptr, newsize);
2904 if (str == NULL)
2905 {
2906 /* Can't allocate that much. Last-ditch
2907 effort. */
2908 if (newsize > strsize + 1)
2909 {
2910 newsize = strsize + 1;
2911 goto allocagain;
2912 }
2913 if (flags & POSIX_MALLOC)
2914 {
2915 done = EOF;
2916 goto errout;
2917 }
2918 /* We lose. Oh well. Terminate the
2919 string and stop converting,
2920 so at least we don't skip any input. */
2921 ((char *) (*strptr))[strsize - 1] = '\0';
2922 strptr = NULL;
2923 ++done;
2924 conv_error ();
2925 }
2926 else
2927 {
2928 *strptr = (char *) str;
2929 str += strsize;
2930 strsize = newsize;
2931 }
2932 }
2933 }
2934 }
2935 while (--width > 0 && inchar () != EOF);
2936#endif
2937
2938 if (__glibc_unlikely (now == read_in))
2939 /* We haven't succesfully read any character. */
2940 conv_error ();
2941
2942 if (!(flags & SUPPRESS))
2943 {
2944#ifdef COMPILE_WSCANF
2945 /* We have to emit the code to get into the initial
2946 state. */
2947 char buf[MB_LEN_MAX];
2948 size_t n = __wcrtomb (buf, L'\0', &state);
2949 if (n > 0 && (flags & MALLOC)
2950 && str + n >= *strptr + strsize)
2951 {
2952 /* Enlarge the buffer. */
2953 size_t strleng = str - *strptr;
2954 char *newstr;
2955
2956 newstr = (char *) realloc (*strptr, strleng + n + 1);
2957 if (newstr == NULL)
2958 {
2959 if (flags & POSIX_MALLOC)
2960 {
2961 done = EOF;
2962 goto errout;
2963 }
2964 /* We lose. Oh well. Terminate the string
2965 and stop converting, so at least we don't
2966 skip any input. */
2967 ((char *) (*strptr))[strleng] = '\0';
2968 strptr = NULL;
2969 ++done;
2970 conv_error ();
2971 }
2972 else
2973 {
2974 *strptr = newstr;
2975 str = newstr + strleng;
2976 strsize = strleng + n + 1;
2977 }
2978 }
2979
2980 str = __mempcpy (str, buf, n);
2981#endif
2982 *str++ = '\0';
2983
2984 if ((flags & MALLOC) && str - *strptr != strsize)
2985 {
2986 char *cp = (char *) realloc (*strptr, str - *strptr);
2987 if (cp != NULL)
2988 *strptr = cp;
2989 }
2990 strptr = NULL;
2991
2992 ++done;
2993 }
2994 }
2995 break;
2996
2997 case L_('p'): /* Generic pointer. */
2998 base = 16;
2999 /* A PTR must be the same size as a `long int'. */
3000 flags &= ~(SHORT|LONGDBL);
3001 if (need_long)
3002 flags |= LONG;
3003 flags |= READ_POINTER;
3004 goto number;
3005
3006 default:
3007 /* If this is an unknown format character punt. */
3008 conv_error ();
3009 }
3010 }
3011
3012 /* The last thing we saw int the format string was a white space.
3013 Consume the last white spaces. */
3014 if (skip_space)
3015 {
3016 do
3017 c = inchar ();
3018 while (ISSPACE (c));
3019 ungetc (c, s);
3020 }
3021
3022 errout:
3023 /* Unlock stream. */
3024 UNLOCK_STREAM (s);
3025
3026 scratch_buffer_free (&charbuf.scratch);
3027 if (errp != NULL)
3028 *errp |= errval;
3029
3030 if (__glibc_unlikely (done == EOF))
3031 {
3032 if (__glibc_unlikely (ptrs_to_free != NULL))
3033 {
3034 struct ptrs_to_free *p = ptrs_to_free;
3035 while (p != NULL)
3036 {
3037 for (size_t cnt = 0; cnt < p->count; ++cnt)
3038 {
3039 free (*p->ptrs[cnt]);
3040 *p->ptrs[cnt] = NULL;
3041 }
3042 p = p->next;
3043 ptrs_to_free = p;
3044 }
3045 }
3046 }
3047 else if (__glibc_unlikely (strptr != NULL))
3048 {
3049 free (*strptr);
3050 *strptr = NULL;
3051 }
3052 return done;
3053}
3054
3055#ifdef COMPILE_WSCANF
3056int
3057__vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
3058{
3059 return _IO_vfwscanf (s, format, argptr, NULL);
3060}
3061ldbl_weak_alias (__vfwscanf, vfwscanf)
3062#else
3063int
3064___vfscanf (FILE *s, const char *format, va_list argptr)
3065{
3066 return _IO_vfscanf_internal (s, format, argptr, NULL);
3067}
3068ldbl_strong_alias (_IO_vfscanf_internal, _IO_vfscanf)
3069ldbl_hidden_def (_IO_vfscanf_internal, _IO_vfscanf)
3070ldbl_strong_alias (___vfscanf, __vfscanf)
3071ldbl_hidden_def (___vfscanf, __vfscanf)
3072ldbl_weak_alias (___vfscanf, vfscanf)
3073#endif
3074