1/* Copyright (C) 1991-2018 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#include <assert.h>
19#include <errno.h>
20#include <limits.h>
21#include <ctype.h>
22#include <stdarg.h>
23#include <stdbool.h>
24#include <stdio.h>
25#include <stdint.h>
26#include <stdlib.h>
27#include <string.h>
28#include <wchar.h>
29#include <wctype.h>
30#include <libc-diag.h>
31#include <libc-lock.h>
32#include <locale/localeinfo.h>
33#include <scratch_buffer.h>
34
35#ifdef __GNUC__
36# define HAVE_LONGLONG
37# define LONGLONG long long
38#else
39# define LONGLONG long
40#endif
41
42/* Determine whether we have to handle `long long' at all. */
43#if LONG_MAX == LONG_LONG_MAX
44# define need_longlong 0
45#else
46# define need_longlong 1
47#endif
48
49/* Determine whether we have to handle `long'. */
50#if INT_MAX == LONG_MAX
51# define need_long 0
52#else
53# define need_long 1
54#endif
55
56/* Those are flags in the conversion format. */
57#define LONG 0x0001 /* l: long or double */
58#define LONGDBL 0x0002 /* L: long long or long double */
59#define SHORT 0x0004 /* h: short */
60#define SUPPRESS 0x0008 /* *: suppress assignment */
61#define POINTER 0x0010 /* weird %p pointer (`fake hex') */
62#define NOSKIP 0x0020 /* do not skip blanks */
63#define NUMBER_SIGNED 0x0040 /* signed integer */
64#define GROUP 0x0080 /* ': group numbers */
65#define GNU_MALLOC 0x0100 /* a: malloc strings */
66#define CHAR 0x0200 /* hh: char */
67#define I18N 0x0400 /* I: use locale's digits */
68#define HEXA_FLOAT 0x0800 /* hexadecimal float */
69#define READ_POINTER 0x1000 /* this is a pointer value */
70#define POSIX_MALLOC 0x2000 /* m: malloc strings */
71#define MALLOC (GNU_MALLOC | POSIX_MALLOC)
72
73#include <locale/localeinfo.h>
74#include <libioP.h>
75
76#undef va_list
77#define va_list _IO_va_list
78
79#ifdef COMPILE_WSCANF
80# define ungetc(c, s) ((void) (c == WEOF \
81 || (--read_in, \
82 _IO_sputbackwc (s, c))))
83# define ungetc_not_eof(c, s) ((void) (--read_in, \
84 _IO_sputbackwc (s, c)))
85# define inchar() (c == WEOF ? ((errno = inchar_errno), WEOF) \
86 : ((c = _IO_getwc_unlocked (s)), \
87 (void) (c != WEOF \
88 ? ++read_in \
89 : (size_t) (inchar_errno = errno)), c))
90
91# define ISSPACE(Ch) iswspace (Ch)
92# define ISDIGIT(Ch) iswdigit (Ch)
93# define ISXDIGIT(Ch) iswxdigit (Ch)
94# define TOLOWER(Ch) towlower (Ch)
95# define ORIENT if (_IO_fwide (s, 1) != 1) return WEOF
96# define __strtoll_internal __wcstoll_internal
97# define __strtoull_internal __wcstoull_internal
98# define __strtol_internal __wcstol_internal
99# define __strtoul_internal __wcstoul_internal
100# define __strtold_internal __wcstold_internal
101# define __strtod_internal __wcstod_internal
102# define __strtof_internal __wcstof_internal
103
104# define L_(Str) L##Str
105# define CHAR_T wchar_t
106# define UCHAR_T unsigned int
107# define WINT_T wint_t
108# undef EOF
109# define EOF WEOF
110#else
111# define ungetc(c, s) ((void) ((int) c == EOF \
112 || (--read_in, \
113 _IO_sputbackc (s, (unsigned char) c))))
114# define ungetc_not_eof(c, s) ((void) (--read_in, \
115 _IO_sputbackc (s, (unsigned char) c)))
116# define inchar() (c == EOF ? ((errno = inchar_errno), EOF) \
117 : ((c = _IO_getc_unlocked (s)), \
118 (void) (c != EOF \
119 ? ++read_in \
120 : (size_t) (inchar_errno = errno)), c))
121# define ISSPACE(Ch) __isspace_l (Ch, loc)
122# define ISDIGIT(Ch) __isdigit_l (Ch, loc)
123# define ISXDIGIT(Ch) __isxdigit_l (Ch, loc)
124# define TOLOWER(Ch) __tolower_l ((unsigned char) (Ch), loc)
125# define ORIENT if (_IO_vtable_offset (s) == 0 \
126 && _IO_fwide (s, -1) != -1) \
127 return EOF
128
129# define L_(Str) Str
130# define CHAR_T char
131# define UCHAR_T unsigned char
132# define WINT_T int
133#endif
134
135#include "printf-parse.h" /* Use read_int. */
136
137#define encode_error() do { \
138 errval = 4; \
139 __set_errno (EILSEQ); \
140 goto errout; \
141 } while (0)
142#define conv_error() do { \
143 errval = 2; \
144 goto errout; \
145 } while (0)
146#define input_error() do { \
147 errval = 1; \
148 if (done == 0) done = EOF; \
149 goto errout; \
150 } while (0)
151#define add_ptr_to_free(ptr) \
152 do \
153 { \
154 if (ptrs_to_free == NULL \
155 || ptrs_to_free->count == (sizeof (ptrs_to_free->ptrs) \
156 / sizeof (ptrs_to_free->ptrs[0]))) \
157 { \
158 struct ptrs_to_free *new_ptrs = alloca (sizeof (*ptrs_to_free)); \
159 new_ptrs->count = 0; \
160 new_ptrs->next = ptrs_to_free; \
161 ptrs_to_free = new_ptrs; \
162 } \
163 ptrs_to_free->ptrs[ptrs_to_free->count++] = (ptr); \
164 } \
165 while (0)
166#define ARGCHECK(s, format) \
167 do \
168 { \
169 /* Check file argument for consistence. */ \
170 CHECK_FILE (s, EOF); \
171 if (s->_flags & _IO_NO_READS) \
172 { \
173 __set_errno (EBADF); \
174 return EOF; \
175 } \
176 else if (format == NULL) \
177 { \
178 MAYBE_SET_EINVAL; \
179 return EOF; \
180 } \
181 } while (0)
182#define LOCK_STREAM(S) \
183 __libc_cleanup_region_start (1, (void (*) (void *)) &_IO_funlockfile, (S)); \
184 _IO_flockfile (S)
185#define UNLOCK_STREAM(S) \
186 _IO_funlockfile (S); \
187 __libc_cleanup_region_end (0)
188
189struct ptrs_to_free
190{
191 size_t count;
192 struct ptrs_to_free *next;
193 char **ptrs[32];
194};
195
196struct char_buffer {
197 CHAR_T *current;
198 CHAR_T *end;
199 struct scratch_buffer scratch;
200};
201
202/* Returns a pointer to the first CHAR_T object in the buffer. Only
203 valid if char_buffer_add (BUFFER, CH) has been called and
204 char_buffer_error (BUFFER) is false. */
205static inline CHAR_T *
206char_buffer_start (const struct char_buffer *buffer)
207{
208 return (CHAR_T *) buffer->scratch.data;
209}
210
211/* Returns the number of CHAR_T objects in the buffer. Only valid if
212 char_buffer_error (BUFFER) is false. */
213static inline size_t
214char_buffer_size (const struct char_buffer *buffer)
215{
216 return buffer->current - char_buffer_start (buffer);
217}
218
219/* Reinitializes BUFFER->current and BUFFER->end to cover the entire
220 scratch buffer. */
221static inline void
222char_buffer_rewind (struct char_buffer *buffer)
223{
224 buffer->current = char_buffer_start (buffer);
225 buffer->end = buffer->current + buffer->scratch.length / sizeof (CHAR_T);
226}
227
228/* Returns true if a previous call to char_buffer_add (BUFFER, CH)
229 failed. */
230static inline bool
231char_buffer_error (const struct char_buffer *buffer)
232{
233 return __glibc_unlikely (buffer->current == NULL);
234}
235
236/* Slow path for char_buffer_add. */
237static void
238char_buffer_add_slow (struct char_buffer *buffer, CHAR_T ch)
239{
240 if (char_buffer_error (buffer))
241 return;
242 size_t offset = buffer->end - (CHAR_T *) buffer->scratch.data;
243 if (!scratch_buffer_grow_preserve (&buffer->scratch))
244 {
245 buffer->current = NULL;
246 buffer->end = NULL;
247 return;
248 }
249 char_buffer_rewind (buffer);
250 buffer->current += offset;
251 *buffer->current++ = ch;
252}
253
254/* Adds CH to BUFFER. This function does not report any errors, check
255 for them with char_buffer_error. */
256static inline void
257char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
258 __attribute__ ((always_inline));
259static inline void
260char_buffer_add (struct char_buffer *buffer, CHAR_T ch)
261{
262 if (__glibc_unlikely (buffer->current == buffer->end))
263 char_buffer_add_slow (buffer, ch);
264 else
265 *buffer->current++ = ch;
266}
267
268/* Read formatted input from S according to the format string
269 FORMAT, using the argument list in ARG.
270 Return the number of assignments made, or -1 for an input error. */
271#ifdef COMPILE_WSCANF
272int
273_IO_vfwscanf (_IO_FILE *s, const wchar_t *format, _IO_va_list argptr,
274 int *errp)
275#else
276int
277_IO_vfscanf_internal (_IO_FILE *s, const char *format, _IO_va_list argptr,
278 int *errp)
279#endif
280{
281 va_list arg;
282 const CHAR_T *f = format;
283 UCHAR_T fc; /* Current character of the format. */
284 WINT_T done = 0; /* Assignments done. */
285 size_t read_in = 0; /* Chars read in. */
286 WINT_T c = 0; /* Last char read. */
287 int width; /* Maximum field width. */
288 int flags; /* Modifiers for current format element. */
289 int errval = 0;
290#ifndef COMPILE_WSCANF
291 locale_t loc = _NL_CURRENT_LOCALE;
292 struct __locale_data *const curctype = loc->__locales[LC_CTYPE];
293#endif
294
295 /* Errno of last failed inchar call. */
296 int inchar_errno = 0;
297 /* Status for reading F-P nums. */
298 char got_digit, got_dot, got_e, negative;
299 /* If a [...] is a [^...]. */
300 CHAR_T not_in;
301#define exp_char not_in
302 /* Base for integral numbers. */
303 int base;
304 /* Decimal point character. */
305#ifdef COMPILE_WSCANF
306 wint_t decimal;
307#else
308 const char *decimal;
309#endif
310 /* The thousands character of the current locale. */
311#ifdef COMPILE_WSCANF
312 wint_t thousands;
313#else
314 const char *thousands;
315#endif
316 struct ptrs_to_free *ptrs_to_free = NULL;
317 /* State for the conversions. */
318 mbstate_t state;
319 /* Integral holding variables. */
320 union
321 {
322 long long int q;
323 unsigned long long int uq;
324 long int l;
325 unsigned long int ul;
326 } num;
327 /* Character-buffer pointer. */
328 char *str = NULL;
329 wchar_t *wstr = NULL;
330 char **strptr = NULL;
331 ssize_t strsize = 0;
332 /* We must not react on white spaces immediately because they can
333 possibly be matched even if in the input stream no character is
334 available anymore. */
335 int skip_space = 0;
336 /* Workspace. */
337 CHAR_T *tw; /* Temporary pointer. */
338 struct char_buffer charbuf;
339 scratch_buffer_init (&charbuf.scratch);
340
341#ifdef __va_copy
342 __va_copy (arg, argptr);
343#else
344 arg = (va_list) argptr;
345#endif
346
347#ifdef ORIENT
348 ORIENT;
349#endif
350
351 ARGCHECK (s, format);
352
353 {
354#ifndef COMPILE_WSCANF
355 struct __locale_data *const curnumeric = loc->__locales[LC_NUMERIC];
356#endif
357
358 /* Figure out the decimal point character. */
359#ifdef COMPILE_WSCANF
360 decimal = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_DECIMAL_POINT_WC);
361#else
362 decimal = curnumeric->values[_NL_ITEM_INDEX (DECIMAL_POINT)].string;
363#endif
364 /* Figure out the thousands separator character. */
365#ifdef COMPILE_WSCANF
366 thousands = _NL_CURRENT_WORD (LC_NUMERIC, _NL_NUMERIC_THOUSANDS_SEP_WC);
367#else
368 thousands = curnumeric->values[_NL_ITEM_INDEX (THOUSANDS_SEP)].string;
369 if (*thousands == '\0')
370 thousands = NULL;
371#endif
372 }
373
374 /* Lock the stream. */
375 LOCK_STREAM (s);
376
377
378#ifndef COMPILE_WSCANF
379 /* From now on we use `state' to convert the format string. */
380 memset (&state, '\0', sizeof (state));
381#endif
382
383 /* Run through the format string. */
384 while (*f != '\0')
385 {
386 unsigned int argpos;
387 /* Extract the next argument, which is of type TYPE.
388 For a %N$... spec, this is the Nth argument from the beginning;
389 otherwise it is the next argument after the state now in ARG. */
390#ifdef __va_copy
391# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
392 ({ unsigned int pos = argpos; \
393 va_list arg; \
394 __va_copy (arg, argptr); \
395 while (--pos > 0) \
396 (void) va_arg (arg, void *); \
397 va_arg (arg, type); \
398 }))
399#else
400# if 0
401 /* XXX Possible optimization. */
402# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
403 ({ va_list arg = (va_list) argptr; \
404 arg = (va_list) ((char *) arg \
405 + (argpos - 1) \
406 * __va_rounded_size (void *)); \
407 va_arg (arg, type); \
408 }))
409# else
410# define ARG(type) (argpos == 0 ? va_arg (arg, type) : \
411 ({ unsigned int pos = argpos; \
412 va_list arg = (va_list) argptr; \
413 while (--pos > 0) \
414 (void) va_arg (arg, void *); \
415 va_arg (arg, type); \
416 }))
417# endif
418#endif
419
420#ifndef COMPILE_WSCANF
421 if (!isascii ((unsigned char) *f))
422 {
423 /* Non-ASCII, may be a multibyte. */
424 int len = __mbrlen (f, strlen (f), &state);
425 if (len > 0)
426 {
427 do
428 {
429 c = inchar ();
430 if (__glibc_unlikely (c == EOF))
431 input_error ();
432 else if (c != (unsigned char) *f++)
433 {
434 ungetc_not_eof (c, s);
435 conv_error ();
436 }
437 }
438 while (--len > 0);
439 continue;
440 }
441 }
442#endif
443
444 fc = *f++;
445 if (fc != '%')
446 {
447 /* Remember to skip spaces. */
448 if (ISSPACE (fc))
449 {
450 skip_space = 1;
451 continue;
452 }
453
454 /* Read a character. */
455 c = inchar ();
456
457 /* Characters other than format specs must just match. */
458 if (__glibc_unlikely (c == EOF))
459 input_error ();
460
461 /* We saw white space char as the last character in the format
462 string. Now it's time to skip all leading white space. */
463 if (skip_space)
464 {
465 while (ISSPACE (c))
466 if (__glibc_unlikely (inchar () == EOF))
467 input_error ();
468 skip_space = 0;
469 }
470
471 if (__glibc_unlikely (c != fc))
472 {
473 ungetc (c, s);
474 conv_error ();
475 }
476
477 continue;
478 }
479
480 /* This is the start of the conversion string. */
481 flags = 0;
482
483 /* Initialize state of modifiers. */
484 argpos = 0;
485
486 /* Prepare temporary buffer. */
487 char_buffer_rewind (&charbuf);
488
489 /* Check for a positional parameter specification. */
490 if (ISDIGIT ((UCHAR_T) *f))
491 {
492 argpos = read_int ((const UCHAR_T **) &f);
493 if (*f == L_('$'))
494 ++f;
495 else
496 {
497 /* Oops; that was actually the field width. */
498 width = argpos;
499 argpos = 0;
500 goto got_width;
501 }
502 }
503
504 /* Check for the assignment-suppressing, the number grouping flag,
505 and the signal to use the locale's digit representation. */
506 while (*f == L_('*') || *f == L_('\'') || *f == L_('I'))
507 switch (*f++)
508 {
509 case L_('*'):
510 flags |= SUPPRESS;
511 break;
512 case L_('\''):
513#ifdef COMPILE_WSCANF
514 if (thousands != L'\0')
515#else
516 if (thousands != NULL)
517#endif
518 flags |= GROUP;
519 break;
520 case L_('I'):
521 flags |= I18N;
522 break;
523 }
524
525 /* Find the maximum field width. */
526 width = 0;
527 if (ISDIGIT ((UCHAR_T) *f))
528 width = read_int ((const UCHAR_T **) &f);
529 got_width:
530 if (width == 0)
531 width = -1;
532
533 /* Check for type modifiers. */
534 switch (*f++)
535 {
536 case L_('h'):
537 /* ints are short ints or chars. */
538 if (*f == L_('h'))
539 {
540 ++f;
541 flags |= CHAR;
542 }
543 else
544 flags |= SHORT;
545 break;
546 case L_('l'):
547 if (*f == L_('l'))
548 {
549 /* A double `l' is equivalent to an `L'. */
550 ++f;
551 flags |= LONGDBL | LONG;
552 }
553 else
554 /* ints are long ints. */
555 flags |= LONG;
556 break;
557 case L_('q'):
558 case L_('L'):
559 /* doubles are long doubles, and ints are long long ints. */
560 flags |= LONGDBL | LONG;
561 break;
562 case L_('a'):
563 /* The `a' is used as a flag only if followed by `s', `S' or
564 `['. */
565 if (*f != L_('s') && *f != L_('S') && *f != L_('['))
566 {
567 --f;
568 break;
569 }
570 /* In __isoc99_*scanf %as, %aS and %a[ extension is not
571 supported at all. */
572 if (s->_flags2 & _IO_FLAGS2_SCANF_STD)
573 {
574 --f;
575 break;
576 }
577 /* String conversions (%s, %[) take a `char **'
578 arg and fill it in with a malloc'd pointer. */
579 flags |= GNU_MALLOC;
580 break;
581 case L_('m'):
582 flags |= POSIX_MALLOC;
583 if (*f == L_('l'))
584 {
585 ++f;
586 flags |= LONG;
587 }
588 break;
589 case L_('z'):
590 if (need_longlong && sizeof (size_t) > sizeof (unsigned long int))
591 flags |= LONGDBL;
592 else if (sizeof (size_t) > sizeof (unsigned int))
593 flags |= LONG;
594 break;
595 case L_('j'):
596 if (need_longlong && sizeof (uintmax_t) > sizeof (unsigned long int))
597 flags |= LONGDBL;
598 else if (sizeof (uintmax_t) > sizeof (unsigned int))
599 flags |= LONG;
600 break;
601 case L_('t'):
602 if (need_longlong && sizeof (ptrdiff_t) > sizeof (long int))
603 flags |= LONGDBL;
604 else if (sizeof (ptrdiff_t) > sizeof (int))
605 flags |= LONG;
606 break;
607 default:
608 /* Not a recognized modifier. Backup. */
609 --f;
610 break;
611 }
612
613 /* End of the format string? */
614 if (__glibc_unlikely (*f == L_('\0')))
615 conv_error ();
616
617 /* Find the conversion specifier. */
618 fc = *f++;
619 if (skip_space || (fc != L_('[') && fc != L_('c')
620 && fc != L_('C') && fc != L_('n')))
621 {
622 /* Eat whitespace. */
623 int save_errno = errno;
624 __set_errno (0);
625 do
626 /* We add the additional test for EOF here since otherwise
627 inchar will restore the old errno value which might be
628 EINTR but does not indicate an interrupt since nothing
629 was read at this time. */
630 if (__builtin_expect ((c == EOF || inchar () == EOF)
631 && errno == EINTR, 0))
632 input_error ();
633 while (ISSPACE (c));
634 __set_errno (save_errno);
635 ungetc (c, s);
636 skip_space = 0;
637 }
638
639 switch (fc)
640 {
641 case L_('%'): /* Must match a literal '%'. */
642 c = inchar ();
643 if (__glibc_unlikely (c == EOF))
644 input_error ();
645 if (__glibc_unlikely (c != fc))
646 {
647 ungetc_not_eof (c, s);
648 conv_error ();
649 }
650 break;
651
652 case L_('n'): /* Answer number of assignments done. */
653 /* Corrigendum 1 to ISO C 1990 describes the allowed flags
654 with the 'n' conversion specifier. */
655 if (!(flags & SUPPRESS))
656 {
657 /* Don't count the read-ahead. */
658 if (need_longlong && (flags & LONGDBL))
659 *ARG (long long int *) = read_in;
660 else if (need_long && (flags & LONG))
661 *ARG (long int *) = read_in;
662 else if (flags & SHORT)
663 *ARG (short int *) = read_in;
664 else if (!(flags & CHAR))
665 *ARG (int *) = read_in;
666 else
667 *ARG (char *) = read_in;
668
669#ifdef NO_BUG_IN_ISO_C_CORRIGENDUM_1
670 /* We have a severe problem here. The ISO C standard
671 contradicts itself in explaining the effect of the %n
672 format in `scanf'. While in ISO C:1990 and the ISO C
673 Amendement 1:1995 the result is described as
674
675 Execution of a %n directive does not effect the
676 assignment count returned at the completion of
677 execution of the f(w)scanf function.
678
679 in ISO C Corrigendum 1:1994 the following was added:
680
681 Subclause 7.9.6.2
682 Add the following fourth example:
683 In:
684 #include <stdio.h>
685 int d1, d2, n1, n2, i;
686 i = sscanf("123", "%d%n%n%d", &d1, &n1, &n2, &d2);
687 the value 123 is assigned to d1 and the value3 to n1.
688 Because %n can never get an input failure the value
689 of 3 is also assigned to n2. The value of d2 is not
690 affected. The value 3 is assigned to i.
691
692 We go for now with the historically correct code from ISO C,
693 i.e., we don't count the %n assignments. When it ever
694 should proof to be wrong just remove the #ifdef above. */
695 ++done;
696#endif
697 }
698 break;
699
700 case L_('c'): /* Match characters. */
701 if ((flags & LONG) == 0)
702 {
703 if (width == -1)
704 width = 1;
705
706#define STRING_ARG(Str, Type, Width) \
707 do if (!(flags & SUPPRESS)) \
708 { \
709 if (flags & MALLOC) \
710 { \
711 /* The string is to be stored in a malloc'd buffer. */ \
712 /* For %mS using char ** is actually wrong, but \
713 shouldn't make a difference on any arch glibc \
714 supports and would unnecessarily complicate \
715 things. */ \
716 strptr = ARG (char **); \
717 if (strptr == NULL) \
718 conv_error (); \
719 /* Allocate an initial buffer. */ \
720 strsize = Width; \
721 *strptr = (char *) malloc (strsize * sizeof (Type)); \
722 Str = (Type *) *strptr; \
723 if (Str != NULL) \
724 add_ptr_to_free (strptr); \
725 else if (flags & POSIX_MALLOC) \
726 { \
727 done = EOF; \
728 goto errout; \
729 } \
730 } \
731 else \
732 Str = ARG (Type *); \
733 if (Str == NULL) \
734 conv_error (); \
735 } while (0)
736#ifdef COMPILE_WSCANF
737 STRING_ARG (str, char, 100);
738#else
739 STRING_ARG (str, char, (width > 1024 ? 1024 : width));
740#endif
741
742 c = inchar ();
743 if (__glibc_unlikely (c == EOF))
744 input_error ();
745
746#ifdef COMPILE_WSCANF
747 /* We have to convert the wide character(s) into multibyte
748 characters and store the result. */
749 memset (&state, '\0', sizeof (state));
750
751 do
752 {
753 size_t n;
754
755 if (!(flags & SUPPRESS) && (flags & POSIX_MALLOC)
756 && *strptr + strsize - str <= MB_LEN_MAX)
757 {
758 /* We have to enlarge the buffer if the `m' flag
759 was given. */
760 size_t strleng = str - *strptr;
761 char *newstr;
762
763 newstr = (char *) realloc (*strptr, strsize * 2);
764 if (newstr == NULL)
765 {
766 /* Can't allocate that much. Last-ditch effort. */
767 newstr = (char *) realloc (*strptr,
768 strleng + MB_LEN_MAX);
769 if (newstr == NULL)
770 {
771 /* c can't have `a' flag, only `m'. */
772 done = EOF;
773 goto errout;
774 }
775 else
776 {
777 *strptr = newstr;
778 str = newstr + strleng;
779 strsize = strleng + MB_LEN_MAX;
780 }
781 }
782 else
783 {
784 *strptr = newstr;
785 str = newstr + strleng;
786 strsize *= 2;
787 }
788 }
789
790 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
791 if (__glibc_unlikely (n == (size_t) -1))
792 /* No valid wide character. */
793 input_error ();
794
795 /* Increment the output pointer. Even if we don't
796 write anything. */
797 str += n;
798 }
799 while (--width > 0 && inchar () != EOF);
800#else
801 if (!(flags & SUPPRESS))
802 {
803 do
804 {
805 if ((flags & MALLOC)
806 && (char *) str == *strptr + strsize)
807 {
808 /* Enlarge the buffer. */
809 size_t newsize
810 = strsize
811 + (strsize >= width ? width - 1 : strsize);
812
813 str = (char *) realloc (*strptr, newsize);
814 if (str == NULL)
815 {
816 /* Can't allocate that much. Last-ditch
817 effort. */
818 str = (char *) realloc (*strptr, strsize + 1);
819 if (str == NULL)
820 {
821 /* c can't have `a' flag, only `m'. */
822 done = EOF;
823 goto errout;
824 }
825 else
826 {
827 *strptr = (char *) str;
828 str += strsize;
829 ++strsize;
830 }
831 }
832 else
833 {
834 *strptr = (char *) str;
835 str += strsize;
836 strsize = newsize;
837 }
838 }
839 *str++ = c;
840 }
841 while (--width > 0 && inchar () != EOF);
842 }
843 else
844 while (--width > 0 && inchar () != EOF);
845#endif
846
847 if (!(flags & SUPPRESS))
848 {
849 if ((flags & MALLOC) && str - *strptr != strsize)
850 {
851 char *cp = (char *) realloc (*strptr, str - *strptr);
852 if (cp != NULL)
853 *strptr = cp;
854 }
855 strptr = NULL;
856 ++done;
857 }
858
859 break;
860 }
861 /* FALLTHROUGH */
862 case L_('C'):
863 if (width == -1)
864 width = 1;
865
866 STRING_ARG (wstr, wchar_t, (width > 1024 ? 1024 : width));
867
868 c = inchar ();
869 if (__glibc_unlikely (c == EOF))
870 input_error ();
871
872#ifdef COMPILE_WSCANF
873 /* Just store the incoming wide characters. */
874 if (!(flags & SUPPRESS))
875 {
876 do
877 {
878 if ((flags & MALLOC)
879 && wstr == (wchar_t *) *strptr + strsize)
880 {
881 size_t newsize
882 = strsize + (strsize > width ? width - 1 : strsize);
883 /* Enlarge the buffer. */
884 wstr = (wchar_t *) realloc (*strptr,
885 newsize * sizeof (wchar_t));
886 if (wstr == NULL)
887 {
888 /* Can't allocate that much. Last-ditch effort. */
889 wstr = (wchar_t *) realloc (*strptr,
890 (strsize + 1)
891 * sizeof (wchar_t));
892 if (wstr == NULL)
893 {
894 /* C or lc can't have `a' flag, only `m'
895 flag. */
896 done = EOF;
897 goto errout;
898 }
899 else
900 {
901 *strptr = (char *) wstr;
902 wstr += strsize;
903 ++strsize;
904 }
905 }
906 else
907 {
908 *strptr = (char *) wstr;
909 wstr += strsize;
910 strsize = newsize;
911 }
912 }
913 *wstr++ = c;
914 }
915 while (--width > 0 && inchar () != EOF);
916 }
917 else
918 while (--width > 0 && inchar () != EOF);
919#else
920 {
921 /* We have to convert the multibyte input sequence to wide
922 characters. */
923 char buf[1];
924 mbstate_t cstate;
925
926 memset (&cstate, '\0', sizeof (cstate));
927
928 do
929 {
930 /* This is what we present the mbrtowc function first. */
931 buf[0] = c;
932
933 if (!(flags & SUPPRESS) && (flags & MALLOC)
934 && wstr == (wchar_t *) *strptr + strsize)
935 {
936 size_t newsize
937 = strsize + (strsize > width ? width - 1 : strsize);
938 /* Enlarge the buffer. */
939 wstr = (wchar_t *) realloc (*strptr,
940 newsize * sizeof (wchar_t));
941 if (wstr == NULL)
942 {
943 /* Can't allocate that much. Last-ditch effort. */
944 wstr = (wchar_t *) realloc (*strptr,
945 ((strsize + 1)
946 * sizeof (wchar_t)));
947 if (wstr == NULL)
948 {
949 /* C or lc can't have `a' flag, only `m' flag. */
950 done = EOF;
951 goto errout;
952 }
953 else
954 {
955 *strptr = (char *) wstr;
956 wstr += strsize;
957 ++strsize;
958 }
959 }
960 else
961 {
962 *strptr = (char *) wstr;
963 wstr += strsize;
964 strsize = newsize;
965 }
966 }
967
968 while (1)
969 {
970 size_t n;
971
972 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
973 buf, 1, &cstate);
974
975 if (n == (size_t) -2)
976 {
977 /* Possibly correct character, just not enough
978 input. */
979 if (__glibc_unlikely (inchar () == EOF))
980 encode_error ();
981
982 buf[0] = c;
983 continue;
984 }
985
986 if (__glibc_unlikely (n != 1))
987 encode_error ();
988
989 /* We have a match. */
990 break;
991 }
992
993 /* Advance the result pointer. */
994 ++wstr;
995 }
996 while (--width > 0 && inchar () != EOF);
997 }
998#endif
999
1000 if (!(flags & SUPPRESS))
1001 {
1002 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1003 {
1004 wchar_t *cp = (wchar_t *) realloc (*strptr,
1005 ((wstr
1006 - (wchar_t *) *strptr)
1007 * sizeof (wchar_t)));
1008 if (cp != NULL)
1009 *strptr = (char *) cp;
1010 }
1011 strptr = NULL;
1012
1013 ++done;
1014 }
1015
1016 break;
1017
1018 case L_('s'): /* Read a string. */
1019 if (!(flags & LONG))
1020 {
1021 STRING_ARG (str, char, 100);
1022
1023 c = inchar ();
1024 if (__glibc_unlikely (c == EOF))
1025 input_error ();
1026
1027#ifdef COMPILE_WSCANF
1028 memset (&state, '\0', sizeof (state));
1029#endif
1030
1031 do
1032 {
1033 if (ISSPACE (c))
1034 {
1035 ungetc_not_eof (c, s);
1036 break;
1037 }
1038
1039#ifdef COMPILE_WSCANF
1040 /* This is quite complicated. We have to convert the
1041 wide characters into multibyte characters and then
1042 store them. */
1043 {
1044 size_t n;
1045
1046 if (!(flags & SUPPRESS) && (flags & MALLOC)
1047 && *strptr + strsize - str <= MB_LEN_MAX)
1048 {
1049 /* We have to enlarge the buffer if the `a' or `m'
1050 flag was given. */
1051 size_t strleng = str - *strptr;
1052 char *newstr;
1053
1054 newstr = (char *) realloc (*strptr, strsize * 2);
1055 if (newstr == NULL)
1056 {
1057 /* Can't allocate that much. Last-ditch
1058 effort. */
1059 newstr = (char *) realloc (*strptr,
1060 strleng + MB_LEN_MAX);
1061 if (newstr == NULL)
1062 {
1063 if (flags & POSIX_MALLOC)
1064 {
1065 done = EOF;
1066 goto errout;
1067 }
1068 /* We lose. Oh well. Terminate the
1069 string and stop converting,
1070 so at least we don't skip any input. */
1071 ((char *) (*strptr))[strleng] = '\0';
1072 strptr = NULL;
1073 ++done;
1074 conv_error ();
1075 }
1076 else
1077 {
1078 *strptr = newstr;
1079 str = newstr + strleng;
1080 strsize = strleng + MB_LEN_MAX;
1081 }
1082 }
1083 else
1084 {
1085 *strptr = newstr;
1086 str = newstr + strleng;
1087 strsize *= 2;
1088 }
1089 }
1090
1091 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c,
1092 &state);
1093 if (__glibc_unlikely (n == (size_t) -1))
1094 encode_error ();
1095
1096 assert (n <= MB_LEN_MAX);
1097 str += n;
1098 }
1099#else
1100 /* This is easy. */
1101 if (!(flags & SUPPRESS))
1102 {
1103 *str++ = c;
1104 if ((flags & MALLOC)
1105 && (char *) str == *strptr + strsize)
1106 {
1107 /* Enlarge the buffer. */
1108 str = (char *) realloc (*strptr, 2 * strsize);
1109 if (str == NULL)
1110 {
1111 /* Can't allocate that much. Last-ditch
1112 effort. */
1113 str = (char *) realloc (*strptr, strsize + 1);
1114 if (str == NULL)
1115 {
1116 if (flags & POSIX_MALLOC)
1117 {
1118 done = EOF;
1119 goto errout;
1120 }
1121 /* We lose. Oh well. Terminate the
1122 string and stop converting,
1123 so at least we don't skip any input. */
1124 ((char *) (*strptr))[strsize - 1] = '\0';
1125 strptr = NULL;
1126 ++done;
1127 conv_error ();
1128 }
1129 else
1130 {
1131 *strptr = (char *) str;
1132 str += strsize;
1133 ++strsize;
1134 }
1135 }
1136 else
1137 {
1138 *strptr = (char *) str;
1139 str += strsize;
1140 strsize *= 2;
1141 }
1142 }
1143 }
1144#endif
1145 }
1146 while ((width <= 0 || --width > 0) && inchar () != EOF);
1147
1148 if (!(flags & SUPPRESS))
1149 {
1150#ifdef COMPILE_WSCANF
1151 /* We have to emit the code to get into the initial
1152 state. */
1153 char buf[MB_LEN_MAX];
1154 size_t n = __wcrtomb (buf, L'\0', &state);
1155 if (n > 0 && (flags & MALLOC)
1156 && str + n >= *strptr + strsize)
1157 {
1158 /* Enlarge the buffer. */
1159 size_t strleng = str - *strptr;
1160 char *newstr;
1161
1162 newstr = (char *) realloc (*strptr, strleng + n + 1);
1163 if (newstr == NULL)
1164 {
1165 if (flags & POSIX_MALLOC)
1166 {
1167 done = EOF;
1168 goto errout;
1169 }
1170 /* We lose. Oh well. Terminate the string
1171 and stop converting, so at least we don't
1172 skip any input. */
1173 ((char *) (*strptr))[strleng] = '\0';
1174 strptr = NULL;
1175 ++done;
1176 conv_error ();
1177 }
1178 else
1179 {
1180 *strptr = newstr;
1181 str = newstr + strleng;
1182 strsize = strleng + n + 1;
1183 }
1184 }
1185
1186 str = __mempcpy (str, buf, n);
1187#endif
1188 *str++ = '\0';
1189
1190 if ((flags & MALLOC) && str - *strptr != strsize)
1191 {
1192 char *cp = (char *) realloc (*strptr, str - *strptr);
1193 if (cp != NULL)
1194 *strptr = cp;
1195 }
1196 strptr = NULL;
1197
1198 ++done;
1199 }
1200 break;
1201 }
1202 /* FALLTHROUGH */
1203
1204 case L_('S'):
1205 {
1206#ifndef COMPILE_WSCANF
1207 mbstate_t cstate;
1208#endif
1209
1210 /* Wide character string. */
1211 STRING_ARG (wstr, wchar_t, 100);
1212
1213 c = inchar ();
1214 if (__builtin_expect (c == EOF, 0))
1215 input_error ();
1216
1217#ifndef COMPILE_WSCANF
1218 memset (&cstate, '\0', sizeof (cstate));
1219#endif
1220
1221 do
1222 {
1223 if (ISSPACE (c))
1224 {
1225 ungetc_not_eof (c, s);
1226 break;
1227 }
1228
1229#ifdef COMPILE_WSCANF
1230 /* This is easy. */
1231 if (!(flags & SUPPRESS))
1232 {
1233 *wstr++ = c;
1234 if ((flags & MALLOC)
1235 && wstr == (wchar_t *) *strptr + strsize)
1236 {
1237 /* Enlarge the buffer. */
1238 wstr = (wchar_t *) realloc (*strptr,
1239 (2 * strsize)
1240 * sizeof (wchar_t));
1241 if (wstr == NULL)
1242 {
1243 /* Can't allocate that much. Last-ditch
1244 effort. */
1245 wstr = (wchar_t *) realloc (*strptr,
1246 (strsize + 1)
1247 * sizeof (wchar_t));
1248 if (wstr == NULL)
1249 {
1250 if (flags & POSIX_MALLOC)
1251 {
1252 done = EOF;
1253 goto errout;
1254 }
1255 /* We lose. Oh well. Terminate the string
1256 and stop converting, so at least we don't
1257 skip any input. */
1258 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1259 strptr = NULL;
1260 ++done;
1261 conv_error ();
1262 }
1263 else
1264 {
1265 *strptr = (char *) wstr;
1266 wstr += strsize;
1267 ++strsize;
1268 }
1269 }
1270 else
1271 {
1272 *strptr = (char *) wstr;
1273 wstr += strsize;
1274 strsize *= 2;
1275 }
1276 }
1277 }
1278#else
1279 {
1280 char buf[1];
1281
1282 buf[0] = c;
1283
1284 while (1)
1285 {
1286 size_t n;
1287
1288 n = __mbrtowc (!(flags & SUPPRESS) ? wstr : NULL,
1289 buf, 1, &cstate);
1290
1291 if (n == (size_t) -2)
1292 {
1293 /* Possibly correct character, just not enough
1294 input. */
1295 if (__glibc_unlikely (inchar () == EOF))
1296 encode_error ();
1297
1298 buf[0] = c;
1299 continue;
1300 }
1301
1302 if (__glibc_unlikely (n != 1))
1303 encode_error ();
1304
1305 /* We have a match. */
1306 ++wstr;
1307 break;
1308 }
1309
1310 if (!(flags & SUPPRESS) && (flags & MALLOC)
1311 && wstr == (wchar_t *) *strptr + strsize)
1312 {
1313 /* Enlarge the buffer. */
1314 wstr = (wchar_t *) realloc (*strptr,
1315 (2 * strsize
1316 * sizeof (wchar_t)));
1317 if (wstr == NULL)
1318 {
1319 /* Can't allocate that much. Last-ditch effort. */
1320 wstr = (wchar_t *) realloc (*strptr,
1321 ((strsize + 1)
1322 * sizeof (wchar_t)));
1323 if (wstr == NULL)
1324 {
1325 if (flags & POSIX_MALLOC)
1326 {
1327 done = EOF;
1328 goto errout;
1329 }
1330 /* We lose. Oh well. Terminate the
1331 string and stop converting, so at
1332 least we don't skip any input. */
1333 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
1334 strptr = NULL;
1335 ++done;
1336 conv_error ();
1337 }
1338 else
1339 {
1340 *strptr = (char *) wstr;
1341 wstr += strsize;
1342 ++strsize;
1343 }
1344 }
1345 else
1346 {
1347 *strptr = (char *) wstr;
1348 wstr += strsize;
1349 strsize *= 2;
1350 }
1351 }
1352 }
1353#endif
1354 }
1355 while ((width <= 0 || --width > 0) && inchar () != EOF);
1356
1357 if (!(flags & SUPPRESS))
1358 {
1359 *wstr++ = L'\0';
1360
1361 if ((flags & MALLOC) && wstr - (wchar_t *) *strptr != strsize)
1362 {
1363 wchar_t *cp = (wchar_t *) realloc (*strptr,
1364 ((wstr
1365 - (wchar_t *) *strptr)
1366 * sizeof(wchar_t)));
1367 if (cp != NULL)
1368 *strptr = (char *) cp;
1369 }
1370 strptr = NULL;
1371
1372 ++done;
1373 }
1374 }
1375 break;
1376
1377 case L_('x'): /* Hexadecimal integer. */
1378 case L_('X'): /* Ditto. */
1379 base = 16;
1380 goto number;
1381
1382 case L_('o'): /* Octal integer. */
1383 base = 8;
1384 goto number;
1385
1386 case L_('u'): /* Unsigned decimal integer. */
1387 base = 10;
1388 goto number;
1389
1390 case L_('d'): /* Signed decimal integer. */
1391 base = 10;
1392 flags |= NUMBER_SIGNED;
1393 goto number;
1394
1395 case L_('i'): /* Generic number. */
1396 base = 0;
1397 flags |= NUMBER_SIGNED;
1398
1399 number:
1400 c = inchar ();
1401 if (__glibc_unlikely (c == EOF))
1402 input_error ();
1403
1404 /* Check for a sign. */
1405 if (c == L_('-') || c == L_('+'))
1406 {
1407 char_buffer_add (&charbuf, c);
1408 if (width > 0)
1409 --width;
1410 c = inchar ();
1411 }
1412
1413 /* Look for a leading indication of base. */
1414 if (width != 0 && c == L_('0'))
1415 {
1416 if (width > 0)
1417 --width;
1418
1419 char_buffer_add (&charbuf, c);
1420 c = inchar ();
1421
1422 if (width != 0 && TOLOWER (c) == L_('x'))
1423 {
1424 if (base == 0)
1425 base = 16;
1426 if (base == 16)
1427 {
1428 if (width > 0)
1429 --width;
1430 c = inchar ();
1431 }
1432 }
1433 else if (base == 0)
1434 base = 8;
1435 }
1436
1437 if (base == 0)
1438 base = 10;
1439
1440 if (base == 10 && __builtin_expect ((flags & I18N) != 0, 0))
1441 {
1442 int from_level;
1443 int to_level;
1444 int level;
1445#ifdef COMPILE_WSCANF
1446 const wchar_t *wcdigits[10];
1447 const wchar_t *wcdigits_extended[10];
1448#else
1449 const char *mbdigits[10];
1450 const char *mbdigits_extended[10];
1451#endif
1452 /* "to_inpunct" is a map from ASCII digits to their
1453 equivalent in locale. This is defined for locales
1454 which use an extra digits set. */
1455 wctrans_t map = __wctrans ("to_inpunct");
1456 int n;
1457
1458 from_level = 0;
1459#ifdef COMPILE_WSCANF
1460 to_level = _NL_CURRENT_WORD (LC_CTYPE,
1461 _NL_CTYPE_INDIGITS_WC_LEN) - 1;
1462#else
1463 to_level = (uint32_t) curctype->values[_NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN)].word - 1;
1464#endif
1465
1466 /* Get the alternative digit forms if there are any. */
1467 if (__glibc_unlikely (map != NULL))
1468 {
1469 /* Adding new level for extra digits set in locale file. */
1470 ++to_level;
1471
1472 for (n = 0; n < 10; ++n)
1473 {
1474#ifdef COMPILE_WSCANF
1475 wcdigits[n] = (const wchar_t *)
1476 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1477
1478 wchar_t *wc_extended = (wchar_t *)
1479 alloca ((to_level + 2) * sizeof (wchar_t));
1480 __wmemcpy (wc_extended, wcdigits[n], to_level);
1481 wc_extended[to_level] = __towctrans (L'0' + n, map);
1482 wc_extended[to_level + 1] = '\0';
1483 wcdigits_extended[n] = wc_extended;
1484#else
1485 mbdigits[n]
1486 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1487
1488 /* Get the equivalent wide char in map. */
1489 wint_t extra_wcdigit = __towctrans (L'0' + n, map);
1490
1491 /* Convert it to multibyte representation. */
1492 mbstate_t state;
1493 memset (&state, '\0', sizeof (state));
1494
1495 char extra_mbdigit[MB_LEN_MAX];
1496 size_t mblen
1497 = __wcrtomb (extra_mbdigit, extra_wcdigit, &state);
1498
1499 if (mblen == (size_t) -1)
1500 {
1501 /* Ignore this new level. */
1502 map = NULL;
1503 break;
1504 }
1505
1506 /* Calculate the length of mbdigits[n]. */
1507 const char *last_char = mbdigits[n];
1508 for (level = 0; level < to_level; ++level)
1509 last_char = strchr (last_char, '\0') + 1;
1510
1511 size_t mbdigits_len = last_char - mbdigits[n];
1512
1513 /* Allocate memory for extended multibyte digit. */
1514 char *mb_extended;
1515 mb_extended = (char *) alloca (mbdigits_len + mblen + 1);
1516
1517 /* And get the mbdigits + extra_digit string. */
1518 *(char *) __mempcpy (__mempcpy (mb_extended, mbdigits[n],
1519 mbdigits_len),
1520 extra_mbdigit, mblen) = '\0';
1521 mbdigits_extended[n] = mb_extended;
1522#endif
1523 }
1524 }
1525
1526 /* Read the number into workspace. */
1527 while (c != EOF && width != 0)
1528 {
1529 /* In this round we get the pointer to the digit strings
1530 and also perform the first round of comparisons. */
1531 for (n = 0; n < 10; ++n)
1532 {
1533 /* Get the string for the digits with value N. */
1534#ifdef COMPILE_WSCANF
1535
1536 /* wcdigits_extended[] is fully set in the loop
1537 above, but the test for "map != NULL" is done
1538 inside the loop here and outside the loop there. */
1539 DIAG_PUSH_NEEDS_COMMENT;
1540 DIAG_IGNORE_NEEDS_COMMENT (4.7, "-Wmaybe-uninitialized");
1541
1542 if (__glibc_unlikely (map != NULL))
1543 wcdigits[n] = wcdigits_extended[n];
1544 else
1545 wcdigits[n] = (const wchar_t *)
1546 _NL_CURRENT (LC_CTYPE, _NL_CTYPE_INDIGITS0_WC + n);
1547 wcdigits[n] += from_level;
1548
1549 DIAG_POP_NEEDS_COMMENT;
1550
1551 if (c == (wint_t) *wcdigits[n])
1552 {
1553 to_level = from_level;
1554 break;
1555 }
1556
1557 /* Advance the pointer to the next string. */
1558 ++wcdigits[n];
1559#else
1560 const char *cmpp;
1561 int avail = width > 0 ? width : INT_MAX;
1562
1563 if (__glibc_unlikely (map != NULL))
1564 mbdigits[n] = mbdigits_extended[n];
1565 else
1566 mbdigits[n]
1567 = curctype->values[_NL_CTYPE_INDIGITS0_MB + n].string;
1568
1569 for (level = 0; level < from_level; level++)
1570 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1571
1572 cmpp = mbdigits[n];
1573 while ((unsigned char) *cmpp == c && avail >= 0)
1574 {
1575 if (*++cmpp == '\0')
1576 break;
1577 else
1578 {
1579 if (avail == 0 || inchar () == EOF)
1580 break;
1581 --avail;
1582 }
1583 }
1584
1585 if (*cmpp == '\0')
1586 {
1587 if (width > 0)
1588 width = avail;
1589 to_level = from_level;
1590 break;
1591 }
1592
1593 /* We are pushing all read characters back. */
1594 if (cmpp > mbdigits[n])
1595 {
1596 ungetc (c, s);
1597 while (--cmpp > mbdigits[n])
1598 ungetc_not_eof ((unsigned char) *cmpp, s);
1599 c = (unsigned char) *cmpp;
1600 }
1601
1602 /* Advance the pointer to the next string. */
1603 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1604#endif
1605 }
1606
1607 if (n == 10)
1608 {
1609 /* Have not yet found the digit. */
1610 for (level = from_level + 1; level <= to_level; ++level)
1611 {
1612 /* Search all ten digits of this level. */
1613 for (n = 0; n < 10; ++n)
1614 {
1615#ifdef COMPILE_WSCANF
1616 if (c == (wint_t) *wcdigits[n])
1617 break;
1618
1619 /* Advance the pointer to the next string. */
1620 ++wcdigits[n];
1621#else
1622 const char *cmpp;
1623 int avail = width > 0 ? width : INT_MAX;
1624
1625 cmpp = mbdigits[n];
1626 while ((unsigned char) *cmpp == c && avail >= 0)
1627 {
1628 if (*++cmpp == '\0')
1629 break;
1630 else
1631 {
1632 if (avail == 0 || inchar () == EOF)
1633 break;
1634 --avail;
1635 }
1636 }
1637
1638 if (*cmpp == '\0')
1639 {
1640 if (width > 0)
1641 width = avail;
1642 break;
1643 }
1644
1645 /* We are pushing all read characters back. */
1646 if (cmpp > mbdigits[n])
1647 {
1648 ungetc (c, s);
1649 while (--cmpp > mbdigits[n])
1650 ungetc_not_eof ((unsigned char) *cmpp, s);
1651 c = (unsigned char) *cmpp;
1652 }
1653
1654 /* Advance the pointer to the next string. */
1655 mbdigits[n] = strchr (mbdigits[n], '\0') + 1;
1656#endif
1657 }
1658
1659 if (n < 10)
1660 {
1661 /* Found it. */
1662 from_level = level;
1663 to_level = level;
1664 break;
1665 }
1666 }
1667 }
1668
1669 if (n < 10)
1670 c = L_('0') + n;
1671 else if (flags & GROUP)
1672 {
1673 /* Try matching against the thousands separator. */
1674#ifdef COMPILE_WSCANF
1675 if (c != thousands)
1676 break;
1677#else
1678 const char *cmpp = thousands;
1679 int avail = width > 0 ? width : INT_MAX;
1680
1681 while ((unsigned char) *cmpp == c && avail >= 0)
1682 {
1683 char_buffer_add (&charbuf, c);
1684 if (*++cmpp == '\0')
1685 break;
1686 else
1687 {
1688 if (avail == 0 || inchar () == EOF)
1689 break;
1690 --avail;
1691 }
1692 }
1693
1694 if (char_buffer_error (&charbuf))
1695 {
1696 __set_errno (ENOMEM);
1697 done = EOF;
1698 goto errout;
1699 }
1700
1701 if (*cmpp != '\0')
1702 {
1703 /* We are pushing all read characters back. */
1704 if (cmpp > thousands)
1705 {
1706 charbuf.current -= cmpp - thousands;
1707 ungetc (c, s);
1708 while (--cmpp > thousands)
1709 ungetc_not_eof ((unsigned char) *cmpp, s);
1710 c = (unsigned char) *cmpp;
1711 }
1712 break;
1713 }
1714
1715 if (width > 0)
1716 width = avail;
1717
1718 /* The last thousands character will be added back by
1719 the char_buffer_add below. */
1720 --charbuf.current;
1721#endif
1722 }
1723 else
1724 break;
1725
1726 char_buffer_add (&charbuf, c);
1727 if (width > 0)
1728 --width;
1729
1730 c = inchar ();
1731 }
1732 }
1733 else
1734 /* Read the number into workspace. */
1735 while (c != EOF && width != 0)
1736 {
1737 if (base == 16)
1738 {
1739 if (!ISXDIGIT (c))
1740 break;
1741 }
1742 else if (!ISDIGIT (c) || (int) (c - L_('0')) >= base)
1743 {
1744 if (base == 10 && (flags & GROUP))
1745 {
1746 /* Try matching against the thousands separator. */
1747#ifdef COMPILE_WSCANF
1748 if (c != thousands)
1749 break;
1750#else
1751 const char *cmpp = thousands;
1752 int avail = width > 0 ? width : INT_MAX;
1753
1754 while ((unsigned char) *cmpp == c && avail >= 0)
1755 {
1756 char_buffer_add (&charbuf, c);
1757 if (*++cmpp == '\0')
1758 break;
1759 else
1760 {
1761 if (avail == 0 || inchar () == EOF)
1762 break;
1763 --avail;
1764 }
1765 }
1766
1767 if (char_buffer_error (&charbuf))
1768 {
1769 __set_errno (ENOMEM);
1770 done = EOF;
1771 goto errout;
1772 }
1773
1774 if (*cmpp != '\0')
1775 {
1776 /* We are pushing all read characters back. */
1777 if (cmpp > thousands)
1778 {
1779 charbuf.current -= cmpp - thousands;
1780 ungetc (c, s);
1781 while (--cmpp > thousands)
1782 ungetc_not_eof ((unsigned char) *cmpp, s);
1783 c = (unsigned char) *cmpp;
1784 }
1785 break;
1786 }
1787
1788 if (width > 0)
1789 width = avail;
1790
1791 /* The last thousands character will be added back by
1792 the char_buffer_add below. */
1793 --charbuf.current;
1794#endif
1795 }
1796 else
1797 break;
1798 }
1799 char_buffer_add (&charbuf, c);
1800 if (width > 0)
1801 --width;
1802
1803 c = inchar ();
1804 }
1805
1806 if (char_buffer_error (&charbuf))
1807 {
1808 __set_errno (ENOMEM);
1809 done = EOF;
1810 goto errout;
1811 }
1812
1813 if (char_buffer_size (&charbuf) == 0
1814 || (char_buffer_size (&charbuf) == 1
1815 && (char_buffer_start (&charbuf)[0] == L_('+')
1816 || char_buffer_start (&charbuf)[0] == L_('-'))))
1817 {
1818 /* There was no number. If we are supposed to read a pointer
1819 we must recognize "(nil)" as well. */
1820 if (__builtin_expect (char_buffer_size (&charbuf) == 0
1821 && (flags & READ_POINTER)
1822 && (width < 0 || width >= 5)
1823 && c == '('
1824 && TOLOWER (inchar ()) == L_('n')
1825 && TOLOWER (inchar ()) == L_('i')
1826 && TOLOWER (inchar ()) == L_('l')
1827 && inchar () == L_(')'), 1))
1828 /* We must produce the value of a NULL pointer. A single
1829 '0' digit is enough. */
1830 char_buffer_add (&charbuf, L_('0'));
1831 else
1832 {
1833 /* The last read character is not part of the number
1834 anymore. */
1835 ungetc (c, s);
1836
1837 conv_error ();
1838 }
1839 }
1840 else
1841 /* The just read character is not part of the number anymore. */
1842 ungetc (c, s);
1843
1844 /* Convert the number. */
1845 char_buffer_add (&charbuf, L_('\0'));
1846 if (char_buffer_error (&charbuf))
1847 {
1848 __set_errno (ENOMEM);
1849 done = EOF;
1850 goto errout;
1851 }
1852 if (need_longlong && (flags & LONGDBL))
1853 {
1854 if (flags & NUMBER_SIGNED)
1855 num.q = __strtoll_internal
1856 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1857 else
1858 num.uq = __strtoull_internal
1859 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1860 }
1861 else
1862 {
1863 if (flags & NUMBER_SIGNED)
1864 num.l = __strtol_internal
1865 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1866 else
1867 num.ul = __strtoul_internal
1868 (char_buffer_start (&charbuf), &tw, base, flags & GROUP);
1869 }
1870 if (__glibc_unlikely (char_buffer_start (&charbuf) == tw))
1871 conv_error ();
1872
1873 if (!(flags & SUPPRESS))
1874 {
1875 if (flags & NUMBER_SIGNED)
1876 {
1877 if (need_longlong && (flags & LONGDBL))
1878 *ARG (LONGLONG int *) = num.q;
1879 else if (need_long && (flags & LONG))
1880 *ARG (long int *) = num.l;
1881 else if (flags & SHORT)
1882 *ARG (short int *) = (short int) num.l;
1883 else if (!(flags & CHAR))
1884 *ARG (int *) = (int) num.l;
1885 else
1886 *ARG (signed char *) = (signed char) num.ul;
1887 }
1888 else
1889 {
1890 if (need_longlong && (flags & LONGDBL))
1891 *ARG (unsigned LONGLONG int *) = num.uq;
1892 else if (need_long && (flags & LONG))
1893 *ARG (unsigned long int *) = num.ul;
1894 else if (flags & SHORT)
1895 *ARG (unsigned short int *)
1896 = (unsigned short int) num.ul;
1897 else if (!(flags & CHAR))
1898 *ARG (unsigned int *) = (unsigned int) num.ul;
1899 else
1900 *ARG (unsigned char *) = (unsigned char) num.ul;
1901 }
1902 ++done;
1903 }
1904 break;
1905
1906 case L_('e'): /* Floating-point numbers. */
1907 case L_('E'):
1908 case L_('f'):
1909 case L_('F'):
1910 case L_('g'):
1911 case L_('G'):
1912 case L_('a'):
1913 case L_('A'):
1914 c = inchar ();
1915 if (width > 0)
1916 --width;
1917 if (__glibc_unlikely (c == EOF))
1918 input_error ();
1919
1920 got_digit = got_dot = got_e = 0;
1921
1922 /* Check for a sign. */
1923 if (c == L_('-') || c == L_('+'))
1924 {
1925 negative = c == L_('-');
1926 if (__glibc_unlikely (width == 0 || inchar () == EOF))
1927 /* EOF is only an input error before we read any chars. */
1928 conv_error ();
1929 if (width > 0)
1930 --width;
1931 }
1932 else
1933 negative = 0;
1934
1935 /* Take care for the special arguments "nan" and "inf". */
1936 if (TOLOWER (c) == L_('n'))
1937 {
1938 /* Maybe "nan". */
1939 char_buffer_add (&charbuf, c);
1940 if (__builtin_expect (width == 0
1941 || inchar () == EOF
1942 || TOLOWER (c) != L_('a'), 0))
1943 conv_error ();
1944 if (width > 0)
1945 --width;
1946 char_buffer_add (&charbuf, c);
1947 if (__builtin_expect (width == 0
1948 || inchar () == EOF
1949 || TOLOWER (c) != L_('n'), 0))
1950 conv_error ();
1951 if (width > 0)
1952 --width;
1953 char_buffer_add (&charbuf, c);
1954 /* It is "nan". */
1955 goto scan_float;
1956 }
1957 else if (TOLOWER (c) == L_('i'))
1958 {
1959 /* Maybe "inf" or "infinity". */
1960 char_buffer_add (&charbuf, c);
1961 if (__builtin_expect (width == 0
1962 || inchar () == EOF
1963 || TOLOWER (c) != L_('n'), 0))
1964 conv_error ();
1965 if (width > 0)
1966 --width;
1967 char_buffer_add (&charbuf, c);
1968 if (__builtin_expect (width == 0
1969 || inchar () == EOF
1970 || TOLOWER (c) != L_('f'), 0))
1971 conv_error ();
1972 if (width > 0)
1973 --width;
1974 char_buffer_add (&charbuf, c);
1975 /* It is as least "inf". */
1976 if (width != 0 && inchar () != EOF)
1977 {
1978 if (TOLOWER (c) == L_('i'))
1979 {
1980 if (width > 0)
1981 --width;
1982 /* Now we have to read the rest as well. */
1983 char_buffer_add (&charbuf, c);
1984 if (__builtin_expect (width == 0
1985 || inchar () == EOF
1986 || TOLOWER (c) != L_('n'), 0))
1987 conv_error ();
1988 if (width > 0)
1989 --width;
1990 char_buffer_add (&charbuf, c);
1991 if (__builtin_expect (width == 0
1992 || inchar () == EOF
1993 || TOLOWER (c) != L_('i'), 0))
1994 conv_error ();
1995 if (width > 0)
1996 --width;
1997 char_buffer_add (&charbuf, c);
1998 if (__builtin_expect (width == 0
1999 || inchar () == EOF
2000 || TOLOWER (c) != L_('t'), 0))
2001 conv_error ();
2002 if (width > 0)
2003 --width;
2004 char_buffer_add (&charbuf, c);
2005 if (__builtin_expect (width == 0
2006 || inchar () == EOF
2007 || TOLOWER (c) != L_('y'), 0))
2008 conv_error ();
2009 if (width > 0)
2010 --width;
2011 char_buffer_add (&charbuf, c);
2012 }
2013 else
2014 /* Never mind. */
2015 ungetc (c, s);
2016 }
2017 goto scan_float;
2018 }
2019
2020 exp_char = L_('e');
2021 if (width != 0 && c == L_('0'))
2022 {
2023 char_buffer_add (&charbuf, c);
2024 c = inchar ();
2025 if (width > 0)
2026 --width;
2027 if (width != 0 && TOLOWER (c) == L_('x'))
2028 {
2029 /* It is a number in hexadecimal format. */
2030 char_buffer_add (&charbuf, c);
2031
2032 flags |= HEXA_FLOAT;
2033 exp_char = L_('p');
2034
2035 /* Grouping is not allowed. */
2036 flags &= ~GROUP;
2037 c = inchar ();
2038 if (width > 0)
2039 --width;
2040 }
2041 else
2042 got_digit = 1;
2043 }
2044
2045 while (1)
2046 {
2047 if (char_buffer_error (&charbuf))
2048 {
2049 __set_errno (ENOMEM);
2050 done = EOF;
2051 goto errout;
2052 }
2053 if (ISDIGIT (c))
2054 {
2055 char_buffer_add (&charbuf, c);
2056 got_digit = 1;
2057 }
2058 else if (!got_e && (flags & HEXA_FLOAT) && ISXDIGIT (c))
2059 {
2060 char_buffer_add (&charbuf, c);
2061 got_digit = 1;
2062 }
2063 else if (got_e && charbuf.current[-1] == exp_char
2064 && (c == L_('-') || c == L_('+')))
2065 char_buffer_add (&charbuf, c);
2066 else if (got_digit && !got_e
2067 && (CHAR_T) TOLOWER (c) == exp_char)
2068 {
2069 char_buffer_add (&charbuf, exp_char);
2070 got_e = got_dot = 1;
2071 }
2072 else
2073 {
2074#ifdef COMPILE_WSCANF
2075 if (! got_dot && c == decimal)
2076 {
2077 char_buffer_add (&charbuf, c);
2078 got_dot = 1;
2079 }
2080 else if ((flags & GROUP) != 0 && ! got_dot && c == thousands)
2081 char_buffer_add (&charbuf, c);
2082 else
2083 {
2084 /* The last read character is not part of the number
2085 anymore. */
2086 ungetc (c, s);
2087 break;
2088 }
2089#else
2090 const char *cmpp = decimal;
2091 int avail = width > 0 ? width : INT_MAX;
2092
2093 if (! got_dot)
2094 {
2095 while ((unsigned char) *cmpp == c && avail >= 0)
2096 if (*++cmpp == '\0')
2097 break;
2098 else
2099 {
2100 if (avail == 0 || inchar () == EOF)
2101 break;
2102 --avail;
2103 }
2104 }
2105
2106 if (*cmpp == '\0')
2107 {
2108 /* Add all the characters. */
2109 for (cmpp = decimal; *cmpp != '\0'; ++cmpp)
2110 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2111 if (width > 0)
2112 width = avail;
2113 got_dot = 1;
2114 }
2115 else
2116 {
2117 /* Figure out whether it is a thousands separator.
2118 There is one problem: we possibly read more than
2119 one character. We cannot push them back but since
2120 we know that parts of the `decimal' string matched,
2121 we can compare against it. */
2122 const char *cmp2p = thousands;
2123
2124 if ((flags & GROUP) != 0 && ! got_dot)
2125 {
2126 while (cmp2p - thousands < cmpp - decimal
2127 && *cmp2p == decimal[cmp2p - thousands])
2128 ++cmp2p;
2129 if (cmp2p - thousands == cmpp - decimal)
2130 {
2131 while ((unsigned char) *cmp2p == c && avail >= 0)
2132 if (*++cmp2p == '\0')
2133 break;
2134 else
2135 {
2136 if (avail == 0 || inchar () == EOF)
2137 break;
2138 --avail;
2139 }
2140 }
2141 }
2142
2143 if (cmp2p != NULL && *cmp2p == '\0')
2144 {
2145 /* Add all the characters. */
2146 for (cmpp = thousands; *cmpp != '\0'; ++cmpp)
2147 char_buffer_add (&charbuf, (unsigned char) *cmpp);
2148 if (width > 0)
2149 width = avail;
2150 }
2151 else
2152 {
2153 /* The last read character is not part of the number
2154 anymore. */
2155 ungetc (c, s);
2156 break;
2157 }
2158 }
2159#endif
2160 }
2161
2162 if (width == 0 || inchar () == EOF)
2163 break;
2164
2165 if (width > 0)
2166 --width;
2167 }
2168
2169 if (char_buffer_error (&charbuf))
2170 {
2171 __set_errno (ENOMEM);
2172 done = EOF;
2173 goto errout;
2174 }
2175
2176 wctrans_t map;
2177 if (__builtin_expect ((flags & I18N) != 0, 0)
2178 /* Hexadecimal floats make no sense, fixing localized
2179 digits with ASCII letters. */
2180 && !(flags & HEXA_FLOAT)
2181 /* Minimum requirement. */
2182 && (char_buffer_size (&charbuf) == 0 || got_dot)
2183 && (map = __wctrans ("to_inpunct")) != NULL)
2184 {
2185 /* Reget the first character. */
2186 inchar ();
2187
2188 /* Localized digits, decimal points, and thousands
2189 separator. */
2190 wint_t wcdigits[12];
2191
2192 /* First get decimal equivalent to check if we read it
2193 or not. */
2194 wcdigits[11] = __towctrans (L'.', map);
2195
2196 /* If we have not read any character or have just read
2197 locale decimal point which matches the decimal point
2198 for localized FP numbers, then we may have localized
2199 digits. Note, we test GOT_DOT above. */
2200#ifdef COMPILE_WSCANF
2201 if (char_buffer_size (&charbuf) == 0
2202 || (char_buffer_size (&charbuf) == 1
2203 && wcdigits[11] == decimal))
2204#else
2205 char mbdigits[12][MB_LEN_MAX + 1];
2206
2207 mbstate_t state;
2208 memset (&state, '\0', sizeof (state));
2209
2210 bool match_so_far = char_buffer_size (&charbuf) == 0;
2211 size_t mblen = __wcrtomb (mbdigits[11], wcdigits[11], &state);
2212 if (mblen != (size_t) -1)
2213 {
2214 mbdigits[11][mblen] = '\0';
2215 match_so_far |=
2216 (char_buffer_size (&charbuf) == strlen (decimal)
2217 && strcmp (decimal, mbdigits[11]) == 0);
2218 }
2219 else
2220 {
2221 size_t decimal_len = strlen (decimal);
2222 /* This should always be the case but the data comes
2223 from a file. */
2224 if (decimal_len <= MB_LEN_MAX)
2225 {
2226 match_so_far |= char_buffer_size (&charbuf) == decimal_len;
2227 memcpy (mbdigits[11], decimal, decimal_len + 1);
2228 }
2229 else
2230 match_so_far = false;
2231 }
2232
2233 if (match_so_far)
2234#endif
2235 {
2236 bool have_locthousands = (flags & GROUP) != 0;
2237
2238 /* Now get the digits and the thousands-sep equivalents. */
2239 for (int n = 0; n < 11; ++n)
2240 {
2241 if (n < 10)
2242 wcdigits[n] = __towctrans (L'0' + n, map);
2243 else if (n == 10)
2244 {
2245 wcdigits[10] = __towctrans (L',', map);
2246 have_locthousands &= wcdigits[10] != L'\0';
2247 }
2248
2249#ifndef COMPILE_WSCANF
2250 memset (&state, '\0', sizeof (state));
2251
2252 size_t mblen = __wcrtomb (mbdigits[n], wcdigits[n],
2253 &state);
2254 if (mblen == (size_t) -1)
2255 {
2256 if (n == 10)
2257 {
2258 if (have_locthousands)
2259 {
2260 size_t thousands_len = strlen (thousands);
2261 if (thousands_len <= MB_LEN_MAX)
2262 memcpy (mbdigits[10], thousands,
2263 thousands_len + 1);
2264 else
2265 have_locthousands = false;
2266 }
2267 }
2268 else
2269 /* Ignore checking against localized digits. */
2270 goto no_i18nflt;
2271 }
2272 else
2273 mbdigits[n][mblen] = '\0';
2274#endif
2275 }
2276
2277 /* Start checking against localized digits, if
2278 conversion is done correctly. */
2279 while (1)
2280 {
2281 if (char_buffer_error (&charbuf))
2282 {
2283 __set_errno (ENOMEM);
2284 done = EOF;
2285 goto errout;
2286 }
2287 if (got_e && charbuf.current[-1] == exp_char
2288 && (c == L_('-') || c == L_('+')))
2289 char_buffer_add (&charbuf, c);
2290 else if (char_buffer_size (&charbuf) > 0 && !got_e
2291 && (CHAR_T) TOLOWER (c) == exp_char)
2292 {
2293 char_buffer_add (&charbuf, exp_char);
2294 got_e = got_dot = 1;
2295 }
2296 else
2297 {
2298 /* Check against localized digits, decimal point,
2299 and thousands separator. */
2300 int n;
2301 for (n = 0; n < 12; ++n)
2302 {
2303#ifdef COMPILE_WSCANF
2304 if (c == wcdigits[n])
2305 {
2306 if (n < 10)
2307 char_buffer_add (&charbuf, L_('0') + n);
2308 else if (n == 11 && !got_dot)
2309 {
2310 char_buffer_add (&charbuf, decimal);
2311 got_dot = 1;
2312 }
2313 else if (n == 10 && have_locthousands
2314 && ! got_dot)
2315 char_buffer_add (&charbuf, thousands);
2316 else
2317 /* The last read character is not part
2318 of the number anymore. */
2319 n = 12;
2320
2321 break;
2322 }
2323#else
2324 const char *cmpp = mbdigits[n];
2325 int avail = width > 0 ? width : INT_MAX;
2326
2327 while ((unsigned char) *cmpp == c && avail >= 0)
2328 if (*++cmpp == '\0')
2329 break;
2330 else
2331 {
2332 if (avail == 0 || inchar () == EOF)
2333 break;
2334 --avail;
2335 }
2336 if (*cmpp == '\0')
2337 {
2338 if (width > 0)
2339 width = avail;
2340
2341 if (n < 10)
2342 char_buffer_add (&charbuf, L_('0') + n);
2343 else if (n == 11 && !got_dot)
2344 {
2345 /* Add all the characters. */
2346 for (cmpp = decimal; *cmpp != '\0';
2347 ++cmpp)
2348 char_buffer_add (&charbuf,
2349 (unsigned char) *cmpp);
2350
2351 got_dot = 1;
2352 }
2353 else if (n == 10 && (flags & GROUP) != 0
2354 && ! got_dot)
2355 {
2356 /* Add all the characters. */
2357 for (cmpp = thousands; *cmpp != '\0';
2358 ++cmpp)
2359 char_buffer_add (&charbuf,
2360 (unsigned char) *cmpp);
2361 }
2362 else
2363 /* The last read character is not part
2364 of the number anymore. */
2365 n = 12;
2366
2367 break;
2368 }
2369
2370 /* We are pushing all read characters back. */
2371 if (cmpp > mbdigits[n])
2372 {
2373 ungetc (c, s);
2374 while (--cmpp > mbdigits[n])
2375 ungetc_not_eof ((unsigned char) *cmpp, s);
2376 c = (unsigned char) *cmpp;
2377 }
2378#endif
2379 }
2380
2381 if (n >= 12)
2382 {
2383 /* The last read character is not part
2384 of the number anymore. */
2385 ungetc (c, s);
2386 break;
2387 }
2388 }
2389
2390 if (width == 0 || inchar () == EOF)
2391 break;
2392
2393 if (width > 0)
2394 --width;
2395 }
2396 }
2397
2398#ifndef COMPILE_WSCANF
2399 no_i18nflt:
2400 ;
2401#endif
2402 }
2403
2404 if (char_buffer_error (&charbuf))
2405 {
2406 __set_errno (ENOMEM);
2407 done = EOF;
2408 goto errout;
2409 }
2410
2411 /* Have we read any character? If we try to read a number
2412 in hexadecimal notation and we have read only the `0x'
2413 prefix this is an error. */
2414 if (__glibc_unlikely (char_buffer_size (&charbuf) == 0
2415 || ((flags & HEXA_FLOAT)
2416 && char_buffer_size (&charbuf) == 2)))
2417 conv_error ();
2418
2419 scan_float:
2420 /* Convert the number. */
2421 char_buffer_add (&charbuf, L_('\0'));
2422 if (char_buffer_error (&charbuf))
2423 {
2424 __set_errno (ENOMEM);
2425 done = EOF;
2426 goto errout;
2427 }
2428 if ((flags & LONGDBL) && !__ldbl_is_dbl)
2429 {
2430 long double d = __strtold_internal
2431 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2432 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2433 *ARG (long double *) = negative ? -d : d;
2434 }
2435 else if (flags & (LONG | LONGDBL))
2436 {
2437 double d = __strtod_internal
2438 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2439 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2440 *ARG (double *) = negative ? -d : d;
2441 }
2442 else
2443 {
2444 float d = __strtof_internal
2445 (char_buffer_start (&charbuf), &tw, flags & GROUP);
2446 if (!(flags & SUPPRESS) && tw != char_buffer_start (&charbuf))
2447 *ARG (float *) = negative ? -d : d;
2448 }
2449
2450 if (__glibc_unlikely (tw == char_buffer_start (&charbuf)))
2451 conv_error ();
2452
2453 if (!(flags & SUPPRESS))
2454 ++done;
2455 break;
2456
2457 case L_('['): /* Character class. */
2458 if (flags & LONG)
2459 STRING_ARG (wstr, wchar_t, 100);
2460 else
2461 STRING_ARG (str, char, 100);
2462
2463 if (*f == L_('^'))
2464 {
2465 ++f;
2466 not_in = 1;
2467 }
2468 else
2469 not_in = 0;
2470
2471 if (width < 0)
2472 /* There is no width given so there is also no limit on the
2473 number of characters we read. Therefore we set width to
2474 a very high value to make the algorithm easier. */
2475 width = INT_MAX;
2476
2477#ifdef COMPILE_WSCANF
2478 /* Find the beginning and the end of the scanlist. We are not
2479 creating a lookup table since it would have to be too large.
2480 Instead we search each time through the string. This is not
2481 a constant lookup time but who uses this feature deserves to
2482 be punished. */
2483 tw = (wchar_t *) f; /* Marks the beginning. */
2484
2485 if (*f == L']')
2486 ++f;
2487
2488 while ((fc = *f++) != L'\0' && fc != L']');
2489
2490 if (__glibc_unlikely (fc == L'\0'))
2491 conv_error ();
2492 wchar_t *twend = (wchar_t *) f - 1;
2493#else
2494 /* Fill WP with byte flags indexed by character.
2495 We will use this flag map for matching input characters. */
2496 if (!scratch_buffer_set_array_size
2497 (&charbuf.scratch, UCHAR_MAX + 1, 1))
2498 {
2499 done = EOF;
2500 goto errout;
2501 }
2502 memset (charbuf.scratch.data, '\0', UCHAR_MAX + 1);
2503
2504 fc = *f;
2505 if (fc == ']' || fc == '-')
2506 {
2507 /* If ] or - appears before any char in the set, it is not
2508 the terminator or separator, but the first char in the
2509 set. */
2510 ((char *)charbuf.scratch.data)[fc] = 1;
2511 ++f;
2512 }
2513
2514 while ((fc = *f++) != '\0' && fc != ']')
2515 if (fc == '-' && *f != '\0' && *f != ']'
2516 && (unsigned char) f[-2] <= (unsigned char) *f)
2517 {
2518 /* Add all characters from the one before the '-'
2519 up to (but not including) the next format char. */
2520 for (fc = (unsigned char) f[-2]; fc < (unsigned char) *f; ++fc)
2521 ((char *)charbuf.scratch.data)[fc] = 1;
2522 }
2523 else
2524 /* Add the character to the flag map. */
2525 ((char *)charbuf.scratch.data)[fc] = 1;
2526
2527 if (__glibc_unlikely (fc == '\0'))
2528 conv_error();
2529#endif
2530
2531 if (flags & LONG)
2532 {
2533 size_t now = read_in;
2534#ifdef COMPILE_WSCANF
2535 if (__glibc_unlikely (inchar () == WEOF))
2536 input_error ();
2537
2538 do
2539 {
2540 wchar_t *runp;
2541
2542 /* Test whether it's in the scanlist. */
2543 runp = tw;
2544 while (runp < twend)
2545 {
2546 if (runp[0] == L'-' && runp[1] != '\0'
2547 && runp + 1 != twend
2548 && runp != tw
2549 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2550 {
2551 /* Match against all characters in between the
2552 first and last character of the sequence. */
2553 wchar_t wc;
2554
2555 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2556 if ((wint_t) wc == c)
2557 break;
2558
2559 if (wc <= runp[1] && !not_in)
2560 break;
2561 if (wc <= runp[1] && not_in)
2562 {
2563 /* The current character is not in the
2564 scanset. */
2565 ungetc (c, s);
2566 goto out;
2567 }
2568
2569 runp += 2;
2570 }
2571 else
2572 {
2573 if ((wint_t) *runp == c && !not_in)
2574 break;
2575 if ((wint_t) *runp == c && not_in)
2576 {
2577 ungetc (c, s);
2578 goto out;
2579 }
2580
2581 ++runp;
2582 }
2583 }
2584
2585 if (runp == twend && !not_in)
2586 {
2587 ungetc (c, s);
2588 goto out;
2589 }
2590
2591 if (!(flags & SUPPRESS))
2592 {
2593 *wstr++ = c;
2594
2595 if ((flags & MALLOC)
2596 && wstr == (wchar_t *) *strptr + strsize)
2597 {
2598 /* Enlarge the buffer. */
2599 wstr = (wchar_t *) realloc (*strptr,
2600 (2 * strsize)
2601 * sizeof (wchar_t));
2602 if (wstr == NULL)
2603 {
2604 /* Can't allocate that much. Last-ditch
2605 effort. */
2606 wstr = (wchar_t *)
2607 realloc (*strptr, (strsize + 1)
2608 * sizeof (wchar_t));
2609 if (wstr == NULL)
2610 {
2611 if (flags & POSIX_MALLOC)
2612 {
2613 done = EOF;
2614 goto errout;
2615 }
2616 /* We lose. Oh well. Terminate the string
2617 and stop converting, so at least we don't
2618 skip any input. */
2619 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2620 strptr = NULL;
2621 ++done;
2622 conv_error ();
2623 }
2624 else
2625 {
2626 *strptr = (char *) wstr;
2627 wstr += strsize;
2628 ++strsize;
2629 }
2630 }
2631 else
2632 {
2633 *strptr = (char *) wstr;
2634 wstr += strsize;
2635 strsize *= 2;
2636 }
2637 }
2638 }
2639 }
2640 while (--width > 0 && inchar () != WEOF);
2641 out:
2642#else
2643 char buf[MB_LEN_MAX];
2644 size_t cnt = 0;
2645 mbstate_t cstate;
2646
2647 if (__glibc_unlikely (inchar () == EOF))
2648 input_error ();
2649
2650 memset (&cstate, '\0', sizeof (cstate));
2651
2652 do
2653 {
2654 if (((char *) charbuf.scratch.data)[c] == not_in)
2655 {
2656 ungetc_not_eof (c, s);
2657 break;
2658 }
2659
2660 /* This is easy. */
2661 if (!(flags & SUPPRESS))
2662 {
2663 size_t n;
2664
2665 /* Convert it into a wide character. */
2666 buf[0] = c;
2667 n = __mbrtowc (wstr, buf, 1, &cstate);
2668
2669 if (n == (size_t) -2)
2670 {
2671 /* Possibly correct character, just not enough
2672 input. */
2673 ++cnt;
2674 assert (cnt < MB_LEN_MAX);
2675 continue;
2676 }
2677 cnt = 0;
2678
2679 ++wstr;
2680 if ((flags & MALLOC)
2681 && wstr == (wchar_t *) *strptr + strsize)
2682 {
2683 /* Enlarge the buffer. */
2684 wstr = (wchar_t *) realloc (*strptr,
2685 (2 * strsize
2686 * sizeof (wchar_t)));
2687 if (wstr == NULL)
2688 {
2689 /* Can't allocate that much. Last-ditch
2690 effort. */
2691 wstr = (wchar_t *)
2692 realloc (*strptr, ((strsize + 1)
2693 * sizeof (wchar_t)));
2694 if (wstr == NULL)
2695 {
2696 if (flags & POSIX_MALLOC)
2697 {
2698 done = EOF;
2699 goto errout;
2700 }
2701 /* We lose. Oh well. Terminate the
2702 string and stop converting,
2703 so at least we don't skip any input. */
2704 ((wchar_t *) (*strptr))[strsize - 1] = L'\0';
2705 strptr = NULL;
2706 ++done;
2707 conv_error ();
2708 }
2709 else
2710 {
2711 *strptr = (char *) wstr;
2712 wstr += strsize;
2713 ++strsize;
2714 }
2715 }
2716 else
2717 {
2718 *strptr = (char *) wstr;
2719 wstr += strsize;
2720 strsize *= 2;
2721 }
2722 }
2723 }
2724
2725 if (--width <= 0)
2726 break;
2727 }
2728 while (inchar () != EOF);
2729
2730 if (__glibc_unlikely (cnt != 0))
2731 /* We stopped in the middle of recognizing another
2732 character. That's a problem. */
2733 encode_error ();
2734#endif
2735
2736 if (__glibc_unlikely (now == read_in))
2737 /* We haven't succesfully read any character. */
2738 conv_error ();
2739
2740 if (!(flags & SUPPRESS))
2741 {
2742 *wstr++ = L'\0';
2743
2744 if ((flags & MALLOC)
2745 && wstr - (wchar_t *) *strptr != strsize)
2746 {
2747 wchar_t *cp = (wchar_t *)
2748 realloc (*strptr, ((wstr - (wchar_t *) *strptr)
2749 * sizeof(wchar_t)));
2750 if (cp != NULL)
2751 *strptr = (char *) cp;
2752 }
2753 strptr = NULL;
2754
2755 ++done;
2756 }
2757 }
2758 else
2759 {
2760 size_t now = read_in;
2761
2762 if (__glibc_unlikely (inchar () == EOF))
2763 input_error ();
2764
2765#ifdef COMPILE_WSCANF
2766
2767 memset (&state, '\0', sizeof (state));
2768
2769 do
2770 {
2771 wchar_t *runp;
2772 size_t n;
2773
2774 /* Test whether it's in the scanlist. */
2775 runp = tw;
2776 while (runp < twend)
2777 {
2778 if (runp[0] == L'-' && runp[1] != '\0'
2779 && runp + 1 != twend
2780 && runp != tw
2781 && (unsigned int) runp[-1] <= (unsigned int) runp[1])
2782 {
2783 /* Match against all characters in between the
2784 first and last character of the sequence. */
2785 wchar_t wc;
2786
2787 for (wc = runp[-1] + 1; wc <= runp[1]; ++wc)
2788 if ((wint_t) wc == c)
2789 break;
2790
2791 if (wc <= runp[1] && !not_in)
2792 break;
2793 if (wc <= runp[1] && not_in)
2794 {
2795 /* The current character is not in the
2796 scanset. */
2797 ungetc (c, s);
2798 goto out2;
2799 }
2800
2801 runp += 2;
2802 }
2803 else
2804 {
2805 if ((wint_t) *runp == c && !not_in)
2806 break;
2807 if ((wint_t) *runp == c && not_in)
2808 {
2809 ungetc (c, s);
2810 goto out2;
2811 }
2812
2813 ++runp;
2814 }
2815 }
2816
2817 if (runp == twend && !not_in)
2818 {
2819 ungetc (c, s);
2820 goto out2;
2821 }
2822
2823 if (!(flags & SUPPRESS))
2824 {
2825 if ((flags & MALLOC)
2826 && *strptr + strsize - str <= MB_LEN_MAX)
2827 {
2828 /* Enlarge the buffer. */
2829 size_t strleng = str - *strptr;
2830 char *newstr;
2831
2832 newstr = (char *) realloc (*strptr, 2 * strsize);
2833 if (newstr == NULL)
2834 {
2835 /* Can't allocate that much. Last-ditch
2836 effort. */
2837 newstr = (char *) realloc (*strptr,
2838 strleng + MB_LEN_MAX);
2839 if (newstr == NULL)
2840 {
2841 if (flags & POSIX_MALLOC)
2842 {
2843 done = EOF;
2844 goto errout;
2845 }
2846 /* We lose. Oh well. Terminate the string
2847 and stop converting, so at least we don't
2848 skip any input. */
2849 ((char *) (*strptr))[strleng] = '\0';
2850 strptr = NULL;
2851 ++done;
2852 conv_error ();
2853 }
2854 else
2855 {
2856 *strptr = newstr;
2857 str = newstr + strleng;
2858 strsize = strleng + MB_LEN_MAX;
2859 }
2860 }
2861 else
2862 {
2863 *strptr = newstr;
2864 str = newstr + strleng;
2865 strsize *= 2;
2866 }
2867 }
2868 }
2869
2870 n = __wcrtomb (!(flags & SUPPRESS) ? str : NULL, c, &state);
2871 if (__glibc_unlikely (n == (size_t) -1))
2872 encode_error ();
2873
2874 assert (n <= MB_LEN_MAX);
2875 str += n;
2876 }
2877 while (--width > 0 && inchar () != WEOF);
2878 out2:
2879#else
2880 do
2881 {
2882 if (((char *) charbuf.scratch.data)[c] == not_in)
2883 {
2884 ungetc_not_eof (c, s);
2885 break;
2886 }
2887
2888 /* This is easy. */
2889 if (!(flags & SUPPRESS))
2890 {
2891 *str++ = c;
2892 if ((flags & MALLOC)
2893 && (char *) str == *strptr + strsize)
2894 {
2895 /* Enlarge the buffer. */
2896 size_t newsize = 2 * strsize;
2897
2898 allocagain:
2899 str = (char *) realloc (*strptr, newsize);
2900 if (str == NULL)
2901 {
2902 /* Can't allocate that much. Last-ditch
2903 effort. */
2904 if (newsize > strsize + 1)
2905 {
2906 newsize = strsize + 1;
2907 goto allocagain;
2908 }
2909 if (flags & POSIX_MALLOC)
2910 {
2911 done = EOF;
2912 goto errout;
2913 }
2914 /* We lose. Oh well. Terminate the
2915 string and stop converting,
2916 so at least we don't skip any input. */
2917 ((char *) (*strptr))[strsize - 1] = '\0';
2918 strptr = NULL;
2919 ++done;
2920 conv_error ();
2921 }
2922 else
2923 {
2924 *strptr = (char *) str;
2925 str += strsize;
2926 strsize = newsize;
2927 }
2928 }
2929 }
2930 }
2931 while (--width > 0 && inchar () != EOF);
2932#endif
2933
2934 if (__glibc_unlikely (now == read_in))
2935 /* We haven't succesfully read any character. */
2936 conv_error ();
2937
2938 if (!(flags & SUPPRESS))
2939 {
2940#ifdef COMPILE_WSCANF
2941 /* We have to emit the code to get into the initial
2942 state. */
2943 char buf[MB_LEN_MAX];
2944 size_t n = __wcrtomb (buf, L'\0', &state);
2945 if (n > 0 && (flags & MALLOC)
2946 && str + n >= *strptr + strsize)
2947 {
2948 /* Enlarge the buffer. */
2949 size_t strleng = str - *strptr;
2950 char *newstr;
2951
2952 newstr = (char *) realloc (*strptr, strleng + n + 1);
2953 if (newstr == NULL)
2954 {
2955 if (flags & POSIX_MALLOC)
2956 {
2957 done = EOF;
2958 goto errout;
2959 }
2960 /* We lose. Oh well. Terminate the string
2961 and stop converting, so at least we don't
2962 skip any input. */
2963 ((char *) (*strptr))[strleng] = '\0';
2964 strptr = NULL;
2965 ++done;
2966 conv_error ();
2967 }
2968 else
2969 {
2970 *strptr = newstr;
2971 str = newstr + strleng;
2972 strsize = strleng + n + 1;
2973 }
2974 }
2975
2976 str = __mempcpy (str, buf, n);
2977#endif
2978 *str++ = '\0';
2979
2980 if ((flags & MALLOC) && str - *strptr != strsize)
2981 {
2982 char *cp = (char *) realloc (*strptr, str - *strptr);
2983 if (cp != NULL)
2984 *strptr = cp;
2985 }
2986 strptr = NULL;
2987
2988 ++done;
2989 }
2990 }
2991 break;
2992
2993 case L_('p'): /* Generic pointer. */
2994 base = 16;
2995 /* A PTR must be the same size as a `long int'. */
2996 flags &= ~(SHORT|LONGDBL);
2997 if (need_long)
2998 flags |= LONG;
2999 flags |= READ_POINTER;
3000 goto number;
3001
3002 default:
3003 /* If this is an unknown format character punt. */
3004 conv_error ();
3005 }
3006 }
3007
3008 /* The last thing we saw int the format string was a white space.
3009 Consume the last white spaces. */
3010 if (skip_space)
3011 {
3012 do
3013 c = inchar ();
3014 while (ISSPACE (c));
3015 ungetc (c, s);
3016 }
3017
3018 errout:
3019 /* Unlock stream. */
3020 UNLOCK_STREAM (s);
3021
3022 scratch_buffer_free (&charbuf.scratch);
3023 if (errp != NULL)
3024 *errp |= errval;
3025
3026 if (__glibc_unlikely (done == EOF))
3027 {
3028 if (__glibc_unlikely (ptrs_to_free != NULL))
3029 {
3030 struct ptrs_to_free *p = ptrs_to_free;
3031 while (p != NULL)
3032 {
3033 for (size_t cnt = 0; cnt < p->count; ++cnt)
3034 {
3035 free (*p->ptrs[cnt]);
3036 *p->ptrs[cnt] = NULL;
3037 }
3038 p = p->next;
3039 ptrs_to_free = p;
3040 }
3041 }
3042 }
3043 else if (__glibc_unlikely (strptr != NULL))
3044 {
3045 free (*strptr);
3046 *strptr = NULL;
3047 }
3048 return done;
3049}
3050
3051#ifdef COMPILE_WSCANF
3052int
3053__vfwscanf (FILE *s, const wchar_t *format, va_list argptr)
3054{
3055 return _IO_vfwscanf (s, format, argptr, NULL);
3056}
3057ldbl_weak_alias (__vfwscanf, vfwscanf)
3058#else
3059int
3060___vfscanf (FILE *s, const char *format, va_list argptr)
3061{
3062 return _IO_vfscanf_internal (s, format, argptr, NULL);
3063}
3064ldbl_strong_alias (_IO_vfscanf_internal, _IO_vfscanf)
3065ldbl_hidden_def (_IO_vfscanf_internal, _IO_vfscanf)
3066ldbl_strong_alias (___vfscanf, __vfscanf)
3067ldbl_hidden_def (___vfscanf, __vfscanf)
3068ldbl_weak_alias (___vfscanf, vfscanf)
3069#endif
3070