1 | /* Helper functions for parsing printf format strings. |
2 | Copyright (C) 1995-2016 Free Software Foundation, Inc. |
3 | This file is part of th GNU C Library. |
4 | |
5 | The GNU C Library is free software; you can redistribute it and/or |
6 | modify it under the terms of the GNU Lesser General Public |
7 | License as published by the Free Software Foundation; either |
8 | version 2.1 of the License, or (at your option) any later version. |
9 | |
10 | The GNU C Library is distributed in the hope that it will be useful, |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
13 | Lesser General Public License for more details. |
14 | |
15 | You should have received a copy of the GNU Lesser General Public |
16 | License along with the GNU C Library; if not, see |
17 | <http://www.gnu.org/licenses/>. */ |
18 | |
19 | #include <ctype.h> |
20 | #include <limits.h> |
21 | #include <stdlib.h> |
22 | #include <string.h> |
23 | #include <sys/param.h> |
24 | #include <wchar.h> |
25 | #include <wctype.h> |
26 | |
27 | #ifndef COMPILE_WPRINTF |
28 | # define CHAR_T char |
29 | # define UCHAR_T unsigned char |
30 | # define INT_T int |
31 | # define L_(Str) Str |
32 | # define ISDIGIT(Ch) isdigit (Ch) |
33 | # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_mb |
34 | #else |
35 | # define CHAR_T wchar_t |
36 | # define UCHAR_T unsigned int |
37 | # define INT_T wint_t |
38 | # define L_(Str) L##Str |
39 | # define ISDIGIT(Ch) iswdigit (Ch) |
40 | # define HANDLE_REGISTERED_MODIFIER __handle_registered_modifier_wc |
41 | #endif |
42 | |
43 | #include "printf-parse.h" |
44 | |
45 | #define NDEBUG 1 |
46 | #include <assert.h> |
47 | |
48 | |
49 | |
50 | /* FORMAT must point to a '%' at the beginning of a spec. Fills in *SPEC |
51 | with the parsed details. POSN is the number of arguments already |
52 | consumed. At most MAXTYPES - POSN types are filled in TYPES. Return |
53 | the number of args consumed by this spec; *MAX_REF_ARG is updated so it |
54 | remains the highest argument index used. */ |
55 | size_t |
56 | attribute_hidden |
57 | #ifdef COMPILE_WPRINTF |
58 | __parse_one_specwc (const UCHAR_T *format, size_t posn, |
59 | struct printf_spec *spec, size_t *max_ref_arg) |
60 | #else |
61 | __parse_one_specmb (const UCHAR_T *format, size_t posn, |
62 | struct printf_spec *spec, size_t *max_ref_arg) |
63 | #endif |
64 | { |
65 | unsigned int n; |
66 | size_t nargs = 0; |
67 | |
68 | /* Skip the '%'. */ |
69 | ++format; |
70 | |
71 | /* Clear information structure. */ |
72 | spec->data_arg = -1; |
73 | spec->info.alt = 0; |
74 | spec->info.space = 0; |
75 | spec->info.left = 0; |
76 | spec->info.showsign = 0; |
77 | spec->info.group = 0; |
78 | spec->info.i18n = 0; |
79 | spec->info.extra = 0; |
80 | spec->info.pad = ' '; |
81 | spec->info.wide = sizeof (UCHAR_T) > 1; |
82 | |
83 | /* Test for positional argument. */ |
84 | if (ISDIGIT (*format)) |
85 | { |
86 | const UCHAR_T *begin = format; |
87 | |
88 | n = read_int (&format); |
89 | |
90 | if (n != 0 && *format == L_('$')) |
91 | /* Is positional parameter. */ |
92 | { |
93 | ++format; /* Skip the '$'. */ |
94 | if (n != -1) |
95 | { |
96 | spec->data_arg = n - 1; |
97 | *max_ref_arg = MAX (*max_ref_arg, n); |
98 | } |
99 | } |
100 | else |
101 | /* Oops; that was actually the width and/or 0 padding flag. |
102 | Step back and read it again. */ |
103 | format = begin; |
104 | } |
105 | |
106 | /* Check for spec modifiers. */ |
107 | do |
108 | { |
109 | switch (*format) |
110 | { |
111 | case L_(' '): |
112 | /* Output a space in place of a sign, when there is no sign. */ |
113 | spec->info.space = 1; |
114 | continue; |
115 | case L_('+'): |
116 | /* Always output + or - for numbers. */ |
117 | spec->info.showsign = 1; |
118 | continue; |
119 | case L_('-'): |
120 | /* Left-justify things. */ |
121 | spec->info.left = 1; |
122 | continue; |
123 | case L_('#'): |
124 | /* Use the "alternate form": |
125 | Hex has 0x or 0X, FP always has a decimal point. */ |
126 | spec->info.alt = 1; |
127 | continue; |
128 | case L_('0'): |
129 | /* Pad with 0s. */ |
130 | spec->info.pad = '0'; |
131 | continue; |
132 | case L_('\''): |
133 | /* Show grouping in numbers if the locale information |
134 | indicates any. */ |
135 | spec->info.group = 1; |
136 | continue; |
137 | case L_('I'): |
138 | /* Use the internationalized form of the output. Currently |
139 | means to use the `outdigits' of the current locale. */ |
140 | spec->info.i18n = 1; |
141 | continue; |
142 | default: |
143 | break; |
144 | } |
145 | break; |
146 | } |
147 | while (*++format); |
148 | |
149 | if (spec->info.left) |
150 | spec->info.pad = ' '; |
151 | |
152 | /* Get the field width. */ |
153 | spec->width_arg = -1; |
154 | spec->info.width = 0; |
155 | if (*format == L_('*')) |
156 | { |
157 | /* The field width is given in an argument. |
158 | A negative field width indicates left justification. */ |
159 | const UCHAR_T *begin = ++format; |
160 | |
161 | if (ISDIGIT (*format)) |
162 | { |
163 | /* The width argument might be found in a positional parameter. */ |
164 | n = read_int (&format); |
165 | |
166 | if (n != 0 && *format == L_('$')) |
167 | { |
168 | if (n != -1) |
169 | { |
170 | spec->width_arg = n - 1; |
171 | *max_ref_arg = MAX (*max_ref_arg, n); |
172 | } |
173 | ++format; /* Skip '$'. */ |
174 | } |
175 | } |
176 | |
177 | if (spec->width_arg < 0) |
178 | { |
179 | /* Not in a positional parameter. Consume one argument. */ |
180 | spec->width_arg = posn++; |
181 | ++nargs; |
182 | format = begin; /* Step back and reread. */ |
183 | } |
184 | } |
185 | else if (ISDIGIT (*format)) |
186 | { |
187 | int n = read_int (&format); |
188 | |
189 | /* Constant width specification. */ |
190 | if (n != -1) |
191 | spec->info.width = n; |
192 | } |
193 | /* Get the precision. */ |
194 | spec->prec_arg = -1; |
195 | /* -1 means none given; 0 means explicit 0. */ |
196 | spec->info.prec = -1; |
197 | if (*format == L_('.')) |
198 | { |
199 | ++format; |
200 | if (*format == L_('*')) |
201 | { |
202 | /* The precision is given in an argument. */ |
203 | const UCHAR_T *begin = ++format; |
204 | |
205 | if (ISDIGIT (*format)) |
206 | { |
207 | n = read_int (&format); |
208 | |
209 | if (n != 0 && *format == L_('$')) |
210 | { |
211 | if (n != -1) |
212 | { |
213 | spec->prec_arg = n - 1; |
214 | *max_ref_arg = MAX (*max_ref_arg, n); |
215 | } |
216 | ++format; |
217 | } |
218 | } |
219 | |
220 | if (spec->prec_arg < 0) |
221 | { |
222 | /* Not in a positional parameter. */ |
223 | spec->prec_arg = posn++; |
224 | ++nargs; |
225 | format = begin; |
226 | } |
227 | } |
228 | else if (ISDIGIT (*format)) |
229 | { |
230 | int n = read_int (&format); |
231 | |
232 | if (n != -1) |
233 | spec->info.prec = n; |
234 | } |
235 | else |
236 | /* "%.?" is treated like "%.0?". */ |
237 | spec->info.prec = 0; |
238 | } |
239 | |
240 | /* Check for type modifiers. */ |
241 | spec->info.is_long_double = 0; |
242 | spec->info.is_short = 0; |
243 | spec->info.is_long = 0; |
244 | spec->info.is_char = 0; |
245 | spec->info.user = 0; |
246 | |
247 | if (__builtin_expect (__printf_modifier_table == NULL, 1) |
248 | || __printf_modifier_table[*format] == NULL |
249 | || HANDLE_REGISTERED_MODIFIER (&format, &spec->info) != 0) |
250 | switch (*format++) |
251 | { |
252 | case L_('h'): |
253 | /* ints are short ints or chars. */ |
254 | if (*format != L_('h')) |
255 | spec->info.is_short = 1; |
256 | else |
257 | { |
258 | ++format; |
259 | spec->info.is_char = 1; |
260 | } |
261 | break; |
262 | case L_('l'): |
263 | /* ints are long ints. */ |
264 | spec->info.is_long = 1; |
265 | if (*format != L_('l')) |
266 | break; |
267 | ++format; |
268 | /* FALLTHROUGH */ |
269 | case L_('L'): |
270 | /* doubles are long doubles, and ints are long long ints. */ |
271 | case L_('q'): |
272 | /* 4.4 uses this for long long. */ |
273 | spec->info.is_long_double = 1; |
274 | break; |
275 | case L_('z'): |
276 | case L_('Z'): |
277 | /* ints are size_ts. */ |
278 | assert (sizeof (size_t) <= sizeof (unsigned long long int)); |
279 | #if LONG_MAX != LONG_LONG_MAX |
280 | spec->info.is_long_double = (sizeof (size_t) |
281 | > sizeof (unsigned long int)); |
282 | #endif |
283 | spec->info.is_long = sizeof (size_t) > sizeof (unsigned int); |
284 | break; |
285 | case L_('t'): |
286 | assert (sizeof (ptrdiff_t) <= sizeof (long long int)); |
287 | #if LONG_MAX != LONG_LONG_MAX |
288 | spec->info.is_long_double = (sizeof (ptrdiff_t) > sizeof (long int)); |
289 | #endif |
290 | spec->info.is_long = sizeof (ptrdiff_t) > sizeof (int); |
291 | break; |
292 | case L_('j'): |
293 | assert (sizeof (uintmax_t) <= sizeof (unsigned long long int)); |
294 | #if LONG_MAX != LONG_LONG_MAX |
295 | spec->info.is_long_double = (sizeof (uintmax_t) |
296 | > sizeof (unsigned long int)); |
297 | #endif |
298 | spec->info.is_long = sizeof (uintmax_t) > sizeof (unsigned int); |
299 | break; |
300 | default: |
301 | /* Not a recognized modifier. Backup. */ |
302 | --format; |
303 | break; |
304 | } |
305 | |
306 | /* Get the format specification. */ |
307 | spec->info.spec = (wchar_t) *format++; |
308 | spec->size = -1; |
309 | if (__builtin_expect (__printf_function_table == NULL, 1) |
310 | || spec->info.spec > UCHAR_MAX |
311 | || __printf_arginfo_table[spec->info.spec] == NULL |
312 | /* We don't try to get the types for all arguments if the format |
313 | uses more than one. The normal case is covered though. If |
314 | the call returns -1 we continue with the normal specifiers. */ |
315 | || (int) (spec->ndata_args = (*__printf_arginfo_table[spec->info.spec]) |
316 | (&spec->info, 1, &spec->data_arg_type, |
317 | &spec->size)) < 0) |
318 | { |
319 | /* Find the data argument types of a built-in spec. */ |
320 | spec->ndata_args = 1; |
321 | |
322 | switch (spec->info.spec) |
323 | { |
324 | case L'i': |
325 | case L'd': |
326 | case L'u': |
327 | case L'o': |
328 | case L'X': |
329 | case L'x': |
330 | #if LONG_MAX != LONG_LONG_MAX |
331 | if (spec->info.is_long_double) |
332 | spec->data_arg_type = PA_INT|PA_FLAG_LONG_LONG; |
333 | else |
334 | #endif |
335 | if (spec->info.is_long) |
336 | spec->data_arg_type = PA_INT|PA_FLAG_LONG; |
337 | else if (spec->info.is_short) |
338 | spec->data_arg_type = PA_INT|PA_FLAG_SHORT; |
339 | else if (spec->info.is_char) |
340 | spec->data_arg_type = PA_CHAR; |
341 | else |
342 | spec->data_arg_type = PA_INT; |
343 | break; |
344 | case L'e': |
345 | case L'E': |
346 | case L'f': |
347 | case L'F': |
348 | case L'g': |
349 | case L'G': |
350 | case L'a': |
351 | case L'A': |
352 | if (spec->info.is_long_double) |
353 | spec->data_arg_type = PA_DOUBLE|PA_FLAG_LONG_DOUBLE; |
354 | else |
355 | spec->data_arg_type = PA_DOUBLE; |
356 | break; |
357 | case L'c': |
358 | spec->data_arg_type = PA_CHAR; |
359 | break; |
360 | case L'C': |
361 | spec->data_arg_type = PA_WCHAR; |
362 | break; |
363 | case L's': |
364 | spec->data_arg_type = PA_STRING; |
365 | break; |
366 | case L'S': |
367 | spec->data_arg_type = PA_WSTRING; |
368 | break; |
369 | case L'p': |
370 | spec->data_arg_type = PA_POINTER; |
371 | break; |
372 | case L'n': |
373 | spec->data_arg_type = PA_INT|PA_FLAG_PTR; |
374 | break; |
375 | |
376 | case L'm': |
377 | default: |
378 | /* An unknown spec will consume no args. */ |
379 | spec->ndata_args = 0; |
380 | break; |
381 | } |
382 | } |
383 | |
384 | if (spec->data_arg == -1 && spec->ndata_args > 0) |
385 | { |
386 | /* There are args consumed, but no positional spec. Use the |
387 | next sequential arg position. */ |
388 | spec->data_arg = posn; |
389 | nargs += spec->ndata_args; |
390 | } |
391 | |
392 | if (spec->info.spec == L'\0') |
393 | /* Format ended before this spec was complete. */ |
394 | spec->end_of_fmt = spec->next_fmt = format - 1; |
395 | else |
396 | { |
397 | /* Find the next format spec. */ |
398 | spec->end_of_fmt = format; |
399 | #ifdef COMPILE_WPRINTF |
400 | spec->next_fmt = __find_specwc (format); |
401 | #else |
402 | spec->next_fmt = __find_specmb (format); |
403 | #endif |
404 | } |
405 | |
406 | return nargs; |
407 | } |
408 | |