1 | /* Copyright (C) 1991-2017 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. */ |
17 | |
18 | #if HAVE_CONFIG_H |
19 | # include <config.h> |
20 | #endif |
21 | |
22 | /* Enable GNU extensions in fnmatch.h. */ |
23 | #ifndef _GNU_SOURCE |
24 | # define _GNU_SOURCE 1 |
25 | #endif |
26 | |
27 | #include <assert.h> |
28 | #include <errno.h> |
29 | #include <fnmatch.h> |
30 | #include <ctype.h> |
31 | #include <string.h> |
32 | |
33 | #if defined STDC_HEADERS || defined _LIBC |
34 | # include <stdlib.h> |
35 | #endif |
36 | |
37 | #ifdef _LIBC |
38 | # include <alloca.h> |
39 | #else |
40 | # define alloca_account(size., var) alloca (size) |
41 | #endif |
42 | |
43 | /* For platform which support the ISO C amendement 1 functionality we |
44 | support user defined character classes. */ |
45 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
46 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ |
47 | # include <wchar.h> |
48 | # include <wctype.h> |
49 | #endif |
50 | |
51 | /* We need some of the locale data (the collation sequence information) |
52 | but there is no interface to get this information in general. Therefore |
53 | we support a correct implementation only in glibc. */ |
54 | #ifdef _LIBC |
55 | # include "../locale/localeinfo.h" |
56 | # include "../locale/elem-hash.h" |
57 | # include "../locale/coll-lookup.h" |
58 | # include <shlib-compat.h> |
59 | |
60 | # define CONCAT(a,b) __CONCAT(a,b) |
61 | # define mbsrtowcs __mbsrtowcs |
62 | # define fnmatch __fnmatch |
63 | extern int fnmatch (const char *pattern, const char *string, int flags); |
64 | #endif |
65 | |
66 | /* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */ |
67 | #define NO_LEADING_PERIOD(flags) \ |
68 | ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD)) |
69 | |
70 | /* Comment out all this code if we are using the GNU C Library, and are not |
71 | actually compiling the library itself. This code is part of the GNU C |
72 | Library, but also included in many other GNU distributions. Compiling |
73 | and linking in this code is a waste when using the GNU C library |
74 | (especially if it is a shared library). Rather than having every GNU |
75 | program understand `configure --with-gnu-libc' and omit the object files, |
76 | it is simpler to just do this in the source for each such file. */ |
77 | |
78 | #if defined _LIBC || !defined __GNU_LIBRARY__ |
79 | |
80 | |
81 | # if defined STDC_HEADERS || !defined isascii |
82 | # define ISASCII(c) 1 |
83 | # else |
84 | # define ISASCII(c) isascii(c) |
85 | # endif |
86 | |
87 | # ifdef isblank |
88 | # define ISBLANK(c) (ISASCII (c) && isblank (c)) |
89 | # else |
90 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') |
91 | # endif |
92 | # ifdef isgraph |
93 | # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) |
94 | # else |
95 | # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) |
96 | # endif |
97 | |
98 | # define ISPRINT(c) (ISASCII (c) && isprint (c)) |
99 | # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) |
100 | # define ISALNUM(c) (ISASCII (c) && isalnum (c)) |
101 | # define ISALPHA(c) (ISASCII (c) && isalpha (c)) |
102 | # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) |
103 | # define ISLOWER(c) (ISASCII (c) && islower (c)) |
104 | # define ISPUNCT(c) (ISASCII (c) && ispunct (c)) |
105 | # define ISSPACE(c) (ISASCII (c) && isspace (c)) |
106 | # define ISUPPER(c) (ISASCII (c) && isupper (c)) |
107 | # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) |
108 | |
109 | # define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) |
110 | |
111 | # if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
112 | /* The GNU C library provides support for user-defined character classes |
113 | and the functions from ISO C amendement 1. */ |
114 | # ifdef CHARCLASS_NAME_MAX |
115 | # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX |
116 | # else |
117 | /* This shouldn't happen but some implementation might still have this |
118 | problem. Use a reasonable default value. */ |
119 | # define CHAR_CLASS_MAX_LENGTH 256 |
120 | # endif |
121 | |
122 | # ifdef _LIBC |
123 | # define IS_CHAR_CLASS(string) __wctype (string) |
124 | # else |
125 | # define IS_CHAR_CLASS(string) wctype (string) |
126 | # endif |
127 | |
128 | # ifdef _LIBC |
129 | # define ISWCTYPE(WC, WT) __iswctype (WC, WT) |
130 | # else |
131 | # define ISWCTYPE(WC, WT) iswctype (WC, WT) |
132 | # endif |
133 | |
134 | # if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC |
135 | /* In this case we are implementing the multibyte character handling. */ |
136 | # define HANDLE_MULTIBYTE 1 |
137 | # endif |
138 | |
139 | # else |
140 | # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ |
141 | |
142 | # define IS_CHAR_CLASS(string) \ |
143 | (STREQ (string, "alpha") || STREQ (string, "upper") \ |
144 | || STREQ (string, "lower") || STREQ (string, "digit") \ |
145 | || STREQ (string, "alnum") || STREQ (string, "xdigit") \ |
146 | || STREQ (string, "space") || STREQ (string, "print") \ |
147 | || STREQ (string, "punct") || STREQ (string, "graph") \ |
148 | || STREQ (string, "cntrl") || STREQ (string, "blank")) |
149 | # endif |
150 | |
151 | /* Avoid depending on library functions or files |
152 | whose names are inconsistent. */ |
153 | |
154 | # if !defined _LIBC && !defined getenv |
155 | extern char *getenv (); |
156 | # endif |
157 | |
158 | # ifndef errno |
159 | extern int errno; |
160 | # endif |
161 | |
162 | /* Global variable. */ |
163 | static int posixly_correct; |
164 | |
165 | /* This function doesn't exist on most systems. */ |
166 | |
167 | # if !defined HAVE___STRCHRNUL && !defined _LIBC |
168 | static char * |
169 | __strchrnul (const char *s, int c) |
170 | { |
171 | char *result = strchr (s, c); |
172 | if (result == NULL) |
173 | result = strchr (s, '\0'); |
174 | return result; |
175 | } |
176 | # endif |
177 | |
178 | # if HANDLE_MULTIBYTE && !defined HAVE___STRCHRNUL && !defined _LIBC |
179 | static wchar_t * |
180 | __wcschrnul (const wchar_t *s, wint_t c) |
181 | { |
182 | wchar_t *result = wcschr (s, c); |
183 | if (result == NULL) |
184 | result = wcschr (s, '\0'); |
185 | return result; |
186 | } |
187 | # endif |
188 | |
189 | # ifndef internal_function |
190 | /* Inside GNU libc we mark some function in a special way. In other |
191 | environments simply ignore the marking. */ |
192 | # define internal_function |
193 | # endif |
194 | |
195 | /* Note that this evaluates C many times. */ |
196 | # ifdef _LIBC |
197 | # define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) |
198 | # else |
199 | # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) |
200 | # endif |
201 | # define CHAR char |
202 | # define UCHAR unsigned char |
203 | # define INT int |
204 | # define FCT internal_fnmatch |
205 | # define EXT ext_match |
206 | # define END end_pattern |
207 | # define STRUCT fnmatch_struct |
208 | # define L(CS) CS |
209 | # ifdef _LIBC |
210 | # define BTOWC(C) __btowc (C) |
211 | # else |
212 | # define BTOWC(C) btowc (C) |
213 | # endif |
214 | # define STRLEN(S) strlen (S) |
215 | # define STRCAT(D, S) strcat (D, S) |
216 | # define MEMPCPY(D, S, N) __mempcpy (D, S, N) |
217 | # define MEMCHR(S, C, N) memchr (S, C, N) |
218 | # define STRCOLL(S1, S2) strcoll (S1, S2) |
219 | # define WIDE_CHAR_VERSION 0 |
220 | # include <locale/weight.h> |
221 | # define FINDIDX findidx |
222 | # include "fnmatch_loop.c" |
223 | |
224 | |
225 | # if HANDLE_MULTIBYTE |
226 | /* Note that this evaluates C many times. */ |
227 | # ifdef _LIBC |
228 | # define FOLD(c) ((flags & FNM_CASEFOLD) ? __towlower (c) : (c)) |
229 | # else |
230 | # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? towlower (c) : (c)) |
231 | # endif |
232 | # define CHAR wchar_t |
233 | # define UCHAR wint_t |
234 | # define INT wint_t |
235 | # define FCT internal_fnwmatch |
236 | # define EXT ext_wmatch |
237 | # define END end_wpattern |
238 | # define STRUCT fnwmatch_struct |
239 | # define L(CS) L##CS |
240 | # define BTOWC(C) (C) |
241 | # define STRLEN(S) __wcslen (S) |
242 | # define STRCAT(D, S) __wcscat (D, S) |
243 | # define MEMPCPY(D, S, N) __wmempcpy (D, S, N) |
244 | # define MEMCHR(S, C, N) __wmemchr (S, C, N) |
245 | # define STRCOLL(S1, S2) wcscoll (S1, S2) |
246 | # define WIDE_CHAR_VERSION 1 |
247 | /* Change the name the header defines so it doesn't conflict with |
248 | the <locale/weight.h> version included above. */ |
249 | # define findidx findidxwc |
250 | # include <locale/weightwc.h> |
251 | # undef findidx |
252 | # define FINDIDX findidxwc |
253 | |
254 | # undef IS_CHAR_CLASS |
255 | /* We have to convert the wide character string in a multibyte string. But |
256 | we know that the character class names consist of alphanumeric characters |
257 | from the portable character set, and since the wide character encoding |
258 | for a member of the portable character set is the same code point as |
259 | its single-byte encoding, we can use a simplified method to convert the |
260 | string to a multibyte character string. */ |
261 | static wctype_t |
262 | is_char_class (const wchar_t *wcs) |
263 | { |
264 | char s[CHAR_CLASS_MAX_LENGTH + 1]; |
265 | char *cp = s; |
266 | |
267 | do |
268 | { |
269 | /* Test for a printable character from the portable character set. */ |
270 | # ifdef _LIBC |
271 | if (*wcs < 0x20 || *wcs > 0x7e |
272 | || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) |
273 | return (wctype_t) 0; |
274 | # else |
275 | switch (*wcs) |
276 | { |
277 | case L' ': case L'!': case L'"': case L'#': case L'%': |
278 | case L'&': case L'\'': case L'(': case L')': case L'*': |
279 | case L'+': case L',': case L'-': case L'.': case L'/': |
280 | case L'0': case L'1': case L'2': case L'3': case L'4': |
281 | case L'5': case L'6': case L'7': case L'8': case L'9': |
282 | case L':': case L';': case L'<': case L'=': case L'>': |
283 | case L'?': |
284 | case L'A': case L'B': case L'C': case L'D': case L'E': |
285 | case L'F': case L'G': case L'H': case L'I': case L'J': |
286 | case L'K': case L'L': case L'M': case L'N': case L'O': |
287 | case L'P': case L'Q': case L'R': case L'S': case L'T': |
288 | case L'U': case L'V': case L'W': case L'X': case L'Y': |
289 | case L'Z': |
290 | case L'[': case L'\\': case L']': case L'^': case L'_': |
291 | case L'a': case L'b': case L'c': case L'd': case L'e': |
292 | case L'f': case L'g': case L'h': case L'i': case L'j': |
293 | case L'k': case L'l': case L'm': case L'n': case L'o': |
294 | case L'p': case L'q': case L'r': case L's': case L't': |
295 | case L'u': case L'v': case L'w': case L'x': case L'y': |
296 | case L'z': case L'{': case L'|': case L'}': case L'~': |
297 | break; |
298 | default: |
299 | return (wctype_t) 0; |
300 | } |
301 | # endif |
302 | |
303 | /* Avoid overrunning the buffer. */ |
304 | if (cp == s + CHAR_CLASS_MAX_LENGTH) |
305 | return (wctype_t) 0; |
306 | |
307 | *cp++ = (char) *wcs++; |
308 | } |
309 | while (*wcs != L'\0'); |
310 | |
311 | *cp = '\0'; |
312 | |
313 | # ifdef _LIBC |
314 | return __wctype (s); |
315 | # else |
316 | return wctype (s); |
317 | # endif |
318 | } |
319 | # define IS_CHAR_CLASS(string) is_char_class (string) |
320 | |
321 | # include "fnmatch_loop.c" |
322 | # endif |
323 | |
324 | |
325 | int |
326 | fnmatch (const char *pattern, const char *string, int flags) |
327 | { |
328 | # if HANDLE_MULTIBYTE |
329 | if (__builtin_expect (MB_CUR_MAX, 1) != 1) |
330 | { |
331 | mbstate_t ps; |
332 | size_t n; |
333 | const char *p; |
334 | wchar_t *wpattern_malloc = NULL; |
335 | wchar_t *wpattern; |
336 | wchar_t *wstring_malloc = NULL; |
337 | wchar_t *wstring; |
338 | size_t alloca_used = 0; |
339 | |
340 | /* Convert the strings into wide characters. */ |
341 | memset (&ps, '\0', sizeof (ps)); |
342 | p = pattern; |
343 | #ifdef _LIBC |
344 | n = __strnlen (pattern, 1024); |
345 | #else |
346 | n = strlen (pattern); |
347 | #endif |
348 | if (__glibc_likely (n < 1024)) |
349 | { |
350 | wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), |
351 | alloca_used); |
352 | n = mbsrtowcs (wpattern, &p, n + 1, &ps); |
353 | if (__glibc_unlikely (n == (size_t) -1)) |
354 | /* Something wrong. |
355 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
356 | already done? */ |
357 | return -1; |
358 | if (p) |
359 | { |
360 | memset (&ps, '\0', sizeof (ps)); |
361 | goto prepare_wpattern; |
362 | } |
363 | } |
364 | else |
365 | { |
366 | prepare_wpattern: |
367 | n = mbsrtowcs (NULL, &pattern, 0, &ps); |
368 | if (__glibc_unlikely (n == (size_t) -1)) |
369 | /* Something wrong. |
370 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
371 | already done? */ |
372 | return -1; |
373 | if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) |
374 | { |
375 | __set_errno (ENOMEM); |
376 | return -2; |
377 | } |
378 | wpattern_malloc = wpattern |
379 | = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); |
380 | assert (mbsinit (&ps)); |
381 | if (wpattern == NULL) |
382 | return -2; |
383 | (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); |
384 | } |
385 | |
386 | assert (mbsinit (&ps)); |
387 | #ifdef _LIBC |
388 | n = __strnlen (string, 1024); |
389 | #else |
390 | n = strlen (string); |
391 | #endif |
392 | p = string; |
393 | if (__glibc_likely (n < 1024)) |
394 | { |
395 | wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), |
396 | alloca_used); |
397 | n = mbsrtowcs (wstring, &p, n + 1, &ps); |
398 | if (__glibc_unlikely (n == (size_t) -1)) |
399 | { |
400 | /* Something wrong. |
401 | XXX Do we have to set `errno' to something which |
402 | mbsrtows hasn't already done? */ |
403 | free_return: |
404 | free (wpattern_malloc); |
405 | return -1; |
406 | } |
407 | if (p) |
408 | { |
409 | memset (&ps, '\0', sizeof (ps)); |
410 | goto prepare_wstring; |
411 | } |
412 | } |
413 | else |
414 | { |
415 | prepare_wstring: |
416 | n = mbsrtowcs (NULL, &string, 0, &ps); |
417 | if (__glibc_unlikely (n == (size_t) -1)) |
418 | /* Something wrong. |
419 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
420 | already done? */ |
421 | goto free_return; |
422 | if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) |
423 | { |
424 | free (wpattern_malloc); |
425 | __set_errno (ENOMEM); |
426 | return -2; |
427 | } |
428 | |
429 | wstring_malloc = wstring |
430 | = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); |
431 | if (wstring == NULL) |
432 | { |
433 | free (wpattern_malloc); |
434 | return -2; |
435 | } |
436 | assert (mbsinit (&ps)); |
437 | (void) mbsrtowcs (wstring, &string, n + 1, &ps); |
438 | } |
439 | |
440 | int res = internal_fnwmatch (wpattern, wstring, wstring + n, |
441 | flags & FNM_PERIOD, flags, NULL, |
442 | alloca_used); |
443 | |
444 | free (wstring_malloc); |
445 | free (wpattern_malloc); |
446 | |
447 | return res; |
448 | } |
449 | # endif /* mbstate_t and mbsrtowcs or _LIBC. */ |
450 | |
451 | return internal_fnmatch (pattern, string, string + strlen (string), |
452 | flags & FNM_PERIOD, flags, NULL, 0); |
453 | } |
454 | |
455 | # ifdef _LIBC |
456 | # undef fnmatch |
457 | versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3); |
458 | # if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) |
459 | strong_alias (__fnmatch, __fnmatch_old) |
460 | compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0); |
461 | # endif |
462 | libc_hidden_ver (__fnmatch, fnmatch) |
463 | # endif |
464 | |
465 | #endif /* _LIBC or not __GNU_LIBRARY__. */ |
466 | |