1 | /* Copyright (C) 1991-2019 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. */ |
17 | |
18 | #if HAVE_CONFIG_H |
19 | # include <config.h> |
20 | #endif |
21 | |
22 | /* Enable GNU extensions in fnmatch.h. */ |
23 | #ifndef _GNU_SOURCE |
24 | # define _GNU_SOURCE 1 |
25 | #endif |
26 | |
27 | #include <assert.h> |
28 | #include <errno.h> |
29 | #include <fnmatch.h> |
30 | #include <ctype.h> |
31 | #include <string.h> |
32 | |
33 | #if defined STDC_HEADERS || defined _LIBC |
34 | # include <stdlib.h> |
35 | #endif |
36 | |
37 | #ifdef _LIBC |
38 | # include <alloca.h> |
39 | #else |
40 | # define alloca_account(size., var) alloca (size) |
41 | #endif |
42 | |
43 | /* For platform which support the ISO C amendement 1 functionality we |
44 | support user defined character classes. */ |
45 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
46 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ |
47 | # include <wchar.h> |
48 | # include <wctype.h> |
49 | #endif |
50 | |
51 | /* We need some of the locale data (the collation sequence information) |
52 | but there is no interface to get this information in general. Therefore |
53 | we support a correct implementation only in glibc. */ |
54 | #ifdef _LIBC |
55 | # include "../locale/localeinfo.h" |
56 | # include "../locale/coll-lookup.h" |
57 | # include <shlib-compat.h> |
58 | |
59 | # define CONCAT(a,b) __CONCAT(a,b) |
60 | # define mbsrtowcs __mbsrtowcs |
61 | # define fnmatch __fnmatch |
62 | extern int fnmatch (const char *pattern, const char *string, int flags); |
63 | #endif |
64 | |
65 | /* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */ |
66 | #define NO_LEADING_PERIOD(flags) \ |
67 | ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD)) |
68 | |
69 | /* Comment out all this code if we are using the GNU C Library, and are not |
70 | actually compiling the library itself. This code is part of the GNU C |
71 | Library, but also included in many other GNU distributions. Compiling |
72 | and linking in this code is a waste when using the GNU C library |
73 | (especially if it is a shared library). Rather than having every GNU |
74 | program understand `configure --with-gnu-libc' and omit the object files, |
75 | it is simpler to just do this in the source for each such file. */ |
76 | |
77 | #if defined _LIBC || !defined __GNU_LIBRARY__ |
78 | |
79 | |
80 | # if defined STDC_HEADERS || !defined isascii |
81 | # define ISASCII(c) 1 |
82 | # else |
83 | # define ISASCII(c) isascii(c) |
84 | # endif |
85 | |
86 | # ifdef isblank |
87 | # define ISBLANK(c) (ISASCII (c) && isblank (c)) |
88 | # else |
89 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') |
90 | # endif |
91 | # ifdef isgraph |
92 | # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) |
93 | # else |
94 | # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) |
95 | # endif |
96 | |
97 | # define ISPRINT(c) (ISASCII (c) && isprint (c)) |
98 | # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) |
99 | # define ISALNUM(c) (ISASCII (c) && isalnum (c)) |
100 | # define ISALPHA(c) (ISASCII (c) && isalpha (c)) |
101 | # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) |
102 | # define ISLOWER(c) (ISASCII (c) && islower (c)) |
103 | # define ISPUNCT(c) (ISASCII (c) && ispunct (c)) |
104 | # define ISSPACE(c) (ISASCII (c) && isspace (c)) |
105 | # define ISUPPER(c) (ISASCII (c) && isupper (c)) |
106 | # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) |
107 | |
108 | # define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) |
109 | |
110 | # if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
111 | /* The GNU C library provides support for user-defined character classes |
112 | and the functions from ISO C amendement 1. */ |
113 | # ifdef CHARCLASS_NAME_MAX |
114 | # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX |
115 | # else |
116 | /* This shouldn't happen but some implementation might still have this |
117 | problem. Use a reasonable default value. */ |
118 | # define CHAR_CLASS_MAX_LENGTH 256 |
119 | # endif |
120 | |
121 | # ifdef _LIBC |
122 | # define IS_CHAR_CLASS(string) __wctype (string) |
123 | # else |
124 | # define IS_CHAR_CLASS(string) wctype (string) |
125 | # endif |
126 | |
127 | # ifdef _LIBC |
128 | # define ISWCTYPE(WC, WT) __iswctype (WC, WT) |
129 | # else |
130 | # define ISWCTYPE(WC, WT) iswctype (WC, WT) |
131 | # endif |
132 | |
133 | # if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC |
134 | /* In this case we are implementing the multibyte character handling. */ |
135 | # define HANDLE_MULTIBYTE 1 |
136 | # endif |
137 | |
138 | # else |
139 | # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ |
140 | |
141 | # define IS_CHAR_CLASS(string) \ |
142 | (STREQ (string, "alpha") || STREQ (string, "upper") \ |
143 | || STREQ (string, "lower") || STREQ (string, "digit") \ |
144 | || STREQ (string, "alnum") || STREQ (string, "xdigit") \ |
145 | || STREQ (string, "space") || STREQ (string, "print") \ |
146 | || STREQ (string, "punct") || STREQ (string, "graph") \ |
147 | || STREQ (string, "cntrl") || STREQ (string, "blank")) |
148 | # endif |
149 | |
150 | /* Avoid depending on library functions or files |
151 | whose names are inconsistent. */ |
152 | |
153 | # if !defined _LIBC && !defined getenv |
154 | extern char *getenv (); |
155 | # endif |
156 | |
157 | # ifndef errno |
158 | extern int errno; |
159 | # endif |
160 | |
161 | /* Global variable. */ |
162 | static int posixly_correct; |
163 | |
164 | /* This function doesn't exist on most systems. */ |
165 | |
166 | # if !defined HAVE___STRCHRNUL && !defined _LIBC |
167 | static char * |
168 | __strchrnul (const char *s, int c) |
169 | { |
170 | char *result = strchr (s, c); |
171 | if (result == NULL) |
172 | result = strchr (s, '\0'); |
173 | return result; |
174 | } |
175 | # endif |
176 | |
177 | # if HANDLE_MULTIBYTE && !defined HAVE___STRCHRNUL && !defined _LIBC |
178 | static wchar_t * |
179 | __wcschrnul (const wchar_t *s, wint_t c) |
180 | { |
181 | wchar_t *result = wcschr (s, c); |
182 | if (result == NULL) |
183 | result = wcschr (s, '\0'); |
184 | return result; |
185 | } |
186 | # endif |
187 | |
188 | /* Note that this evaluates C many times. */ |
189 | # ifdef _LIBC |
190 | # define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) |
191 | # else |
192 | # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) |
193 | # endif |
194 | # define CHAR char |
195 | # define UCHAR unsigned char |
196 | # define INT int |
197 | # define FCT internal_fnmatch |
198 | # define EXT ext_match |
199 | # define END end_pattern |
200 | # define STRUCT fnmatch_struct |
201 | # define L(CS) CS |
202 | # ifdef _LIBC |
203 | # define BTOWC(C) __btowc (C) |
204 | # else |
205 | # define BTOWC(C) btowc (C) |
206 | # endif |
207 | # define STRLEN(S) strlen (S) |
208 | # define STRCAT(D, S) strcat (D, S) |
209 | # define MEMPCPY(D, S, N) __mempcpy (D, S, N) |
210 | # define MEMCHR(S, C, N) memchr (S, C, N) |
211 | # define STRCOLL(S1, S2) strcoll (S1, S2) |
212 | # define WIDE_CHAR_VERSION 0 |
213 | # include <locale/weight.h> |
214 | # define FINDIDX findidx |
215 | # include "fnmatch_loop.c" |
216 | |
217 | |
218 | # if HANDLE_MULTIBYTE |
219 | /* Note that this evaluates C many times. */ |
220 | # ifdef _LIBC |
221 | # define FOLD(c) ((flags & FNM_CASEFOLD) ? __towlower (c) : (c)) |
222 | # else |
223 | # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? towlower (c) : (c)) |
224 | # endif |
225 | # define CHAR wchar_t |
226 | # define UCHAR wint_t |
227 | # define INT wint_t |
228 | # define FCT internal_fnwmatch |
229 | # define EXT ext_wmatch |
230 | # define END end_wpattern |
231 | # define STRUCT fnwmatch_struct |
232 | # define L(CS) L##CS |
233 | # define BTOWC(C) (C) |
234 | # define STRLEN(S) __wcslen (S) |
235 | # define STRCAT(D, S) __wcscat (D, S) |
236 | # define MEMPCPY(D, S, N) __wmempcpy (D, S, N) |
237 | # define MEMCHR(S, C, N) __wmemchr (S, C, N) |
238 | # define STRCOLL(S1, S2) wcscoll (S1, S2) |
239 | # ifdef _LIBC |
240 | # define WMEMCMP(S1, S2, N) __wmemcmp (S1, S2, N) |
241 | # else |
242 | # define WMEMCMP(S1, S2, N) wmemcmp (S1, S2, N) |
243 | # endif |
244 | # define WIDE_CHAR_VERSION 1 |
245 | /* Change the name the header defines so it doesn't conflict with |
246 | the <locale/weight.h> version included above. */ |
247 | # define findidx findidxwc |
248 | # include <locale/weightwc.h> |
249 | # undef findidx |
250 | # define FINDIDX findidxwc |
251 | |
252 | # undef IS_CHAR_CLASS |
253 | /* We have to convert the wide character string in a multibyte string. But |
254 | we know that the character class names consist of alphanumeric characters |
255 | from the portable character set, and since the wide character encoding |
256 | for a member of the portable character set is the same code point as |
257 | its single-byte encoding, we can use a simplified method to convert the |
258 | string to a multibyte character string. */ |
259 | static wctype_t |
260 | is_char_class (const wchar_t *wcs) |
261 | { |
262 | char s[CHAR_CLASS_MAX_LENGTH + 1]; |
263 | char *cp = s; |
264 | |
265 | do |
266 | { |
267 | /* Test for a printable character from the portable character set. */ |
268 | # ifdef _LIBC |
269 | if (*wcs < 0x20 || *wcs > 0x7e |
270 | || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) |
271 | return (wctype_t) 0; |
272 | # else |
273 | switch (*wcs) |
274 | { |
275 | case L' ': case L'!': case L'"': case L'#': case L'%': |
276 | case L'&': case L'\'': case L'(': case L')': case L'*': |
277 | case L'+': case L',': case L'-': case L'.': case L'/': |
278 | case L'0': case L'1': case L'2': case L'3': case L'4': |
279 | case L'5': case L'6': case L'7': case L'8': case L'9': |
280 | case L':': case L';': case L'<': case L'=': case L'>': |
281 | case L'?': |
282 | case L'A': case L'B': case L'C': case L'D': case L'E': |
283 | case L'F': case L'G': case L'H': case L'I': case L'J': |
284 | case L'K': case L'L': case L'M': case L'N': case L'O': |
285 | case L'P': case L'Q': case L'R': case L'S': case L'T': |
286 | case L'U': case L'V': case L'W': case L'X': case L'Y': |
287 | case L'Z': |
288 | case L'[': case L'\\': case L']': case L'^': case L'_': |
289 | case L'a': case L'b': case L'c': case L'd': case L'e': |
290 | case L'f': case L'g': case L'h': case L'i': case L'j': |
291 | case L'k': case L'l': case L'm': case L'n': case L'o': |
292 | case L'p': case L'q': case L'r': case L's': case L't': |
293 | case L'u': case L'v': case L'w': case L'x': case L'y': |
294 | case L'z': case L'{': case L'|': case L'}': case L'~': |
295 | break; |
296 | default: |
297 | return (wctype_t) 0; |
298 | } |
299 | # endif |
300 | |
301 | /* Avoid overrunning the buffer. */ |
302 | if (cp == s + CHAR_CLASS_MAX_LENGTH) |
303 | return (wctype_t) 0; |
304 | |
305 | *cp++ = (char) *wcs++; |
306 | } |
307 | while (*wcs != L'\0'); |
308 | |
309 | *cp = '\0'; |
310 | |
311 | # ifdef _LIBC |
312 | return __wctype (s); |
313 | # else |
314 | return wctype (s); |
315 | # endif |
316 | } |
317 | # define IS_CHAR_CLASS(string) is_char_class (string) |
318 | |
319 | # include "fnmatch_loop.c" |
320 | # endif |
321 | |
322 | |
323 | int |
324 | fnmatch (const char *pattern, const char *string, int flags) |
325 | { |
326 | # if HANDLE_MULTIBYTE |
327 | if (__builtin_expect (MB_CUR_MAX, 1) != 1) |
328 | { |
329 | mbstate_t ps; |
330 | size_t n; |
331 | const char *p; |
332 | wchar_t *wpattern_malloc = NULL; |
333 | wchar_t *wpattern; |
334 | wchar_t *wstring_malloc = NULL; |
335 | wchar_t *wstring; |
336 | size_t alloca_used = 0; |
337 | |
338 | /* Convert the strings into wide characters. */ |
339 | memset (&ps, '\0', sizeof (ps)); |
340 | p = pattern; |
341 | #ifdef _LIBC |
342 | n = __strnlen (pattern, 1024); |
343 | #else |
344 | n = strlen (pattern); |
345 | #endif |
346 | if (__glibc_likely (n < 1024)) |
347 | { |
348 | wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), |
349 | alloca_used); |
350 | n = mbsrtowcs (wpattern, &p, n + 1, &ps); |
351 | if (__glibc_unlikely (n == (size_t) -1)) |
352 | /* Something wrong. |
353 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
354 | already done? */ |
355 | return -1; |
356 | if (p) |
357 | { |
358 | memset (&ps, '\0', sizeof (ps)); |
359 | goto prepare_wpattern; |
360 | } |
361 | } |
362 | else |
363 | { |
364 | prepare_wpattern: |
365 | n = mbsrtowcs (NULL, &pattern, 0, &ps); |
366 | if (__glibc_unlikely (n == (size_t) -1)) |
367 | /* Something wrong. |
368 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
369 | already done? */ |
370 | return -1; |
371 | if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) |
372 | { |
373 | __set_errno (ENOMEM); |
374 | return -2; |
375 | } |
376 | wpattern_malloc = wpattern |
377 | = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); |
378 | assert (mbsinit (&ps)); |
379 | if (wpattern == NULL) |
380 | return -2; |
381 | (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); |
382 | } |
383 | |
384 | assert (mbsinit (&ps)); |
385 | #ifdef _LIBC |
386 | n = __strnlen (string, 1024); |
387 | #else |
388 | n = strlen (string); |
389 | #endif |
390 | p = string; |
391 | if (__glibc_likely (n < 1024)) |
392 | { |
393 | wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), |
394 | alloca_used); |
395 | n = mbsrtowcs (wstring, &p, n + 1, &ps); |
396 | if (__glibc_unlikely (n == (size_t) -1)) |
397 | { |
398 | /* Something wrong. |
399 | XXX Do we have to set `errno' to something which |
400 | mbsrtows hasn't already done? */ |
401 | free_return: |
402 | free (wpattern_malloc); |
403 | return -1; |
404 | } |
405 | if (p) |
406 | { |
407 | memset (&ps, '\0', sizeof (ps)); |
408 | goto prepare_wstring; |
409 | } |
410 | } |
411 | else |
412 | { |
413 | prepare_wstring: |
414 | n = mbsrtowcs (NULL, &string, 0, &ps); |
415 | if (__glibc_unlikely (n == (size_t) -1)) |
416 | /* Something wrong. |
417 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
418 | already done? */ |
419 | goto free_return; |
420 | if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) |
421 | { |
422 | free (wpattern_malloc); |
423 | __set_errno (ENOMEM); |
424 | return -2; |
425 | } |
426 | |
427 | wstring_malloc = wstring |
428 | = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); |
429 | if (wstring == NULL) |
430 | { |
431 | free (wpattern_malloc); |
432 | return -2; |
433 | } |
434 | assert (mbsinit (&ps)); |
435 | (void) mbsrtowcs (wstring, &string, n + 1, &ps); |
436 | } |
437 | |
438 | int res = internal_fnwmatch (wpattern, wstring, wstring + n, |
439 | flags & FNM_PERIOD, flags, NULL, |
440 | alloca_used); |
441 | |
442 | free (wstring_malloc); |
443 | free (wpattern_malloc); |
444 | |
445 | return res; |
446 | } |
447 | # endif /* mbstate_t and mbsrtowcs or _LIBC. */ |
448 | |
449 | return internal_fnmatch (pattern, string, string + strlen (string), |
450 | flags & FNM_PERIOD, flags, NULL, 0); |
451 | } |
452 | |
453 | # ifdef _LIBC |
454 | # undef fnmatch |
455 | versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3); |
456 | # if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) |
457 | strong_alias (__fnmatch, __fnmatch_old) |
458 | compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0); |
459 | # endif |
460 | libc_hidden_ver (__fnmatch, fnmatch) |
461 | # endif |
462 | |
463 | #endif /* _LIBC or not __GNU_LIBRARY__. */ |
464 | |