1 | /* Copyright (C) 1991-2018 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. */ |
17 | |
18 | #if HAVE_CONFIG_H |
19 | # include <config.h> |
20 | #endif |
21 | |
22 | /* Enable GNU extensions in fnmatch.h. */ |
23 | #ifndef _GNU_SOURCE |
24 | # define _GNU_SOURCE 1 |
25 | #endif |
26 | |
27 | #include <assert.h> |
28 | #include <errno.h> |
29 | #include <fnmatch.h> |
30 | #include <ctype.h> |
31 | #include <string.h> |
32 | |
33 | #if defined STDC_HEADERS || defined _LIBC |
34 | # include <stdlib.h> |
35 | #endif |
36 | |
37 | #ifdef _LIBC |
38 | # include <alloca.h> |
39 | #else |
40 | # define alloca_account(size., var) alloca (size) |
41 | #endif |
42 | |
43 | /* For platform which support the ISO C amendement 1 functionality we |
44 | support user defined character classes. */ |
45 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
46 | /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>. */ |
47 | # include <wchar.h> |
48 | # include <wctype.h> |
49 | #endif |
50 | |
51 | /* We need some of the locale data (the collation sequence information) |
52 | but there is no interface to get this information in general. Therefore |
53 | we support a correct implementation only in glibc. */ |
54 | #ifdef _LIBC |
55 | # include "../locale/localeinfo.h" |
56 | # include "../locale/elem-hash.h" |
57 | # include "../locale/coll-lookup.h" |
58 | # include <shlib-compat.h> |
59 | |
60 | # define CONCAT(a,b) __CONCAT(a,b) |
61 | # define mbsrtowcs __mbsrtowcs |
62 | # define fnmatch __fnmatch |
63 | extern int fnmatch (const char *pattern, const char *string, int flags); |
64 | #endif |
65 | |
66 | /* We often have to test for FNM_FILE_NAME and FNM_PERIOD being both set. */ |
67 | #define NO_LEADING_PERIOD(flags) \ |
68 | ((flags & (FNM_FILE_NAME | FNM_PERIOD)) == (FNM_FILE_NAME | FNM_PERIOD)) |
69 | |
70 | /* Comment out all this code if we are using the GNU C Library, and are not |
71 | actually compiling the library itself. This code is part of the GNU C |
72 | Library, but also included in many other GNU distributions. Compiling |
73 | and linking in this code is a waste when using the GNU C library |
74 | (especially if it is a shared library). Rather than having every GNU |
75 | program understand `configure --with-gnu-libc' and omit the object files, |
76 | it is simpler to just do this in the source for each such file. */ |
77 | |
78 | #if defined _LIBC || !defined __GNU_LIBRARY__ |
79 | |
80 | |
81 | # if defined STDC_HEADERS || !defined isascii |
82 | # define ISASCII(c) 1 |
83 | # else |
84 | # define ISASCII(c) isascii(c) |
85 | # endif |
86 | |
87 | # ifdef isblank |
88 | # define ISBLANK(c) (ISASCII (c) && isblank (c)) |
89 | # else |
90 | # define ISBLANK(c) ((c) == ' ' || (c) == '\t') |
91 | # endif |
92 | # ifdef isgraph |
93 | # define ISGRAPH(c) (ISASCII (c) && isgraph (c)) |
94 | # else |
95 | # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) |
96 | # endif |
97 | |
98 | # define ISPRINT(c) (ISASCII (c) && isprint (c)) |
99 | # define ISDIGIT(c) (ISASCII (c) && isdigit (c)) |
100 | # define ISALNUM(c) (ISASCII (c) && isalnum (c)) |
101 | # define ISALPHA(c) (ISASCII (c) && isalpha (c)) |
102 | # define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) |
103 | # define ISLOWER(c) (ISASCII (c) && islower (c)) |
104 | # define ISPUNCT(c) (ISASCII (c) && ispunct (c)) |
105 | # define ISSPACE(c) (ISASCII (c) && isspace (c)) |
106 | # define ISUPPER(c) (ISASCII (c) && isupper (c)) |
107 | # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) |
108 | |
109 | # define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) |
110 | |
111 | # if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
112 | /* The GNU C library provides support for user-defined character classes |
113 | and the functions from ISO C amendement 1. */ |
114 | # ifdef CHARCLASS_NAME_MAX |
115 | # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX |
116 | # else |
117 | /* This shouldn't happen but some implementation might still have this |
118 | problem. Use a reasonable default value. */ |
119 | # define CHAR_CLASS_MAX_LENGTH 256 |
120 | # endif |
121 | |
122 | # ifdef _LIBC |
123 | # define IS_CHAR_CLASS(string) __wctype (string) |
124 | # else |
125 | # define IS_CHAR_CLASS(string) wctype (string) |
126 | # endif |
127 | |
128 | # ifdef _LIBC |
129 | # define ISWCTYPE(WC, WT) __iswctype (WC, WT) |
130 | # else |
131 | # define ISWCTYPE(WC, WT) iswctype (WC, WT) |
132 | # endif |
133 | |
134 | # if (HAVE_MBSTATE_T && HAVE_MBSRTOWCS) || _LIBC |
135 | /* In this case we are implementing the multibyte character handling. */ |
136 | # define HANDLE_MULTIBYTE 1 |
137 | # endif |
138 | |
139 | # else |
140 | # define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ |
141 | |
142 | # define IS_CHAR_CLASS(string) \ |
143 | (STREQ (string, "alpha") || STREQ (string, "upper") \ |
144 | || STREQ (string, "lower") || STREQ (string, "digit") \ |
145 | || STREQ (string, "alnum") || STREQ (string, "xdigit") \ |
146 | || STREQ (string, "space") || STREQ (string, "print") \ |
147 | || STREQ (string, "punct") || STREQ (string, "graph") \ |
148 | || STREQ (string, "cntrl") || STREQ (string, "blank")) |
149 | # endif |
150 | |
151 | /* Avoid depending on library functions or files |
152 | whose names are inconsistent. */ |
153 | |
154 | # if !defined _LIBC && !defined getenv |
155 | extern char *getenv (); |
156 | # endif |
157 | |
158 | # ifndef errno |
159 | extern int errno; |
160 | # endif |
161 | |
162 | /* Global variable. */ |
163 | static int posixly_correct; |
164 | |
165 | /* This function doesn't exist on most systems. */ |
166 | |
167 | # if !defined HAVE___STRCHRNUL && !defined _LIBC |
168 | static char * |
169 | __strchrnul (const char *s, int c) |
170 | { |
171 | char *result = strchr (s, c); |
172 | if (result == NULL) |
173 | result = strchr (s, '\0'); |
174 | return result; |
175 | } |
176 | # endif |
177 | |
178 | # if HANDLE_MULTIBYTE && !defined HAVE___STRCHRNUL && !defined _LIBC |
179 | static wchar_t * |
180 | __wcschrnul (const wchar_t *s, wint_t c) |
181 | { |
182 | wchar_t *result = wcschr (s, c); |
183 | if (result == NULL) |
184 | result = wcschr (s, '\0'); |
185 | return result; |
186 | } |
187 | # endif |
188 | |
189 | /* Note that this evaluates C many times. */ |
190 | # ifdef _LIBC |
191 | # define FOLD(c) ((flags & FNM_CASEFOLD) ? tolower (c) : (c)) |
192 | # else |
193 | # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? tolower (c) : (c)) |
194 | # endif |
195 | # define CHAR char |
196 | # define UCHAR unsigned char |
197 | # define INT int |
198 | # define FCT internal_fnmatch |
199 | # define EXT ext_match |
200 | # define END end_pattern |
201 | # define STRUCT fnmatch_struct |
202 | # define L(CS) CS |
203 | # ifdef _LIBC |
204 | # define BTOWC(C) __btowc (C) |
205 | # else |
206 | # define BTOWC(C) btowc (C) |
207 | # endif |
208 | # define STRLEN(S) strlen (S) |
209 | # define STRCAT(D, S) strcat (D, S) |
210 | # define MEMPCPY(D, S, N) __mempcpy (D, S, N) |
211 | # define MEMCHR(S, C, N) memchr (S, C, N) |
212 | # define STRCOLL(S1, S2) strcoll (S1, S2) |
213 | # define WIDE_CHAR_VERSION 0 |
214 | # include <locale/weight.h> |
215 | # define FINDIDX findidx |
216 | # include "fnmatch_loop.c" |
217 | |
218 | |
219 | # if HANDLE_MULTIBYTE |
220 | /* Note that this evaluates C many times. */ |
221 | # ifdef _LIBC |
222 | # define FOLD(c) ((flags & FNM_CASEFOLD) ? __towlower (c) : (c)) |
223 | # else |
224 | # define FOLD(c) ((flags & FNM_CASEFOLD) && ISUPPER (c) ? towlower (c) : (c)) |
225 | # endif |
226 | # define CHAR wchar_t |
227 | # define UCHAR wint_t |
228 | # define INT wint_t |
229 | # define FCT internal_fnwmatch |
230 | # define EXT ext_wmatch |
231 | # define END end_wpattern |
232 | # define STRUCT fnwmatch_struct |
233 | # define L(CS) L##CS |
234 | # define BTOWC(C) (C) |
235 | # define STRLEN(S) __wcslen (S) |
236 | # define STRCAT(D, S) __wcscat (D, S) |
237 | # define MEMPCPY(D, S, N) __wmempcpy (D, S, N) |
238 | # define MEMCHR(S, C, N) __wmemchr (S, C, N) |
239 | # define STRCOLL(S1, S2) wcscoll (S1, S2) |
240 | # define WIDE_CHAR_VERSION 1 |
241 | /* Change the name the header defines so it doesn't conflict with |
242 | the <locale/weight.h> version included above. */ |
243 | # define findidx findidxwc |
244 | # include <locale/weightwc.h> |
245 | # undef findidx |
246 | # define FINDIDX findidxwc |
247 | |
248 | # undef IS_CHAR_CLASS |
249 | /* We have to convert the wide character string in a multibyte string. But |
250 | we know that the character class names consist of alphanumeric characters |
251 | from the portable character set, and since the wide character encoding |
252 | for a member of the portable character set is the same code point as |
253 | its single-byte encoding, we can use a simplified method to convert the |
254 | string to a multibyte character string. */ |
255 | static wctype_t |
256 | is_char_class (const wchar_t *wcs) |
257 | { |
258 | char s[CHAR_CLASS_MAX_LENGTH + 1]; |
259 | char *cp = s; |
260 | |
261 | do |
262 | { |
263 | /* Test for a printable character from the portable character set. */ |
264 | # ifdef _LIBC |
265 | if (*wcs < 0x20 || *wcs > 0x7e |
266 | || *wcs == 0x24 || *wcs == 0x40 || *wcs == 0x60) |
267 | return (wctype_t) 0; |
268 | # else |
269 | switch (*wcs) |
270 | { |
271 | case L' ': case L'!': case L'"': case L'#': case L'%': |
272 | case L'&': case L'\'': case L'(': case L')': case L'*': |
273 | case L'+': case L',': case L'-': case L'.': case L'/': |
274 | case L'0': case L'1': case L'2': case L'3': case L'4': |
275 | case L'5': case L'6': case L'7': case L'8': case L'9': |
276 | case L':': case L';': case L'<': case L'=': case L'>': |
277 | case L'?': |
278 | case L'A': case L'B': case L'C': case L'D': case L'E': |
279 | case L'F': case L'G': case L'H': case L'I': case L'J': |
280 | case L'K': case L'L': case L'M': case L'N': case L'O': |
281 | case L'P': case L'Q': case L'R': case L'S': case L'T': |
282 | case L'U': case L'V': case L'W': case L'X': case L'Y': |
283 | case L'Z': |
284 | case L'[': case L'\\': case L']': case L'^': case L'_': |
285 | case L'a': case L'b': case L'c': case L'd': case L'e': |
286 | case L'f': case L'g': case L'h': case L'i': case L'j': |
287 | case L'k': case L'l': case L'm': case L'n': case L'o': |
288 | case L'p': case L'q': case L'r': case L's': case L't': |
289 | case L'u': case L'v': case L'w': case L'x': case L'y': |
290 | case L'z': case L'{': case L'|': case L'}': case L'~': |
291 | break; |
292 | default: |
293 | return (wctype_t) 0; |
294 | } |
295 | # endif |
296 | |
297 | /* Avoid overrunning the buffer. */ |
298 | if (cp == s + CHAR_CLASS_MAX_LENGTH) |
299 | return (wctype_t) 0; |
300 | |
301 | *cp++ = (char) *wcs++; |
302 | } |
303 | while (*wcs != L'\0'); |
304 | |
305 | *cp = '\0'; |
306 | |
307 | # ifdef _LIBC |
308 | return __wctype (s); |
309 | # else |
310 | return wctype (s); |
311 | # endif |
312 | } |
313 | # define IS_CHAR_CLASS(string) is_char_class (string) |
314 | |
315 | # include "fnmatch_loop.c" |
316 | # endif |
317 | |
318 | |
319 | int |
320 | fnmatch (const char *pattern, const char *string, int flags) |
321 | { |
322 | # if HANDLE_MULTIBYTE |
323 | if (__builtin_expect (MB_CUR_MAX, 1) != 1) |
324 | { |
325 | mbstate_t ps; |
326 | size_t n; |
327 | const char *p; |
328 | wchar_t *wpattern_malloc = NULL; |
329 | wchar_t *wpattern; |
330 | wchar_t *wstring_malloc = NULL; |
331 | wchar_t *wstring; |
332 | size_t alloca_used = 0; |
333 | |
334 | /* Convert the strings into wide characters. */ |
335 | memset (&ps, '\0', sizeof (ps)); |
336 | p = pattern; |
337 | #ifdef _LIBC |
338 | n = __strnlen (pattern, 1024); |
339 | #else |
340 | n = strlen (pattern); |
341 | #endif |
342 | if (__glibc_likely (n < 1024)) |
343 | { |
344 | wpattern = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), |
345 | alloca_used); |
346 | n = mbsrtowcs (wpattern, &p, n + 1, &ps); |
347 | if (__glibc_unlikely (n == (size_t) -1)) |
348 | /* Something wrong. |
349 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
350 | already done? */ |
351 | return -1; |
352 | if (p) |
353 | { |
354 | memset (&ps, '\0', sizeof (ps)); |
355 | goto prepare_wpattern; |
356 | } |
357 | } |
358 | else |
359 | { |
360 | prepare_wpattern: |
361 | n = mbsrtowcs (NULL, &pattern, 0, &ps); |
362 | if (__glibc_unlikely (n == (size_t) -1)) |
363 | /* Something wrong. |
364 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
365 | already done? */ |
366 | return -1; |
367 | if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) |
368 | { |
369 | __set_errno (ENOMEM); |
370 | return -2; |
371 | } |
372 | wpattern_malloc = wpattern |
373 | = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); |
374 | assert (mbsinit (&ps)); |
375 | if (wpattern == NULL) |
376 | return -2; |
377 | (void) mbsrtowcs (wpattern, &pattern, n + 1, &ps); |
378 | } |
379 | |
380 | assert (mbsinit (&ps)); |
381 | #ifdef _LIBC |
382 | n = __strnlen (string, 1024); |
383 | #else |
384 | n = strlen (string); |
385 | #endif |
386 | p = string; |
387 | if (__glibc_likely (n < 1024)) |
388 | { |
389 | wstring = (wchar_t *) alloca_account ((n + 1) * sizeof (wchar_t), |
390 | alloca_used); |
391 | n = mbsrtowcs (wstring, &p, n + 1, &ps); |
392 | if (__glibc_unlikely (n == (size_t) -1)) |
393 | { |
394 | /* Something wrong. |
395 | XXX Do we have to set `errno' to something which |
396 | mbsrtows hasn't already done? */ |
397 | free_return: |
398 | free (wpattern_malloc); |
399 | return -1; |
400 | } |
401 | if (p) |
402 | { |
403 | memset (&ps, '\0', sizeof (ps)); |
404 | goto prepare_wstring; |
405 | } |
406 | } |
407 | else |
408 | { |
409 | prepare_wstring: |
410 | n = mbsrtowcs (NULL, &string, 0, &ps); |
411 | if (__glibc_unlikely (n == (size_t) -1)) |
412 | /* Something wrong. |
413 | XXX Do we have to set `errno' to something which mbsrtows hasn't |
414 | already done? */ |
415 | goto free_return; |
416 | if (__glibc_unlikely (n >= (size_t) -1 / sizeof (wchar_t))) |
417 | { |
418 | free (wpattern_malloc); |
419 | __set_errno (ENOMEM); |
420 | return -2; |
421 | } |
422 | |
423 | wstring_malloc = wstring |
424 | = (wchar_t *) malloc ((n + 1) * sizeof (wchar_t)); |
425 | if (wstring == NULL) |
426 | { |
427 | free (wpattern_malloc); |
428 | return -2; |
429 | } |
430 | assert (mbsinit (&ps)); |
431 | (void) mbsrtowcs (wstring, &string, n + 1, &ps); |
432 | } |
433 | |
434 | int res = internal_fnwmatch (wpattern, wstring, wstring + n, |
435 | flags & FNM_PERIOD, flags, NULL, |
436 | alloca_used); |
437 | |
438 | free (wstring_malloc); |
439 | free (wpattern_malloc); |
440 | |
441 | return res; |
442 | } |
443 | # endif /* mbstate_t and mbsrtowcs or _LIBC. */ |
444 | |
445 | return internal_fnmatch (pattern, string, string + strlen (string), |
446 | flags & FNM_PERIOD, flags, NULL, 0); |
447 | } |
448 | |
449 | # ifdef _LIBC |
450 | # undef fnmatch |
451 | versioned_symbol (libc, __fnmatch, fnmatch, GLIBC_2_2_3); |
452 | # if SHLIB_COMPAT(libc, GLIBC_2_0, GLIBC_2_2_3) |
453 | strong_alias (__fnmatch, __fnmatch_old) |
454 | compat_symbol (libc, __fnmatch_old, fnmatch, GLIBC_2_0); |
455 | # endif |
456 | libc_hidden_ver (__fnmatch, fnmatch) |
457 | # endif |
458 | |
459 | #endif /* _LIBC or not __GNU_LIBRARY__. */ |
460 | |