1 | /* Copyright (C) 1991-2019 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <stdint.h> |
19 | |
20 | struct STRUCT |
21 | { |
22 | const CHAR *pattern; |
23 | const CHAR *string; |
24 | int no_leading_period; |
25 | }; |
26 | |
27 | /* Match STRING against the filename pattern PATTERN, returning zero if |
28 | it matches, nonzero if not. */ |
29 | static int FCT (const CHAR *pattern, const CHAR *string, |
30 | const CHAR *string_end, int no_leading_period, int flags, |
31 | struct STRUCT *ends, size_t alloca_used); |
32 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, |
33 | const CHAR *string_end, int no_leading_period, int flags, |
34 | size_t alloca_used); |
35 | static const CHAR *END (const CHAR *patternp); |
36 | |
37 | static int |
38 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
39 | int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used) |
40 | { |
41 | const CHAR *p = pattern, *n = string; |
42 | UCHAR c; |
43 | #ifdef _LIBC |
44 | # if WIDE_CHAR_VERSION |
45 | const char *collseq = (const char *) |
46 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
47 | # else |
48 | const UCHAR *collseq = (const UCHAR *) |
49 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
50 | # endif |
51 | #endif |
52 | |
53 | while ((c = *p++) != L('\0')) |
54 | { |
55 | int new_no_leading_period = 0; |
56 | c = FOLD (c); |
57 | |
58 | switch (c) |
59 | { |
60 | case L('?'): |
61 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
62 | { |
63 | int res = EXT (c, p, n, string_end, no_leading_period, |
64 | flags, alloca_used); |
65 | if (res != -1) |
66 | return res; |
67 | } |
68 | |
69 | if (n == string_end) |
70 | return FNM_NOMATCH; |
71 | else if (*n == L('/') && (flags & FNM_FILE_NAME)) |
72 | return FNM_NOMATCH; |
73 | else if (*n == L('.') && no_leading_period) |
74 | return FNM_NOMATCH; |
75 | break; |
76 | |
77 | case L('\\'): |
78 | if (!(flags & FNM_NOESCAPE)) |
79 | { |
80 | c = *p++; |
81 | if (c == L('\0')) |
82 | /* Trailing \ loses. */ |
83 | return FNM_NOMATCH; |
84 | c = FOLD (c); |
85 | } |
86 | if (n == string_end || FOLD ((UCHAR) *n) != c) |
87 | return FNM_NOMATCH; |
88 | break; |
89 | |
90 | case L('*'): |
91 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
92 | { |
93 | int res = EXT (c, p, n, string_end, no_leading_period, |
94 | flags, alloca_used); |
95 | if (res != -1) |
96 | return res; |
97 | } |
98 | else if (ends != NULL) |
99 | { |
100 | ends->pattern = p - 1; |
101 | ends->string = n; |
102 | ends->no_leading_period = no_leading_period; |
103 | return 0; |
104 | } |
105 | |
106 | if (n != string_end && *n == L('.') && no_leading_period) |
107 | return FNM_NOMATCH; |
108 | |
109 | for (c = *p++; c == L('?') || c == L('*'); c = *p++) |
110 | { |
111 | if (*p == L('(') && (flags & FNM_EXTMATCH) != 0) |
112 | { |
113 | const CHAR *endp = END (p); |
114 | if (endp != p) |
115 | { |
116 | /* This is a pattern. Skip over it. */ |
117 | p = endp; |
118 | continue; |
119 | } |
120 | } |
121 | |
122 | if (c == L('?')) |
123 | { |
124 | /* A ? needs to match one character. */ |
125 | if (n == string_end) |
126 | /* There isn't another character; no match. */ |
127 | return FNM_NOMATCH; |
128 | else if (*n == L('/') |
129 | && __builtin_expect (flags & FNM_FILE_NAME, 0)) |
130 | /* A slash does not match a wildcard under |
131 | FNM_FILE_NAME. */ |
132 | return FNM_NOMATCH; |
133 | else |
134 | /* One character of the string is consumed in matching |
135 | this ? wildcard, so *??? won't match if there are |
136 | less than three characters. */ |
137 | ++n; |
138 | } |
139 | } |
140 | |
141 | if (c == L('\0')) |
142 | /* The wildcard(s) is/are the last element of the pattern. |
143 | If the name is a file name and contains another slash |
144 | this means it cannot match, unless the FNM_LEADING_DIR |
145 | flag is set. */ |
146 | { |
147 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; |
148 | |
149 | if (flags & FNM_FILE_NAME) |
150 | { |
151 | if (flags & FNM_LEADING_DIR) |
152 | result = 0; |
153 | else |
154 | { |
155 | if (MEMCHR (n, L('/'), string_end - n) == NULL) |
156 | result = 0; |
157 | } |
158 | } |
159 | |
160 | return result; |
161 | } |
162 | else |
163 | { |
164 | const CHAR *endp; |
165 | struct STRUCT end; |
166 | |
167 | end.pattern = NULL; |
168 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'), |
169 | string_end - n); |
170 | if (endp == NULL) |
171 | endp = string_end; |
172 | |
173 | if (c == L('[') |
174 | || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 |
175 | && (c == L('@') || c == L('+') || c == L('!')) |
176 | && *p == L('('))) |
177 | { |
178 | int flags2 = ((flags & FNM_FILE_NAME) |
179 | ? flags : (flags & ~FNM_PERIOD)); |
180 | |
181 | for (--p; n < endp; ++n, no_leading_period = 0) |
182 | if (FCT (p, n, string_end, no_leading_period, flags2, |
183 | &end, alloca_used) == 0) |
184 | goto found; |
185 | } |
186 | else if (c == L('/') && (flags & FNM_FILE_NAME)) |
187 | { |
188 | while (n < string_end && *n != L('/')) |
189 | ++n; |
190 | if (n < string_end && *n == L('/') |
191 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, |
192 | NULL, alloca_used) == 0)) |
193 | return 0; |
194 | } |
195 | else |
196 | { |
197 | int flags2 = ((flags & FNM_FILE_NAME) |
198 | ? flags : (flags & ~FNM_PERIOD)); |
199 | |
200 | if (c == L('\\') && !(flags & FNM_NOESCAPE)) |
201 | c = *p; |
202 | c = FOLD (c); |
203 | for (--p; n < endp; ++n, no_leading_period = 0) |
204 | if (FOLD ((UCHAR) *n) == c |
205 | && (FCT (p, n, string_end, no_leading_period, flags2, |
206 | &end, alloca_used) == 0)) |
207 | { |
208 | found: |
209 | if (end.pattern == NULL) |
210 | return 0; |
211 | break; |
212 | } |
213 | if (end.pattern != NULL) |
214 | { |
215 | p = end.pattern; |
216 | n = end.string; |
217 | no_leading_period = end.no_leading_period; |
218 | continue; |
219 | } |
220 | } |
221 | } |
222 | |
223 | /* If we come here no match is possible with the wildcard. */ |
224 | return FNM_NOMATCH; |
225 | |
226 | case L('['): |
227 | { |
228 | /* Nonzero if the sense of the character class is inverted. */ |
229 | const CHAR *p_init = p; |
230 | const CHAR *n_init = n; |
231 | int not; |
232 | CHAR cold; |
233 | UCHAR fn; |
234 | |
235 | if (posixly_correct == 0) |
236 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
237 | |
238 | if (n == string_end) |
239 | return FNM_NOMATCH; |
240 | |
241 | if (*n == L('.') && no_leading_period) |
242 | return FNM_NOMATCH; |
243 | |
244 | if (*n == L('/') && (flags & FNM_FILE_NAME)) |
245 | /* `/' cannot be matched. */ |
246 | return FNM_NOMATCH; |
247 | |
248 | not = (*p == L('!') || (posixly_correct < 0 && *p == L('^'))); |
249 | if (not) |
250 | ++p; |
251 | |
252 | fn = FOLD ((UCHAR) *n); |
253 | |
254 | c = *p++; |
255 | for (;;) |
256 | { |
257 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) |
258 | { |
259 | if (*p == L('\0')) |
260 | return FNM_NOMATCH; |
261 | c = FOLD ((UCHAR) *p); |
262 | ++p; |
263 | |
264 | goto normal_bracket; |
265 | } |
266 | else if (c == L('[') && *p == L(':')) |
267 | { |
268 | /* Leave room for the null. */ |
269 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; |
270 | size_t c1 = 0; |
271 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
272 | wctype_t wt; |
273 | #endif |
274 | const CHAR *startp = p; |
275 | |
276 | for (;;) |
277 | { |
278 | if (c1 == CHAR_CLASS_MAX_LENGTH) |
279 | /* The name is too long and therefore the pattern |
280 | is ill-formed. */ |
281 | return FNM_NOMATCH; |
282 | |
283 | c = *++p; |
284 | if (c == L(':') && p[1] == L(']')) |
285 | { |
286 | p += 2; |
287 | break; |
288 | } |
289 | if (c < L('a') || c >= L('z')) |
290 | { |
291 | /* This cannot possibly be a character class name. |
292 | Match it as a normal range. */ |
293 | p = startp; |
294 | c = L('['); |
295 | goto normal_bracket; |
296 | } |
297 | str[c1++] = c; |
298 | } |
299 | str[c1] = L('\0'); |
300 | |
301 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
302 | wt = IS_CHAR_CLASS (str); |
303 | if (wt == 0) |
304 | /* Invalid character class name. */ |
305 | return FNM_NOMATCH; |
306 | |
307 | # if defined _LIBC && ! WIDE_CHAR_VERSION |
308 | /* The following code is glibc specific but does |
309 | there a good job in speeding up the code since |
310 | we can avoid the btowc() call. */ |
311 | if (_ISCTYPE ((UCHAR) *n, wt)) |
312 | goto matched; |
313 | # else |
314 | if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) |
315 | goto matched; |
316 | # endif |
317 | #else |
318 | if ((STREQ (str, L("alnum" )) && ISALNUM ((UCHAR) *n)) |
319 | || (STREQ (str, L("alpha" )) && ISALPHA ((UCHAR) *n)) |
320 | || (STREQ (str, L("blank" )) && ISBLANK ((UCHAR) *n)) |
321 | || (STREQ (str, L("cntrl" )) && ISCNTRL ((UCHAR) *n)) |
322 | || (STREQ (str, L("digit" )) && ISDIGIT ((UCHAR) *n)) |
323 | || (STREQ (str, L("graph" )) && ISGRAPH ((UCHAR) *n)) |
324 | || (STREQ (str, L("lower" )) && ISLOWER ((UCHAR) *n)) |
325 | || (STREQ (str, L("print" )) && ISPRINT ((UCHAR) *n)) |
326 | || (STREQ (str, L("punct" )) && ISPUNCT ((UCHAR) *n)) |
327 | || (STREQ (str, L("space" )) && ISSPACE ((UCHAR) *n)) |
328 | || (STREQ (str, L("upper" )) && ISUPPER ((UCHAR) *n)) |
329 | || (STREQ (str, L("xdigit" )) && ISXDIGIT ((UCHAR) *n))) |
330 | goto matched; |
331 | #endif |
332 | c = *p++; |
333 | } |
334 | #ifdef _LIBC |
335 | else if (c == L('[') && *p == L('=')) |
336 | { |
337 | /* It's important that STR be a scalar variable rather |
338 | than a one-element array, because GCC (at least 4.9.2 |
339 | -O2 on x86-64) can be confused by the array and |
340 | diagnose a "used initialized" in a dead branch in the |
341 | findidx function. */ |
342 | UCHAR str; |
343 | uint32_t nrules = |
344 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
345 | const CHAR *startp = p; |
346 | |
347 | c = *++p; |
348 | if (c == L('\0')) |
349 | { |
350 | p = startp; |
351 | c = L('['); |
352 | goto normal_bracket; |
353 | } |
354 | str = c; |
355 | |
356 | c = *++p; |
357 | if (c != L('=') || p[1] != L(']')) |
358 | { |
359 | p = startp; |
360 | c = L('['); |
361 | goto normal_bracket; |
362 | } |
363 | p += 2; |
364 | |
365 | if (nrules == 0) |
366 | { |
367 | if ((UCHAR) *n == str) |
368 | goto matched; |
369 | } |
370 | else |
371 | { |
372 | const int32_t *table; |
373 | # if WIDE_CHAR_VERSION |
374 | const int32_t *weights; |
375 | const wint_t *extra; |
376 | # else |
377 | const unsigned char *weights; |
378 | const unsigned char *; |
379 | # endif |
380 | const int32_t *indirect; |
381 | int32_t idx; |
382 | const UCHAR *cp = (const UCHAR *) &str; |
383 | |
384 | # if WIDE_CHAR_VERSION |
385 | table = (const int32_t *) |
386 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); |
387 | weights = (const int32_t *) |
388 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); |
389 | extra = (const wint_t *) |
390 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); |
391 | indirect = (const int32_t *) |
392 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); |
393 | # else |
394 | table = (const int32_t *) |
395 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
396 | weights = (const unsigned char *) |
397 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); |
398 | extra = (const unsigned char *) |
399 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
400 | indirect = (const int32_t *) |
401 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
402 | # endif |
403 | |
404 | idx = FINDIDX (table, indirect, extra, &cp, 1); |
405 | if (idx != 0) |
406 | { |
407 | /* We found a table entry. Now see whether the |
408 | character we are currently at has the same |
409 | equivalance class value. */ |
410 | int len = weights[idx & 0xffffff]; |
411 | int32_t idx2; |
412 | const UCHAR *np = (const UCHAR *) n; |
413 | |
414 | idx2 = FINDIDX (table, indirect, extra, |
415 | &np, string_end - n); |
416 | if (idx2 != 0 |
417 | && (idx >> 24) == (idx2 >> 24) |
418 | && len == weights[idx2 & 0xffffff]) |
419 | { |
420 | int cnt = 0; |
421 | |
422 | idx &= 0xffffff; |
423 | idx2 &= 0xffffff; |
424 | |
425 | while (cnt < len |
426 | && (weights[idx + 1 + cnt] |
427 | == weights[idx2 + 1 + cnt])) |
428 | ++cnt; |
429 | |
430 | if (cnt == len) |
431 | goto matched; |
432 | } |
433 | } |
434 | } |
435 | |
436 | c = *p++; |
437 | } |
438 | #endif |
439 | else if (c == L('\0')) |
440 | { |
441 | /* [ unterminated, treat as normal character. */ |
442 | p = p_init; |
443 | n = n_init; |
444 | c = L('['); |
445 | goto normal_match; |
446 | } |
447 | else |
448 | { |
449 | int is_range = 0; |
450 | |
451 | #ifdef _LIBC |
452 | int is_seqval = 0; |
453 | |
454 | if (c == L('[') && *p == L('.')) |
455 | { |
456 | uint32_t nrules = |
457 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
458 | const CHAR *startp = p; |
459 | size_t c1 = 0; |
460 | |
461 | while (1) |
462 | { |
463 | c = *++p; |
464 | if (c == L('.') && p[1] == L(']')) |
465 | { |
466 | p += 2; |
467 | break; |
468 | } |
469 | if (c == '\0') |
470 | return FNM_NOMATCH; |
471 | ++c1; |
472 | } |
473 | |
474 | /* We have to handling the symbols differently in |
475 | ranges since then the collation sequence is |
476 | important. */ |
477 | is_range = *p == L('-') && p[1] != L('\0'); |
478 | |
479 | if (nrules == 0) |
480 | { |
481 | /* There are no names defined in the collation |
482 | data. Therefore we only accept the trivial |
483 | names consisting of the character itself. */ |
484 | if (c1 != 1) |
485 | return FNM_NOMATCH; |
486 | |
487 | if (!is_range && *n == startp[1]) |
488 | goto matched; |
489 | |
490 | cold = startp[1]; |
491 | c = *p++; |
492 | } |
493 | else |
494 | { |
495 | int32_t table_size; |
496 | const int32_t *symb_table; |
497 | const unsigned char *; |
498 | int32_t idx; |
499 | int32_t elem; |
500 | # if WIDE_CHAR_VERSION |
501 | CHAR *wextra; |
502 | # endif |
503 | |
504 | table_size = |
505 | _NL_CURRENT_WORD (LC_COLLATE, |
506 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
507 | symb_table = (const int32_t *) |
508 | _NL_CURRENT (LC_COLLATE, |
509 | _NL_COLLATE_SYMB_TABLEMB); |
510 | extra = (const unsigned char *) |
511 | _NL_CURRENT (LC_COLLATE, |
512 | _NL_COLLATE_SYMB_EXTRAMB); |
513 | |
514 | for (elem = 0; elem < table_size; elem++) |
515 | if (symb_table[2 * elem] != 0) |
516 | { |
517 | idx = symb_table[2 * elem + 1]; |
518 | /* Skip the name of collating element. */ |
519 | idx += 1 + extra[idx]; |
520 | # if WIDE_CHAR_VERSION |
521 | /* Skip the byte sequence of the |
522 | collating element. */ |
523 | idx += 1 + extra[idx]; |
524 | /* Adjust for the alignment. */ |
525 | idx = (idx + 3) & ~3; |
526 | |
527 | wextra = (CHAR *) &extra[idx + 4]; |
528 | |
529 | if (/* Compare the length of the sequence. */ |
530 | c1 == wextra[0] |
531 | /* Compare the wide char sequence. */ |
532 | && WMEMCMP (startp + 1, &wextra[1], |
533 | c1) == 0) |
534 | /* Yep, this is the entry. */ |
535 | break; |
536 | # else |
537 | if (/* Compare the length of the sequence. */ |
538 | c1 == extra[idx] |
539 | /* Compare the byte sequence. */ |
540 | && memcmp (startp + 1, |
541 | &extra[idx + 1], c1) == 0) |
542 | /* Yep, this is the entry. */ |
543 | break; |
544 | # endif |
545 | } |
546 | |
547 | if (elem < table_size) |
548 | { |
549 | /* Compare the byte sequence but only if |
550 | this is not part of a range. */ |
551 | if (! is_range |
552 | |
553 | # if WIDE_CHAR_VERSION |
554 | && WMEMCMP (n, &wextra[1], c1) == 0 |
555 | # else |
556 | && memcmp (n, &extra[idx + 1], c1) == 0 |
557 | # endif |
558 | ) |
559 | { |
560 | n += c1 - 1; |
561 | goto matched; |
562 | } |
563 | |
564 | /* Get the collation sequence value. */ |
565 | is_seqval = 1; |
566 | # if WIDE_CHAR_VERSION |
567 | cold = wextra[1 + wextra[idx]]; |
568 | # else |
569 | idx += 1 + extra[idx]; |
570 | /* Adjust for the alignment. */ |
571 | idx = (idx + 3) & ~3; |
572 | cold = *((int32_t *) &extra[idx]); |
573 | # endif |
574 | |
575 | c = *p++; |
576 | } |
577 | else if (c1 == 1) |
578 | { |
579 | /* No valid character. Match it as a |
580 | single byte. */ |
581 | if (!is_range && *n == startp[1]) |
582 | goto matched; |
583 | |
584 | cold = startp[1]; |
585 | c = *p++; |
586 | } |
587 | else |
588 | return FNM_NOMATCH; |
589 | } |
590 | } |
591 | else |
592 | #endif |
593 | { |
594 | c = FOLD (c); |
595 | normal_bracket: |
596 | |
597 | /* We have to handling the symbols differently in |
598 | ranges since then the collation sequence is |
599 | important. */ |
600 | is_range = (*p == L('-') && p[1] != L('\0') |
601 | && p[1] != L(']')); |
602 | |
603 | if (!is_range && c == fn) |
604 | goto matched; |
605 | |
606 | /* This is needed if we goto normal_bracket; from |
607 | outside of is_seqval's scope. */ |
608 | is_seqval = 0; |
609 | cold = c; |
610 | c = *p++; |
611 | } |
612 | |
613 | if (c == L('-') && *p != L(']')) |
614 | { |
615 | #if _LIBC |
616 | /* We have to find the collation sequence |
617 | value for C. Collation sequence is nothing |
618 | we can regularly access. The sequence |
619 | value is defined by the order in which the |
620 | definitions of the collation values for the |
621 | various characters appear in the source |
622 | file. A strange concept, nowhere |
623 | documented. */ |
624 | uint32_t fcollseq; |
625 | uint32_t lcollseq; |
626 | UCHAR cend = *p++; |
627 | |
628 | # if WIDE_CHAR_VERSION |
629 | /* Search in the `names' array for the characters. */ |
630 | fcollseq = __collseq_table_lookup (collseq, fn); |
631 | if (fcollseq == ~((uint32_t) 0)) |
632 | /* XXX We don't know anything about the character |
633 | we are supposed to match. This means we are |
634 | failing. */ |
635 | goto range_not_matched; |
636 | |
637 | if (is_seqval) |
638 | lcollseq = cold; |
639 | else |
640 | lcollseq = __collseq_table_lookup (collseq, cold); |
641 | # else |
642 | fcollseq = collseq[fn]; |
643 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; |
644 | # endif |
645 | |
646 | is_seqval = 0; |
647 | if (cend == L('[') && *p == L('.')) |
648 | { |
649 | uint32_t nrules = |
650 | _NL_CURRENT_WORD (LC_COLLATE, |
651 | _NL_COLLATE_NRULES); |
652 | const CHAR *startp = p; |
653 | size_t c1 = 0; |
654 | |
655 | while (1) |
656 | { |
657 | c = *++p; |
658 | if (c == L('.') && p[1] == L(']')) |
659 | { |
660 | p += 2; |
661 | break; |
662 | } |
663 | if (c == '\0') |
664 | return FNM_NOMATCH; |
665 | ++c1; |
666 | } |
667 | |
668 | if (nrules == 0) |
669 | { |
670 | /* There are no names defined in the |
671 | collation data. Therefore we only |
672 | accept the trivial names consisting |
673 | of the character itself. */ |
674 | if (c1 != 1) |
675 | return FNM_NOMATCH; |
676 | |
677 | cend = startp[1]; |
678 | } |
679 | else |
680 | { |
681 | int32_t table_size; |
682 | const int32_t *symb_table; |
683 | const unsigned char *; |
684 | int32_t idx; |
685 | int32_t elem; |
686 | # if WIDE_CHAR_VERSION |
687 | CHAR *wextra; |
688 | # endif |
689 | |
690 | table_size = |
691 | _NL_CURRENT_WORD (LC_COLLATE, |
692 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
693 | symb_table = (const int32_t *) |
694 | _NL_CURRENT (LC_COLLATE, |
695 | _NL_COLLATE_SYMB_TABLEMB); |
696 | extra = (const unsigned char *) |
697 | _NL_CURRENT (LC_COLLATE, |
698 | _NL_COLLATE_SYMB_EXTRAMB); |
699 | |
700 | for (elem = 0; elem < table_size; elem++) |
701 | if (symb_table[2 * elem] != 0) |
702 | { |
703 | idx = symb_table[2 * elem + 1]; |
704 | /* Skip the name of collating |
705 | element. */ |
706 | idx += 1 + extra[idx]; |
707 | # if WIDE_CHAR_VERSION |
708 | /* Skip the byte sequence of the |
709 | collating element. */ |
710 | idx += 1 + extra[idx]; |
711 | /* Adjust for the alignment. */ |
712 | idx = (idx + 3) & ~3; |
713 | |
714 | wextra = (CHAR *) &extra[idx + 4]; |
715 | |
716 | if (/* Compare the length of the |
717 | sequence. */ |
718 | c1 == wextra[0] |
719 | /* Compare the wide char sequence. */ |
720 | && WMEMCMP (startp + 1, &wextra[1], |
721 | c1) == 0) |
722 | /* Yep, this is the entry. */ |
723 | break; |
724 | # else |
725 | if (/* Compare the length of the |
726 | sequence. */ |
727 | c1 == extra[idx] |
728 | /* Compare the byte sequence. */ |
729 | && memcmp (startp + 1, |
730 | &extra[idx + 1], c1) == 0) |
731 | /* Yep, this is the entry. */ |
732 | break; |
733 | # endif |
734 | } |
735 | |
736 | if (elem < table_size) |
737 | { |
738 | /* Get the collation sequence value. */ |
739 | is_seqval = 1; |
740 | # if WIDE_CHAR_VERSION |
741 | cend = wextra[1 + wextra[idx]]; |
742 | # else |
743 | idx += 1 + extra[idx]; |
744 | /* Adjust for the alignment. */ |
745 | idx = (idx + 3) & ~3; |
746 | cend = *((int32_t *) &extra[idx]); |
747 | # endif |
748 | } |
749 | else if (c1 == 1) |
750 | { |
751 | cend = startp[1]; |
752 | c = *p++; |
753 | } |
754 | else |
755 | return FNM_NOMATCH; |
756 | } |
757 | } |
758 | else |
759 | { |
760 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) |
761 | cend = *p++; |
762 | if (cend == L('\0')) |
763 | return FNM_NOMATCH; |
764 | cend = FOLD (cend); |
765 | } |
766 | |
767 | /* XXX It is not entirely clear to me how to handle |
768 | characters which are not mentioned in the |
769 | collation specification. */ |
770 | if ( |
771 | # if WIDE_CHAR_VERSION |
772 | lcollseq == 0xffffffff || |
773 | # endif |
774 | lcollseq <= fcollseq) |
775 | { |
776 | /* We have to look at the upper bound. */ |
777 | uint32_t hcollseq; |
778 | |
779 | if (is_seqval) |
780 | hcollseq = cend; |
781 | else |
782 | { |
783 | # if WIDE_CHAR_VERSION |
784 | hcollseq = |
785 | __collseq_table_lookup (collseq, cend); |
786 | if (hcollseq == ~((uint32_t) 0)) |
787 | { |
788 | /* Hum, no information about the upper |
789 | bound. The matching succeeds if the |
790 | lower bound is matched exactly. */ |
791 | if (lcollseq != fcollseq) |
792 | goto range_not_matched; |
793 | |
794 | goto matched; |
795 | } |
796 | # else |
797 | hcollseq = collseq[cend]; |
798 | # endif |
799 | } |
800 | |
801 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) |
802 | goto matched; |
803 | } |
804 | # if WIDE_CHAR_VERSION |
805 | range_not_matched: |
806 | # endif |
807 | #else |
808 | /* We use a boring value comparison of the character |
809 | values. This is better than comparing using |
810 | `strcoll' since the latter would have surprising |
811 | and sometimes fatal consequences. */ |
812 | UCHAR cend = *p++; |
813 | |
814 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) |
815 | cend = *p++; |
816 | if (cend == L('\0')) |
817 | return FNM_NOMATCH; |
818 | |
819 | /* It is a range. */ |
820 | if (cold <= fn && fn <= cend) |
821 | goto matched; |
822 | #endif |
823 | |
824 | c = *p++; |
825 | } |
826 | } |
827 | |
828 | if (c == L(']')) |
829 | break; |
830 | } |
831 | |
832 | if (!not) |
833 | return FNM_NOMATCH; |
834 | break; |
835 | |
836 | matched: |
837 | /* Skip the rest of the [...] that already matched. */ |
838 | while ((c = *p++) != L (']')) |
839 | { |
840 | if (c == L('\0')) |
841 | /* [... (unterminated) loses. */ |
842 | return FNM_NOMATCH; |
843 | |
844 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) |
845 | { |
846 | if (*p == L('\0')) |
847 | return FNM_NOMATCH; |
848 | /* XXX 1003.2d11 is unclear if this is right. */ |
849 | ++p; |
850 | } |
851 | else if (c == L('[') && *p == L(':')) |
852 | { |
853 | int c1 = 0; |
854 | const CHAR *startp = p; |
855 | |
856 | while (1) |
857 | { |
858 | c = *++p; |
859 | if (++c1 == CHAR_CLASS_MAX_LENGTH) |
860 | return FNM_NOMATCH; |
861 | |
862 | if (*p == L(':') && p[1] == L(']')) |
863 | break; |
864 | |
865 | if (c < L('a') || c >= L('z')) |
866 | { |
867 | p = startp - 2; |
868 | break; |
869 | } |
870 | } |
871 | p += 2; |
872 | } |
873 | else if (c == L('[') && *p == L('=')) |
874 | { |
875 | c = *++p; |
876 | if (c == L('\0')) |
877 | return FNM_NOMATCH; |
878 | c = *++p; |
879 | if (c != L('=') || p[1] != L(']')) |
880 | return FNM_NOMATCH; |
881 | p += 2; |
882 | } |
883 | else if (c == L('[') && *p == L('.')) |
884 | { |
885 | while (1) |
886 | { |
887 | c = *++p; |
888 | if (c == L('\0')) |
889 | return FNM_NOMATCH; |
890 | |
891 | if (c == L('.') && p[1] == L(']')) |
892 | break; |
893 | } |
894 | p += 2; |
895 | } |
896 | } |
897 | if (not) |
898 | return FNM_NOMATCH; |
899 | } |
900 | break; |
901 | |
902 | case L('+'): |
903 | case L('@'): |
904 | case L('!'): |
905 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
906 | { |
907 | int res = EXT (c, p, n, string_end, no_leading_period, flags, |
908 | alloca_used); |
909 | if (res != -1) |
910 | return res; |
911 | } |
912 | goto normal_match; |
913 | |
914 | case L('/'): |
915 | if (NO_LEADING_PERIOD (flags)) |
916 | { |
917 | if (n == string_end || c != (UCHAR) *n) |
918 | return FNM_NOMATCH; |
919 | |
920 | new_no_leading_period = 1; |
921 | break; |
922 | } |
923 | /* FALLTHROUGH */ |
924 | default: |
925 | normal_match: |
926 | if (n == string_end || c != FOLD ((UCHAR) *n)) |
927 | return FNM_NOMATCH; |
928 | } |
929 | |
930 | no_leading_period = new_no_leading_period; |
931 | ++n; |
932 | } |
933 | |
934 | if (n == string_end) |
935 | return 0; |
936 | |
937 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/')) |
938 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ |
939 | return 0; |
940 | |
941 | return FNM_NOMATCH; |
942 | } |
943 | |
944 | |
945 | static const CHAR * |
946 | END (const CHAR *pattern) |
947 | { |
948 | const CHAR *p = pattern; |
949 | |
950 | while (1) |
951 | if (*++p == L('\0')) |
952 | /* This is an invalid pattern. */ |
953 | return pattern; |
954 | else if (*p == L('[')) |
955 | { |
956 | /* Handle brackets special. */ |
957 | if (posixly_correct == 0) |
958 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
959 | |
960 | /* Skip the not sign. We have to recognize it because of a possibly |
961 | following ']'. */ |
962 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) |
963 | ++p; |
964 | /* A leading ']' is recognized as such. */ |
965 | if (*p == L(']')) |
966 | ++p; |
967 | /* Skip over all characters of the list. */ |
968 | while (*p != L(']')) |
969 | if (*p++ == L('\0')) |
970 | /* This is no valid pattern. */ |
971 | return pattern; |
972 | } |
973 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') |
974 | || *p == L('!')) && p[1] == L('(')) |
975 | { |
976 | p = END (p + 1); |
977 | if (*p == L('\0')) |
978 | /* This is an invalid pattern. */ |
979 | return pattern; |
980 | } |
981 | else if (*p == L(')')) |
982 | break; |
983 | |
984 | return p + 1; |
985 | } |
986 | |
987 | |
988 | static int |
989 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
990 | int no_leading_period, int flags, size_t alloca_used) |
991 | { |
992 | const CHAR *startp; |
993 | int level; |
994 | struct patternlist |
995 | { |
996 | struct patternlist *next; |
997 | CHAR malloced; |
998 | CHAR str[0]; |
999 | } *list = NULL; |
1000 | struct patternlist **lastp = &list; |
1001 | size_t pattern_len = STRLEN (pattern); |
1002 | int any_malloced = 0; |
1003 | const CHAR *p; |
1004 | const CHAR *rs; |
1005 | int retval = 0; |
1006 | |
1007 | /* Parse the pattern. Store the individual parts in the list. */ |
1008 | level = 0; |
1009 | for (startp = p = pattern + 1; level >= 0; ++p) |
1010 | if (*p == L('\0')) |
1011 | { |
1012 | /* This is an invalid pattern. */ |
1013 | retval = -1; |
1014 | goto out; |
1015 | } |
1016 | else if (*p == L('[')) |
1017 | { |
1018 | /* Handle brackets special. */ |
1019 | if (posixly_correct == 0) |
1020 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1021 | |
1022 | /* Skip the not sign. We have to recognize it because of a possibly |
1023 | following ']'. */ |
1024 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) |
1025 | ++p; |
1026 | /* A leading ']' is recognized as such. */ |
1027 | if (*p == L(']')) |
1028 | ++p; |
1029 | /* Skip over all characters of the list. */ |
1030 | while (*p != L(']')) |
1031 | if (*p++ == L('\0')) |
1032 | { |
1033 | /* This is no valid pattern. */ |
1034 | retval = -1; |
1035 | goto out; |
1036 | } |
1037 | } |
1038 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') |
1039 | || *p == L('!')) && p[1] == L('(')) |
1040 | /* Remember the nesting level. */ |
1041 | ++level; |
1042 | else if (*p == L(')')) |
1043 | { |
1044 | if (level-- == 0) |
1045 | { |
1046 | /* This means we found the end of the pattern. */ |
1047 | #define NEW_PATTERN \ |
1048 | struct patternlist *newp; \ |
1049 | size_t slen = (opt == L('?') || opt == L('@') \ |
1050 | ? pattern_len : (p - startp + 1)); \ |
1051 | slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \ |
1052 | int malloced = ! __libc_use_alloca (alloca_used + slen); \ |
1053 | if (__builtin_expect (malloced, 0)) \ |
1054 | { \ |
1055 | newp = malloc (slen); \ |
1056 | if (newp == NULL) \ |
1057 | { \ |
1058 | retval = -2; \ |
1059 | goto out; \ |
1060 | } \ |
1061 | any_malloced = 1; \ |
1062 | } \ |
1063 | else \ |
1064 | newp = alloca_account (slen, alloca_used); \ |
1065 | newp->next = NULL; \ |
1066 | newp->malloced = malloced; \ |
1067 | *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \ |
1068 | *lastp = newp; \ |
1069 | lastp = &newp->next |
1070 | NEW_PATTERN; |
1071 | } |
1072 | } |
1073 | else if (*p == L('|')) |
1074 | { |
1075 | if (level == 0) |
1076 | { |
1077 | NEW_PATTERN; |
1078 | startp = p + 1; |
1079 | } |
1080 | } |
1081 | assert (list != NULL); |
1082 | assert (p[-1] == L(')')); |
1083 | #undef NEW_PATTERN |
1084 | |
1085 | switch (opt) |
1086 | { |
1087 | case L('*'): |
1088 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1089 | alloca_used) == 0) |
1090 | goto success; |
1091 | /* FALLTHROUGH */ |
1092 | |
1093 | case L('+'): |
1094 | do |
1095 | { |
1096 | for (rs = string; rs <= string_end; ++rs) |
1097 | /* First match the prefix with the current pattern with the |
1098 | current pattern. */ |
1099 | if (FCT (list->str, string, rs, no_leading_period, |
1100 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1101 | NULL, alloca_used) == 0 |
1102 | /* This was successful. Now match the rest with the rest |
1103 | of the pattern. */ |
1104 | && (FCT (p, rs, string_end, |
1105 | rs == string |
1106 | ? no_leading_period |
1107 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, |
1108 | flags & FNM_FILE_NAME |
1109 | ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0 |
1110 | /* This didn't work. Try the whole pattern. */ |
1111 | || (rs != string |
1112 | && FCT (pattern - 1, rs, string_end, |
1113 | rs == string |
1114 | ? no_leading_period |
1115 | : (rs[-1] == '/' && NO_LEADING_PERIOD (flags) |
1116 | ? 1 : 0), |
1117 | flags & FNM_FILE_NAME |
1118 | ? flags : flags & ~FNM_PERIOD, NULL, |
1119 | alloca_used) == 0))) |
1120 | /* It worked. Signal success. */ |
1121 | goto success; |
1122 | } |
1123 | while ((list = list->next) != NULL); |
1124 | |
1125 | /* None of the patterns lead to a match. */ |
1126 | retval = FNM_NOMATCH; |
1127 | break; |
1128 | |
1129 | case L('?'): |
1130 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1131 | alloca_used) == 0) |
1132 | goto success; |
1133 | /* FALLTHROUGH */ |
1134 | |
1135 | case L('@'): |
1136 | do |
1137 | /* I cannot believe it but `strcat' is actually acceptable |
1138 | here. Match the entire string with the prefix from the |
1139 | pattern list and the rest of the pattern following the |
1140 | pattern list. */ |
1141 | if (FCT (STRCAT (list->str, p), string, string_end, |
1142 | no_leading_period, |
1143 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1144 | NULL, alloca_used) == 0) |
1145 | /* It worked. Signal success. */ |
1146 | goto success; |
1147 | while ((list = list->next) != NULL); |
1148 | |
1149 | /* None of the patterns lead to a match. */ |
1150 | retval = FNM_NOMATCH; |
1151 | break; |
1152 | |
1153 | case L('!'): |
1154 | for (rs = string; rs <= string_end; ++rs) |
1155 | { |
1156 | struct patternlist *runp; |
1157 | |
1158 | for (runp = list; runp != NULL; runp = runp->next) |
1159 | if (FCT (runp->str, string, rs, no_leading_period, |
1160 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1161 | NULL, alloca_used) == 0) |
1162 | break; |
1163 | |
1164 | /* If none of the patterns matched see whether the rest does. */ |
1165 | if (runp == NULL |
1166 | && (FCT (p, rs, string_end, |
1167 | rs == string |
1168 | ? no_leading_period |
1169 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, |
1170 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1171 | NULL, alloca_used) == 0)) |
1172 | /* This is successful. */ |
1173 | goto success; |
1174 | } |
1175 | |
1176 | /* None of the patterns together with the rest of the pattern |
1177 | lead to a match. */ |
1178 | retval = FNM_NOMATCH; |
1179 | break; |
1180 | |
1181 | default: |
1182 | assert (! "Invalid extended matching operator" ); |
1183 | retval = -1; |
1184 | break; |
1185 | } |
1186 | |
1187 | success: |
1188 | out: |
1189 | if (any_malloced) |
1190 | while (list != NULL) |
1191 | { |
1192 | struct patternlist *old = list; |
1193 | list = list->next; |
1194 | if (old->malloced) |
1195 | free (old); |
1196 | } |
1197 | |
1198 | return retval; |
1199 | } |
1200 | |
1201 | |
1202 | #undef FOLD |
1203 | #undef CHAR |
1204 | #undef UCHAR |
1205 | #undef INT |
1206 | #undef FCT |
1207 | #undef EXT |
1208 | #undef END |
1209 | #undef STRUCT |
1210 | #undef MEMPCPY |
1211 | #undef MEMCHR |
1212 | #undef STRCOLL |
1213 | #undef STRLEN |
1214 | #undef STRCAT |
1215 | #undef L |
1216 | #undef BTOWC |
1217 | #undef WIDE_CHAR_VERSION |
1218 | #undef FINDIDX |
1219 | |