1/* Copyright (C) 1991-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#include <stdint.h>
19
20struct STRUCT
21{
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
25};
26
27/* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29static int FCT (const CHAR *pattern, const CHAR *string,
30 const CHAR *string_end, int no_leading_period, int flags,
31 struct STRUCT *ends, size_t alloca_used)
32 internal_function;
33static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
34 const CHAR *string_end, int no_leading_period, int flags,
35 size_t alloca_used)
36 internal_function;
37static const CHAR *END (const CHAR *patternp) internal_function;
38
39static int
40internal_function
41FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
42 int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
43{
44 const CHAR *p = pattern, *n = string;
45 UCHAR c;
46#ifdef _LIBC
47# if WIDE_CHAR_VERSION
48 const char *collseq = (const char *)
49 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
50# else
51 const UCHAR *collseq = (const UCHAR *)
52 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
53# endif
54#endif
55
56 while ((c = *p++) != L('\0'))
57 {
58 int new_no_leading_period = 0;
59 c = FOLD (c);
60
61 switch (c)
62 {
63 case L('?'):
64 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
65 {
66 int res = EXT (c, p, n, string_end, no_leading_period,
67 flags, alloca_used);
68 if (res != -1)
69 return res;
70 }
71
72 if (n == string_end)
73 return FNM_NOMATCH;
74 else if (*n == L('/') && (flags & FNM_FILE_NAME))
75 return FNM_NOMATCH;
76 else if (*n == L('.') && no_leading_period)
77 return FNM_NOMATCH;
78 break;
79
80 case L('\\'):
81 if (!(flags & FNM_NOESCAPE))
82 {
83 c = *p++;
84 if (c == L('\0'))
85 /* Trailing \ loses. */
86 return FNM_NOMATCH;
87 c = FOLD (c);
88 }
89 if (n == string_end || FOLD ((UCHAR) *n) != c)
90 return FNM_NOMATCH;
91 break;
92
93 case L('*'):
94 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
95 {
96 int res = EXT (c, p, n, string_end, no_leading_period,
97 flags, alloca_used);
98 if (res != -1)
99 return res;
100 }
101 else if (ends != NULL)
102 {
103 ends->pattern = p - 1;
104 ends->string = n;
105 ends->no_leading_period = no_leading_period;
106 return 0;
107 }
108
109 if (n != string_end && *n == L('.') && no_leading_period)
110 return FNM_NOMATCH;
111
112 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
113 {
114 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
115 {
116 const CHAR *endp = END (p);
117 if (endp != p)
118 {
119 /* This is a pattern. Skip over it. */
120 p = endp;
121 continue;
122 }
123 }
124
125 if (c == L('?'))
126 {
127 /* A ? needs to match one character. */
128 if (n == string_end)
129 /* There isn't another character; no match. */
130 return FNM_NOMATCH;
131 else if (*n == L('/')
132 && __builtin_expect (flags & FNM_FILE_NAME, 0))
133 /* A slash does not match a wildcard under
134 FNM_FILE_NAME. */
135 return FNM_NOMATCH;
136 else
137 /* One character of the string is consumed in matching
138 this ? wildcard, so *??? won't match if there are
139 less than three characters. */
140 ++n;
141 }
142 }
143
144 if (c == L('\0'))
145 /* The wildcard(s) is/are the last element of the pattern.
146 If the name is a file name and contains another slash
147 this means it cannot match, unless the FNM_LEADING_DIR
148 flag is set. */
149 {
150 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
151
152 if (flags & FNM_FILE_NAME)
153 {
154 if (flags & FNM_LEADING_DIR)
155 result = 0;
156 else
157 {
158 if (MEMCHR (n, L('/'), string_end - n) == NULL)
159 result = 0;
160 }
161 }
162
163 return result;
164 }
165 else
166 {
167 const CHAR *endp;
168 struct STRUCT end;
169
170 end.pattern = NULL;
171 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
172 string_end - n);
173 if (endp == NULL)
174 endp = string_end;
175
176 if (c == L('[')
177 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
178 && (c == L('@') || c == L('+') || c == L('!'))
179 && *p == L('(')))
180 {
181 int flags2 = ((flags & FNM_FILE_NAME)
182 ? flags : (flags & ~FNM_PERIOD));
183
184 for (--p; n < endp; ++n, no_leading_period = 0)
185 if (FCT (p, n, string_end, no_leading_period, flags2,
186 &end, alloca_used) == 0)
187 goto found;
188 }
189 else if (c == L('/') && (flags & FNM_FILE_NAME))
190 {
191 while (n < string_end && *n != L('/'))
192 ++n;
193 if (n < string_end && *n == L('/')
194 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
195 NULL, alloca_used) == 0))
196 return 0;
197 }
198 else
199 {
200 int flags2 = ((flags & FNM_FILE_NAME)
201 ? flags : (flags & ~FNM_PERIOD));
202
203 if (c == L('\\') && !(flags & FNM_NOESCAPE))
204 c = *p;
205 c = FOLD (c);
206 for (--p; n < endp; ++n, no_leading_period = 0)
207 if (FOLD ((UCHAR) *n) == c
208 && (FCT (p, n, string_end, no_leading_period, flags2,
209 &end, alloca_used) == 0))
210 {
211 found:
212 if (end.pattern == NULL)
213 return 0;
214 break;
215 }
216 if (end.pattern != NULL)
217 {
218 p = end.pattern;
219 n = end.string;
220 no_leading_period = end.no_leading_period;
221 continue;
222 }
223 }
224 }
225
226 /* If we come here no match is possible with the wildcard. */
227 return FNM_NOMATCH;
228
229 case L('['):
230 {
231 /* Nonzero if the sense of the character class is inverted. */
232 const CHAR *p_init = p;
233 const CHAR *n_init = n;
234 int not;
235 CHAR cold;
236 UCHAR fn;
237
238 if (posixly_correct == 0)
239 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
240
241 if (n == string_end)
242 return FNM_NOMATCH;
243
244 if (*n == L('.') && no_leading_period)
245 return FNM_NOMATCH;
246
247 if (*n == L('/') && (flags & FNM_FILE_NAME))
248 /* `/' cannot be matched. */
249 return FNM_NOMATCH;
250
251 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
252 if (not)
253 ++p;
254
255 fn = FOLD ((UCHAR) *n);
256
257 c = *p++;
258 for (;;)
259 {
260 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
261 {
262 if (*p == L('\0'))
263 return FNM_NOMATCH;
264 c = FOLD ((UCHAR) *p);
265 ++p;
266
267 goto normal_bracket;
268 }
269 else if (c == L('[') && *p == L(':'))
270 {
271 /* Leave room for the null. */
272 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
273 size_t c1 = 0;
274#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
275 wctype_t wt;
276#endif
277 const CHAR *startp = p;
278
279 for (;;)
280 {
281 if (c1 == CHAR_CLASS_MAX_LENGTH)
282 /* The name is too long and therefore the pattern
283 is ill-formed. */
284 return FNM_NOMATCH;
285
286 c = *++p;
287 if (c == L(':') && p[1] == L(']'))
288 {
289 p += 2;
290 break;
291 }
292 if (c < L('a') || c >= L('z'))
293 {
294 /* This cannot possibly be a character class name.
295 Match it as a normal range. */
296 p = startp;
297 c = L('[');
298 goto normal_bracket;
299 }
300 str[c1++] = c;
301 }
302 str[c1] = L('\0');
303
304#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
305 wt = IS_CHAR_CLASS (str);
306 if (wt == 0)
307 /* Invalid character class name. */
308 return FNM_NOMATCH;
309
310# if defined _LIBC && ! WIDE_CHAR_VERSION
311 /* The following code is glibc specific but does
312 there a good job in speeding up the code since
313 we can avoid the btowc() call. */
314 if (_ISCTYPE ((UCHAR) *n, wt))
315 goto matched;
316# else
317 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
318 goto matched;
319# endif
320#else
321 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
322 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
323 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
324 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
325 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
326 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
327 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
328 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
329 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
330 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
331 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
332 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
333 goto matched;
334#endif
335 c = *p++;
336 }
337#ifdef _LIBC
338 else if (c == L('[') && *p == L('='))
339 {
340 /* It's important that STR be a scalar variable rather
341 than a one-element array, because GCC (at least 4.9.2
342 -O2 on x86-64) can be confused by the array and
343 diagnose a "used initialized" in a dead branch in the
344 findidx function. */
345 UCHAR str;
346 uint32_t nrules =
347 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
348 const CHAR *startp = p;
349
350 c = *++p;
351 if (c == L('\0'))
352 {
353 p = startp;
354 c = L('[');
355 goto normal_bracket;
356 }
357 str = c;
358
359 c = *++p;
360 if (c != L('=') || p[1] != L(']'))
361 {
362 p = startp;
363 c = L('[');
364 goto normal_bracket;
365 }
366 p += 2;
367
368 if (nrules == 0)
369 {
370 if ((UCHAR) *n == str)
371 goto matched;
372 }
373 else
374 {
375 const int32_t *table;
376# if WIDE_CHAR_VERSION
377 const int32_t *weights;
378 const wint_t *extra;
379# else
380 const unsigned char *weights;
381 const unsigned char *extra;
382# endif
383 const int32_t *indirect;
384 int32_t idx;
385 const UCHAR *cp = (const UCHAR *) &str;
386
387# if WIDE_CHAR_VERSION
388 table = (const int32_t *)
389 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
390 weights = (const int32_t *)
391 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
392 extra = (const wint_t *)
393 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
394 indirect = (const int32_t *)
395 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
396# else
397 table = (const int32_t *)
398 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
399 weights = (const unsigned char *)
400 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
401 extra = (const unsigned char *)
402 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
403 indirect = (const int32_t *)
404 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
405# endif
406
407 idx = FINDIDX (table, indirect, extra, &cp, 1);
408 if (idx != 0)
409 {
410 /* We found a table entry. Now see whether the
411 character we are currently at has the same
412 equivalance class value. */
413 int len = weights[idx & 0xffffff];
414 int32_t idx2;
415 const UCHAR *np = (const UCHAR *) n;
416
417 idx2 = FINDIDX (table, indirect, extra,
418 &np, string_end - n);
419 if (idx2 != 0
420 && (idx >> 24) == (idx2 >> 24)
421 && len == weights[idx2 & 0xffffff])
422 {
423 int cnt = 0;
424
425 idx &= 0xffffff;
426 idx2 &= 0xffffff;
427
428 while (cnt < len
429 && (weights[idx + 1 + cnt]
430 == weights[idx2 + 1 + cnt]))
431 ++cnt;
432
433 if (cnt == len)
434 goto matched;
435 }
436 }
437 }
438
439 c = *p++;
440 }
441#endif
442 else if (c == L('\0'))
443 {
444 /* [ unterminated, treat as normal character. */
445 p = p_init;
446 n = n_init;
447 c = L('[');
448 goto normal_match;
449 }
450 else
451 {
452 int is_range = 0;
453
454#ifdef _LIBC
455 int is_seqval = 0;
456
457 if (c == L('[') && *p == L('.'))
458 {
459 uint32_t nrules =
460 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
461 const CHAR *startp = p;
462 size_t c1 = 0;
463
464 while (1)
465 {
466 c = *++p;
467 if (c == L('.') && p[1] == L(']'))
468 {
469 p += 2;
470 break;
471 }
472 if (c == '\0')
473 return FNM_NOMATCH;
474 ++c1;
475 }
476
477 /* We have to handling the symbols differently in
478 ranges since then the collation sequence is
479 important. */
480 is_range = *p == L('-') && p[1] != L('\0');
481
482 if (nrules == 0)
483 {
484 /* There are no names defined in the collation
485 data. Therefore we only accept the trivial
486 names consisting of the character itself. */
487 if (c1 != 1)
488 return FNM_NOMATCH;
489
490 if (!is_range && *n == startp[1])
491 goto matched;
492
493 cold = startp[1];
494 c = *p++;
495 }
496 else
497 {
498 int32_t table_size;
499 const int32_t *symb_table;
500# if WIDE_CHAR_VERSION
501 char str[c1];
502 unsigned int strcnt;
503# else
504# define str (startp + 1)
505# endif
506 const unsigned char *extra;
507 int32_t idx;
508 int32_t elem;
509 int32_t second;
510 int32_t hash;
511
512# if WIDE_CHAR_VERSION
513 /* We have to convert the name to a single-byte
514 string. This is possible since the names
515 consist of ASCII characters and the internal
516 representation is UCS4. */
517 for (strcnt = 0; strcnt < c1; ++strcnt)
518 str[strcnt] = startp[1 + strcnt];
519#endif
520
521 table_size =
522 _NL_CURRENT_WORD (LC_COLLATE,
523 _NL_COLLATE_SYMB_HASH_SIZEMB);
524 symb_table = (const int32_t *)
525 _NL_CURRENT (LC_COLLATE,
526 _NL_COLLATE_SYMB_TABLEMB);
527 extra = (const unsigned char *)
528 _NL_CURRENT (LC_COLLATE,
529 _NL_COLLATE_SYMB_EXTRAMB);
530
531 /* Locate the character in the hashing table. */
532 hash = elem_hash (str, c1);
533
534 idx = 0;
535 elem = hash % table_size;
536 if (symb_table[2 * elem] != 0)
537 {
538 second = hash % (table_size - 2) + 1;
539
540 do
541 {
542 /* First compare the hashing value. */
543 if (symb_table[2 * elem] == hash
544 && (c1
545 == extra[symb_table[2 * elem + 1]])
546 && memcmp (str,
547 &extra[symb_table[2 * elem
548 + 1]
549 + 1], c1) == 0)
550 {
551 /* Yep, this is the entry. */
552 idx = symb_table[2 * elem + 1];
553 idx += 1 + extra[idx];
554 break;
555 }
556
557 /* Next entry. */
558 elem += second;
559 }
560 while (symb_table[2 * elem] != 0);
561 }
562
563 if (symb_table[2 * elem] != 0)
564 {
565 /* Compare the byte sequence but only if
566 this is not part of a range. */
567# if WIDE_CHAR_VERSION
568 int32_t *wextra;
569
570 idx += 1 + extra[idx];
571 /* Adjust for the alignment. */
572 idx = (idx + 3) & ~3;
573
574 wextra = (int32_t *) &extra[idx + 4];
575# endif
576
577 if (! is_range)
578 {
579# if WIDE_CHAR_VERSION
580 for (c1 = 0;
581 (int32_t) c1 < wextra[idx];
582 ++c1)
583 if (n[c1] != wextra[1 + c1])
584 break;
585
586 if ((int32_t) c1 == wextra[idx])
587 goto matched;
588# else
589 for (c1 = 0; c1 < extra[idx]; ++c1)
590 if (n[c1] != extra[1 + c1])
591 break;
592
593 if (c1 == extra[idx])
594 goto matched;
595# endif
596 }
597
598 /* Get the collation sequence value. */
599 is_seqval = 1;
600# if WIDE_CHAR_VERSION
601 cold = wextra[1 + wextra[idx]];
602# else
603 /* Adjust for the alignment. */
604 idx += 1 + extra[idx];
605 idx = (idx + 3) & ~4;
606 cold = *((int32_t *) &extra[idx]);
607# endif
608
609 c = *p++;
610 }
611 else if (c1 == 1)
612 {
613 /* No valid character. Match it as a
614 single byte. */
615 if (!is_range && *n == str[0])
616 goto matched;
617
618 cold = str[0];
619 c = *p++;
620 }
621 else
622 return FNM_NOMATCH;
623 }
624 }
625 else
626# undef str
627#endif
628 {
629 c = FOLD (c);
630 normal_bracket:
631
632 /* We have to handling the symbols differently in
633 ranges since then the collation sequence is
634 important. */
635 is_range = (*p == L('-') && p[1] != L('\0')
636 && p[1] != L(']'));
637
638 if (!is_range && c == fn)
639 goto matched;
640
641 /* This is needed if we goto normal_bracket; from
642 outside of is_seqval's scope. */
643 is_seqval = 0;
644 cold = c;
645 c = *p++;
646 }
647
648 if (c == L('-') && *p != L(']'))
649 {
650#if _LIBC
651 /* We have to find the collation sequence
652 value for C. Collation sequence is nothing
653 we can regularly access. The sequence
654 value is defined by the order in which the
655 definitions of the collation values for the
656 various characters appear in the source
657 file. A strange concept, nowhere
658 documented. */
659 uint32_t fcollseq;
660 uint32_t lcollseq;
661 UCHAR cend = *p++;
662
663# if WIDE_CHAR_VERSION
664 /* Search in the `names' array for the characters. */
665 fcollseq = __collseq_table_lookup (collseq, fn);
666 if (fcollseq == ~((uint32_t) 0))
667 /* XXX We don't know anything about the character
668 we are supposed to match. This means we are
669 failing. */
670 goto range_not_matched;
671
672 if (is_seqval)
673 lcollseq = cold;
674 else
675 lcollseq = __collseq_table_lookup (collseq, cold);
676# else
677 fcollseq = collseq[fn];
678 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
679# endif
680
681 is_seqval = 0;
682 if (cend == L('[') && *p == L('.'))
683 {
684 uint32_t nrules =
685 _NL_CURRENT_WORD (LC_COLLATE,
686 _NL_COLLATE_NRULES);
687 const CHAR *startp = p;
688 size_t c1 = 0;
689
690 while (1)
691 {
692 c = *++p;
693 if (c == L('.') && p[1] == L(']'))
694 {
695 p += 2;
696 break;
697 }
698 if (c == '\0')
699 return FNM_NOMATCH;
700 ++c1;
701 }
702
703 if (nrules == 0)
704 {
705 /* There are no names defined in the
706 collation data. Therefore we only
707 accept the trivial names consisting
708 of the character itself. */
709 if (c1 != 1)
710 return FNM_NOMATCH;
711
712 cend = startp[1];
713 }
714 else
715 {
716 int32_t table_size;
717 const int32_t *symb_table;
718# if WIDE_CHAR_VERSION
719 char str[c1];
720 unsigned int strcnt;
721# else
722# define str (startp + 1)
723# endif
724 const unsigned char *extra;
725 int32_t idx;
726 int32_t elem;
727 int32_t second;
728 int32_t hash;
729
730# if WIDE_CHAR_VERSION
731 /* We have to convert the name to a single-byte
732 string. This is possible since the names
733 consist of ASCII characters and the internal
734 representation is UCS4. */
735 for (strcnt = 0; strcnt < c1; ++strcnt)
736 str[strcnt] = startp[1 + strcnt];
737# endif
738
739 table_size =
740 _NL_CURRENT_WORD (LC_COLLATE,
741 _NL_COLLATE_SYMB_HASH_SIZEMB);
742 symb_table = (const int32_t *)
743 _NL_CURRENT (LC_COLLATE,
744 _NL_COLLATE_SYMB_TABLEMB);
745 extra = (const unsigned char *)
746 _NL_CURRENT (LC_COLLATE,
747 _NL_COLLATE_SYMB_EXTRAMB);
748
749 /* Locate the character in the hashing
750 table. */
751 hash = elem_hash (str, c1);
752
753 idx = 0;
754 elem = hash % table_size;
755 if (symb_table[2 * elem] != 0)
756 {
757 second = hash % (table_size - 2) + 1;
758
759 do
760 {
761 /* First compare the hashing value. */
762 if (symb_table[2 * elem] == hash
763 && (c1
764 == extra[symb_table[2 * elem + 1]])
765 && memcmp (str,
766 &extra[symb_table[2 * elem + 1]
767 + 1], c1) == 0)
768 {
769 /* Yep, this is the entry. */
770 idx = symb_table[2 * elem + 1];
771 idx += 1 + extra[idx];
772 break;
773 }
774
775 /* Next entry. */
776 elem += second;
777 }
778 while (symb_table[2 * elem] != 0);
779 }
780
781 if (symb_table[2 * elem] != 0)
782 {
783 /* Compare the byte sequence but only if
784 this is not part of a range. */
785# if WIDE_CHAR_VERSION
786 int32_t *wextra;
787
788 idx += 1 + extra[idx];
789 /* Adjust for the alignment. */
790 idx = (idx + 3) & ~4;
791
792 wextra = (int32_t *) &extra[idx + 4];
793# endif
794 /* Get the collation sequence value. */
795 is_seqval = 1;
796# if WIDE_CHAR_VERSION
797 cend = wextra[1 + wextra[idx]];
798# else
799 /* Adjust for the alignment. */
800 idx += 1 + extra[idx];
801 idx = (idx + 3) & ~4;
802 cend = *((int32_t *) &extra[idx]);
803# endif
804 }
805 else if (symb_table[2 * elem] != 0 && c1 == 1)
806 {
807 cend = str[0];
808 c = *p++;
809 }
810 else
811 return FNM_NOMATCH;
812 }
813# undef str
814 }
815 else
816 {
817 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
818 cend = *p++;
819 if (cend == L('\0'))
820 return FNM_NOMATCH;
821 cend = FOLD (cend);
822 }
823
824 /* XXX It is not entirely clear to me how to handle
825 characters which are not mentioned in the
826 collation specification. */
827 if (
828# if WIDE_CHAR_VERSION
829 lcollseq == 0xffffffff ||
830# endif
831 lcollseq <= fcollseq)
832 {
833 /* We have to look at the upper bound. */
834 uint32_t hcollseq;
835
836 if (is_seqval)
837 hcollseq = cend;
838 else
839 {
840# if WIDE_CHAR_VERSION
841 hcollseq =
842 __collseq_table_lookup (collseq, cend);
843 if (hcollseq == ~((uint32_t) 0))
844 {
845 /* Hum, no information about the upper
846 bound. The matching succeeds if the
847 lower bound is matched exactly. */
848 if (lcollseq != fcollseq)
849 goto range_not_matched;
850
851 goto matched;
852 }
853# else
854 hcollseq = collseq[cend];
855# endif
856 }
857
858 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
859 goto matched;
860 }
861# if WIDE_CHAR_VERSION
862 range_not_matched:
863# endif
864#else
865 /* We use a boring value comparison of the character
866 values. This is better than comparing using
867 `strcoll' since the latter would have surprising
868 and sometimes fatal consequences. */
869 UCHAR cend = *p++;
870
871 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
872 cend = *p++;
873 if (cend == L('\0'))
874 return FNM_NOMATCH;
875
876 /* It is a range. */
877 if (cold <= fn && fn <= cend)
878 goto matched;
879#endif
880
881 c = *p++;
882 }
883 }
884
885 if (c == L(']'))
886 break;
887 }
888
889 if (!not)
890 return FNM_NOMATCH;
891 break;
892
893 matched:
894 /* Skip the rest of the [...] that already matched. */
895 while ((c = *p++) != L (']'))
896 {
897 if (c == L('\0'))
898 /* [... (unterminated) loses. */
899 return FNM_NOMATCH;
900
901 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
902 {
903 if (*p == L('\0'))
904 return FNM_NOMATCH;
905 /* XXX 1003.2d11 is unclear if this is right. */
906 ++p;
907 }
908 else if (c == L('[') && *p == L(':'))
909 {
910 int c1 = 0;
911 const CHAR *startp = p;
912
913 while (1)
914 {
915 c = *++p;
916 if (++c1 == CHAR_CLASS_MAX_LENGTH)
917 return FNM_NOMATCH;
918
919 if (*p == L(':') && p[1] == L(']'))
920 break;
921
922 if (c < L('a') || c >= L('z'))
923 {
924 p = startp - 2;
925 break;
926 }
927 }
928 p += 2;
929 }
930 else if (c == L('[') && *p == L('='))
931 {
932 c = *++p;
933 if (c == L('\0'))
934 return FNM_NOMATCH;
935 c = *++p;
936 if (c != L('=') || p[1] != L(']'))
937 return FNM_NOMATCH;
938 p += 2;
939 }
940 else if (c == L('[') && *p == L('.'))
941 {
942 while (1)
943 {
944 c = *++p;
945 if (c == L('\0'))
946 return FNM_NOMATCH;
947
948 if (c == L('.') && p[1] == L(']'))
949 break;
950 }
951 p += 2;
952 }
953 }
954 if (not)
955 return FNM_NOMATCH;
956 }
957 break;
958
959 case L('+'):
960 case L('@'):
961 case L('!'):
962 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
963 {
964 int res = EXT (c, p, n, string_end, no_leading_period, flags,
965 alloca_used);
966 if (res != -1)
967 return res;
968 }
969 goto normal_match;
970
971 case L('/'):
972 if (NO_LEADING_PERIOD (flags))
973 {
974 if (n == string_end || c != (UCHAR) *n)
975 return FNM_NOMATCH;
976
977 new_no_leading_period = 1;
978 break;
979 }
980 /* FALLTHROUGH */
981 default:
982 normal_match:
983 if (n == string_end || c != FOLD ((UCHAR) *n))
984 return FNM_NOMATCH;
985 }
986
987 no_leading_period = new_no_leading_period;
988 ++n;
989 }
990
991 if (n == string_end)
992 return 0;
993
994 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
995 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
996 return 0;
997
998 return FNM_NOMATCH;
999}
1000
1001
1002static const CHAR *
1003internal_function
1004END (const CHAR *pattern)
1005{
1006 const CHAR *p = pattern;
1007
1008 while (1)
1009 if (*++p == L('\0'))
1010 /* This is an invalid pattern. */
1011 return pattern;
1012 else if (*p == L('['))
1013 {
1014 /* Handle brackets special. */
1015 if (posixly_correct == 0)
1016 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1017
1018 /* Skip the not sign. We have to recognize it because of a possibly
1019 following ']'. */
1020 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1021 ++p;
1022 /* A leading ']' is recognized as such. */
1023 if (*p == L(']'))
1024 ++p;
1025 /* Skip over all characters of the list. */
1026 while (*p != L(']'))
1027 if (*p++ == L('\0'))
1028 /* This is no valid pattern. */
1029 return pattern;
1030 }
1031 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1032 || *p == L('!')) && p[1] == L('('))
1033 {
1034 p = END (p + 1);
1035 if (*p == L('\0'))
1036 /* This is an invalid pattern. */
1037 return pattern;
1038 }
1039 else if (*p == L(')'))
1040 break;
1041
1042 return p + 1;
1043}
1044
1045
1046static int
1047internal_function
1048EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1049 int no_leading_period, int flags, size_t alloca_used)
1050{
1051 const CHAR *startp;
1052 int level;
1053 struct patternlist
1054 {
1055 struct patternlist *next;
1056 CHAR malloced;
1057 CHAR str[0];
1058 } *list = NULL;
1059 struct patternlist **lastp = &list;
1060 size_t pattern_len = STRLEN (pattern);
1061 int any_malloced = 0;
1062 const CHAR *p;
1063 const CHAR *rs;
1064 int retval = 0;
1065
1066 /* Parse the pattern. Store the individual parts in the list. */
1067 level = 0;
1068 for (startp = p = pattern + 1; level >= 0; ++p)
1069 if (*p == L('\0'))
1070 {
1071 /* This is an invalid pattern. */
1072 retval = -1;
1073 goto out;
1074 }
1075 else if (*p == L('['))
1076 {
1077 /* Handle brackets special. */
1078 if (posixly_correct == 0)
1079 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1080
1081 /* Skip the not sign. We have to recognize it because of a possibly
1082 following ']'. */
1083 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1084 ++p;
1085 /* A leading ']' is recognized as such. */
1086 if (*p == L(']'))
1087 ++p;
1088 /* Skip over all characters of the list. */
1089 while (*p != L(']'))
1090 if (*p++ == L('\0'))
1091 {
1092 /* This is no valid pattern. */
1093 retval = -1;
1094 goto out;
1095 }
1096 }
1097 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1098 || *p == L('!')) && p[1] == L('('))
1099 /* Remember the nesting level. */
1100 ++level;
1101 else if (*p == L(')'))
1102 {
1103 if (level-- == 0)
1104 {
1105 /* This means we found the end of the pattern. */
1106#define NEW_PATTERN \
1107 struct patternlist *newp; \
1108 size_t slen = (opt == L('?') || opt == L('@') \
1109 ? pattern_len : (p - startp + 1)); \
1110 slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1111 int malloced = ! __libc_use_alloca (alloca_used + slen); \
1112 if (__builtin_expect (malloced, 0)) \
1113 { \
1114 newp = malloc (slen); \
1115 if (newp == NULL) \
1116 { \
1117 retval = -2; \
1118 goto out; \
1119 } \
1120 any_malloced = 1; \
1121 } \
1122 else \
1123 newp = alloca_account (slen, alloca_used); \
1124 newp->next = NULL; \
1125 newp->malloced = malloced; \
1126 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1127 *lastp = newp; \
1128 lastp = &newp->next
1129 NEW_PATTERN;
1130 }
1131 }
1132 else if (*p == L('|'))
1133 {
1134 if (level == 0)
1135 {
1136 NEW_PATTERN;
1137 startp = p + 1;
1138 }
1139 }
1140 assert (list != NULL);
1141 assert (p[-1] == L(')'));
1142#undef NEW_PATTERN
1143
1144 switch (opt)
1145 {
1146 case L('*'):
1147 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1148 alloca_used) == 0)
1149 goto success;
1150 /* FALLTHROUGH */
1151
1152 case L('+'):
1153 do
1154 {
1155 for (rs = string; rs <= string_end; ++rs)
1156 /* First match the prefix with the current pattern with the
1157 current pattern. */
1158 if (FCT (list->str, string, rs, no_leading_period,
1159 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1160 NULL, alloca_used) == 0
1161 /* This was successful. Now match the rest with the rest
1162 of the pattern. */
1163 && (FCT (p, rs, string_end,
1164 rs == string
1165 ? no_leading_period
1166 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1167 flags & FNM_FILE_NAME
1168 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1169 /* This didn't work. Try the whole pattern. */
1170 || (rs != string
1171 && FCT (pattern - 1, rs, string_end,
1172 rs == string
1173 ? no_leading_period
1174 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1175 ? 1 : 0),
1176 flags & FNM_FILE_NAME
1177 ? flags : flags & ~FNM_PERIOD, NULL,
1178 alloca_used) == 0)))
1179 /* It worked. Signal success. */
1180 goto success;
1181 }
1182 while ((list = list->next) != NULL);
1183
1184 /* None of the patterns lead to a match. */
1185 retval = FNM_NOMATCH;
1186 break;
1187
1188 case L('?'):
1189 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1190 alloca_used) == 0)
1191 goto success;
1192 /* FALLTHROUGH */
1193
1194 case L('@'):
1195 do
1196 /* I cannot believe it but `strcat' is actually acceptable
1197 here. Match the entire string with the prefix from the
1198 pattern list and the rest of the pattern following the
1199 pattern list. */
1200 if (FCT (STRCAT (list->str, p), string, string_end,
1201 no_leading_period,
1202 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1203 NULL, alloca_used) == 0)
1204 /* It worked. Signal success. */
1205 goto success;
1206 while ((list = list->next) != NULL);
1207
1208 /* None of the patterns lead to a match. */
1209 retval = FNM_NOMATCH;
1210 break;
1211
1212 case L('!'):
1213 for (rs = string; rs <= string_end; ++rs)
1214 {
1215 struct patternlist *runp;
1216
1217 for (runp = list; runp != NULL; runp = runp->next)
1218 if (FCT (runp->str, string, rs, no_leading_period,
1219 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1220 NULL, alloca_used) == 0)
1221 break;
1222
1223 /* If none of the patterns matched see whether the rest does. */
1224 if (runp == NULL
1225 && (FCT (p, rs, string_end,
1226 rs == string
1227 ? no_leading_period
1228 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1229 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1230 NULL, alloca_used) == 0))
1231 /* This is successful. */
1232 goto success;
1233 }
1234
1235 /* None of the patterns together with the rest of the pattern
1236 lead to a match. */
1237 retval = FNM_NOMATCH;
1238 break;
1239
1240 default:
1241 assert (! "Invalid extended matching operator");
1242 retval = -1;
1243 break;
1244 }
1245
1246 success:
1247 out:
1248 if (any_malloced)
1249 while (list != NULL)
1250 {
1251 struct patternlist *old = list;
1252 list = list->next;
1253 if (old->malloced)
1254 free (old);
1255 }
1256
1257 return retval;
1258}
1259
1260
1261#undef FOLD
1262#undef CHAR
1263#undef UCHAR
1264#undef INT
1265#undef FCT
1266#undef EXT
1267#undef END
1268#undef STRUCT
1269#undef MEMPCPY
1270#undef MEMCHR
1271#undef STRCOLL
1272#undef STRLEN
1273#undef STRCAT
1274#undef L
1275#undef BTOWC
1276#undef WIDE_CHAR_VERSION
1277#undef FINDIDX
1278