1/* Copyright (C) 1991-2018 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>. */
17
18#include <stdint.h>
19
20struct STRUCT
21{
22 const CHAR *pattern;
23 const CHAR *string;
24 int no_leading_period;
25};
26
27/* Match STRING against the filename pattern PATTERN, returning zero if
28 it matches, nonzero if not. */
29static int FCT (const CHAR *pattern, const CHAR *string,
30 const CHAR *string_end, int no_leading_period, int flags,
31 struct STRUCT *ends, size_t alloca_used);
32static int EXT (INT opt, const CHAR *pattern, const CHAR *string,
33 const CHAR *string_end, int no_leading_period, int flags,
34 size_t alloca_used);
35static const CHAR *END (const CHAR *patternp);
36
37static int
38FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end,
39 int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used)
40{
41 const CHAR *p = pattern, *n = string;
42 UCHAR c;
43#ifdef _LIBC
44# if WIDE_CHAR_VERSION
45 const char *collseq = (const char *)
46 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
47# else
48 const UCHAR *collseq = (const UCHAR *)
49 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB);
50# endif
51#endif
52
53 while ((c = *p++) != L('\0'))
54 {
55 int new_no_leading_period = 0;
56 c = FOLD (c);
57
58 switch (c)
59 {
60 case L('?'):
61 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
62 {
63 int res = EXT (c, p, n, string_end, no_leading_period,
64 flags, alloca_used);
65 if (res != -1)
66 return res;
67 }
68
69 if (n == string_end)
70 return FNM_NOMATCH;
71 else if (*n == L('/') && (flags & FNM_FILE_NAME))
72 return FNM_NOMATCH;
73 else if (*n == L('.') && no_leading_period)
74 return FNM_NOMATCH;
75 break;
76
77 case L('\\'):
78 if (!(flags & FNM_NOESCAPE))
79 {
80 c = *p++;
81 if (c == L('\0'))
82 /* Trailing \ loses. */
83 return FNM_NOMATCH;
84 c = FOLD (c);
85 }
86 if (n == string_end || FOLD ((UCHAR) *n) != c)
87 return FNM_NOMATCH;
88 break;
89
90 case L('*'):
91 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
92 {
93 int res = EXT (c, p, n, string_end, no_leading_period,
94 flags, alloca_used);
95 if (res != -1)
96 return res;
97 }
98 else if (ends != NULL)
99 {
100 ends->pattern = p - 1;
101 ends->string = n;
102 ends->no_leading_period = no_leading_period;
103 return 0;
104 }
105
106 if (n != string_end && *n == L('.') && no_leading_period)
107 return FNM_NOMATCH;
108
109 for (c = *p++; c == L('?') || c == L('*'); c = *p++)
110 {
111 if (*p == L('(') && (flags & FNM_EXTMATCH) != 0)
112 {
113 const CHAR *endp = END (p);
114 if (endp != p)
115 {
116 /* This is a pattern. Skip over it. */
117 p = endp;
118 continue;
119 }
120 }
121
122 if (c == L('?'))
123 {
124 /* A ? needs to match one character. */
125 if (n == string_end)
126 /* There isn't another character; no match. */
127 return FNM_NOMATCH;
128 else if (*n == L('/')
129 && __builtin_expect (flags & FNM_FILE_NAME, 0))
130 /* A slash does not match a wildcard under
131 FNM_FILE_NAME. */
132 return FNM_NOMATCH;
133 else
134 /* One character of the string is consumed in matching
135 this ? wildcard, so *??? won't match if there are
136 less than three characters. */
137 ++n;
138 }
139 }
140
141 if (c == L('\0'))
142 /* The wildcard(s) is/are the last element of the pattern.
143 If the name is a file name and contains another slash
144 this means it cannot match, unless the FNM_LEADING_DIR
145 flag is set. */
146 {
147 int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH;
148
149 if (flags & FNM_FILE_NAME)
150 {
151 if (flags & FNM_LEADING_DIR)
152 result = 0;
153 else
154 {
155 if (MEMCHR (n, L('/'), string_end - n) == NULL)
156 result = 0;
157 }
158 }
159
160 return result;
161 }
162 else
163 {
164 const CHAR *endp;
165 struct STRUCT end;
166
167 end.pattern = NULL;
168 endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'),
169 string_end - n);
170 if (endp == NULL)
171 endp = string_end;
172
173 if (c == L('[')
174 || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0
175 && (c == L('@') || c == L('+') || c == L('!'))
176 && *p == L('(')))
177 {
178 int flags2 = ((flags & FNM_FILE_NAME)
179 ? flags : (flags & ~FNM_PERIOD));
180
181 for (--p; n < endp; ++n, no_leading_period = 0)
182 if (FCT (p, n, string_end, no_leading_period, flags2,
183 &end, alloca_used) == 0)
184 goto found;
185 }
186 else if (c == L('/') && (flags & FNM_FILE_NAME))
187 {
188 while (n < string_end && *n != L('/'))
189 ++n;
190 if (n < string_end && *n == L('/')
191 && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags,
192 NULL, alloca_used) == 0))
193 return 0;
194 }
195 else
196 {
197 int flags2 = ((flags & FNM_FILE_NAME)
198 ? flags : (flags & ~FNM_PERIOD));
199
200 if (c == L('\\') && !(flags & FNM_NOESCAPE))
201 c = *p;
202 c = FOLD (c);
203 for (--p; n < endp; ++n, no_leading_period = 0)
204 if (FOLD ((UCHAR) *n) == c
205 && (FCT (p, n, string_end, no_leading_period, flags2,
206 &end, alloca_used) == 0))
207 {
208 found:
209 if (end.pattern == NULL)
210 return 0;
211 break;
212 }
213 if (end.pattern != NULL)
214 {
215 p = end.pattern;
216 n = end.string;
217 no_leading_period = end.no_leading_period;
218 continue;
219 }
220 }
221 }
222
223 /* If we come here no match is possible with the wildcard. */
224 return FNM_NOMATCH;
225
226 case L('['):
227 {
228 /* Nonzero if the sense of the character class is inverted. */
229 const CHAR *p_init = p;
230 const CHAR *n_init = n;
231 int not;
232 CHAR cold;
233 UCHAR fn;
234
235 if (posixly_correct == 0)
236 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
237
238 if (n == string_end)
239 return FNM_NOMATCH;
240
241 if (*n == L('.') && no_leading_period)
242 return FNM_NOMATCH;
243
244 if (*n == L('/') && (flags & FNM_FILE_NAME))
245 /* `/' cannot be matched. */
246 return FNM_NOMATCH;
247
248 not = (*p == L('!') || (posixly_correct < 0 && *p == L('^')));
249 if (not)
250 ++p;
251
252 fn = FOLD ((UCHAR) *n);
253
254 c = *p++;
255 for (;;)
256 {
257 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
258 {
259 if (*p == L('\0'))
260 return FNM_NOMATCH;
261 c = FOLD ((UCHAR) *p);
262 ++p;
263
264 goto normal_bracket;
265 }
266 else if (c == L('[') && *p == L(':'))
267 {
268 /* Leave room for the null. */
269 CHAR str[CHAR_CLASS_MAX_LENGTH + 1];
270 size_t c1 = 0;
271#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
272 wctype_t wt;
273#endif
274 const CHAR *startp = p;
275
276 for (;;)
277 {
278 if (c1 == CHAR_CLASS_MAX_LENGTH)
279 /* The name is too long and therefore the pattern
280 is ill-formed. */
281 return FNM_NOMATCH;
282
283 c = *++p;
284 if (c == L(':') && p[1] == L(']'))
285 {
286 p += 2;
287 break;
288 }
289 if (c < L('a') || c >= L('z'))
290 {
291 /* This cannot possibly be a character class name.
292 Match it as a normal range. */
293 p = startp;
294 c = L('[');
295 goto normal_bracket;
296 }
297 str[c1++] = c;
298 }
299 str[c1] = L('\0');
300
301#if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
302 wt = IS_CHAR_CLASS (str);
303 if (wt == 0)
304 /* Invalid character class name. */
305 return FNM_NOMATCH;
306
307# if defined _LIBC && ! WIDE_CHAR_VERSION
308 /* The following code is glibc specific but does
309 there a good job in speeding up the code since
310 we can avoid the btowc() call. */
311 if (_ISCTYPE ((UCHAR) *n, wt))
312 goto matched;
313# else
314 if (ISWCTYPE (BTOWC ((UCHAR) *n), wt))
315 goto matched;
316# endif
317#else
318 if ((STREQ (str, L("alnum")) && ISALNUM ((UCHAR) *n))
319 || (STREQ (str, L("alpha")) && ISALPHA ((UCHAR) *n))
320 || (STREQ (str, L("blank")) && ISBLANK ((UCHAR) *n))
321 || (STREQ (str, L("cntrl")) && ISCNTRL ((UCHAR) *n))
322 || (STREQ (str, L("digit")) && ISDIGIT ((UCHAR) *n))
323 || (STREQ (str, L("graph")) && ISGRAPH ((UCHAR) *n))
324 || (STREQ (str, L("lower")) && ISLOWER ((UCHAR) *n))
325 || (STREQ (str, L("print")) && ISPRINT ((UCHAR) *n))
326 || (STREQ (str, L("punct")) && ISPUNCT ((UCHAR) *n))
327 || (STREQ (str, L("space")) && ISSPACE ((UCHAR) *n))
328 || (STREQ (str, L("upper")) && ISUPPER ((UCHAR) *n))
329 || (STREQ (str, L("xdigit")) && ISXDIGIT ((UCHAR) *n)))
330 goto matched;
331#endif
332 c = *p++;
333 }
334#ifdef _LIBC
335 else if (c == L('[') && *p == L('='))
336 {
337 /* It's important that STR be a scalar variable rather
338 than a one-element array, because GCC (at least 4.9.2
339 -O2 on x86-64) can be confused by the array and
340 diagnose a "used initialized" in a dead branch in the
341 findidx function. */
342 UCHAR str;
343 uint32_t nrules =
344 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
345 const CHAR *startp = p;
346
347 c = *++p;
348 if (c == L('\0'))
349 {
350 p = startp;
351 c = L('[');
352 goto normal_bracket;
353 }
354 str = c;
355
356 c = *++p;
357 if (c != L('=') || p[1] != L(']'))
358 {
359 p = startp;
360 c = L('[');
361 goto normal_bracket;
362 }
363 p += 2;
364
365 if (nrules == 0)
366 {
367 if ((UCHAR) *n == str)
368 goto matched;
369 }
370 else
371 {
372 const int32_t *table;
373# if WIDE_CHAR_VERSION
374 const int32_t *weights;
375 const wint_t *extra;
376# else
377 const unsigned char *weights;
378 const unsigned char *extra;
379# endif
380 const int32_t *indirect;
381 int32_t idx;
382 const UCHAR *cp = (const UCHAR *) &str;
383
384# if WIDE_CHAR_VERSION
385 table = (const int32_t *)
386 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
387 weights = (const int32_t *)
388 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
389 extra = (const wint_t *)
390 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
391 indirect = (const int32_t *)
392 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
393# else
394 table = (const int32_t *)
395 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
396 weights = (const unsigned char *)
397 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
398 extra = (const unsigned char *)
399 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
400 indirect = (const int32_t *)
401 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
402# endif
403
404 idx = FINDIDX (table, indirect, extra, &cp, 1);
405 if (idx != 0)
406 {
407 /* We found a table entry. Now see whether the
408 character we are currently at has the same
409 equivalance class value. */
410 int len = weights[idx & 0xffffff];
411 int32_t idx2;
412 const UCHAR *np = (const UCHAR *) n;
413
414 idx2 = FINDIDX (table, indirect, extra,
415 &np, string_end - n);
416 if (idx2 != 0
417 && (idx >> 24) == (idx2 >> 24)
418 && len == weights[idx2 & 0xffffff])
419 {
420 int cnt = 0;
421
422 idx &= 0xffffff;
423 idx2 &= 0xffffff;
424
425 while (cnt < len
426 && (weights[idx + 1 + cnt]
427 == weights[idx2 + 1 + cnt]))
428 ++cnt;
429
430 if (cnt == len)
431 goto matched;
432 }
433 }
434 }
435
436 c = *p++;
437 }
438#endif
439 else if (c == L('\0'))
440 {
441 /* [ unterminated, treat as normal character. */
442 p = p_init;
443 n = n_init;
444 c = L('[');
445 goto normal_match;
446 }
447 else
448 {
449 int is_range = 0;
450
451#ifdef _LIBC
452 int is_seqval = 0;
453
454 if (c == L('[') && *p == L('.'))
455 {
456 uint32_t nrules =
457 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
458 const CHAR *startp = p;
459 size_t c1 = 0;
460
461 while (1)
462 {
463 c = *++p;
464 if (c == L('.') && p[1] == L(']'))
465 {
466 p += 2;
467 break;
468 }
469 if (c == '\0')
470 return FNM_NOMATCH;
471 ++c1;
472 }
473
474 /* We have to handling the symbols differently in
475 ranges since then the collation sequence is
476 important. */
477 is_range = *p == L('-') && p[1] != L('\0');
478
479 if (nrules == 0)
480 {
481 /* There are no names defined in the collation
482 data. Therefore we only accept the trivial
483 names consisting of the character itself. */
484 if (c1 != 1)
485 return FNM_NOMATCH;
486
487 if (!is_range && *n == startp[1])
488 goto matched;
489
490 cold = startp[1];
491 c = *p++;
492 }
493 else
494 {
495 int32_t table_size;
496 const int32_t *symb_table;
497# if WIDE_CHAR_VERSION
498 char str[c1];
499 unsigned int strcnt;
500# else
501# define str (startp + 1)
502# endif
503 const unsigned char *extra;
504 int32_t idx;
505 int32_t elem;
506 int32_t second;
507 int32_t hash;
508
509# if WIDE_CHAR_VERSION
510 /* We have to convert the name to a single-byte
511 string. This is possible since the names
512 consist of ASCII characters and the internal
513 representation is UCS4. */
514 for (strcnt = 0; strcnt < c1; ++strcnt)
515 str[strcnt] = startp[1 + strcnt];
516#endif
517
518 table_size =
519 _NL_CURRENT_WORD (LC_COLLATE,
520 _NL_COLLATE_SYMB_HASH_SIZEMB);
521 symb_table = (const int32_t *)
522 _NL_CURRENT (LC_COLLATE,
523 _NL_COLLATE_SYMB_TABLEMB);
524 extra = (const unsigned char *)
525 _NL_CURRENT (LC_COLLATE,
526 _NL_COLLATE_SYMB_EXTRAMB);
527
528 /* Locate the character in the hashing table. */
529 hash = elem_hash (str, c1);
530
531 idx = 0;
532 elem = hash % table_size;
533 if (symb_table[2 * elem] != 0)
534 {
535 second = hash % (table_size - 2) + 1;
536
537 do
538 {
539 /* First compare the hashing value. */
540 if (symb_table[2 * elem] == hash
541 && (c1
542 == extra[symb_table[2 * elem + 1]])
543 && memcmp (str,
544 &extra[symb_table[2 * elem
545 + 1]
546 + 1], c1) == 0)
547 {
548 /* Yep, this is the entry. */
549 idx = symb_table[2 * elem + 1];
550 idx += 1 + extra[idx];
551 break;
552 }
553
554 /* Next entry. */
555 elem += second;
556 }
557 while (symb_table[2 * elem] != 0);
558 }
559
560 if (symb_table[2 * elem] != 0)
561 {
562 /* Compare the byte sequence but only if
563 this is not part of a range. */
564# if WIDE_CHAR_VERSION
565 int32_t *wextra;
566
567 idx += 1 + extra[idx];
568 /* Adjust for the alignment. */
569 idx = (idx + 3) & ~3;
570
571 wextra = (int32_t *) &extra[idx + 4];
572# endif
573
574 if (! is_range)
575 {
576# if WIDE_CHAR_VERSION
577 for (c1 = 0;
578 (int32_t) c1 < wextra[idx];
579 ++c1)
580 if (n[c1] != wextra[1 + c1])
581 break;
582
583 if ((int32_t) c1 == wextra[idx])
584 goto matched;
585# else
586 for (c1 = 0; c1 < extra[idx]; ++c1)
587 if (n[c1] != extra[1 + c1])
588 break;
589
590 if (c1 == extra[idx])
591 goto matched;
592# endif
593 }
594
595 /* Get the collation sequence value. */
596 is_seqval = 1;
597# if WIDE_CHAR_VERSION
598 cold = wextra[1 + wextra[idx]];
599# else
600 /* Adjust for the alignment. */
601 idx += 1 + extra[idx];
602 idx = (idx + 3) & ~4;
603 cold = *((int32_t *) &extra[idx]);
604# endif
605
606 c = *p++;
607 }
608 else if (c1 == 1)
609 {
610 /* No valid character. Match it as a
611 single byte. */
612 if (!is_range && *n == str[0])
613 goto matched;
614
615 cold = str[0];
616 c = *p++;
617 }
618 else
619 return FNM_NOMATCH;
620 }
621 }
622 else
623# undef str
624#endif
625 {
626 c = FOLD (c);
627 normal_bracket:
628
629 /* We have to handling the symbols differently in
630 ranges since then the collation sequence is
631 important. */
632 is_range = (*p == L('-') && p[1] != L('\0')
633 && p[1] != L(']'));
634
635 if (!is_range && c == fn)
636 goto matched;
637
638 /* This is needed if we goto normal_bracket; from
639 outside of is_seqval's scope. */
640 is_seqval = 0;
641 cold = c;
642 c = *p++;
643 }
644
645 if (c == L('-') && *p != L(']'))
646 {
647#if _LIBC
648 /* We have to find the collation sequence
649 value for C. Collation sequence is nothing
650 we can regularly access. The sequence
651 value is defined by the order in which the
652 definitions of the collation values for the
653 various characters appear in the source
654 file. A strange concept, nowhere
655 documented. */
656 uint32_t fcollseq;
657 uint32_t lcollseq;
658 UCHAR cend = *p++;
659
660# if WIDE_CHAR_VERSION
661 /* Search in the `names' array for the characters. */
662 fcollseq = __collseq_table_lookup (collseq, fn);
663 if (fcollseq == ~((uint32_t) 0))
664 /* XXX We don't know anything about the character
665 we are supposed to match. This means we are
666 failing. */
667 goto range_not_matched;
668
669 if (is_seqval)
670 lcollseq = cold;
671 else
672 lcollseq = __collseq_table_lookup (collseq, cold);
673# else
674 fcollseq = collseq[fn];
675 lcollseq = is_seqval ? cold : collseq[(UCHAR) cold];
676# endif
677
678 is_seqval = 0;
679 if (cend == L('[') && *p == L('.'))
680 {
681 uint32_t nrules =
682 _NL_CURRENT_WORD (LC_COLLATE,
683 _NL_COLLATE_NRULES);
684 const CHAR *startp = p;
685 size_t c1 = 0;
686
687 while (1)
688 {
689 c = *++p;
690 if (c == L('.') && p[1] == L(']'))
691 {
692 p += 2;
693 break;
694 }
695 if (c == '\0')
696 return FNM_NOMATCH;
697 ++c1;
698 }
699
700 if (nrules == 0)
701 {
702 /* There are no names defined in the
703 collation data. Therefore we only
704 accept the trivial names consisting
705 of the character itself. */
706 if (c1 != 1)
707 return FNM_NOMATCH;
708
709 cend = startp[1];
710 }
711 else
712 {
713 int32_t table_size;
714 const int32_t *symb_table;
715# if WIDE_CHAR_VERSION
716 char str[c1];
717 unsigned int strcnt;
718# else
719# define str (startp + 1)
720# endif
721 const unsigned char *extra;
722 int32_t idx;
723 int32_t elem;
724 int32_t second;
725 int32_t hash;
726
727# if WIDE_CHAR_VERSION
728 /* We have to convert the name to a single-byte
729 string. This is possible since the names
730 consist of ASCII characters and the internal
731 representation is UCS4. */
732 for (strcnt = 0; strcnt < c1; ++strcnt)
733 str[strcnt] = startp[1 + strcnt];
734# endif
735
736 table_size =
737 _NL_CURRENT_WORD (LC_COLLATE,
738 _NL_COLLATE_SYMB_HASH_SIZEMB);
739 symb_table = (const int32_t *)
740 _NL_CURRENT (LC_COLLATE,
741 _NL_COLLATE_SYMB_TABLEMB);
742 extra = (const unsigned char *)
743 _NL_CURRENT (LC_COLLATE,
744 _NL_COLLATE_SYMB_EXTRAMB);
745
746 /* Locate the character in the hashing
747 table. */
748 hash = elem_hash (str, c1);
749
750 idx = 0;
751 elem = hash % table_size;
752 if (symb_table[2 * elem] != 0)
753 {
754 second = hash % (table_size - 2) + 1;
755
756 do
757 {
758 /* First compare the hashing value. */
759 if (symb_table[2 * elem] == hash
760 && (c1
761 == extra[symb_table[2 * elem + 1]])
762 && memcmp (str,
763 &extra[symb_table[2 * elem + 1]
764 + 1], c1) == 0)
765 {
766 /* Yep, this is the entry. */
767 idx = symb_table[2 * elem + 1];
768 idx += 1 + extra[idx];
769 break;
770 }
771
772 /* Next entry. */
773 elem += second;
774 }
775 while (symb_table[2 * elem] != 0);
776 }
777
778 if (symb_table[2 * elem] != 0)
779 {
780 /* Compare the byte sequence but only if
781 this is not part of a range. */
782# if WIDE_CHAR_VERSION
783 int32_t *wextra;
784
785 idx += 1 + extra[idx];
786 /* Adjust for the alignment. */
787 idx = (idx + 3) & ~4;
788
789 wextra = (int32_t *) &extra[idx + 4];
790# endif
791 /* Get the collation sequence value. */
792 is_seqval = 1;
793# if WIDE_CHAR_VERSION
794 cend = wextra[1 + wextra[idx]];
795# else
796 /* Adjust for the alignment. */
797 idx += 1 + extra[idx];
798 idx = (idx + 3) & ~4;
799 cend = *((int32_t *) &extra[idx]);
800# endif
801 }
802 else if (symb_table[2 * elem] != 0 && c1 == 1)
803 {
804 cend = str[0];
805 c = *p++;
806 }
807 else
808 return FNM_NOMATCH;
809 }
810# undef str
811 }
812 else
813 {
814 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
815 cend = *p++;
816 if (cend == L('\0'))
817 return FNM_NOMATCH;
818 cend = FOLD (cend);
819 }
820
821 /* XXX It is not entirely clear to me how to handle
822 characters which are not mentioned in the
823 collation specification. */
824 if (
825# if WIDE_CHAR_VERSION
826 lcollseq == 0xffffffff ||
827# endif
828 lcollseq <= fcollseq)
829 {
830 /* We have to look at the upper bound. */
831 uint32_t hcollseq;
832
833 if (is_seqval)
834 hcollseq = cend;
835 else
836 {
837# if WIDE_CHAR_VERSION
838 hcollseq =
839 __collseq_table_lookup (collseq, cend);
840 if (hcollseq == ~((uint32_t) 0))
841 {
842 /* Hum, no information about the upper
843 bound. The matching succeeds if the
844 lower bound is matched exactly. */
845 if (lcollseq != fcollseq)
846 goto range_not_matched;
847
848 goto matched;
849 }
850# else
851 hcollseq = collseq[cend];
852# endif
853 }
854
855 if (lcollseq <= hcollseq && fcollseq <= hcollseq)
856 goto matched;
857 }
858# if WIDE_CHAR_VERSION
859 range_not_matched:
860# endif
861#else
862 /* We use a boring value comparison of the character
863 values. This is better than comparing using
864 `strcoll' since the latter would have surprising
865 and sometimes fatal consequences. */
866 UCHAR cend = *p++;
867
868 if (!(flags & FNM_NOESCAPE) && cend == L('\\'))
869 cend = *p++;
870 if (cend == L('\0'))
871 return FNM_NOMATCH;
872
873 /* It is a range. */
874 if (cold <= fn && fn <= cend)
875 goto matched;
876#endif
877
878 c = *p++;
879 }
880 }
881
882 if (c == L(']'))
883 break;
884 }
885
886 if (!not)
887 return FNM_NOMATCH;
888 break;
889
890 matched:
891 /* Skip the rest of the [...] that already matched. */
892 while ((c = *p++) != L (']'))
893 {
894 if (c == L('\0'))
895 /* [... (unterminated) loses. */
896 return FNM_NOMATCH;
897
898 if (!(flags & FNM_NOESCAPE) && c == L('\\'))
899 {
900 if (*p == L('\0'))
901 return FNM_NOMATCH;
902 /* XXX 1003.2d11 is unclear if this is right. */
903 ++p;
904 }
905 else if (c == L('[') && *p == L(':'))
906 {
907 int c1 = 0;
908 const CHAR *startp = p;
909
910 while (1)
911 {
912 c = *++p;
913 if (++c1 == CHAR_CLASS_MAX_LENGTH)
914 return FNM_NOMATCH;
915
916 if (*p == L(':') && p[1] == L(']'))
917 break;
918
919 if (c < L('a') || c >= L('z'))
920 {
921 p = startp - 2;
922 break;
923 }
924 }
925 p += 2;
926 }
927 else if (c == L('[') && *p == L('='))
928 {
929 c = *++p;
930 if (c == L('\0'))
931 return FNM_NOMATCH;
932 c = *++p;
933 if (c != L('=') || p[1] != L(']'))
934 return FNM_NOMATCH;
935 p += 2;
936 }
937 else if (c == L('[') && *p == L('.'))
938 {
939 while (1)
940 {
941 c = *++p;
942 if (c == L('\0'))
943 return FNM_NOMATCH;
944
945 if (c == L('.') && p[1] == L(']'))
946 break;
947 }
948 p += 2;
949 }
950 }
951 if (not)
952 return FNM_NOMATCH;
953 }
954 break;
955
956 case L('+'):
957 case L('@'):
958 case L('!'):
959 if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(')
960 {
961 int res = EXT (c, p, n, string_end, no_leading_period, flags,
962 alloca_used);
963 if (res != -1)
964 return res;
965 }
966 goto normal_match;
967
968 case L('/'):
969 if (NO_LEADING_PERIOD (flags))
970 {
971 if (n == string_end || c != (UCHAR) *n)
972 return FNM_NOMATCH;
973
974 new_no_leading_period = 1;
975 break;
976 }
977 /* FALLTHROUGH */
978 default:
979 normal_match:
980 if (n == string_end || c != FOLD ((UCHAR) *n))
981 return FNM_NOMATCH;
982 }
983
984 no_leading_period = new_no_leading_period;
985 ++n;
986 }
987
988 if (n == string_end)
989 return 0;
990
991 if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/'))
992 /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */
993 return 0;
994
995 return FNM_NOMATCH;
996}
997
998
999static const CHAR *
1000END (const CHAR *pattern)
1001{
1002 const CHAR *p = pattern;
1003
1004 while (1)
1005 if (*++p == L('\0'))
1006 /* This is an invalid pattern. */
1007 return pattern;
1008 else if (*p == L('['))
1009 {
1010 /* Handle brackets special. */
1011 if (posixly_correct == 0)
1012 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1013
1014 /* Skip the not sign. We have to recognize it because of a possibly
1015 following ']'. */
1016 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1017 ++p;
1018 /* A leading ']' is recognized as such. */
1019 if (*p == L(']'))
1020 ++p;
1021 /* Skip over all characters of the list. */
1022 while (*p != L(']'))
1023 if (*p++ == L('\0'))
1024 /* This is no valid pattern. */
1025 return pattern;
1026 }
1027 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1028 || *p == L('!')) && p[1] == L('('))
1029 {
1030 p = END (p + 1);
1031 if (*p == L('\0'))
1032 /* This is an invalid pattern. */
1033 return pattern;
1034 }
1035 else if (*p == L(')'))
1036 break;
1037
1038 return p + 1;
1039}
1040
1041
1042static int
1043EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end,
1044 int no_leading_period, int flags, size_t alloca_used)
1045{
1046 const CHAR *startp;
1047 int level;
1048 struct patternlist
1049 {
1050 struct patternlist *next;
1051 CHAR malloced;
1052 CHAR str[0];
1053 } *list = NULL;
1054 struct patternlist **lastp = &list;
1055 size_t pattern_len = STRLEN (pattern);
1056 int any_malloced = 0;
1057 const CHAR *p;
1058 const CHAR *rs;
1059 int retval = 0;
1060
1061 /* Parse the pattern. Store the individual parts in the list. */
1062 level = 0;
1063 for (startp = p = pattern + 1; level >= 0; ++p)
1064 if (*p == L('\0'))
1065 {
1066 /* This is an invalid pattern. */
1067 retval = -1;
1068 goto out;
1069 }
1070 else if (*p == L('['))
1071 {
1072 /* Handle brackets special. */
1073 if (posixly_correct == 0)
1074 posixly_correct = getenv ("POSIXLY_CORRECT") != NULL ? 1 : -1;
1075
1076 /* Skip the not sign. We have to recognize it because of a possibly
1077 following ']'. */
1078 if (*++p == L('!') || (posixly_correct < 0 && *p == L('^')))
1079 ++p;
1080 /* A leading ']' is recognized as such. */
1081 if (*p == L(']'))
1082 ++p;
1083 /* Skip over all characters of the list. */
1084 while (*p != L(']'))
1085 if (*p++ == L('\0'))
1086 {
1087 /* This is no valid pattern. */
1088 retval = -1;
1089 goto out;
1090 }
1091 }
1092 else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@')
1093 || *p == L('!')) && p[1] == L('('))
1094 /* Remember the nesting level. */
1095 ++level;
1096 else if (*p == L(')'))
1097 {
1098 if (level-- == 0)
1099 {
1100 /* This means we found the end of the pattern. */
1101#define NEW_PATTERN \
1102 struct patternlist *newp; \
1103 size_t slen = (opt == L('?') || opt == L('@') \
1104 ? pattern_len : (p - startp + 1)); \
1105 slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \
1106 int malloced = ! __libc_use_alloca (alloca_used + slen); \
1107 if (__builtin_expect (malloced, 0)) \
1108 { \
1109 newp = malloc (slen); \
1110 if (newp == NULL) \
1111 { \
1112 retval = -2; \
1113 goto out; \
1114 } \
1115 any_malloced = 1; \
1116 } \
1117 else \
1118 newp = alloca_account (slen, alloca_used); \
1119 newp->next = NULL; \
1120 newp->malloced = malloced; \
1121 *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \
1122 *lastp = newp; \
1123 lastp = &newp->next
1124 NEW_PATTERN;
1125 }
1126 }
1127 else if (*p == L('|'))
1128 {
1129 if (level == 0)
1130 {
1131 NEW_PATTERN;
1132 startp = p + 1;
1133 }
1134 }
1135 assert (list != NULL);
1136 assert (p[-1] == L(')'));
1137#undef NEW_PATTERN
1138
1139 switch (opt)
1140 {
1141 case L('*'):
1142 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1143 alloca_used) == 0)
1144 goto success;
1145 /* FALLTHROUGH */
1146
1147 case L('+'):
1148 do
1149 {
1150 for (rs = string; rs <= string_end; ++rs)
1151 /* First match the prefix with the current pattern with the
1152 current pattern. */
1153 if (FCT (list->str, string, rs, no_leading_period,
1154 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1155 NULL, alloca_used) == 0
1156 /* This was successful. Now match the rest with the rest
1157 of the pattern. */
1158 && (FCT (p, rs, string_end,
1159 rs == string
1160 ? no_leading_period
1161 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1162 flags & FNM_FILE_NAME
1163 ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0
1164 /* This didn't work. Try the whole pattern. */
1165 || (rs != string
1166 && FCT (pattern - 1, rs, string_end,
1167 rs == string
1168 ? no_leading_period
1169 : (rs[-1] == '/' && NO_LEADING_PERIOD (flags)
1170 ? 1 : 0),
1171 flags & FNM_FILE_NAME
1172 ? flags : flags & ~FNM_PERIOD, NULL,
1173 alloca_used) == 0)))
1174 /* It worked. Signal success. */
1175 goto success;
1176 }
1177 while ((list = list->next) != NULL);
1178
1179 /* None of the patterns lead to a match. */
1180 retval = FNM_NOMATCH;
1181 break;
1182
1183 case L('?'):
1184 if (FCT (p, string, string_end, no_leading_period, flags, NULL,
1185 alloca_used) == 0)
1186 goto success;
1187 /* FALLTHROUGH */
1188
1189 case L('@'):
1190 do
1191 /* I cannot believe it but `strcat' is actually acceptable
1192 here. Match the entire string with the prefix from the
1193 pattern list and the rest of the pattern following the
1194 pattern list. */
1195 if (FCT (STRCAT (list->str, p), string, string_end,
1196 no_leading_period,
1197 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1198 NULL, alloca_used) == 0)
1199 /* It worked. Signal success. */
1200 goto success;
1201 while ((list = list->next) != NULL);
1202
1203 /* None of the patterns lead to a match. */
1204 retval = FNM_NOMATCH;
1205 break;
1206
1207 case L('!'):
1208 for (rs = string; rs <= string_end; ++rs)
1209 {
1210 struct patternlist *runp;
1211
1212 for (runp = list; runp != NULL; runp = runp->next)
1213 if (FCT (runp->str, string, rs, no_leading_period,
1214 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1215 NULL, alloca_used) == 0)
1216 break;
1217
1218 /* If none of the patterns matched see whether the rest does. */
1219 if (runp == NULL
1220 && (FCT (p, rs, string_end,
1221 rs == string
1222 ? no_leading_period
1223 : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0,
1224 flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD,
1225 NULL, alloca_used) == 0))
1226 /* This is successful. */
1227 goto success;
1228 }
1229
1230 /* None of the patterns together with the rest of the pattern
1231 lead to a match. */
1232 retval = FNM_NOMATCH;
1233 break;
1234
1235 default:
1236 assert (! "Invalid extended matching operator");
1237 retval = -1;
1238 break;
1239 }
1240
1241 success:
1242 out:
1243 if (any_malloced)
1244 while (list != NULL)
1245 {
1246 struct patternlist *old = list;
1247 list = list->next;
1248 if (old->malloced)
1249 free (old);
1250 }
1251
1252 return retval;
1253}
1254
1255
1256#undef FOLD
1257#undef CHAR
1258#undef UCHAR
1259#undef INT
1260#undef FCT
1261#undef EXT
1262#undef END
1263#undef STRUCT
1264#undef MEMPCPY
1265#undef MEMCHR
1266#undef STRCOLL
1267#undef STRLEN
1268#undef STRCAT
1269#undef L
1270#undef BTOWC
1271#undef WIDE_CHAR_VERSION
1272#undef FINDIDX
1273