1 | /* Copyright (C) 1991-2016 Free Software Foundation, Inc. |
2 | This file is part of the GNU C Library. |
3 | |
4 | The GNU C Library is free software; you can redistribute it and/or |
5 | modify it under the terms of the GNU Lesser General Public |
6 | License as published by the Free Software Foundation; either |
7 | version 2.1 of the License, or (at your option) any later version. |
8 | |
9 | The GNU C Library is distributed in the hope that it will be useful, |
10 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
11 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
12 | Lesser General Public License for more details. |
13 | |
14 | You should have received a copy of the GNU Lesser General Public |
15 | License along with the GNU C Library; if not, see |
16 | <http://www.gnu.org/licenses/>. */ |
17 | |
18 | #include <stdint.h> |
19 | |
20 | struct STRUCT |
21 | { |
22 | const CHAR *pattern; |
23 | const CHAR *string; |
24 | int no_leading_period; |
25 | }; |
26 | |
27 | /* Match STRING against the filename pattern PATTERN, returning zero if |
28 | it matches, nonzero if not. */ |
29 | static int FCT (const CHAR *pattern, const CHAR *string, |
30 | const CHAR *string_end, int no_leading_period, int flags, |
31 | struct STRUCT *ends, size_t alloca_used) |
32 | internal_function; |
33 | static int EXT (INT opt, const CHAR *pattern, const CHAR *string, |
34 | const CHAR *string_end, int no_leading_period, int flags, |
35 | size_t alloca_used) |
36 | internal_function; |
37 | static const CHAR *END (const CHAR *patternp) internal_function; |
38 | |
39 | static int |
40 | internal_function |
41 | FCT (const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
42 | int no_leading_period, int flags, struct STRUCT *ends, size_t alloca_used) |
43 | { |
44 | const CHAR *p = pattern, *n = string; |
45 | UCHAR c; |
46 | #ifdef _LIBC |
47 | # if WIDE_CHAR_VERSION |
48 | const char *collseq = (const char *) |
49 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC); |
50 | # else |
51 | const UCHAR *collseq = (const UCHAR *) |
52 | _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQMB); |
53 | # endif |
54 | #endif |
55 | |
56 | while ((c = *p++) != L('\0')) |
57 | { |
58 | int new_no_leading_period = 0; |
59 | c = FOLD (c); |
60 | |
61 | switch (c) |
62 | { |
63 | case L('?'): |
64 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
65 | { |
66 | int res = EXT (c, p, n, string_end, no_leading_period, |
67 | flags, alloca_used); |
68 | if (res != -1) |
69 | return res; |
70 | } |
71 | |
72 | if (n == string_end) |
73 | return FNM_NOMATCH; |
74 | else if (*n == L('/') && (flags & FNM_FILE_NAME)) |
75 | return FNM_NOMATCH; |
76 | else if (*n == L('.') && no_leading_period) |
77 | return FNM_NOMATCH; |
78 | break; |
79 | |
80 | case L('\\'): |
81 | if (!(flags & FNM_NOESCAPE)) |
82 | { |
83 | c = *p++; |
84 | if (c == L('\0')) |
85 | /* Trailing \ loses. */ |
86 | return FNM_NOMATCH; |
87 | c = FOLD (c); |
88 | } |
89 | if (n == string_end || FOLD ((UCHAR) *n) != c) |
90 | return FNM_NOMATCH; |
91 | break; |
92 | |
93 | case L('*'): |
94 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
95 | { |
96 | int res = EXT (c, p, n, string_end, no_leading_period, |
97 | flags, alloca_used); |
98 | if (res != -1) |
99 | return res; |
100 | } |
101 | else if (ends != NULL) |
102 | { |
103 | ends->pattern = p - 1; |
104 | ends->string = n; |
105 | ends->no_leading_period = no_leading_period; |
106 | return 0; |
107 | } |
108 | |
109 | if (n != string_end && *n == L('.') && no_leading_period) |
110 | return FNM_NOMATCH; |
111 | |
112 | for (c = *p++; c == L('?') || c == L('*'); c = *p++) |
113 | { |
114 | if (*p == L('(') && (flags & FNM_EXTMATCH) != 0) |
115 | { |
116 | const CHAR *endp = END (p); |
117 | if (endp != p) |
118 | { |
119 | /* This is a pattern. Skip over it. */ |
120 | p = endp; |
121 | continue; |
122 | } |
123 | } |
124 | |
125 | if (c == L('?')) |
126 | { |
127 | /* A ? needs to match one character. */ |
128 | if (n == string_end) |
129 | /* There isn't another character; no match. */ |
130 | return FNM_NOMATCH; |
131 | else if (*n == L('/') |
132 | && __builtin_expect (flags & FNM_FILE_NAME, 0)) |
133 | /* A slash does not match a wildcard under |
134 | FNM_FILE_NAME. */ |
135 | return FNM_NOMATCH; |
136 | else |
137 | /* One character of the string is consumed in matching |
138 | this ? wildcard, so *??? won't match if there are |
139 | less than three characters. */ |
140 | ++n; |
141 | } |
142 | } |
143 | |
144 | if (c == L('\0')) |
145 | /* The wildcard(s) is/are the last element of the pattern. |
146 | If the name is a file name and contains another slash |
147 | this means it cannot match, unless the FNM_LEADING_DIR |
148 | flag is set. */ |
149 | { |
150 | int result = (flags & FNM_FILE_NAME) == 0 ? 0 : FNM_NOMATCH; |
151 | |
152 | if (flags & FNM_FILE_NAME) |
153 | { |
154 | if (flags & FNM_LEADING_DIR) |
155 | result = 0; |
156 | else |
157 | { |
158 | if (MEMCHR (n, L('/'), string_end - n) == NULL) |
159 | result = 0; |
160 | } |
161 | } |
162 | |
163 | return result; |
164 | } |
165 | else |
166 | { |
167 | const CHAR *endp; |
168 | struct STRUCT end; |
169 | |
170 | end.pattern = NULL; |
171 | endp = MEMCHR (n, (flags & FNM_FILE_NAME) ? L('/') : L('\0'), |
172 | string_end - n); |
173 | if (endp == NULL) |
174 | endp = string_end; |
175 | |
176 | if (c == L('[') |
177 | || (__builtin_expect (flags & FNM_EXTMATCH, 0) != 0 |
178 | && (c == L('@') || c == L('+') || c == L('!')) |
179 | && *p == L('('))) |
180 | { |
181 | int flags2 = ((flags & FNM_FILE_NAME) |
182 | ? flags : (flags & ~FNM_PERIOD)); |
183 | |
184 | for (--p; n < endp; ++n, no_leading_period = 0) |
185 | if (FCT (p, n, string_end, no_leading_period, flags2, |
186 | &end, alloca_used) == 0) |
187 | goto found; |
188 | } |
189 | else if (c == L('/') && (flags & FNM_FILE_NAME)) |
190 | { |
191 | while (n < string_end && *n != L('/')) |
192 | ++n; |
193 | if (n < string_end && *n == L('/') |
194 | && (FCT (p, n + 1, string_end, flags & FNM_PERIOD, flags, |
195 | NULL, alloca_used) == 0)) |
196 | return 0; |
197 | } |
198 | else |
199 | { |
200 | int flags2 = ((flags & FNM_FILE_NAME) |
201 | ? flags : (flags & ~FNM_PERIOD)); |
202 | |
203 | if (c == L('\\') && !(flags & FNM_NOESCAPE)) |
204 | c = *p; |
205 | c = FOLD (c); |
206 | for (--p; n < endp; ++n, no_leading_period = 0) |
207 | if (FOLD ((UCHAR) *n) == c |
208 | && (FCT (p, n, string_end, no_leading_period, flags2, |
209 | &end, alloca_used) == 0)) |
210 | { |
211 | found: |
212 | if (end.pattern == NULL) |
213 | return 0; |
214 | break; |
215 | } |
216 | if (end.pattern != NULL) |
217 | { |
218 | p = end.pattern; |
219 | n = end.string; |
220 | no_leading_period = end.no_leading_period; |
221 | continue; |
222 | } |
223 | } |
224 | } |
225 | |
226 | /* If we come here no match is possible with the wildcard. */ |
227 | return FNM_NOMATCH; |
228 | |
229 | case L('['): |
230 | { |
231 | /* Nonzero if the sense of the character class is inverted. */ |
232 | const CHAR *p_init = p; |
233 | const CHAR *n_init = n; |
234 | int not; |
235 | CHAR cold; |
236 | UCHAR fn; |
237 | |
238 | if (posixly_correct == 0) |
239 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
240 | |
241 | if (n == string_end) |
242 | return FNM_NOMATCH; |
243 | |
244 | if (*n == L('.') && no_leading_period) |
245 | return FNM_NOMATCH; |
246 | |
247 | if (*n == L('/') && (flags & FNM_FILE_NAME)) |
248 | /* `/' cannot be matched. */ |
249 | return FNM_NOMATCH; |
250 | |
251 | not = (*p == L('!') || (posixly_correct < 0 && *p == L('^'))); |
252 | if (not) |
253 | ++p; |
254 | |
255 | fn = FOLD ((UCHAR) *n); |
256 | |
257 | c = *p++; |
258 | for (;;) |
259 | { |
260 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) |
261 | { |
262 | if (*p == L('\0')) |
263 | return FNM_NOMATCH; |
264 | c = FOLD ((UCHAR) *p); |
265 | ++p; |
266 | |
267 | goto normal_bracket; |
268 | } |
269 | else if (c == L('[') && *p == L(':')) |
270 | { |
271 | /* Leave room for the null. */ |
272 | CHAR str[CHAR_CLASS_MAX_LENGTH + 1]; |
273 | size_t c1 = 0; |
274 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
275 | wctype_t wt; |
276 | #endif |
277 | const CHAR *startp = p; |
278 | |
279 | for (;;) |
280 | { |
281 | if (c1 == CHAR_CLASS_MAX_LENGTH) |
282 | /* The name is too long and therefore the pattern |
283 | is ill-formed. */ |
284 | return FNM_NOMATCH; |
285 | |
286 | c = *++p; |
287 | if (c == L(':') && p[1] == L(']')) |
288 | { |
289 | p += 2; |
290 | break; |
291 | } |
292 | if (c < L('a') || c >= L('z')) |
293 | { |
294 | /* This cannot possibly be a character class name. |
295 | Match it as a normal range. */ |
296 | p = startp; |
297 | c = L('['); |
298 | goto normal_bracket; |
299 | } |
300 | str[c1++] = c; |
301 | } |
302 | str[c1] = L('\0'); |
303 | |
304 | #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H) |
305 | wt = IS_CHAR_CLASS (str); |
306 | if (wt == 0) |
307 | /* Invalid character class name. */ |
308 | return FNM_NOMATCH; |
309 | |
310 | # if defined _LIBC && ! WIDE_CHAR_VERSION |
311 | /* The following code is glibc specific but does |
312 | there a good job in speeding up the code since |
313 | we can avoid the btowc() call. */ |
314 | if (_ISCTYPE ((UCHAR) *n, wt)) |
315 | goto matched; |
316 | # else |
317 | if (ISWCTYPE (BTOWC ((UCHAR) *n), wt)) |
318 | goto matched; |
319 | # endif |
320 | #else |
321 | if ((STREQ (str, L("alnum" )) && ISALNUM ((UCHAR) *n)) |
322 | || (STREQ (str, L("alpha" )) && ISALPHA ((UCHAR) *n)) |
323 | || (STREQ (str, L("blank" )) && ISBLANK ((UCHAR) *n)) |
324 | || (STREQ (str, L("cntrl" )) && ISCNTRL ((UCHAR) *n)) |
325 | || (STREQ (str, L("digit" )) && ISDIGIT ((UCHAR) *n)) |
326 | || (STREQ (str, L("graph" )) && ISGRAPH ((UCHAR) *n)) |
327 | || (STREQ (str, L("lower" )) && ISLOWER ((UCHAR) *n)) |
328 | || (STREQ (str, L("print" )) && ISPRINT ((UCHAR) *n)) |
329 | || (STREQ (str, L("punct" )) && ISPUNCT ((UCHAR) *n)) |
330 | || (STREQ (str, L("space" )) && ISSPACE ((UCHAR) *n)) |
331 | || (STREQ (str, L("upper" )) && ISUPPER ((UCHAR) *n)) |
332 | || (STREQ (str, L("xdigit" )) && ISXDIGIT ((UCHAR) *n))) |
333 | goto matched; |
334 | #endif |
335 | c = *p++; |
336 | } |
337 | #ifdef _LIBC |
338 | else if (c == L('[') && *p == L('=')) |
339 | { |
340 | /* It's important that STR be a scalar variable rather |
341 | than a one-element array, because GCC (at least 4.9.2 |
342 | -O2 on x86-64) can be confused by the array and |
343 | diagnose a "used initialized" in a dead branch in the |
344 | findidx function. */ |
345 | UCHAR str; |
346 | uint32_t nrules = |
347 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
348 | const CHAR *startp = p; |
349 | |
350 | c = *++p; |
351 | if (c == L('\0')) |
352 | { |
353 | p = startp; |
354 | c = L('['); |
355 | goto normal_bracket; |
356 | } |
357 | str = c; |
358 | |
359 | c = *++p; |
360 | if (c != L('=') || p[1] != L(']')) |
361 | { |
362 | p = startp; |
363 | c = L('['); |
364 | goto normal_bracket; |
365 | } |
366 | p += 2; |
367 | |
368 | if (nrules == 0) |
369 | { |
370 | if ((UCHAR) *n == str) |
371 | goto matched; |
372 | } |
373 | else |
374 | { |
375 | const int32_t *table; |
376 | # if WIDE_CHAR_VERSION |
377 | const int32_t *weights; |
378 | const wint_t *extra; |
379 | # else |
380 | const unsigned char *weights; |
381 | const unsigned char *; |
382 | # endif |
383 | const int32_t *indirect; |
384 | int32_t idx; |
385 | const UCHAR *cp = (const UCHAR *) &str; |
386 | |
387 | # if WIDE_CHAR_VERSION |
388 | table = (const int32_t *) |
389 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC); |
390 | weights = (const int32_t *) |
391 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC); |
392 | extra = (const wint_t *) |
393 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC); |
394 | indirect = (const int32_t *) |
395 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC); |
396 | # else |
397 | table = (const int32_t *) |
398 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB); |
399 | weights = (const unsigned char *) |
400 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB); |
401 | extra = (const unsigned char *) |
402 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); |
403 | indirect = (const int32_t *) |
404 | _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); |
405 | # endif |
406 | |
407 | idx = FINDIDX (table, indirect, extra, &cp, 1); |
408 | if (idx != 0) |
409 | { |
410 | /* We found a table entry. Now see whether the |
411 | character we are currently at has the same |
412 | equivalance class value. */ |
413 | int len = weights[idx & 0xffffff]; |
414 | int32_t idx2; |
415 | const UCHAR *np = (const UCHAR *) n; |
416 | |
417 | idx2 = FINDIDX (table, indirect, extra, |
418 | &np, string_end - n); |
419 | if (idx2 != 0 |
420 | && (idx >> 24) == (idx2 >> 24) |
421 | && len == weights[idx2 & 0xffffff]) |
422 | { |
423 | int cnt = 0; |
424 | |
425 | idx &= 0xffffff; |
426 | idx2 &= 0xffffff; |
427 | |
428 | while (cnt < len |
429 | && (weights[idx + 1 + cnt] |
430 | == weights[idx2 + 1 + cnt])) |
431 | ++cnt; |
432 | |
433 | if (cnt == len) |
434 | goto matched; |
435 | } |
436 | } |
437 | } |
438 | |
439 | c = *p++; |
440 | } |
441 | #endif |
442 | else if (c == L('\0')) |
443 | { |
444 | /* [ unterminated, treat as normal character. */ |
445 | p = p_init; |
446 | n = n_init; |
447 | c = L('['); |
448 | goto normal_match; |
449 | } |
450 | else |
451 | { |
452 | int is_range = 0; |
453 | |
454 | #ifdef _LIBC |
455 | int is_seqval = 0; |
456 | |
457 | if (c == L('[') && *p == L('.')) |
458 | { |
459 | uint32_t nrules = |
460 | _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES); |
461 | const CHAR *startp = p; |
462 | size_t c1 = 0; |
463 | |
464 | while (1) |
465 | { |
466 | c = *++p; |
467 | if (c == L('.') && p[1] == L(']')) |
468 | { |
469 | p += 2; |
470 | break; |
471 | } |
472 | if (c == '\0') |
473 | return FNM_NOMATCH; |
474 | ++c1; |
475 | } |
476 | |
477 | /* We have to handling the symbols differently in |
478 | ranges since then the collation sequence is |
479 | important. */ |
480 | is_range = *p == L('-') && p[1] != L('\0'); |
481 | |
482 | if (nrules == 0) |
483 | { |
484 | /* There are no names defined in the collation |
485 | data. Therefore we only accept the trivial |
486 | names consisting of the character itself. */ |
487 | if (c1 != 1) |
488 | return FNM_NOMATCH; |
489 | |
490 | if (!is_range && *n == startp[1]) |
491 | goto matched; |
492 | |
493 | cold = startp[1]; |
494 | c = *p++; |
495 | } |
496 | else |
497 | { |
498 | int32_t table_size; |
499 | const int32_t *symb_table; |
500 | # if WIDE_CHAR_VERSION |
501 | char str[c1]; |
502 | unsigned int strcnt; |
503 | # else |
504 | # define str (startp + 1) |
505 | # endif |
506 | const unsigned char *; |
507 | int32_t idx; |
508 | int32_t elem; |
509 | int32_t second; |
510 | int32_t hash; |
511 | |
512 | # if WIDE_CHAR_VERSION |
513 | /* We have to convert the name to a single-byte |
514 | string. This is possible since the names |
515 | consist of ASCII characters and the internal |
516 | representation is UCS4. */ |
517 | for (strcnt = 0; strcnt < c1; ++strcnt) |
518 | str[strcnt] = startp[1 + strcnt]; |
519 | #endif |
520 | |
521 | table_size = |
522 | _NL_CURRENT_WORD (LC_COLLATE, |
523 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
524 | symb_table = (const int32_t *) |
525 | _NL_CURRENT (LC_COLLATE, |
526 | _NL_COLLATE_SYMB_TABLEMB); |
527 | extra = (const unsigned char *) |
528 | _NL_CURRENT (LC_COLLATE, |
529 | _NL_COLLATE_SYMB_EXTRAMB); |
530 | |
531 | /* Locate the character in the hashing table. */ |
532 | hash = elem_hash (str, c1); |
533 | |
534 | idx = 0; |
535 | elem = hash % table_size; |
536 | if (symb_table[2 * elem] != 0) |
537 | { |
538 | second = hash % (table_size - 2) + 1; |
539 | |
540 | do |
541 | { |
542 | /* First compare the hashing value. */ |
543 | if (symb_table[2 * elem] == hash |
544 | && (c1 |
545 | == extra[symb_table[2 * elem + 1]]) |
546 | && memcmp (str, |
547 | &extra[symb_table[2 * elem |
548 | + 1] |
549 | + 1], c1) == 0) |
550 | { |
551 | /* Yep, this is the entry. */ |
552 | idx = symb_table[2 * elem + 1]; |
553 | idx += 1 + extra[idx]; |
554 | break; |
555 | } |
556 | |
557 | /* Next entry. */ |
558 | elem += second; |
559 | } |
560 | while (symb_table[2 * elem] != 0); |
561 | } |
562 | |
563 | if (symb_table[2 * elem] != 0) |
564 | { |
565 | /* Compare the byte sequence but only if |
566 | this is not part of a range. */ |
567 | # if WIDE_CHAR_VERSION |
568 | int32_t *wextra; |
569 | |
570 | idx += 1 + extra[idx]; |
571 | /* Adjust for the alignment. */ |
572 | idx = (idx + 3) & ~3; |
573 | |
574 | wextra = (int32_t *) &extra[idx + 4]; |
575 | # endif |
576 | |
577 | if (! is_range) |
578 | { |
579 | # if WIDE_CHAR_VERSION |
580 | for (c1 = 0; |
581 | (int32_t) c1 < wextra[idx]; |
582 | ++c1) |
583 | if (n[c1] != wextra[1 + c1]) |
584 | break; |
585 | |
586 | if ((int32_t) c1 == wextra[idx]) |
587 | goto matched; |
588 | # else |
589 | for (c1 = 0; c1 < extra[idx]; ++c1) |
590 | if (n[c1] != extra[1 + c1]) |
591 | break; |
592 | |
593 | if (c1 == extra[idx]) |
594 | goto matched; |
595 | # endif |
596 | } |
597 | |
598 | /* Get the collation sequence value. */ |
599 | is_seqval = 1; |
600 | # if WIDE_CHAR_VERSION |
601 | cold = wextra[1 + wextra[idx]]; |
602 | # else |
603 | /* Adjust for the alignment. */ |
604 | idx += 1 + extra[idx]; |
605 | idx = (idx + 3) & ~4; |
606 | cold = *((int32_t *) &extra[idx]); |
607 | # endif |
608 | |
609 | c = *p++; |
610 | } |
611 | else if (c1 == 1) |
612 | { |
613 | /* No valid character. Match it as a |
614 | single byte. */ |
615 | if (!is_range && *n == str[0]) |
616 | goto matched; |
617 | |
618 | cold = str[0]; |
619 | c = *p++; |
620 | } |
621 | else |
622 | return FNM_NOMATCH; |
623 | } |
624 | } |
625 | else |
626 | # undef str |
627 | #endif |
628 | { |
629 | c = FOLD (c); |
630 | normal_bracket: |
631 | |
632 | /* We have to handling the symbols differently in |
633 | ranges since then the collation sequence is |
634 | important. */ |
635 | is_range = (*p == L('-') && p[1] != L('\0') |
636 | && p[1] != L(']')); |
637 | |
638 | if (!is_range && c == fn) |
639 | goto matched; |
640 | |
641 | /* This is needed if we goto normal_bracket; from |
642 | outside of is_seqval's scope. */ |
643 | is_seqval = 0; |
644 | cold = c; |
645 | c = *p++; |
646 | } |
647 | |
648 | if (c == L('-') && *p != L(']')) |
649 | { |
650 | #if _LIBC |
651 | /* We have to find the collation sequence |
652 | value for C. Collation sequence is nothing |
653 | we can regularly access. The sequence |
654 | value is defined by the order in which the |
655 | definitions of the collation values for the |
656 | various characters appear in the source |
657 | file. A strange concept, nowhere |
658 | documented. */ |
659 | uint32_t fcollseq; |
660 | uint32_t lcollseq; |
661 | UCHAR cend = *p++; |
662 | |
663 | # if WIDE_CHAR_VERSION |
664 | /* Search in the `names' array for the characters. */ |
665 | fcollseq = __collseq_table_lookup (collseq, fn); |
666 | if (fcollseq == ~((uint32_t) 0)) |
667 | /* XXX We don't know anything about the character |
668 | we are supposed to match. This means we are |
669 | failing. */ |
670 | goto range_not_matched; |
671 | |
672 | if (is_seqval) |
673 | lcollseq = cold; |
674 | else |
675 | lcollseq = __collseq_table_lookup (collseq, cold); |
676 | # else |
677 | fcollseq = collseq[fn]; |
678 | lcollseq = is_seqval ? cold : collseq[(UCHAR) cold]; |
679 | # endif |
680 | |
681 | is_seqval = 0; |
682 | if (cend == L('[') && *p == L('.')) |
683 | { |
684 | uint32_t nrules = |
685 | _NL_CURRENT_WORD (LC_COLLATE, |
686 | _NL_COLLATE_NRULES); |
687 | const CHAR *startp = p; |
688 | size_t c1 = 0; |
689 | |
690 | while (1) |
691 | { |
692 | c = *++p; |
693 | if (c == L('.') && p[1] == L(']')) |
694 | { |
695 | p += 2; |
696 | break; |
697 | } |
698 | if (c == '\0') |
699 | return FNM_NOMATCH; |
700 | ++c1; |
701 | } |
702 | |
703 | if (nrules == 0) |
704 | { |
705 | /* There are no names defined in the |
706 | collation data. Therefore we only |
707 | accept the trivial names consisting |
708 | of the character itself. */ |
709 | if (c1 != 1) |
710 | return FNM_NOMATCH; |
711 | |
712 | cend = startp[1]; |
713 | } |
714 | else |
715 | { |
716 | int32_t table_size; |
717 | const int32_t *symb_table; |
718 | # if WIDE_CHAR_VERSION |
719 | char str[c1]; |
720 | unsigned int strcnt; |
721 | # else |
722 | # define str (startp + 1) |
723 | # endif |
724 | const unsigned char *; |
725 | int32_t idx; |
726 | int32_t elem; |
727 | int32_t second; |
728 | int32_t hash; |
729 | |
730 | # if WIDE_CHAR_VERSION |
731 | /* We have to convert the name to a single-byte |
732 | string. This is possible since the names |
733 | consist of ASCII characters and the internal |
734 | representation is UCS4. */ |
735 | for (strcnt = 0; strcnt < c1; ++strcnt) |
736 | str[strcnt] = startp[1 + strcnt]; |
737 | # endif |
738 | |
739 | table_size = |
740 | _NL_CURRENT_WORD (LC_COLLATE, |
741 | _NL_COLLATE_SYMB_HASH_SIZEMB); |
742 | symb_table = (const int32_t *) |
743 | _NL_CURRENT (LC_COLLATE, |
744 | _NL_COLLATE_SYMB_TABLEMB); |
745 | extra = (const unsigned char *) |
746 | _NL_CURRENT (LC_COLLATE, |
747 | _NL_COLLATE_SYMB_EXTRAMB); |
748 | |
749 | /* Locate the character in the hashing |
750 | table. */ |
751 | hash = elem_hash (str, c1); |
752 | |
753 | idx = 0; |
754 | elem = hash % table_size; |
755 | if (symb_table[2 * elem] != 0) |
756 | { |
757 | second = hash % (table_size - 2) + 1; |
758 | |
759 | do |
760 | { |
761 | /* First compare the hashing value. */ |
762 | if (symb_table[2 * elem] == hash |
763 | && (c1 |
764 | == extra[symb_table[2 * elem + 1]]) |
765 | && memcmp (str, |
766 | &extra[symb_table[2 * elem + 1] |
767 | + 1], c1) == 0) |
768 | { |
769 | /* Yep, this is the entry. */ |
770 | idx = symb_table[2 * elem + 1]; |
771 | idx += 1 + extra[idx]; |
772 | break; |
773 | } |
774 | |
775 | /* Next entry. */ |
776 | elem += second; |
777 | } |
778 | while (symb_table[2 * elem] != 0); |
779 | } |
780 | |
781 | if (symb_table[2 * elem] != 0) |
782 | { |
783 | /* Compare the byte sequence but only if |
784 | this is not part of a range. */ |
785 | # if WIDE_CHAR_VERSION |
786 | int32_t *wextra; |
787 | |
788 | idx += 1 + extra[idx]; |
789 | /* Adjust for the alignment. */ |
790 | idx = (idx + 3) & ~4; |
791 | |
792 | wextra = (int32_t *) &extra[idx + 4]; |
793 | # endif |
794 | /* Get the collation sequence value. */ |
795 | is_seqval = 1; |
796 | # if WIDE_CHAR_VERSION |
797 | cend = wextra[1 + wextra[idx]]; |
798 | # else |
799 | /* Adjust for the alignment. */ |
800 | idx += 1 + extra[idx]; |
801 | idx = (idx + 3) & ~4; |
802 | cend = *((int32_t *) &extra[idx]); |
803 | # endif |
804 | } |
805 | else if (symb_table[2 * elem] != 0 && c1 == 1) |
806 | { |
807 | cend = str[0]; |
808 | c = *p++; |
809 | } |
810 | else |
811 | return FNM_NOMATCH; |
812 | } |
813 | # undef str |
814 | } |
815 | else |
816 | { |
817 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) |
818 | cend = *p++; |
819 | if (cend == L('\0')) |
820 | return FNM_NOMATCH; |
821 | cend = FOLD (cend); |
822 | } |
823 | |
824 | /* XXX It is not entirely clear to me how to handle |
825 | characters which are not mentioned in the |
826 | collation specification. */ |
827 | if ( |
828 | # if WIDE_CHAR_VERSION |
829 | lcollseq == 0xffffffff || |
830 | # endif |
831 | lcollseq <= fcollseq) |
832 | { |
833 | /* We have to look at the upper bound. */ |
834 | uint32_t hcollseq; |
835 | |
836 | if (is_seqval) |
837 | hcollseq = cend; |
838 | else |
839 | { |
840 | # if WIDE_CHAR_VERSION |
841 | hcollseq = |
842 | __collseq_table_lookup (collseq, cend); |
843 | if (hcollseq == ~((uint32_t) 0)) |
844 | { |
845 | /* Hum, no information about the upper |
846 | bound. The matching succeeds if the |
847 | lower bound is matched exactly. */ |
848 | if (lcollseq != fcollseq) |
849 | goto range_not_matched; |
850 | |
851 | goto matched; |
852 | } |
853 | # else |
854 | hcollseq = collseq[cend]; |
855 | # endif |
856 | } |
857 | |
858 | if (lcollseq <= hcollseq && fcollseq <= hcollseq) |
859 | goto matched; |
860 | } |
861 | # if WIDE_CHAR_VERSION |
862 | range_not_matched: |
863 | # endif |
864 | #else |
865 | /* We use a boring value comparison of the character |
866 | values. This is better than comparing using |
867 | `strcoll' since the latter would have surprising |
868 | and sometimes fatal consequences. */ |
869 | UCHAR cend = *p++; |
870 | |
871 | if (!(flags & FNM_NOESCAPE) && cend == L('\\')) |
872 | cend = *p++; |
873 | if (cend == L('\0')) |
874 | return FNM_NOMATCH; |
875 | |
876 | /* It is a range. */ |
877 | if (cold <= fn && fn <= cend) |
878 | goto matched; |
879 | #endif |
880 | |
881 | c = *p++; |
882 | } |
883 | } |
884 | |
885 | if (c == L(']')) |
886 | break; |
887 | } |
888 | |
889 | if (!not) |
890 | return FNM_NOMATCH; |
891 | break; |
892 | |
893 | matched: |
894 | /* Skip the rest of the [...] that already matched. */ |
895 | while ((c = *p++) != L (']')) |
896 | { |
897 | if (c == L('\0')) |
898 | /* [... (unterminated) loses. */ |
899 | return FNM_NOMATCH; |
900 | |
901 | if (!(flags & FNM_NOESCAPE) && c == L('\\')) |
902 | { |
903 | if (*p == L('\0')) |
904 | return FNM_NOMATCH; |
905 | /* XXX 1003.2d11 is unclear if this is right. */ |
906 | ++p; |
907 | } |
908 | else if (c == L('[') && *p == L(':')) |
909 | { |
910 | int c1 = 0; |
911 | const CHAR *startp = p; |
912 | |
913 | while (1) |
914 | { |
915 | c = *++p; |
916 | if (++c1 == CHAR_CLASS_MAX_LENGTH) |
917 | return FNM_NOMATCH; |
918 | |
919 | if (*p == L(':') && p[1] == L(']')) |
920 | break; |
921 | |
922 | if (c < L('a') || c >= L('z')) |
923 | { |
924 | p = startp - 2; |
925 | break; |
926 | } |
927 | } |
928 | p += 2; |
929 | } |
930 | else if (c == L('[') && *p == L('=')) |
931 | { |
932 | c = *++p; |
933 | if (c == L('\0')) |
934 | return FNM_NOMATCH; |
935 | c = *++p; |
936 | if (c != L('=') || p[1] != L(']')) |
937 | return FNM_NOMATCH; |
938 | p += 2; |
939 | } |
940 | else if (c == L('[') && *p == L('.')) |
941 | { |
942 | while (1) |
943 | { |
944 | c = *++p; |
945 | if (c == L('\0')) |
946 | return FNM_NOMATCH; |
947 | |
948 | if (c == L('.') && p[1] == L(']')) |
949 | break; |
950 | } |
951 | p += 2; |
952 | } |
953 | } |
954 | if (not) |
955 | return FNM_NOMATCH; |
956 | } |
957 | break; |
958 | |
959 | case L('+'): |
960 | case L('@'): |
961 | case L('!'): |
962 | if (__builtin_expect (flags & FNM_EXTMATCH, 0) && *p == '(') |
963 | { |
964 | int res = EXT (c, p, n, string_end, no_leading_period, flags, |
965 | alloca_used); |
966 | if (res != -1) |
967 | return res; |
968 | } |
969 | goto normal_match; |
970 | |
971 | case L('/'): |
972 | if (NO_LEADING_PERIOD (flags)) |
973 | { |
974 | if (n == string_end || c != (UCHAR) *n) |
975 | return FNM_NOMATCH; |
976 | |
977 | new_no_leading_period = 1; |
978 | break; |
979 | } |
980 | /* FALLTHROUGH */ |
981 | default: |
982 | normal_match: |
983 | if (n == string_end || c != FOLD ((UCHAR) *n)) |
984 | return FNM_NOMATCH; |
985 | } |
986 | |
987 | no_leading_period = new_no_leading_period; |
988 | ++n; |
989 | } |
990 | |
991 | if (n == string_end) |
992 | return 0; |
993 | |
994 | if ((flags & FNM_LEADING_DIR) && n != string_end && *n == L('/')) |
995 | /* The FNM_LEADING_DIR flag says that "foo*" matches "foobar/frobozz". */ |
996 | return 0; |
997 | |
998 | return FNM_NOMATCH; |
999 | } |
1000 | |
1001 | |
1002 | static const CHAR * |
1003 | internal_function |
1004 | END (const CHAR *pattern) |
1005 | { |
1006 | const CHAR *p = pattern; |
1007 | |
1008 | while (1) |
1009 | if (*++p == L('\0')) |
1010 | /* This is an invalid pattern. */ |
1011 | return pattern; |
1012 | else if (*p == L('[')) |
1013 | { |
1014 | /* Handle brackets special. */ |
1015 | if (posixly_correct == 0) |
1016 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1017 | |
1018 | /* Skip the not sign. We have to recognize it because of a possibly |
1019 | following ']'. */ |
1020 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) |
1021 | ++p; |
1022 | /* A leading ']' is recognized as such. */ |
1023 | if (*p == L(']')) |
1024 | ++p; |
1025 | /* Skip over all characters of the list. */ |
1026 | while (*p != L(']')) |
1027 | if (*p++ == L('\0')) |
1028 | /* This is no valid pattern. */ |
1029 | return pattern; |
1030 | } |
1031 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') |
1032 | || *p == L('!')) && p[1] == L('(')) |
1033 | { |
1034 | p = END (p + 1); |
1035 | if (*p == L('\0')) |
1036 | /* This is an invalid pattern. */ |
1037 | return pattern; |
1038 | } |
1039 | else if (*p == L(')')) |
1040 | break; |
1041 | |
1042 | return p + 1; |
1043 | } |
1044 | |
1045 | |
1046 | static int |
1047 | internal_function |
1048 | EXT (INT opt, const CHAR *pattern, const CHAR *string, const CHAR *string_end, |
1049 | int no_leading_period, int flags, size_t alloca_used) |
1050 | { |
1051 | const CHAR *startp; |
1052 | int level; |
1053 | struct patternlist |
1054 | { |
1055 | struct patternlist *next; |
1056 | CHAR malloced; |
1057 | CHAR str[0]; |
1058 | } *list = NULL; |
1059 | struct patternlist **lastp = &list; |
1060 | size_t pattern_len = STRLEN (pattern); |
1061 | int any_malloced = 0; |
1062 | const CHAR *p; |
1063 | const CHAR *rs; |
1064 | int retval = 0; |
1065 | |
1066 | /* Parse the pattern. Store the individual parts in the list. */ |
1067 | level = 0; |
1068 | for (startp = p = pattern + 1; level >= 0; ++p) |
1069 | if (*p == L('\0')) |
1070 | { |
1071 | /* This is an invalid pattern. */ |
1072 | retval = -1; |
1073 | goto out; |
1074 | } |
1075 | else if (*p == L('[')) |
1076 | { |
1077 | /* Handle brackets special. */ |
1078 | if (posixly_correct == 0) |
1079 | posixly_correct = getenv ("POSIXLY_CORRECT" ) != NULL ? 1 : -1; |
1080 | |
1081 | /* Skip the not sign. We have to recognize it because of a possibly |
1082 | following ']'. */ |
1083 | if (*++p == L('!') || (posixly_correct < 0 && *p == L('^'))) |
1084 | ++p; |
1085 | /* A leading ']' is recognized as such. */ |
1086 | if (*p == L(']')) |
1087 | ++p; |
1088 | /* Skip over all characters of the list. */ |
1089 | while (*p != L(']')) |
1090 | if (*p++ == L('\0')) |
1091 | { |
1092 | /* This is no valid pattern. */ |
1093 | retval = -1; |
1094 | goto out; |
1095 | } |
1096 | } |
1097 | else if ((*p == L('?') || *p == L('*') || *p == L('+') || *p == L('@') |
1098 | || *p == L('!')) && p[1] == L('(')) |
1099 | /* Remember the nesting level. */ |
1100 | ++level; |
1101 | else if (*p == L(')')) |
1102 | { |
1103 | if (level-- == 0) |
1104 | { |
1105 | /* This means we found the end of the pattern. */ |
1106 | #define NEW_PATTERN \ |
1107 | struct patternlist *newp; \ |
1108 | size_t slen = (opt == L('?') || opt == L('@') \ |
1109 | ? pattern_len : (p - startp + 1)); \ |
1110 | slen = sizeof (struct patternlist) + (slen * sizeof (CHAR)); \ |
1111 | int malloced = ! __libc_use_alloca (alloca_used + slen); \ |
1112 | if (__builtin_expect (malloced, 0)) \ |
1113 | { \ |
1114 | newp = malloc (slen); \ |
1115 | if (newp == NULL) \ |
1116 | { \ |
1117 | retval = -2; \ |
1118 | goto out; \ |
1119 | } \ |
1120 | any_malloced = 1; \ |
1121 | } \ |
1122 | else \ |
1123 | newp = alloca_account (slen, alloca_used); \ |
1124 | newp->next = NULL; \ |
1125 | newp->malloced = malloced; \ |
1126 | *((CHAR *) MEMPCPY (newp->str, startp, p - startp)) = L('\0'); \ |
1127 | *lastp = newp; \ |
1128 | lastp = &newp->next |
1129 | NEW_PATTERN; |
1130 | } |
1131 | } |
1132 | else if (*p == L('|')) |
1133 | { |
1134 | if (level == 0) |
1135 | { |
1136 | NEW_PATTERN; |
1137 | startp = p + 1; |
1138 | } |
1139 | } |
1140 | assert (list != NULL); |
1141 | assert (p[-1] == L(')')); |
1142 | #undef NEW_PATTERN |
1143 | |
1144 | switch (opt) |
1145 | { |
1146 | case L('*'): |
1147 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1148 | alloca_used) == 0) |
1149 | goto success; |
1150 | /* FALLTHROUGH */ |
1151 | |
1152 | case L('+'): |
1153 | do |
1154 | { |
1155 | for (rs = string; rs <= string_end; ++rs) |
1156 | /* First match the prefix with the current pattern with the |
1157 | current pattern. */ |
1158 | if (FCT (list->str, string, rs, no_leading_period, |
1159 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1160 | NULL, alloca_used) == 0 |
1161 | /* This was successful. Now match the rest with the rest |
1162 | of the pattern. */ |
1163 | && (FCT (p, rs, string_end, |
1164 | rs == string |
1165 | ? no_leading_period |
1166 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, |
1167 | flags & FNM_FILE_NAME |
1168 | ? flags : flags & ~FNM_PERIOD, NULL, alloca_used) == 0 |
1169 | /* This didn't work. Try the whole pattern. */ |
1170 | || (rs != string |
1171 | && FCT (pattern - 1, rs, string_end, |
1172 | rs == string |
1173 | ? no_leading_period |
1174 | : (rs[-1] == '/' && NO_LEADING_PERIOD (flags) |
1175 | ? 1 : 0), |
1176 | flags & FNM_FILE_NAME |
1177 | ? flags : flags & ~FNM_PERIOD, NULL, |
1178 | alloca_used) == 0))) |
1179 | /* It worked. Signal success. */ |
1180 | goto success; |
1181 | } |
1182 | while ((list = list->next) != NULL); |
1183 | |
1184 | /* None of the patterns lead to a match. */ |
1185 | retval = FNM_NOMATCH; |
1186 | break; |
1187 | |
1188 | case L('?'): |
1189 | if (FCT (p, string, string_end, no_leading_period, flags, NULL, |
1190 | alloca_used) == 0) |
1191 | goto success; |
1192 | /* FALLTHROUGH */ |
1193 | |
1194 | case L('@'): |
1195 | do |
1196 | /* I cannot believe it but `strcat' is actually acceptable |
1197 | here. Match the entire string with the prefix from the |
1198 | pattern list and the rest of the pattern following the |
1199 | pattern list. */ |
1200 | if (FCT (STRCAT (list->str, p), string, string_end, |
1201 | no_leading_period, |
1202 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1203 | NULL, alloca_used) == 0) |
1204 | /* It worked. Signal success. */ |
1205 | goto success; |
1206 | while ((list = list->next) != NULL); |
1207 | |
1208 | /* None of the patterns lead to a match. */ |
1209 | retval = FNM_NOMATCH; |
1210 | break; |
1211 | |
1212 | case L('!'): |
1213 | for (rs = string; rs <= string_end; ++rs) |
1214 | { |
1215 | struct patternlist *runp; |
1216 | |
1217 | for (runp = list; runp != NULL; runp = runp->next) |
1218 | if (FCT (runp->str, string, rs, no_leading_period, |
1219 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1220 | NULL, alloca_used) == 0) |
1221 | break; |
1222 | |
1223 | /* If none of the patterns matched see whether the rest does. */ |
1224 | if (runp == NULL |
1225 | && (FCT (p, rs, string_end, |
1226 | rs == string |
1227 | ? no_leading_period |
1228 | : rs[-1] == '/' && NO_LEADING_PERIOD (flags) ? 1 : 0, |
1229 | flags & FNM_FILE_NAME ? flags : flags & ~FNM_PERIOD, |
1230 | NULL, alloca_used) == 0)) |
1231 | /* This is successful. */ |
1232 | goto success; |
1233 | } |
1234 | |
1235 | /* None of the patterns together with the rest of the pattern |
1236 | lead to a match. */ |
1237 | retval = FNM_NOMATCH; |
1238 | break; |
1239 | |
1240 | default: |
1241 | assert (! "Invalid extended matching operator" ); |
1242 | retval = -1; |
1243 | break; |
1244 | } |
1245 | |
1246 | success: |
1247 | out: |
1248 | if (any_malloced) |
1249 | while (list != NULL) |
1250 | { |
1251 | struct patternlist *old = list; |
1252 | list = list->next; |
1253 | if (old->malloced) |
1254 | free (old); |
1255 | } |
1256 | |
1257 | return retval; |
1258 | } |
1259 | |
1260 | |
1261 | #undef FOLD |
1262 | #undef CHAR |
1263 | #undef UCHAR |
1264 | #undef INT |
1265 | #undef FCT |
1266 | #undef EXT |
1267 | #undef END |
1268 | #undef STRUCT |
1269 | #undef MEMPCPY |
1270 | #undef MEMCHR |
1271 | #undef STRCOLL |
1272 | #undef STRLEN |
1273 | #undef STRCAT |
1274 | #undef L |
1275 | #undef BTOWC |
1276 | #undef WIDE_CHAR_VERSION |
1277 | #undef FINDIDX |
1278 | |