regcomp.c source code [glibc_src_2.28/posix/regcomp.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2018 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	#ifdef _LIBC
21	# include <locale/weight.h>
22	#endif
23
24	static reg_errcode_t re_compile_internal (regex_t preg, const* char * pattern,
25	size_t length, reg_syntax_t syntax);
26	static void re_compile_fastmap_iter (regex_t *bufp,
27	const re_dfastate_t *init_state,
28	char *fastmap);
29	static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
30	#ifdef RE_ENABLE_I18N
31	static void free_charset (re_charset_t *cset);
32	#endif /* RE_ENABLE_I18N */
33	static void free_workarea_compile (regex_t *preg);
34	static reg_errcode_t create_initial_state (re_dfa_t *dfa);
35	#ifdef RE_ENABLE_I18N
36	static void optimize_utf8 (re_dfa_t *dfa);
37	#endif
38	static reg_errcode_t analyze (regex_t *preg);
39	static reg_errcode_t preorder (bin_tree_t *root,
40	reg_errcode_t (fn (void , bin_tree_t )),
41	void *extra);
42	static reg_errcode_t postorder (bin_tree_t *root,
43	reg_errcode_t (fn (void , bin_tree_t )),
44	void *extra);
45	static reg_errcode_t optimize_subexps (void extra, bin_tree_t node);
46	static reg_errcode_t lower_subexps (void extra, bin_tree_t node);
47	static bin_tree_t lower_subexp (reg_errcode_t err, regex_t *preg,
48	bin_tree_t *node);
49	static reg_errcode_t calc_first (void extra, bin_tree_t node);
50	static reg_errcode_t calc_next (void extra, bin_tree_t node);
51	static reg_errcode_t link_nfa_nodes (void extra, bin_tree_t node);
52	static Idx duplicate_node (re_dfa_t dfa, Idx org_idx, unsigned* int constraint);
53	static Idx search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
54	unsigned int constraint);
55	static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
56	static reg_errcode_t calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa,
57	Idx node, bool root);
58	static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
59	static Idx fetch_number (re_string_t input, re_token_t token,
60	reg_syntax_t syntax);
61	static int peek_token (re_token_t token, re_string_t input,
62	reg_syntax_t syntax);
63	static bin_tree_t parse (re_string_t regexp, regex_t *preg,
64	reg_syntax_t syntax, reg_errcode_t *err);
65	static bin_tree_t parse_reg_exp (re_string_t regexp, regex_t *preg,
66	re_token_t *token, reg_syntax_t syntax,
67	Idx nest, reg_errcode_t *err);
68	static bin_tree_t parse_branch (re_string_t regexp, regex_t *preg,
69	re_token_t *token, reg_syntax_t syntax,
70	Idx nest, reg_errcode_t *err);
71	static bin_tree_t parse_expression (re_string_t regexp, regex_t *preg,
72	re_token_t *token, reg_syntax_t syntax,
73	Idx nest, reg_errcode_t *err);
74	static bin_tree_t parse_sub_exp (re_string_t regexp, regex_t *preg,
75	re_token_t *token, reg_syntax_t syntax,
76	Idx nest, reg_errcode_t *err);
77	static bin_tree_t parse_dup_op (bin_tree_t dup_elem, re_string_t *regexp,
78	re_dfa_t dfa, re_token_t token,
79	reg_syntax_t syntax, reg_errcode_t *err);
80	static bin_tree_t parse_bracket_exp (re_string_t regexp, re_dfa_t *dfa,
81	re_token_t *token, reg_syntax_t syntax,
82	reg_errcode_t *err);
83	static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
84	re_string_t *regexp,
85	re_token_t token, int* token_len,
86	re_dfa_t *dfa,
87	reg_syntax_t syntax,
88	bool accept_hyphen);
89	static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
90	re_string_t *regexp,
91	re_token_t *token);
92	#ifdef RE_ENABLE_I18N
93	static reg_errcode_t build_equiv_class (bitset_t sbcset,
94	re_charset_t *mbcset,
95	Idx *equiv_class_alloc,
96	const unsigned char *name);
97	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
98	bitset_t sbcset,
99	re_charset_t *mbcset,
100	Idx *char_class_alloc,
101	const char *class_name,
102	reg_syntax_t syntax);
103	#else /* not RE_ENABLE_I18N */
104	static reg_errcode_t build_equiv_class (bitset_t sbcset,
105	const unsigned char *name);
106	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
107	bitset_t sbcset,
108	const char *class_name,
109	reg_syntax_t syntax);
110	#endif /* not RE_ENABLE_I18N */
111	static bin_tree_t build_charclass_op (re_dfa_t dfa,
112	RE_TRANSLATE_TYPE trans,
113	const char *class_name,
114	const char *extra,
115	bool non_match, reg_errcode_t *err);
116	static bin_tree_t create_tree (re_dfa_t dfa,
117	bin_tree_t left, bin_tree_t right,
118	re_token_type_t type);
119	static bin_tree_t create_token_tree (re_dfa_t dfa,
120	bin_tree_t left, bin_tree_t right,
121	const re_token_t *token);
122	static bin_tree_t duplicate_tree (const* bin_tree_t src, re_dfa_t dfa);
123	static void free_token (re_token_t *node);
124	static reg_errcode_t free_tree (void extra, bin_tree_t node);
125	static reg_errcode_t mark_opt_subexp (void extra, bin_tree_t node);
126
127	/ This table gives an error message for each of the error codes listed*
128	in regex.h. Obviously the order here has to be same as there.
129	POSIX doesn't require that we do anything for REG_NOERROR,
130	but why not be nice? /*
131
132	static const char __re_error_msgid[] =
133	{
134	#define REG_NOERROR_IDX 0
135	gettext_noop ("Success") / REG_NOERROR /
136	"\0"
137	#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
138	gettext_noop ("No match") / REG_NOMATCH /
139	"\0"
140	#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
141	gettext_noop ("Invalid regular expression") / REG_BADPAT /
142	"\0"
143	#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
144	gettext_noop ("Invalid collation character") / REG_ECOLLATE /
145	"\0"
146	#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
147	gettext_noop ("Invalid character class name") / REG_ECTYPE /
148	"\0"
149	#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
150	gettext_noop ("Trailing backslash") / REG_EESCAPE /
151	"\0"
152	#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
153	gettext_noop ("Invalid back reference") / REG_ESUBREG /
154	"\0"
155	#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
156	gettext_noop ("Unmatched [, [^, [:, [., or [=") / REG_EBRACK /
157	"\0"
158	#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or [=")
159	gettext_noop ("Unmatched ( or \\(") / REG_EPAREN /
160	"\0"
161	#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
162	gettext_noop ("Unmatched \\{") / REG_EBRACE /
163	"\0"
164	#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
165	gettext_noop ("Invalid content of \\{\\}") / REG_BADBR /
166	"\0"
167	#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
168	gettext_noop ("Invalid range end") / REG_ERANGE /
169	"\0"
170	#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
171	gettext_noop ("Memory exhausted") / REG_ESPACE /
172	"\0"
173	#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
174	gettext_noop ("Invalid preceding regular expression") / REG_BADRPT /
175	"\0"
176	#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
177	gettext_noop ("Premature end of regular expression") / REG_EEND /
178	"\0"
179	#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
180	gettext_noop ("Regular expression too big") / REG_ESIZE /
181	"\0"
182	#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
183	gettext_noop ("Unmatched ) or \\)") / REG_ERPAREN /
184	};
185
186	static const size_t __re_error_msgid_idx[] =
187	{
188	REG_NOERROR_IDX,
189	REG_NOMATCH_IDX,
190	REG_BADPAT_IDX,
191	REG_ECOLLATE_IDX,
192	REG_ECTYPE_IDX,
193	REG_EESCAPE_IDX,
194	REG_ESUBREG_IDX,
195	REG_EBRACK_IDX,
196	REG_EPAREN_IDX,
197	REG_EBRACE_IDX,
198	REG_BADBR_IDX,
199	REG_ERANGE_IDX,
200	REG_ESPACE_IDX,
201	REG_BADRPT_IDX,
202	REG_EEND_IDX,
203	REG_ESIZE_IDX,
204	REG_ERPAREN_IDX
205	};
206
207	/ Entry points for GNU code. /
208
209	/ re_compile_pattern is the GNU regular expression compiler: it*
210	compiles PATTERN (of length LENGTH) and puts the result in BUFP.
211	Returns 0 if the pattern was valid, otherwise an error string.
212
213	Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
214	are set in BUFP on entry. /*
215
216	const char *
217	re_compile_pattern (const char *pattern, size_t length,
218	struct re_pattern_buffer *bufp)
219	{
220	reg_errcode_t ret;
221
222	/ And GNU code determines whether or not to get register information*
223	by passing null for the REGS argument to re_match, etc., not by
224	setting no_sub, unless RE_NO_SUB is set. /*
225	bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
226
227	/ Match anchors at newline. /
228	bufp->newline_anchor = `1`;
229
230	ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
231
232	if (!ret)
233	return NULL;
234	return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
235	}
236	#ifdef _LIBC
237	weak_alias (__re_compile_pattern, re_compile_pattern)
238	#endif
239
240	/ Set by 're_set_syntax' to the current regexp syntax to recognize. Can*
241	also be assigned to arbitrarily: each pattern buffer stores its own
242	syntax, so it can be changed between regex compilations. /*
243	/ This has no initializer because initialized variables in Emacs*
244	become read-only after dumping. /*
245	reg_syntax_t re_syntax_options;
246
247
248	/ Specify the precise syntax of regexps for compilation. This provides*
249	for compatibility for various utilities which historically have
250	different, incompatible syntaxes.
251
252	The argument SYNTAX is a bit mask comprised of the various bits
253	defined in regex.h. We return the old syntax. /*
254
255	reg_syntax_t
256	re_set_syntax (reg_syntax_t syntax)
257	{
258	reg_syntax_t ret = re_syntax_options;
259
260	re_syntax_options = syntax;
261	return ret;
262	}
263	#ifdef _LIBC
264	weak_alias (__re_set_syntax, re_set_syntax)
265	#endif
266
267	int
268	re_compile_fastmap (struct re_pattern_buffer *bufp)
269	{
270	re_dfa_t *dfa = bufp->buffer;
271	char *fastmap = bufp->fastmap;
272
273	memset (fastmap, `'\0'`, sizeof (char) * SBC_MAX);
274	re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
275	if (dfa->init_state != dfa->init_state_word)
276	re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
277	if (dfa->init_state != dfa->init_state_nl)
278	re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
279	if (dfa->init_state != dfa->init_state_begbuf)
280	re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
281	bufp->fastmap_accurate = `1`;
282	return `0`;
283	}
284	#ifdef _LIBC
285	weak_alias (__re_compile_fastmap, re_compile_fastmap)
286	#endif
287
288	static inline void
289	__attribute__ ((always_inline))
290	re_set_fastmap (char fastmap, bool icase, int* ch)
291	{
292	fastmap[ch] = `1`;
293	if (icase)
294	fastmap[tolower (ch)] = `1`;
295	}
296
297	/ Helper function for re_compile_fastmap.*
298	Compile fastmap for the initial_state INIT_STATE. /*
299
300	static void
301	re_compile_fastmap_iter (regex_t bufp, const* re_dfastate_t *init_state,
302	char *fastmap)
303	{
304	re_dfa_t *dfa = bufp->buffer;
305	Idx node_cnt;
306	bool icase = (dfa->mb_cur_max == `1` && (bufp->syntax & RE_ICASE));
307	for (node_cnt = `0`; node_cnt < init_state->nodes.nelem; ++node_cnt)
308	{
309	Idx node = init_state->nodes.elems[node_cnt];
310	re_token_type_t type = dfa->nodes[node].type;
311
312	if (type == CHARACTER)
313	{
314	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
315	#ifdef RE_ENABLE_I18N
316	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > `1`)
317	{
318	unsigned char buf[MB_LEN_MAX];
319	unsigned char *p;
320	wchar_t wc;
321	mbstate_t state;
322
323	p = buf;
324	*p++ = dfa->nodes[node].opr.c;
325	while (++node < dfa->nodes_len
326	&& dfa->nodes[node].type == CHARACTER
327	&& dfa->nodes[node].mb_partial)
328	*p++ = dfa->nodes[node].opr.c;
329	memset (&state, `'\0'`, sizeof (state));
330	if (__mbrtowc (&wc, (const char *) buf, p - buf,
331	&state) == p - buf
332	&& (__wcrtomb ((char *) buf, __towlower (wc), &state)
333	!= (size_t) -`1`))
334	re_set_fastmap (fastmap, false, buf[`0`]);
335	}
336	#endif
337	}
338	else if (type == SIMPLE_BRACKET)
339	{
340	int i, ch;
341	for (i = `0`, ch = `0`; i < BITSET_WORDS; ++i)
342	{
343	int j;
344	bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
345	for (j = `0`; j < BITSET_WORD_BITS; ++j, ++ch)
346	if (w & ((bitset_word_t) `1` << j))
347	re_set_fastmap (fastmap, icase, ch);
348	}
349	}
350	#ifdef RE_ENABLE_I18N
351	else if (type == COMPLEX_BRACKET)
352	{
353	re_charset_t *cset = dfa->nodes[node].opr.mbcset;
354	Idx i;
355
356	# ifdef _LIBC
357	/ See if we have to try all bytes which start multiple collation*
358	elements.
359	e.g. In da_DK, we want to catch 'a' since "aa" is a valid
360	collation element, and don't catch 'b' since 'b' is
361	the only collation element which starts from 'b' (and
362	it is caught by SIMPLE_BRACKET). /*
363	if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != `0`
364	&& (cset->ncoll_syms \|\| cset->nranges))
365	{
366	const int32_t table = (const* int32_t *)
367	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
368	for (i = `0`; i < SBC_MAX; ++i)
369	if (table[i] < `0`)
370	re_set_fastmap (fastmap, icase, i);
371	}
372	# endif /* _LIBC */
373
374	/ See if we have to start the match at all multibyte characters,*
375	i.e. where we would not find an invalid sequence. This only
376	applies to multibyte character sets; for single byte character
377	sets, the SIMPLE_BRACKET again suffices. /*
378	if (dfa->mb_cur_max > `1`
379	&& (cset->nchar_classes \|\| cset->non_match \|\| cset->nranges
380	# ifdef _LIBC
381	\|\| cset->nequiv_classes
382	# endif /* _LIBC */
383	))
384	{
385	unsigned char c = `0`;
386	do
387	{
388	mbstate_t mbs;
389	memset (&mbs, `0`, sizeof (mbs));
390	if (__mbrtowc (NULL, (char *) &c, `1`, &mbs) == (size_t) -`2`)
391	re_set_fastmap (fastmap, false, (int) c);
392	}
393	while (++c != `0`);
394	}
395
396	else
397	{
398	/ ... Else catch all bytes which can start the mbchars. /
399	for (i = `0`; i < cset->nmbchars; ++i)
400	{
401	char buf[`256`];
402	mbstate_t state;
403	memset (&state, `'\0'`, sizeof (state));
404	if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -`1`)
405	re_set_fastmap (fastmap, icase, (unsigned* char *) buf);
406	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > `1`)
407	{
408	if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
409	!= (size_t) -`1`)
410	re_set_fastmap (fastmap, false, (unsigned* char *) buf);
411	}
412	}
413	}
414	}
415	#endif /* RE_ENABLE_I18N */
416	else if (type == OP_PERIOD
417	#ifdef RE_ENABLE_I18N
418	\|\| type == OP_UTF8_PERIOD
419	#endif /* RE_ENABLE_I18N */
420	\|\| type == END_OF_RE)
421	{
422	memset (fastmap, `'\1'`, sizeof (char) * SBC_MAX);
423	if (type == END_OF_RE)
424	bufp->can_be_null = `1`;
425	return;
426	}
427	}
428	}
429
430	/ Entry point for POSIX code. /
431	/ regcomp takes a regular expression as a string and compiles it.*
432
433	PREG is a regex_t . We do not expect any fields to be initialized,*
434	since POSIX says we shouldn't. Thus, we set
435
436	'buffer' to the compiled pattern;
437	'used' to the length of the compiled pattern;
438	'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
439	REG_EXTENDED bit in CFLAGS is set; otherwise, to
440	RE_SYNTAX_POSIX_BASIC;
441	'newline_anchor' to REG_NEWLINE being set in CFLAGS;
442	'fastmap' to an allocated space for the fastmap;
443	'fastmap_accurate' to zero;
444	're_nsub' to the number of subexpressions in PATTERN.
445
446	PATTERN is the address of the pattern string.
447
448	CFLAGS is a series of bits which affect compilation.
449
450	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
451	use POSIX basic syntax.
452
453	If REG_NEWLINE is set, then . and [^...] don't match newline.
454	Also, regexec will try a match beginning after every newline.
455
456	If REG_ICASE is set, then we considers upper- and lowercase
457	versions of letters to be equivalent when matching.
458
459	If REG_NOSUB is set, then when PREG is passed to regexec, that
460	routine will report only success or failure, and nothing about the
461	registers.
462
463	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
464	the return codes and their meanings.) /*
465
466	int
467	regcomp (regex_t _Restrict_ preg, const* char _Restrict_ pattern, int* cflags)
468	{
469	reg_errcode_t ret;
470	reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
471	: RE_SYNTAX_POSIX_BASIC);
472
473	preg->buffer = NULL;
474	preg->allocated = `0`;
475	preg->used = `0`;
476
477	/ Try to allocate space for the fastmap. /
478	preg->fastmap = re_malloc (char, SBC_MAX);
479	if (BE (preg->fastmap == NULL, `0`))
480	return REG_ESPACE;
481
482	syntax \|= (cflags & REG_ICASE) ? RE_ICASE : `0`;
483
484	/ If REG_NEWLINE is set, newlines are treated differently. /
485	if (cflags & REG_NEWLINE)
486	{ / REG_NEWLINE implies neither . nor [^...] match newline. /
487	syntax &= ~RE_DOT_NEWLINE;
488	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
489	/ It also changes the matching behavior. /
490	preg->newline_anchor = `1`;
491	}
492	else
493	preg->newline_anchor = `0`;
494	preg->no_sub = !!(cflags & REG_NOSUB);
495	preg->translate = NULL;
496
497	ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
498
499	/ POSIX doesn't distinguish between an unmatched open-group and an*
500	unmatched close-group: both are REG_EPAREN. /*
501	if (ret == REG_ERPAREN)
502	ret = REG_EPAREN;
503
504	/ We have already checked preg->fastmap != NULL. /
505	if (BE (ret == REG_NOERROR, `1`))
506	/ Compute the fastmap now, since regexec cannot modify the pattern*
507	buffer. This function never fails in this implementation. /*
508	(void) re_compile_fastmap (preg);
509	else
510	{
511	/ Some error occurred while compiling the expression. /
512	re_free (preg->fastmap);
513	preg->fastmap = NULL;
514	}
515
516	return (int) ret;
517	}
518	#ifdef _LIBC
519	libc_hidden_def (__regcomp)
520	weak_alias (__regcomp, regcomp)
521	#endif
522
523	/ Returns a message corresponding to an error code, ERRCODE, returned*
524	from either regcomp or regexec. We don't use PREG here. /*
525
526	size_t
527	regerror (int errcode, const regex_t _Restrict_ preg, char* *_Restrict_ errbuf,
528	size_t errbuf_size)
529	{
530	const char *msg;
531	size_t msg_size;
532
533	if (BE (errcode < `0`
534	\|\| errcode >= (int) (sizeof (__re_error_msgid_idx)
535	/ sizeof (__re_error_msgid_idx[`0`])), `0`))
536	/ Only error codes returned by the rest of the code should be passed*
537	to this routine. If we are given anything else, or if other regex
538	code generates an invalid error code, then the program has a bug.
539	Dump core so we can fix it. /*
540	abort ();
541
542	msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
543
544	msg_size = strlen (msg) + `1`; / Includes the null. /
545
546	if (BE (errbuf_size != `0`, `1`))
547	{
548	size_t cpy_size = msg_size;
549	if (BE (msg_size > errbuf_size, `0`))
550	{
551	cpy_size = errbuf_size - `1`;
552	errbuf[cpy_size] = `'\0'`;
553	}
554	memcpy (errbuf, msg, cpy_size);
555	}
556
557	return msg_size;
558	}
559	#ifdef _LIBC
560	weak_alias (__regerror, regerror)
561	#endif
562
563
564	#ifdef RE_ENABLE_I18N
565	/ This static array is used for the map to single-byte characters when*
566	UTF-8 is used. Otherwise we would allocate memory just to initialize
567	it the same all the time. UTF-8 is the preferred encoding so this is
568	a worthwhile optimization. /*
569	static const bitset_t utf8_sb_map =
570	{
571	/ Set the first 128 bits. /
572	# if defined __GNUC__ && !defined __STRICT_ANSI__
573	[`0` ... `0x80` / BITSET_WORD_BITS - `1`] = BITSET_WORD_MAX
574	# else
575	# if 4 * BITSET_WORD_BITS < ASCII_CHARS
576	# error "bitset_word_t is narrower than 32 bits"
577	# elif 3 * BITSET_WORD_BITS < ASCII_CHARS
578	BITSET_WORD_MAX, BITSET_WORD_MAX, BITSET_WORD_MAX,
579	# elif 2 * BITSET_WORD_BITS < ASCII_CHARS
580	BITSET_WORD_MAX, BITSET_WORD_MAX,
581	# elif 1 * BITSET_WORD_BITS < ASCII_CHARS
582	BITSET_WORD_MAX,
583	# endif
584	(BITSET_WORD_MAX
585	>> (SBC_MAX % BITSET_WORD_BITS == `0`
586	? `0`
587	: BITSET_WORD_BITS - SBC_MAX % BITSET_WORD_BITS))
588	# endif
589	};
590	#endif
591
592
593	static void
594	free_dfa_content (re_dfa_t *dfa)
595	{
596	Idx i, j;
597
598	if (dfa->nodes)
599	for (i = `0`; i < dfa->nodes_len; ++i)
600	free_token (dfa->nodes + i);
601	re_free (dfa->nexts);
602	for (i = `0`; i < dfa->nodes_len; ++i)
603	{
604	if (dfa->eclosures != NULL)
605	re_node_set_free (dfa->eclosures + i);
606	if (dfa->inveclosures != NULL)
607	re_node_set_free (dfa->inveclosures + i);
608	if (dfa->edests != NULL)
609	re_node_set_free (dfa->edests + i);
610	}
611	re_free (dfa->edests);
612	re_free (dfa->eclosures);
613	re_free (dfa->inveclosures);
614	re_free (dfa->nodes);
615
616	if (dfa->state_table)
617	for (i = `0`; i <= dfa->state_hash_mask; ++i)
618	{
619	struct re_state_table_entry *entry = dfa->state_table + i;
620	for (j = `0`; j < entry->num; ++j)
621	{
622	re_dfastate_t *state = entry->array[j];
623	free_state (state);
624	}
625	re_free (entry->array);
626	}
627	re_free (dfa->state_table);
628	#ifdef RE_ENABLE_I18N
629	if (dfa->sb_char != utf8_sb_map)
630	re_free (dfa->sb_char);
631	#endif
632	re_free (dfa->subexp_map);
633	#ifdef DEBUG
634	re_free (dfa->re_str);
635	#endif
636
637	re_free (dfa);
638	}
639
640
641	/ Free dynamically allocated space used by PREG. /
642
643	void
644	regfree (regex_t *preg)
645	{
646	re_dfa_t *dfa = preg->buffer;
647	if (BE (dfa != NULL, `1`))
648	{
649	lock_fini (dfa->lock);
650	free_dfa_content (dfa);
651	}
652	preg->buffer = NULL;
653	preg->allocated = `0`;
654
655	re_free (preg->fastmap);
656	preg->fastmap = NULL;
657
658	re_free (preg->translate);
659	preg->translate = NULL;
660	}
661	#ifdef _LIBC
662	libc_hidden_def (__regfree)
663	weak_alias (__regfree, regfree)
664	#endif
665
666	/ Entry points compatible with 4.2 BSD regex library. We don't define*
667	them unless specifically requested. /*
668
669	#if defined _REGEX_RE_COMP \|\| defined _LIBC
670
671	/ BSD has one and only one pattern buffer. /
672	static struct re_pattern_buffer re_comp_buf;
673
674	char *
675	# ifdef _LIBC
676	/ Make these definitions weak in libc, so POSIX programs can redefine*
677	these names if they don't use our functions, and still use
678	regcomp/regexec above without link errors. /*
679	weak_function
680	# endif
681	re_comp (const char *s)
682	{
683	reg_errcode_t ret;
684	char *fastmap;
685
686	if (!s)
687	{
688	if (!re_comp_buf.buffer)
689	return gettext ("No previous regular expression");
690	return `0`;
691	}
692
693	if (re_comp_buf.buffer)
694	{
695	fastmap = re_comp_buf.fastmap;
696	re_comp_buf.fastmap = NULL;
697	__regfree (&re_comp_buf);
698	memset (&re_comp_buf, `'\0'`, sizeof (re_comp_buf));
699	re_comp_buf.fastmap = fastmap;
700	}
701
702	if (re_comp_buf.fastmap == NULL)
703	{
704	re_comp_buf.fastmap = re_malloc (char, SBC_MAX);
705	if (re_comp_buf.fastmap == NULL)
706	return (char *) gettext (__re_error_msgid
707	+ __re_error_msgid_idx[(int) REG_ESPACE]);
708	}
709
710	/ Since 're_exec' always passes NULL for the 'regs' argument, we*
711	don't need to initialize the pattern buffer fields which affect it. /*
712
713	/ Match anchors at newlines. /
714	re_comp_buf.newline_anchor = `1`;
715
716	ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
717
718	if (!ret)
719	return NULL;
720
721	/ Yes, we're discarding 'const' here if !HAVE_LIBINTL. /
722	return (char ) gettext (__re_error_msgid + __re_error_msgid_idx[(int*) ret]);
723	}
724
725	#ifdef _LIBC
726	libc_freeres_fn (free_mem)
727	{
728	__regfree (&re_comp_buf);
729	}
730	#endif
731
732	#endif /* _REGEX_RE_COMP */
733
734	/ Internal entry point.*
735	Compile the regular expression PATTERN, whose length is LENGTH.
736	SYNTAX indicate regular expression's syntax. /*
737
738	static reg_errcode_t
739	re_compile_internal (regex_t preg, const* char * pattern, size_t length,
740	reg_syntax_t syntax)
741	{
742	reg_errcode_t err = REG_NOERROR;
743	re_dfa_t *dfa;
744	re_string_t regexp;
745
746	/ Initialize the pattern buffer. /
747	preg->fastmap_accurate = `0`;
748	preg->syntax = syntax;
749	preg->not_bol = preg->not_eol = `0`;
750	preg->used = `0`;
751	preg->re_nsub = `0`;
752	preg->can_be_null = `0`;
753	preg->regs_allocated = REGS_UNALLOCATED;
754
755	/ Initialize the dfa. /
756	dfa = preg->buffer;
757	if (BE (preg->allocated < sizeof (re_dfa_t), `0`))
758	{
759	/ If zero allocated, but buffer is non-null, try to realloc*
760	enough space. This loses if buffer's address is bogus, but
761	that is the user's responsibility. If ->buffer is NULL this
762	is a simple allocation. /*
763	dfa = re_realloc (preg->buffer, re_dfa_t, `1`);
764	if (dfa == NULL)
765	return REG_ESPACE;
766	preg->allocated = sizeof (re_dfa_t);
767	preg->buffer = dfa;
768	}
769	preg->used = sizeof (re_dfa_t);
770
771	err = init_dfa (dfa, length);
772	if (BE (err == REG_NOERROR && lock_init (dfa->lock) != `0`, `0`))
773	err = REG_ESPACE;
774	if (BE (err != REG_NOERROR, `0`))
775	{
776	free_dfa_content (dfa);
777	preg->buffer = NULL;
778	preg->allocated = `0`;
779	return err;
780	}
781	#ifdef DEBUG
782	/ Note: length+1 will not overflow since it is checked in init_dfa. /
783	dfa->re_str = re_malloc (char, length + `1`);
784	strncpy (dfa->re_str, pattern, length + `1`);
785	#endif
786
787	err = re_string_construct (&regexp, pattern, length, preg->translate,
788	(syntax & RE_ICASE) != `0`, dfa);
789	if (BE (err != REG_NOERROR, `0`))
790	{
791	re_compile_internal_free_return:
792	free_workarea_compile (preg);
793	re_string_destruct (&regexp);
794	lock_fini (dfa->lock);
795	free_dfa_content (dfa);
796	preg->buffer = NULL;
797	preg->allocated = `0`;
798	return err;
799	}
800
801	/ Parse the regular expression, and build a structure tree. /
802	preg->re_nsub = `0`;
803	dfa->str_tree = parse (&regexp, preg, syntax, &err);
804	if (BE (dfa->str_tree == NULL, `0`))
805	goto re_compile_internal_free_return;
806
807	/ Analyze the tree and create the nfa. /
808	err = analyze (preg);
809	if (BE (err != REG_NOERROR, `0`))
810	goto re_compile_internal_free_return;
811
812	#ifdef RE_ENABLE_I18N
813	/ If possible, do searching in single byte encoding to speed things up. /
814	if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
815	optimize_utf8 (dfa);
816	#endif
817
818	/ Then create the initial state of the dfa. /
819	err = create_initial_state (dfa);
820
821	/ Release work areas. /
822	free_workarea_compile (preg);
823	re_string_destruct (&regexp);
824
825	if (BE (err != REG_NOERROR, `0`))
826	{
827	lock_fini (dfa->lock);
828	free_dfa_content (dfa);
829	preg->buffer = NULL;
830	preg->allocated = `0`;
831	}
832
833	return err;
834	}
835
836	/ Initialize DFA. We use the length of the regular expression PAT_LEN*
837	as the initial length of some arrays. /*
838
839	static reg_errcode_t
840	init_dfa (re_dfa_t *dfa, size_t pat_len)
841	{
842	__re_size_t table_size;
843	#ifndef _LIBC
844	const char *codeset_name;
845	#endif
846	#ifdef RE_ENABLE_I18N
847	size_t max_i18n_object_size = MAX (sizeof (wchar_t), sizeof (wctype_t));
848	#else
849	size_t max_i18n_object_size = `0`;
850	#endif
851	size_t max_object_size =
852	MAX (sizeof (struct re_state_table_entry),
853	MAX (sizeof (re_token_t),
854	MAX (sizeof (re_node_set),
855	MAX (sizeof (regmatch_t),
856	max_i18n_object_size))));
857
858	memset (dfa, `'\0'`, sizeof (re_dfa_t));
859
860	/ Force allocation of str_tree_storage the first time. /
861	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
862
863	/ Avoid overflows. The extra "/ 2" is for the table_size doubling*
864	calculation below, and for similar doubling calculations
865	elsewhere. And it's <= rather than <, because some of the
866	doubling calculations add 1 afterwards. /*
867	if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) / `2` <= pat_len, `0`))
868	return REG_ESPACE;
869
870	dfa->nodes_alloc = pat_len + `1`;
871	dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
872
873	/ table_size = 2 ^ ceil(log pat_len) /
874	for (table_size = `1`; ; table_size <<= `1`)
875	if (table_size > pat_len)
876	break;
877
878	dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
879	dfa->state_hash_mask = table_size - `1`;
880
881	dfa->mb_cur_max = MB_CUR_MAX;
882	#ifdef _LIBC
883	if (dfa->mb_cur_max == `6`
884	&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == `0`)
885	dfa->is_utf8 = `1`;
886	dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
887	!= `0`);
888	#else
889	codeset_name = nl_langinfo (CODESET);
890	if ((codeset_name[`0`] == `'U'` \|\| codeset_name[`0`] == `'u'`)
891	&& (codeset_name[`1`] == `'T'` \|\| codeset_name[`1`] == `'t'`)
892	&& (codeset_name[`2`] == `'F'` \|\| codeset_name[`2`] == `'f'`)
893	&& strcmp (codeset_name + `3` + (codeset_name[`3`] == `'-'`), "8") == `0`)
894	dfa->is_utf8 = `1`;
895
896	/ We check exhaustively in the loop below if this charset is a*
897	superset of ASCII. /*
898	dfa->map_notascii = `0`;
899	#endif
900
901	#ifdef RE_ENABLE_I18N
902	if (dfa->mb_cur_max > `1`)
903	{
904	if (dfa->is_utf8)
905	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
906	else
907	{
908	int i, j, ch;
909
910	dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), `1`);
911	if (BE (dfa->sb_char == NULL, `0`))
912	return REG_ESPACE;
913
914	/ Set the bits corresponding to single byte chars. /
915	for (i = `0`, ch = `0`; i < BITSET_WORDS; ++i)
916	for (j = `0`; j < BITSET_WORD_BITS; ++j, ++ch)
917	{
918	wint_t wch = __btowc (ch);
919	if (wch != WEOF)
920	dfa->sb_char[i] \|= (bitset_word_t) `1` << j;
921	# ifndef _LIBC
922	if (isascii (ch) && wch != ch)
923	dfa->map_notascii = `1`;
924	# endif
925	}
926	}
927	}
928	#endif
929
930	if (BE (dfa->nodes == NULL \|\| dfa->state_table == NULL, `0`))
931	return REG_ESPACE;
932	return REG_NOERROR;
933	}
934
935	/ Initialize WORD_CHAR table, which indicate which character is*
936	"word". In this case "word" means that it is the word construction
937	character used by some operators like "\<", "\>", etc. /*
938
939	static void
940	init_word_char (re_dfa_t *dfa)
941	{
942	int i = `0`;
943	int j;
944	int ch = `0`;
945	dfa->word_ops_used = `1`;
946	if (BE (dfa->map_notascii == `0`, `1`))
947	{
948	/ Avoid uint32_t and uint64_t as some non-GCC platforms lack*
949	them, an issue when this code is used in Gnulib. /*
950	bitset_word_t bits0 = `0x00000000`;
951	bitset_word_t bits1 = `0x03ff0000`;
952	bitset_word_t bits2 = `0x87fffffe`;
953	bitset_word_t bits3 = `0x07fffffe`;
954	if (BITSET_WORD_BITS == `64`)
955	{
956	/ Pacify gcc -Woverflow on 32-bit platformns. /
957	dfa->word_char[`0`] = bits1 << `31` << `1` \| bits0;
958	dfa->word_char[`1`] = bits3 << `31` << `1` \| bits2;
959	i = `2`;
960	}
961	else if (BITSET_WORD_BITS == `32`)
962	{
963	dfa->word_char[`0`] = bits0;
964	dfa->word_char[`1`] = bits1;
965	dfa->word_char[`2`] = bits2;
966	dfa->word_char[`3`] = bits3;
967	i = `4`;
968	}
969	else
970	goto general_case;
971	ch = `128`;
972
973	if (BE (dfa->is_utf8, `1`))
974	{
975	memset (&dfa->word_char[i], `'\0'`, (SBC_MAX - ch) / `8`);
976	return;
977	}
978	}
979
980	general_case:
981	for (; i < BITSET_WORDS; ++i)
982	for (j = `0`; j < BITSET_WORD_BITS; ++j, ++ch)
983	if (isalnum (ch) \|\| ch == `'_'`)
984	dfa->word_char[i] \|= (bitset_word_t) `1` << j;
985	}
986
987	/ Free the work area which are only used while compiling. /
988
989	static void
990	free_workarea_compile (regex_t *preg)
991	{
992	re_dfa_t *dfa = preg->buffer;
993	bin_tree_storage_t storage, next;
994	for (storage = dfa->str_tree_storage; storage; storage = next)
995	{
996	next = storage->next;
997	re_free (storage);
998	}
999	dfa->str_tree_storage = NULL;
1000	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
1001	dfa->str_tree = NULL;
1002	re_free (dfa->org_indices);
1003	dfa->org_indices = NULL;
1004	}
1005
1006	/ Create initial states for all contexts. /
1007
1008	static reg_errcode_t
1009	create_initial_state (re_dfa_t *dfa)
1010	{
1011	Idx first, i;
1012	reg_errcode_t err;
1013	re_node_set init_nodes;
1014
1015	/ Initial states have the epsilon closure of the node which is*
1016	the first node of the regular expression. /*
1017	first = dfa->str_tree->first->node_idx;
1018	dfa->init_node = first;
1019	err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1020	if (BE (err != REG_NOERROR, `0`))
1021	return err;
1022
1023	/ The back-references which are in initial states can epsilon transit,*
1024	since in this case all of the subexpressions can be null.
1025	Then we add epsilon closures of the nodes which are the next nodes of
1026	the back-references. /*
1027	if (dfa->nbackref > `0`)
1028	for (i = `0`; i < init_nodes.nelem; ++i)
1029	{
1030	Idx node_idx = init_nodes.elems[i];
1031	re_token_type_t type = dfa->nodes[node_idx].type;
1032
1033	Idx clexp_idx;
1034	if (type != OP_BACK_REF)
1035	continue;
1036	for (clexp_idx = `0`; clexp_idx < init_nodes.nelem; ++clexp_idx)
1037	{
1038	re_token_t *clexp_node;
1039	clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
1040	if (clexp_node->type == OP_CLOSE_SUBEXP
1041	&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
1042	break;
1043	}
1044	if (clexp_idx == init_nodes.nelem)
1045	continue;
1046
1047	if (type == OP_BACK_REF)
1048	{
1049	Idx dest_idx = dfa->edests[node_idx].elems[`0`];
1050	if (!re_node_set_contains (&init_nodes, dest_idx))
1051	{
1052	reg_errcode_t merge_err
1053	= re_node_set_merge (&init_nodes, dfa->eclosures + dest_idx);
1054	if (merge_err != REG_NOERROR)
1055	return merge_err;
1056	i = `0`;
1057	}
1058	}
1059	}
1060
1061	/ It must be the first time to invoke acquire_state. /
1062	dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, `0`);
1063	/ We don't check ERR here, since the initial state must not be NULL. /
1064	if (BE (dfa->init_state == NULL, `0`))
1065	return err;
1066	if (dfa->init_state->has_constraint)
1067	{
1068	dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1069	CONTEXT_WORD);
1070	dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1071	CONTEXT_NEWLINE);
1072	dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1073	&init_nodes,
1074	CONTEXT_NEWLINE
1075	\| CONTEXT_BEGBUF);
1076	if (BE (dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
1077	\|\| dfa->init_state_begbuf == NULL, `0`))
1078	return err;
1079	}
1080	else
1081	dfa->init_state_word = dfa->init_state_nl
1082	= dfa->init_state_begbuf = dfa->init_state;
1083
1084	re_node_set_free (&init_nodes);
1085	return REG_NOERROR;
1086	}
1087
1088	#ifdef RE_ENABLE_I18N
1089	/ If it is possible to do searching in single byte encoding instead of UTF-8*
1090	to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1091	DFA nodes where needed. /*
1092
1093	static void
1094	optimize_utf8 (re_dfa_t *dfa)
1095	{
1096	Idx node;
1097	int i;
1098	bool mb_chars = false;
1099	bool has_period = false;
1100
1101	for (node = `0`; node < dfa->nodes_len; ++node)
1102	switch (dfa->nodes[node].type)
1103	{
1104	case CHARACTER:
1105	if (dfa->nodes[node].opr.c >= ASCII_CHARS)
1106	mb_chars = true;
1107	break;
1108	case ANCHOR:
1109	switch (dfa->nodes[node].opr.ctx_type)
1110	{
1111	case LINE_FIRST:
1112	case LINE_LAST:
1113	case BUF_FIRST:
1114	case BUF_LAST:
1115	break;
1116	default:
1117	/ Word anchors etc. cannot be handled. It's okay to test*
1118	opr.ctx_type since constraints (for all DFA nodes) are
1119	created by ORing one or more opr.ctx_type values. /*
1120	return;
1121	}
1122	break;
1123	case OP_PERIOD:
1124	has_period = true;
1125	break;
1126	case OP_BACK_REF:
1127	case OP_ALT:
1128	case END_OF_RE:
1129	case OP_DUP_ASTERISK:
1130	case OP_OPEN_SUBEXP:
1131	case OP_CLOSE_SUBEXP:
1132	break;
1133	case COMPLEX_BRACKET:
1134	return;
1135	case SIMPLE_BRACKET:
1136	/ Just double check. /
1137	{
1138	int rshift = (ASCII_CHARS % BITSET_WORD_BITS == `0`
1139	? `0`
1140	: BITSET_WORD_BITS - ASCII_CHARS % BITSET_WORD_BITS);
1141	for (i = ASCII_CHARS / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1142	{
1143	if (dfa->nodes[node].opr.sbcset[i] >> rshift != `0`)
1144	return;
1145	rshift = `0`;
1146	}
1147	}
1148	break;
1149	default:
1150	abort ();
1151	}
1152
1153	if (mb_chars \|\| has_period)
1154	for (node = `0`; node < dfa->nodes_len; ++node)
1155	{
1156	if (dfa->nodes[node].type == CHARACTER
1157	&& dfa->nodes[node].opr.c >= ASCII_CHARS)
1158	dfa->nodes[node].mb_partial = `0`;
1159	else if (dfa->nodes[node].type == OP_PERIOD)
1160	dfa->nodes[node].type = OP_UTF8_PERIOD;
1161	}
1162
1163	/ The search can be in single byte locale. /
1164	dfa->mb_cur_max = `1`;
1165	dfa->is_utf8 = `0`;
1166	dfa->has_mb_node = dfa->nbackref > `0` \|\| has_period;
1167	}
1168	#endif
1169
1170	/ Analyze the structure tree, and calculate "first", "next", "edest",*
1171	"eclosure", and "inveclosure". /*
1172
1173	static reg_errcode_t
1174	analyze (regex_t *preg)
1175	{
1176	re_dfa_t *dfa = preg->buffer;
1177	reg_errcode_t ret;
1178
1179	/ Allocate arrays. /
1180	dfa->nexts = re_malloc (Idx, dfa->nodes_alloc);
1181	dfa->org_indices = re_malloc (Idx, dfa->nodes_alloc);
1182	dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1183	dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1184	if (BE (dfa->nexts == NULL \|\| dfa->org_indices == NULL \|\| dfa->edests == NULL
1185	\|\| dfa->eclosures == NULL, `0`))
1186	return REG_ESPACE;
1187
1188	dfa->subexp_map = re_malloc (Idx, preg->re_nsub);
1189	if (dfa->subexp_map != NULL)
1190	{
1191	Idx i;
1192	for (i = `0`; i < preg->re_nsub; i++)
1193	dfa->subexp_map[i] = i;
1194	preorder (dfa->str_tree, optimize_subexps, dfa);
1195	for (i = `0`; i < preg->re_nsub; i++)
1196	if (dfa->subexp_map[i] != i)
1197	break;
1198	if (i == preg->re_nsub)
1199	{
1200	re_free (dfa->subexp_map);
1201	dfa->subexp_map = NULL;
1202	}
1203	}
1204
1205	ret = postorder (dfa->str_tree, lower_subexps, preg);
1206	if (BE (ret != REG_NOERROR, `0`))
1207	return ret;
1208	ret = postorder (dfa->str_tree, calc_first, dfa);
1209	if (BE (ret != REG_NOERROR, `0`))
1210	return ret;
1211	preorder (dfa->str_tree, calc_next, dfa);
1212	ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1213	if (BE (ret != REG_NOERROR, `0`))
1214	return ret;
1215	ret = calc_eclosure (dfa);
1216	if (BE (ret != REG_NOERROR, `0`))
1217	return ret;
1218
1219	/ We only need this during the prune_impossible_nodes pass in regexec.c;*
1220	skip it if p_i_n will not run, as calc_inveclosure can be quadratic. /*
1221	if ((!preg->no_sub && preg->re_nsub > `0` && dfa->has_plural_match)
1222	\|\| dfa->nbackref)
1223	{
1224	dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1225	if (BE (dfa->inveclosures == NULL, `0`))
1226	return REG_ESPACE;
1227	ret = calc_inveclosure (dfa);
1228	}
1229
1230	return ret;
1231	}
1232
1233	/ Our parse trees are very unbalanced, so we cannot use a stack to*
1234	implement parse tree visits. Instead, we use parent pointers and
1235	some hairy code in these two functions. /*
1236	static reg_errcode_t
1237	postorder (bin_tree_t root, reg_errcode_t (fn (void* , bin_tree_t )),
1238	void *extra)
1239	{
1240	bin_tree_t node, prev;
1241
1242	for (node = root; ; )
1243	{
1244	/ Descend down the tree, preferably to the left (or to the right*
1245	if that's the only child). /*
1246	while (node->left \|\| node->right)
1247	if (node->left)
1248	node = node->left;
1249	else
1250	node = node->right;
1251
1252	do
1253	{
1254	reg_errcode_t err = fn (extra, node);
1255	if (BE (err != REG_NOERROR, `0`))
1256	return err;
1257	if (node->parent == NULL)
1258	return REG_NOERROR;
1259	prev = node;
1260	node = node->parent;
1261	}
1262	/ Go up while we have a node that is reached from the right. /
1263	while (node->right == prev \|\| node->right == NULL);
1264	node = node->right;
1265	}
1266	}
1267
1268	static reg_errcode_t
1269	preorder (bin_tree_t root, reg_errcode_t (fn (void* , bin_tree_t )),
1270	void *extra)
1271	{
1272	bin_tree_t *node;
1273
1274	for (node = root; ; )
1275	{
1276	reg_errcode_t err = fn (extra, node);
1277	if (BE (err != REG_NOERROR, `0`))
1278	return err;
1279
1280	/ Go to the left node, or up and to the right. /
1281	if (node->left)
1282	node = node->left;
1283	else
1284	{
1285	bin_tree_t *prev = NULL;
1286	while (node->right == prev \|\| node->right == NULL)
1287	{
1288	prev = node;
1289	node = node->parent;
1290	if (!node)
1291	return REG_NOERROR;
1292	}
1293	node = node->right;
1294	}
1295	}
1296	}
1297
1298	/ Optimization pass: if a SUBEXP is entirely contained, strip it and tell*
1299	re_search_internal to map the inner one's opr.idx to this one's. Adjust
1300	backreferences as well. Requires a preorder visit. /*
1301	static reg_errcode_t
1302	optimize_subexps (void extra, bin_tree_t node)
1303	{
1304	re_dfa_t dfa = (re_dfa_t ) extra;
1305
1306	if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1307	{
1308	int idx = node->token.opr.idx;
1309	node->token.opr.idx = dfa->subexp_map[idx];
1310	dfa->used_bkref_map \|= `1` << node->token.opr.idx;
1311	}
1312
1313	else if (node->token.type == SUBEXP
1314	&& node->left && node->left->token.type == SUBEXP)
1315	{
1316	Idx other_idx = node->left->token.opr.idx;
1317
1318	node->left = node->left->left;
1319	if (node->left)
1320	node->left->parent = node;
1321
1322	dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1323	if (other_idx < BITSET_WORD_BITS)
1324	dfa->used_bkref_map &= ~((bitset_word_t) `1` << other_idx);
1325	}
1326
1327	return REG_NOERROR;
1328	}
1329
1330	/ Lowering pass: Turn each SUBEXP node into the appropriate concatenation*
1331	of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. /*
1332	static reg_errcode_t
1333	lower_subexps (void extra, bin_tree_t node)
1334	{
1335	regex_t preg = (regex_t ) extra;
1336	reg_errcode_t err = REG_NOERROR;
1337
1338	if (node->left && node->left->token.type == SUBEXP)
1339	{
1340	node->left = lower_subexp (&err, preg, node->left);
1341	if (node->left)
1342	node->left->parent = node;
1343	}
1344	if (node->right && node->right->token.type == SUBEXP)
1345	{
1346	node->right = lower_subexp (&err, preg, node->right);
1347	if (node->right)
1348	node->right->parent = node;
1349	}
1350
1351	return err;
1352	}
1353
1354	static bin_tree_t *
1355	lower_subexp (reg_errcode_t err, regex_t preg, bin_tree_t *node)
1356	{
1357	re_dfa_t *dfa = preg->buffer;
1358	bin_tree_t *body = node->left;
1359	bin_tree_t op, cls, tree1, tree;
1360
1361	if (preg->no_sub
1362	/ We do not optimize empty subexpressions, because otherwise we may*
1363	have bad CONCAT nodes with NULL children. This is obviously not
1364	very common, so we do not lose much. An example that triggers
1365	this case is the sed "script" //x. /*
1366	&& node->left != NULL
1367	&& (node->token.opr.idx >= BITSET_WORD_BITS
1368	\|\| !(dfa->used_bkref_map
1369	& ((bitset_word_t) `1` << node->token.opr.idx))))
1370	return node->left;
1371
1372	/ Convert the SUBEXP node to the concatenation of an*
1373	OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. /*
1374	op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1375	cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1376	tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1377	tree = create_tree (dfa, op, tree1, CONCAT);
1378	if (BE (tree == NULL \|\| tree1 == NULL \|\| op == NULL \|\| cls == NULL, `0`))
1379	{
1380	*err = REG_ESPACE;
1381	return NULL;
1382	}
1383
1384	op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1385	op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1386	return tree;
1387	}
1388
1389	/ Pass 1 in building the NFA: compute FIRST and create unlinked automaton*
1390	nodes. Requires a postorder visit. /*
1391	static reg_errcode_t
1392	calc_first (void extra, bin_tree_t node)
1393	{
1394	re_dfa_t dfa = (re_dfa_t ) extra;
1395	if (node->token.type == CONCAT)
1396	{
1397	node->first = node->left->first;
1398	node->node_idx = node->left->node_idx;
1399	}
1400	else
1401	{
1402	node->first = node;
1403	node->node_idx = re_dfa_add_node (dfa, node->token);
1404	if (BE (node->node_idx == -`1`, `0`))
1405	return REG_ESPACE;
1406	if (node->token.type == ANCHOR)
1407	dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1408	}
1409	return REG_NOERROR;
1410	}
1411
1412	/ Pass 2: compute NEXT on the tree. Preorder visit. /
1413	static reg_errcode_t
1414	calc_next (void extra, bin_tree_t node)
1415	{
1416	switch (node->token.type)
1417	{
1418	case OP_DUP_ASTERISK:
1419	node->left->next = node;
1420	break;
1421	case CONCAT:
1422	node->left->next = node->right->first;
1423	node->right->next = node->next;
1424	break;
1425	default:
1426	if (node->left)
1427	node->left->next = node->next;
1428	if (node->right)
1429	node->right->next = node->next;
1430	break;
1431	}
1432	return REG_NOERROR;
1433	}
1434
1435	/ Pass 3: link all DFA nodes to their NEXT node (any order will do). /
1436	static reg_errcode_t
1437	link_nfa_nodes (void extra, bin_tree_t node)
1438	{
1439	re_dfa_t dfa = (re_dfa_t ) extra;
1440	Idx idx = node->node_idx;
1441	reg_errcode_t err = REG_NOERROR;
1442
1443	switch (node->token.type)
1444	{
1445	case CONCAT:
1446	break;
1447
1448	case END_OF_RE:
1449	assert (node->next == NULL);
1450	break;
1451
1452	case OP_DUP_ASTERISK:
1453	case OP_ALT:
1454	{
1455	Idx left, right;
1456	dfa->has_plural_match = `1`;
1457	if (node->left != NULL)
1458	left = node->left->first->node_idx;
1459	else
1460	left = node->next->node_idx;
1461	if (node->right != NULL)
1462	right = node->right->first->node_idx;
1463	else
1464	right = node->next->node_idx;
1465	assert (left > -`1`);
1466	assert (right > -`1`);
1467	err = re_node_set_init_2 (dfa->edests + idx, left, right);
1468	}
1469	break;
1470
1471	case ANCHOR:
1472	case OP_OPEN_SUBEXP:
1473	case OP_CLOSE_SUBEXP:
1474	err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1475	break;
1476
1477	case OP_BACK_REF:
1478	dfa->nexts[idx] = node->next->node_idx;
1479	if (node->token.type == OP_BACK_REF)
1480	err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1481	break;
1482
1483	default:
1484	assert (!IS_EPSILON_NODE (node->token.type));
1485	dfa->nexts[idx] = node->next->node_idx;
1486	break;
1487	}
1488
1489	return err;
1490	}
1491
1492	/ Duplicate the epsilon closure of the node ROOT_NODE.*
1493	Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1494	to their own constraint. /*
1495
1496	static reg_errcode_t
1497	duplicate_node_closure (re_dfa_t *dfa, Idx top_org_node, Idx top_clone_node,
1498	Idx root_node, unsigned int init_constraint)
1499	{
1500	Idx org_node, clone_node;
1501	bool ok;
1502	unsigned int constraint = init_constraint;
1503	for (org_node = top_org_node, clone_node = top_clone_node;;)
1504	{
1505	Idx org_dest, clone_dest;
1506	if (dfa->nodes[org_node].type == OP_BACK_REF)
1507	{
1508	/ If the back reference epsilon-transit, its destination must*
1509	also have the constraint. Then duplicate the epsilon closure
1510	of the destination of the back reference, and store it in
1511	edests of the back reference. /*
1512	org_dest = dfa->nexts[org_node];
1513	re_node_set_empty (dfa->edests + clone_node);
1514	clone_dest = duplicate_node (dfa, org_dest, constraint);
1515	if (BE (clone_dest == -`1`, `0`))
1516	return REG_ESPACE;
1517	dfa->nexts[clone_node] = dfa->nexts[org_node];
1518	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1519	if (BE (! ok, `0`))
1520	return REG_ESPACE;
1521	}
1522	else if (dfa->edests[org_node].nelem == `0`)
1523	{
1524	/ In case of the node can't epsilon-transit, don't duplicate the*
1525	destination and store the original destination as the
1526	destination of the node. /*
1527	dfa->nexts[clone_node] = dfa->nexts[org_node];
1528	break;
1529	}
1530	else if (dfa->edests[org_node].nelem == `1`)
1531	{
1532	/ In case of the node can epsilon-transit, and it has only one*
1533	destination. /*
1534	org_dest = dfa->edests[org_node].elems[`0`];
1535	re_node_set_empty (dfa->edests + clone_node);
1536	/ If the node is root_node itself, it means the epsilon closure*
1537	has a loop. Then tie it to the destination of the root_node. /*
1538	if (org_node == root_node && clone_node != org_node)
1539	{
1540	ok = re_node_set_insert (dfa->edests + clone_node, org_dest);
1541	if (BE (! ok, `0`))
1542	return REG_ESPACE;
1543	break;
1544	}
1545	/ In case the node has another constraint, append it. /
1546	constraint \|= dfa->nodes[org_node].constraint;
1547	clone_dest = duplicate_node (dfa, org_dest, constraint);
1548	if (BE (clone_dest == -`1`, `0`))
1549	return REG_ESPACE;
1550	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1551	if (BE (! ok, `0`))
1552	return REG_ESPACE;
1553	}
1554	else / dfa->edests[org_node].nelem == 2 /
1555	{
1556	/ In case of the node can epsilon-transit, and it has two*
1557	destinations. In the bin_tree_t and DFA, that's '\|' and ''. /
1558	org_dest = dfa->edests[org_node].elems[`0`];
1559	re_node_set_empty (dfa->edests + clone_node);
1560	/ Search for a duplicated node which satisfies the constraint. /
1561	clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1562	if (clone_dest == -`1`)
1563	{
1564	/ There is no such duplicated node, create a new one. /
1565	reg_errcode_t err;
1566	clone_dest = duplicate_node (dfa, org_dest, constraint);
1567	if (BE (clone_dest == -`1`, `0`))
1568	return REG_ESPACE;
1569	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1570	if (BE (! ok, `0`))
1571	return REG_ESPACE;
1572	err = duplicate_node_closure (dfa, org_dest, clone_dest,
1573	root_node, constraint);
1574	if (BE (err != REG_NOERROR, `0`))
1575	return err;
1576	}
1577	else
1578	{
1579	/ There is a duplicated node which satisfies the constraint,*
1580	use it to avoid infinite loop. /*
1581	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1582	if (BE (! ok, `0`))
1583	return REG_ESPACE;
1584	}
1585
1586	org_dest = dfa->edests[org_node].elems[`1`];
1587	clone_dest = duplicate_node (dfa, org_dest, constraint);
1588	if (BE (clone_dest == -`1`, `0`))
1589	return REG_ESPACE;
1590	ok = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1591	if (BE (! ok, `0`))
1592	return REG_ESPACE;
1593	}
1594	org_node = org_dest;
1595	clone_node = clone_dest;
1596	}
1597	return REG_NOERROR;
1598	}
1599
1600	/ Search for a node which is duplicated from the node ORG_NODE, and*
1601	satisfies the constraint CONSTRAINT. /*
1602
1603	static Idx
1604	search_duplicated_node (const re_dfa_t *dfa, Idx org_node,
1605	unsigned int constraint)
1606	{
1607	Idx idx;
1608	for (idx = dfa->nodes_len - `1`; dfa->nodes[idx].duplicated && idx > `0`; --idx)
1609	{
1610	if (org_node == dfa->org_indices[idx]
1611	&& constraint == dfa->nodes[idx].constraint)
1612	return idx; / Found. /
1613	}
1614	return -`1`; / Not found. /
1615	}
1616
1617	/ Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.*
1618	Return the index of the new node, or -1 if insufficient storage is
1619	available. /*
1620
1621	static Idx
1622	duplicate_node (re_dfa_t dfa, Idx org_idx, unsigned* int constraint)
1623	{
1624	Idx dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1625	if (BE (dup_idx != -`1`, `1`))
1626	{
1627	dfa->nodes[dup_idx].constraint = constraint;
1628	dfa->nodes[dup_idx].constraint \|= dfa->nodes[org_idx].constraint;
1629	dfa->nodes[dup_idx].duplicated = `1`;
1630
1631	/ Store the index of the original node. /
1632	dfa->org_indices[dup_idx] = org_idx;
1633	}
1634	return dup_idx;
1635	}
1636
1637	static reg_errcode_t
1638	calc_inveclosure (re_dfa_t *dfa)
1639	{
1640	Idx src, idx;
1641	bool ok;
1642	for (idx = `0`; idx < dfa->nodes_len; ++idx)
1643	re_node_set_init_empty (dfa->inveclosures + idx);
1644
1645	for (src = `0`; src < dfa->nodes_len; ++src)
1646	{
1647	Idx *elems = dfa->eclosures[src].elems;
1648	for (idx = `0`; idx < dfa->eclosures[src].nelem; ++idx)
1649	{
1650	ok = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1651	if (BE (! ok, `0`))
1652	return REG_ESPACE;
1653	}
1654	}
1655
1656	return REG_NOERROR;
1657	}
1658
1659	/ Calculate "eclosure" for all the node in DFA. /
1660
1661	static reg_errcode_t
1662	calc_eclosure (re_dfa_t *dfa)
1663	{
1664	Idx node_idx;
1665	bool incomplete;
1666	#ifdef DEBUG
1667	assert (dfa->nodes_len > `0`);
1668	#endif
1669	incomplete = false;
1670	/ For each nodes, calculate epsilon closure. /
1671	for (node_idx = `0`; ; ++node_idx)
1672	{
1673	reg_errcode_t err;
1674	re_node_set eclosure_elem;
1675	if (node_idx == dfa->nodes_len)
1676	{
1677	if (!incomplete)
1678	break;
1679	incomplete = false;
1680	node_idx = `0`;
1681	}
1682
1683	#ifdef DEBUG
1684	assert (dfa->eclosures[node_idx].nelem != -`1`);
1685	#endif
1686
1687	/ If we have already calculated, skip it. /
1688	if (dfa->eclosures[node_idx].nelem != `0`)
1689	continue;
1690	/ Calculate epsilon closure of 'node_idx'. /
1691	err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, true);
1692	if (BE (err != REG_NOERROR, `0`))
1693	return err;
1694
1695	if (dfa->eclosures[node_idx].nelem == `0`)
1696	{
1697	incomplete = true;
1698	re_node_set_free (&eclosure_elem);
1699	}
1700	}
1701	return REG_NOERROR;
1702	}
1703
1704	/ Calculate epsilon closure of NODE. /
1705
1706	static reg_errcode_t
1707	calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa, Idx node, bool root)
1708	{
1709	reg_errcode_t err;
1710	Idx i;
1711	re_node_set eclosure;
1712	bool ok;
1713	bool incomplete = false;
1714	err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + `1`);
1715	if (BE (err != REG_NOERROR, `0`))
1716	return err;
1717
1718	/ This indicates that we are calculating this node now.*
1719	We reference this value to avoid infinite loop. /*
1720	dfa->eclosures[node].nelem = -`1`;
1721
1722	/ If the current node has constraints, duplicate all nodes*
1723	since they must inherit the constraints. /*
1724	if (dfa->nodes[node].constraint
1725	&& dfa->edests[node].nelem
1726	&& !dfa->nodes[dfa->edests[node].elems[`0`]].duplicated)
1727	{
1728	err = duplicate_node_closure (dfa, node, node, node,
1729	dfa->nodes[node].constraint);
1730	if (BE (err != REG_NOERROR, `0`))
1731	return err;
1732	}
1733
1734	/ Expand each epsilon destination nodes. /
1735	if (IS_EPSILON_NODE(dfa->nodes[node].type))
1736	for (i = `0`; i < dfa->edests[node].nelem; ++i)
1737	{
1738	re_node_set eclosure_elem;
1739	Idx edest = dfa->edests[node].elems[i];
1740	/ If calculating the epsilon closure of 'edest' is in progress,*
1741	return intermediate result. /*
1742	if (dfa->eclosures[edest].nelem == -`1`)
1743	{
1744	incomplete = true;
1745	continue;
1746	}
1747	/ If we haven't calculated the epsilon closure of 'edest' yet,*
1748	calculate now. Otherwise use calculated epsilon closure. /*
1749	if (dfa->eclosures[edest].nelem == `0`)
1750	{
1751	err = calc_eclosure_iter (&eclosure_elem, dfa, edest, false);
1752	if (BE (err != REG_NOERROR, `0`))
1753	return err;
1754	}
1755	else
1756	eclosure_elem = dfa->eclosures[edest];
1757	/ Merge the epsilon closure of 'edest'. /
1758	err = re_node_set_merge (&eclosure, &eclosure_elem);
1759	if (BE (err != REG_NOERROR, `0`))
1760	return err;
1761	/ If the epsilon closure of 'edest' is incomplete,*
1762	the epsilon closure of this node is also incomplete. /*
1763	if (dfa->eclosures[edest].nelem == `0`)
1764	{
1765	incomplete = true;
1766	re_node_set_free (&eclosure_elem);
1767	}
1768	}
1769
1770	/ An epsilon closure includes itself. /
1771	ok = re_node_set_insert (&eclosure, node);
1772	if (BE (! ok, `0`))
1773	return REG_ESPACE;
1774	if (incomplete && !root)
1775	dfa->eclosures[node].nelem = `0`;
1776	else
1777	dfa->eclosures[node] = eclosure;
1778	*new_set = eclosure;
1779	return REG_NOERROR;
1780	}
1781
1782	/ Functions for token which are used in the parser. /
1783
1784	/ Fetch a token from INPUT.*
1785	We must not use this function inside bracket expressions. /*
1786
1787	static void
1788	fetch_token (re_token_t result, re_string_t input, reg_syntax_t syntax)
1789	{
1790	re_string_skip_bytes (input, peek_token (result, input, syntax));
1791	}
1792
1793	/ Peek a token from INPUT, and return the length of the token.*
1794	We must not use this function inside bracket expressions. /*
1795
1796	static int
1797	peek_token (re_token_t token, re_string_t input, reg_syntax_t syntax)
1798	{
1799	unsigned char c;
1800
1801	if (re_string_eoi (input))
1802	{
1803	token->type = END_OF_RE;
1804	return `0`;
1805	}
1806
1807	c = re_string_peek_byte (input, `0`);
1808	token->opr.c = c;
1809
1810	token->word_char = `0`;
1811	#ifdef RE_ENABLE_I18N
1812	token->mb_partial = `0`;
1813	if (input->mb_cur_max > `1` &&
1814	!re_string_first_byte (input, re_string_cur_idx (input)))
1815	{
1816	token->type = CHARACTER;
1817	token->mb_partial = `1`;
1818	return `1`;
1819	}
1820	#endif
1821	if (c == `'\\'`)
1822	{
1823	unsigned char c2;
1824	if (re_string_cur_idx (input) + `1` >= re_string_length (input))
1825	{
1826	token->type = BACK_SLASH;
1827	return `1`;
1828	}
1829
1830	c2 = re_string_peek_byte_case (input, `1`);
1831	token->opr.c = c2;
1832	token->type = CHARACTER;
1833	#ifdef RE_ENABLE_I18N
1834	if (input->mb_cur_max > `1`)
1835	{
1836	wint_t wc = re_string_wchar_at (input,
1837	re_string_cur_idx (input) + `1`);
1838	token->word_char = IS_WIDE_WORD_CHAR (wc) != `0`;
1839	}
1840	else
1841	#endif
1842	token->word_char = IS_WORD_CHAR (c2) != `0`;
1843
1844	switch (c2)
1845	{
1846	case `'\|'`:
1847	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1848	token->type = OP_ALT;
1849	break;
1850	case `'1'`: case `'2'`: case `'3'`: case `'4'`: case `'5'`:
1851	case `'6'`: case `'7'`: case `'8'`: case `'9'`:
1852	if (!(syntax & RE_NO_BK_REFS))
1853	{
1854	token->type = OP_BACK_REF;
1855	token->opr.idx = c2 - `'1'`;
1856	}
1857	break;
1858	case `'<'`:
1859	if (!(syntax & RE_NO_GNU_OPS))
1860	{
1861	token->type = ANCHOR;
1862	token->opr.ctx_type = WORD_FIRST;
1863	}
1864	break;
1865	case `'>'`:
1866	if (!(syntax & RE_NO_GNU_OPS))
1867	{
1868	token->type = ANCHOR;
1869	token->opr.ctx_type = WORD_LAST;
1870	}
1871	break;
1872	case `'b'`:
1873	if (!(syntax & RE_NO_GNU_OPS))
1874	{
1875	token->type = ANCHOR;
1876	token->opr.ctx_type = WORD_DELIM;
1877	}
1878	break;
1879	case `'B'`:
1880	if (!(syntax & RE_NO_GNU_OPS))
1881	{
1882	token->type = ANCHOR;
1883	token->opr.ctx_type = NOT_WORD_DELIM;
1884	}
1885	break;
1886	case `'w'`:
1887	if (!(syntax & RE_NO_GNU_OPS))
1888	token->type = OP_WORD;
1889	break;
1890	case `'W'`:
1891	if (!(syntax & RE_NO_GNU_OPS))
1892	token->type = OP_NOTWORD;
1893	break;
1894	case `'s'`:
1895	if (!(syntax & RE_NO_GNU_OPS))
1896	token->type = OP_SPACE;
1897	break;
1898	case `'S'`:
1899	if (!(syntax & RE_NO_GNU_OPS))
1900	token->type = OP_NOTSPACE;
1901	break;
1902	case '`':
1903	if (!(syntax & RE_NO_GNU_OPS))
1904	{
1905	token->type = ANCHOR;
1906	token->opr.ctx_type = BUF_FIRST;
1907	}
1908	break;
1909	case `'\''`:
1910	if (!(syntax & RE_NO_GNU_OPS))
1911	{
1912	token->type = ANCHOR;
1913	token->opr.ctx_type = BUF_LAST;
1914	}
1915	break;
1916	case `'('`:
1917	if (!(syntax & RE_NO_BK_PARENS))
1918	token->type = OP_OPEN_SUBEXP;
1919	break;
1920	case `')'`:
1921	if (!(syntax & RE_NO_BK_PARENS))
1922	token->type = OP_CLOSE_SUBEXP;
1923	break;
1924	case `'+'`:
1925	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1926	token->type = OP_DUP_PLUS;
1927	break;
1928	case `'?'`:
1929	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1930	token->type = OP_DUP_QUESTION;
1931	break;
1932	case `'{'`:
1933	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1934	token->type = OP_OPEN_DUP_NUM;
1935	break;
1936	case `'}'`:
1937	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1938	token->type = OP_CLOSE_DUP_NUM;
1939	break;
1940	default:
1941	break;
1942	}
1943	return `2`;
1944	}
1945
1946	token->type = CHARACTER;
1947	#ifdef RE_ENABLE_I18N
1948	if (input->mb_cur_max > `1`)
1949	{
1950	wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1951	token->word_char = IS_WIDE_WORD_CHAR (wc) != `0`;
1952	}
1953	else
1954	#endif
1955	token->word_char = IS_WORD_CHAR (token->opr.c);
1956
1957	switch (c)
1958	{
1959	case `'\n'`:
1960	if (syntax & RE_NEWLINE_ALT)
1961	token->type = OP_ALT;
1962	break;
1963	case `'\|'`:
1964	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1965	token->type = OP_ALT;
1966	break;
1967	case `'*'`:
1968	token->type = OP_DUP_ASTERISK;
1969	break;
1970	case `'+'`:
1971	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1972	token->type = OP_DUP_PLUS;
1973	break;
1974	case `'?'`:
1975	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1976	token->type = OP_DUP_QUESTION;
1977	break;
1978	case `'{'`:
1979	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1980	token->type = OP_OPEN_DUP_NUM;
1981	break;
1982	case `'}'`:
1983	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1984	token->type = OP_CLOSE_DUP_NUM;
1985	break;
1986	case `'('`:
1987	if (syntax & RE_NO_BK_PARENS)
1988	token->type = OP_OPEN_SUBEXP;
1989	break;
1990	case `')'`:
1991	if (syntax & RE_NO_BK_PARENS)
1992	token->type = OP_CLOSE_SUBEXP;
1993	break;
1994	case `'['`:
1995	token->type = OP_OPEN_BRACKET;
1996	break;
1997	case `'.'`:
1998	token->type = OP_PERIOD;
1999	break;
2000	case `'^'`:
2001	if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS \| RE_CARET_ANCHORS_HERE)) &&
2002	re_string_cur_idx (input) != `0`)
2003	{
2004	char prev = re_string_peek_byte (input, -`1`);
2005	if (!(syntax & RE_NEWLINE_ALT) \|\| prev != `'\n'`)
2006	break;
2007	}
2008	token->type = ANCHOR;
2009	token->opr.ctx_type = LINE_FIRST;
2010	break;
2011	case `'$'`:
2012	if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
2013	re_string_cur_idx (input) + `1` != re_string_length (input))
2014	{
2015	re_token_t next;
2016	re_string_skip_bytes (input, `1`);
2017	peek_token (&next, input, syntax);
2018	re_string_skip_bytes (input, -`1`);
2019	if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
2020	break;
2021	}
2022	token->type = ANCHOR;
2023	token->opr.ctx_type = LINE_LAST;
2024	break;
2025	default:
2026	break;
2027	}
2028	return `1`;
2029	}
2030
2031	/ Peek a token from INPUT, and return the length of the token.*
2032	We must not use this function out of bracket expressions. /*
2033
2034	static int
2035	peek_token_bracket (re_token_t token, re_string_t input, reg_syntax_t syntax)
2036	{
2037	unsigned char c;
2038	if (re_string_eoi (input))
2039	{
2040	token->type = END_OF_RE;
2041	return `0`;
2042	}
2043	c = re_string_peek_byte (input, `0`);
2044	token->opr.c = c;
2045
2046	#ifdef RE_ENABLE_I18N
2047	if (input->mb_cur_max > `1` &&
2048	!re_string_first_byte (input, re_string_cur_idx (input)))
2049	{
2050	token->type = CHARACTER;
2051	return `1`;
2052	}
2053	#endif /* RE_ENABLE_I18N */
2054
2055	if (c == `'\\'` && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2056	&& re_string_cur_idx (input) + `1` < re_string_length (input))
2057	{
2058	/ In this case, '\' escape a character. /
2059	unsigned char c2;
2060	re_string_skip_bytes (input, `1`);
2061	c2 = re_string_peek_byte (input, `0`);
2062	token->opr.c = c2;
2063	token->type = CHARACTER;
2064	return `1`;
2065	}
2066	if (c == `'['`) / '[' is a special char in a bracket exps. /
2067	{
2068	unsigned char c2;
2069	int token_len;
2070	if (re_string_cur_idx (input) + `1` < re_string_length (input))
2071	c2 = re_string_peek_byte (input, `1`);
2072	else
2073	c2 = `0`;
2074	token->opr.c = c2;
2075	token_len = `2`;
2076	switch (c2)
2077	{
2078	case `'.'`:
2079	token->type = OP_OPEN_COLL_ELEM;
2080	break;
2081
2082	case `'='`:
2083	token->type = OP_OPEN_EQUIV_CLASS;
2084	break;
2085
2086	case `':'`:
2087	if (syntax & RE_CHAR_CLASSES)
2088	{
2089	token->type = OP_OPEN_CHAR_CLASS;
2090	break;
2091	}
2092	FALLTHROUGH;
2093	default:
2094	token->type = CHARACTER;
2095	token->opr.c = c;
2096	token_len = `1`;
2097	break;
2098	}
2099	return token_len;
2100	}
2101	switch (c)
2102	{
2103	case `'-'`:
2104	token->type = OP_CHARSET_RANGE;
2105	break;
2106	case `']'`:
2107	token->type = OP_CLOSE_BRACKET;
2108	break;
2109	case `'^'`:
2110	token->type = OP_NON_MATCH_LIST;
2111	break;
2112	default:
2113	token->type = CHARACTER;
2114	}
2115	return `1`;
2116	}
2117
2118	/ Functions for parser. /
2119
2120	/ Entry point of the parser.*
2121	Parse the regular expression REGEXP and return the structure tree.
2122	If an error occurs, ERR is set by error code, and return NULL.
2123	This function build the following tree, from regular expression <reg_exp>:
2124	CAT
2125	/ \
2126	/ \
2127	<reg_exp> EOR
2128
2129	CAT means concatenation.
2130	EOR means end of regular expression. /*
2131
2132	static bin_tree_t *
2133	parse (re_string_t regexp, regex_t preg, reg_syntax_t syntax,
2134	reg_errcode_t *err)
2135	{
2136	re_dfa_t *dfa = preg->buffer;
2137	bin_tree_t tree, eor, *root;
2138	re_token_t current_token;
2139	dfa->syntax = syntax;
2140	fetch_token (&current_token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2141	tree = parse_reg_exp (regexp, preg, &current_token, syntax, `0`, err);
2142	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2143	return NULL;
2144	eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2145	if (tree != NULL)
2146	root = create_tree (dfa, tree, eor, CONCAT);
2147	else
2148	root = eor;
2149	if (BE (eor == NULL \|\| root == NULL, `0`))
2150	{
2151	*err = REG_ESPACE;
2152	return NULL;
2153	}
2154	return root;
2155	}
2156
2157	/ This function build the following tree, from regular expression*
2158	<branch1>\|<branch2>:
2159	ALT
2160	/ \
2161	/ \
2162	<branch1> <branch2>
2163
2164	ALT means alternative, which represents the operator '\|'. /*
2165
2166	static bin_tree_t *
2167	parse_reg_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2168	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2169	{
2170	re_dfa_t *dfa = preg->buffer;
2171	bin_tree_t tree, branch = NULL;
2172	bitset_word_t initial_bkref_map = dfa->completed_bkref_map;
2173	tree = parse_branch (regexp, preg, token, syntax, nest, err);
2174	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2175	return NULL;
2176
2177	while (token->type == OP_ALT)
2178	{
2179	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2180	if (token->type != OP_ALT && token->type != END_OF_RE
2181	&& (nest == `0` \|\| token->type != OP_CLOSE_SUBEXP))
2182	{
2183	bitset_word_t accumulated_bkref_map = dfa->completed_bkref_map;
2184	dfa->completed_bkref_map = initial_bkref_map;
2185	branch = parse_branch (regexp, preg, token, syntax, nest, err);
2186	if (BE (*err != REG_NOERROR && branch == NULL, `0`))
2187	{
2188	if (tree != NULL)
2189	postorder (tree, free_tree, NULL);
2190	return NULL;
2191	}
2192	dfa->completed_bkref_map \|= accumulated_bkref_map;
2193	}
2194	else
2195	branch = NULL;
2196	tree = create_tree (dfa, tree, branch, OP_ALT);
2197	if (BE (tree == NULL, `0`))
2198	{
2199	*err = REG_ESPACE;
2200	return NULL;
2201	}
2202	}
2203	return tree;
2204	}
2205
2206	/ This function build the following tree, from regular expression*
2207	<exp1><exp2>:
2208	CAT
2209	/ \
2210	/ \
2211	<exp1> <exp2>
2212
2213	CAT means concatenation. /*
2214
2215	static bin_tree_t *
2216	parse_branch (re_string_t regexp, regex_t preg, re_token_t *token,
2217	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2218	{
2219	bin_tree_t tree, expr;
2220	re_dfa_t *dfa = preg->buffer;
2221	tree = parse_expression (regexp, preg, token, syntax, nest, err);
2222	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2223	return NULL;
2224
2225	while (token->type != OP_ALT && token->type != END_OF_RE
2226	&& (nest == `0` \|\| token->type != OP_CLOSE_SUBEXP))
2227	{
2228	expr = parse_expression (regexp, preg, token, syntax, nest, err);
2229	if (BE (*err != REG_NOERROR && expr == NULL, `0`))
2230	{
2231	if (tree != NULL)
2232	postorder (tree, free_tree, NULL);
2233	return NULL;
2234	}
2235	if (tree != NULL && expr != NULL)
2236	{
2237	bin_tree_t *newtree = create_tree (dfa, tree, expr, CONCAT);
2238	if (newtree == NULL)
2239	{
2240	postorder (expr, free_tree, NULL);
2241	postorder (tree, free_tree, NULL);
2242	*err = REG_ESPACE;
2243	return NULL;
2244	}
2245	tree = newtree;
2246	}
2247	else if (tree == NULL)
2248	tree = expr;
2249	/ Otherwise expr == NULL, we don't need to create new tree. /
2250	}
2251	return tree;
2252	}
2253
2254	/ This function build the following tree, from regular expression a:
2255	*
2256	\|
2257	a
2258	*/
2259
2260	static bin_tree_t *
2261	parse_expression (re_string_t regexp, regex_t preg, re_token_t *token,
2262	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2263	{
2264	re_dfa_t *dfa = preg->buffer;
2265	bin_tree_t *tree;
2266	switch (token->type)
2267	{
2268	case CHARACTER:
2269	tree = create_token_tree (dfa, NULL, NULL, token);
2270	if (BE (tree == NULL, `0`))
2271	{
2272	*err = REG_ESPACE;
2273	return NULL;
2274	}
2275	#ifdef RE_ENABLE_I18N
2276	if (dfa->mb_cur_max > `1`)
2277	{
2278	while (!re_string_eoi (regexp)
2279	&& !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2280	{
2281	bin_tree_t *mbc_remain;
2282	fetch_token (token, regexp, syntax);
2283	mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2284	tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2285	if (BE (mbc_remain == NULL \|\| tree == NULL, `0`))
2286	{
2287	*err = REG_ESPACE;
2288	return NULL;
2289	}
2290	}
2291	}
2292	#endif
2293	break;
2294
2295	case OP_OPEN_SUBEXP:
2296	tree = parse_sub_exp (regexp, preg, token, syntax, nest + `1`, err);
2297	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2298	return NULL;
2299	break;
2300
2301	case OP_OPEN_BRACKET:
2302	tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2303	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2304	return NULL;
2305	break;
2306
2307	case OP_BACK_REF:
2308	if (!BE (dfa->completed_bkref_map & (`1` << token->opr.idx), `1`))
2309	{
2310	*err = REG_ESUBREG;
2311	return NULL;
2312	}
2313	dfa->used_bkref_map \|= `1` << token->opr.idx;
2314	tree = create_token_tree (dfa, NULL, NULL, token);
2315	if (BE (tree == NULL, `0`))
2316	{
2317	*err = REG_ESPACE;
2318	return NULL;
2319	}
2320	++dfa->nbackref;
2321	dfa->has_mb_node = `1`;
2322	break;
2323
2324	case OP_OPEN_DUP_NUM:
2325	if (syntax & RE_CONTEXT_INVALID_DUP)
2326	{
2327	*err = REG_BADRPT;
2328	return NULL;
2329	}
2330	FALLTHROUGH;
2331	case OP_DUP_ASTERISK:
2332	case OP_DUP_PLUS:
2333	case OP_DUP_QUESTION:
2334	if (syntax & RE_CONTEXT_INVALID_OPS)
2335	{
2336	*err = REG_BADRPT;
2337	return NULL;
2338	}
2339	else if (syntax & RE_CONTEXT_INDEP_OPS)
2340	{
2341	fetch_token (token, regexp, syntax);
2342	return parse_expression (regexp, preg, token, syntax, nest, err);
2343	}
2344	FALLTHROUGH;
2345	case OP_CLOSE_SUBEXP:
2346	if ((token->type == OP_CLOSE_SUBEXP) &&
2347	!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2348	{
2349	*err = REG_ERPAREN;
2350	return NULL;
2351	}
2352	FALLTHROUGH;
2353	case OP_CLOSE_DUP_NUM:
2354	/ We treat it as a normal character. /
2355
2356	/ Then we can these characters as normal characters. /
2357	token->type = CHARACTER;
2358	/ mb_partial and word_char bits should be initialized already*
2359	by peek_token. /*
2360	tree = create_token_tree (dfa, NULL, NULL, token);
2361	if (BE (tree == NULL, `0`))
2362	{
2363	*err = REG_ESPACE;
2364	return NULL;
2365	}
2366	break;
2367
2368	case ANCHOR:
2369	if ((token->opr.ctx_type
2370	& (WORD_DELIM \| NOT_WORD_DELIM \| WORD_FIRST \| WORD_LAST))
2371	&& dfa->word_ops_used == `0`)
2372	init_word_char (dfa);
2373	if (token->opr.ctx_type == WORD_DELIM
2374	\|\| token->opr.ctx_type == NOT_WORD_DELIM)
2375	{
2376	bin_tree_t tree_first, tree_last;
2377	if (token->opr.ctx_type == WORD_DELIM)
2378	{
2379	token->opr.ctx_type = WORD_FIRST;
2380	tree_first = create_token_tree (dfa, NULL, NULL, token);
2381	token->opr.ctx_type = WORD_LAST;
2382	}
2383	else
2384	{
2385	token->opr.ctx_type = INSIDE_WORD;
2386	tree_first = create_token_tree (dfa, NULL, NULL, token);
2387	token->opr.ctx_type = INSIDE_NOTWORD;
2388	}
2389	tree_last = create_token_tree (dfa, NULL, NULL, token);
2390	tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2391	if (BE (tree_first == NULL \|\| tree_last == NULL \|\| tree == NULL, `0`))
2392	{
2393	*err = REG_ESPACE;
2394	return NULL;
2395	}
2396	}
2397	else
2398	{
2399	tree = create_token_tree (dfa, NULL, NULL, token);
2400	if (BE (tree == NULL, `0`))
2401	{
2402	*err = REG_ESPACE;
2403	return NULL;
2404	}
2405	}
2406	/ We must return here, since ANCHORs can't be followed*
2407	by repetition operators.
2408	eg. RE"^" is invalid or "<ANCHOR(^)><CHAR()>",
2409	it must not be "<ANCHOR(^)><REPEAT()>". /
2410	fetch_token (token, regexp, syntax);
2411	return tree;
2412
2413	case OP_PERIOD:
2414	tree = create_token_tree (dfa, NULL, NULL, token);
2415	if (BE (tree == NULL, `0`))
2416	{
2417	*err = REG_ESPACE;
2418	return NULL;
2419	}
2420	if (dfa->mb_cur_max > `1`)
2421	dfa->has_mb_node = `1`;
2422	break;
2423
2424	case OP_WORD:
2425	case OP_NOTWORD:
2426	tree = build_charclass_op (dfa, regexp->trans,
2427	"alnum",
2428	"_",
2429	token->type == OP_NOTWORD, err);
2430	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2431	return NULL;
2432	break;
2433
2434	case OP_SPACE:
2435	case OP_NOTSPACE:
2436	tree = build_charclass_op (dfa, regexp->trans,
2437	"space",
2438	"",
2439	token->type == OP_NOTSPACE, err);
2440	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2441	return NULL;
2442	break;
2443
2444	case OP_ALT:
2445	case END_OF_RE:
2446	return NULL;
2447
2448	case BACK_SLASH:
2449	*err = REG_EESCAPE;
2450	return NULL;
2451
2452	default:
2453	/ Must not happen? /
2454	#ifdef DEBUG
2455	assert (`0`);
2456	#endif
2457	return NULL;
2458	}
2459	fetch_token (token, regexp, syntax);
2460
2461	while (token->type == OP_DUP_ASTERISK \|\| token->type == OP_DUP_PLUS
2462	\|\| token->type == OP_DUP_QUESTION \|\| token->type == OP_OPEN_DUP_NUM)
2463	{
2464	bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token,
2465	syntax, err);
2466	if (BE (*err != REG_NOERROR && dup_tree == NULL, `0`))
2467	{
2468	if (tree != NULL)
2469	postorder (tree, free_tree, NULL);
2470	return NULL;
2471	}
2472	tree = dup_tree;
2473	/ In BRE consecutive duplications are not allowed. /
2474	if ((syntax & RE_CONTEXT_INVALID_DUP)
2475	&& (token->type == OP_DUP_ASTERISK
2476	\|\| token->type == OP_OPEN_DUP_NUM))
2477	{
2478	if (tree != NULL)
2479	postorder (tree, free_tree, NULL);
2480	*err = REG_BADRPT;
2481	return NULL;
2482	}
2483	}
2484
2485	return tree;
2486	}
2487
2488	/ This function build the following tree, from regular expression*
2489	(<reg_exp>):
2490	SUBEXP
2491	\|
2492	<reg_exp>
2493	*/
2494
2495	static bin_tree_t *
2496	parse_sub_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2497	reg_syntax_t syntax, Idx nest, reg_errcode_t *err)
2498	{
2499	re_dfa_t *dfa = preg->buffer;
2500	bin_tree_t *tree;
2501	size_t cur_nsub;
2502	cur_nsub = preg->re_nsub++;
2503
2504	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2505
2506	/ The subexpression may be a null string. /
2507	if (token->type == OP_CLOSE_SUBEXP)
2508	tree = NULL;
2509	else
2510	{
2511	tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2512	if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, `0`))
2513	{
2514	if (tree != NULL)
2515	postorder (tree, free_tree, NULL);
2516	*err = REG_EPAREN;
2517	}
2518	if (BE (*err != REG_NOERROR, `0`))
2519	return NULL;
2520	}
2521
2522	if (cur_nsub <= `'9'` - `'1'`)
2523	dfa->completed_bkref_map \|= `1` << cur_nsub;
2524
2525	tree = create_tree (dfa, tree, NULL, SUBEXP);
2526	if (BE (tree == NULL, `0`))
2527	{
2528	*err = REG_ESPACE;
2529	return NULL;
2530	}
2531	tree->token.opr.idx = cur_nsub;
2532	return tree;
2533	}
2534
2535	/ This function parse repetition operators like "", "+", "{1,3}" etc. /*
2536
2537	static bin_tree_t *
2538	parse_dup_op (bin_tree_t elem, re_string_t regexp, re_dfa_t *dfa,
2539	re_token_t token, reg_syntax_t syntax, reg_errcode_t err)
2540	{
2541	bin_tree_t tree = NULL, old_tree = NULL;
2542	Idx i, start, end, start_idx = re_string_cur_idx (regexp);
2543	re_token_t start_token = *token;
2544
2545	if (token->type == OP_OPEN_DUP_NUM)
2546	{
2547	end = `0`;
2548	start = fetch_number (regexp, token, syntax);
2549	if (start == -`1`)
2550	{
2551	if (token->type == CHARACTER && token->opr.c == `','`)
2552	start = `0`; / We treat "{,m}" as "{0,m}". /
2553	else
2554	{
2555	err = REG_BADBR; /* <re>{} is invalid. /
2556	return NULL;
2557	}
2558	}
2559	if (BE (start != -`2`, `1`))
2560	{
2561	/ We treat "{n}" as "{n,n}". /
2562	end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2563	: ((token->type == CHARACTER && token->opr.c == `','`)
2564	? fetch_number (regexp, token, syntax) : -`2`));
2565	}
2566	if (BE (start == -`2` \|\| end == -`2`, `0`))
2567	{
2568	/ Invalid sequence. /
2569	if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), `0`))
2570	{
2571	if (token->type == END_OF_RE)
2572	*err = REG_EBRACE;
2573	else
2574	*err = REG_BADBR;
2575
2576	return NULL;
2577	}
2578
2579	/ If the syntax bit is set, rollback. /
2580	re_string_set_index (regexp, start_idx);
2581	*token = start_token;
2582	token->type = CHARACTER;
2583	/ mb_partial and word_char bits should be already initialized by*
2584	peek_token. /*
2585	return elem;
2586	}
2587
2588	if (BE ((end != -`1` && start > end)
2589	\|\| token->type != OP_CLOSE_DUP_NUM, `0`))
2590	{
2591	/ First number greater than second. /
2592	*err = REG_BADBR;
2593	return NULL;
2594	}
2595
2596	if (BE (RE_DUP_MAX < (end == -`1` ? start : end), `0`))
2597	{
2598	*err = REG_ESIZE;
2599	return NULL;
2600	}
2601	}
2602	else
2603	{
2604	start = (token->type == OP_DUP_PLUS) ? `1` : `0`;
2605	end = (token->type == OP_DUP_QUESTION) ? `1` : -`1`;
2606	}
2607
2608	fetch_token (token, regexp, syntax);
2609
2610	if (BE (elem == NULL, `0`))
2611	return NULL;
2612	if (BE (start == `0` && end == `0`, `0`))
2613	{
2614	postorder (elem, free_tree, NULL);
2615	return NULL;
2616	}
2617
2618	/ Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". /
2619	if (BE (start > `0`, `0`))
2620	{
2621	tree = elem;
2622	for (i = `2`; i <= start; ++i)
2623	{
2624	elem = duplicate_tree (elem, dfa);
2625	tree = create_tree (dfa, tree, elem, CONCAT);
2626	if (BE (elem == NULL \|\| tree == NULL, `0`))
2627	goto parse_dup_op_espace;
2628	}
2629
2630	if (start == end)
2631	return tree;
2632
2633	/ Duplicate ELEM before it is marked optional. /
2634	elem = duplicate_tree (elem, dfa);
2635	if (BE (elem == NULL, `0`))
2636	goto parse_dup_op_espace;
2637	old_tree = tree;
2638	}
2639	else
2640	old_tree = NULL;
2641
2642	if (elem->token.type == SUBEXP)
2643	{
2644	uintptr_t subidx = elem->token.opr.idx;
2645	postorder (elem, mark_opt_subexp, (void *) subidx);
2646	}
2647
2648	tree = create_tree (dfa, elem, NULL,
2649	(end == -`1` ? OP_DUP_ASTERISK : OP_ALT));
2650	if (BE (tree == NULL, `0`))
2651	goto parse_dup_op_espace;
2652
2653	/ This loop is actually executed only when end != -1,*
2654	to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2655	already created the start+1-th copy. /*
2656	if (TYPE_SIGNED (Idx) \|\| end != -`1`)
2657	for (i = start + `2`; i <= end; ++i)
2658	{
2659	elem = duplicate_tree (elem, dfa);
2660	tree = create_tree (dfa, tree, elem, CONCAT);
2661	if (BE (elem == NULL \|\| tree == NULL, `0`))
2662	goto parse_dup_op_espace;
2663
2664	tree = create_tree (dfa, tree, NULL, OP_ALT);
2665	if (BE (tree == NULL, `0`))
2666	goto parse_dup_op_espace;
2667	}
2668
2669	if (old_tree)
2670	tree = create_tree (dfa, old_tree, tree, CONCAT);
2671
2672	return tree;
2673
2674	parse_dup_op_espace:
2675	*err = REG_ESPACE;
2676	return NULL;
2677	}
2678
2679	/ Size of the names for collating symbol/equivalence_class/character_class.*
2680	I'm not sure, but maybe enough. /*
2681	#define BRACKET_NAME_BUF_SIZE 32
2682
2683	#ifndef _LIBC
2684
2685	# ifdef RE_ENABLE_I18N
2686	/ Convert the byte B to the corresponding wide character. In a*
2687	unibyte locale, treat B as itself if it is an encoding error.
2688	In a multibyte locale, return WEOF if B is an encoding error. /*
2689	static wint_t
2690	parse_byte (unsigned char b, re_charset_t *mbcset)
2691	{
2692	wint_t wc = __btowc (b);
2693	return wc == WEOF && !mbcset ? b : wc;
2694	}
2695	#endif
2696
2697	/ Local function for parse_bracket_exp only used in case of NOT _LIBC.*
2698	Build the range expression which starts from START_ELEM, and ends
2699	at END_ELEM. The result are written to MBCSET and SBCSET.
2700	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2701	mbcset->range_ends, is a pointer argument since we may
2702	update it. /*
2703
2704	static reg_errcode_t
2705	# ifdef RE_ENABLE_I18N
2706	build_range_exp (const reg_syntax_t syntax,
2707	bitset_t sbcset,
2708	re_charset_t *mbcset,
2709	Idx *range_alloc,
2710	const bracket_elem_t *start_elem,
2711	const bracket_elem_t *end_elem)
2712	# else /* not RE_ENABLE_I18N */
2713	build_range_exp (const reg_syntax_t syntax,
2714	bitset_t sbcset,
2715	const bracket_elem_t *start_elem,
2716	const bracket_elem_t *end_elem)
2717	# endif /* not RE_ENABLE_I18N */
2718	{
2719	unsigned int start_ch, end_ch;
2720	/ Equivalence Classes and Character Classes can't be a range start/end. /
2721	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2722	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2723	`0`))
2724	return REG_ERANGE;
2725
2726	/ We can handle no multi character collating elements without libc*
2727	support. /*
2728	if (BE ((start_elem->type == COLL_SYM
2729	&& strlen ((char *) start_elem->opr.name) > `1`)
2730	\|\| (end_elem->type == COLL_SYM
2731	&& strlen ((char *) end_elem->opr.name) > `1`), `0`))
2732	return REG_ECOLLATE;
2733
2734	# ifdef RE_ENABLE_I18N
2735	{
2736	wchar_t wc;
2737	wint_t start_wc;
2738	wint_t end_wc;
2739
2740	start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2741	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[`0`]
2742	: `0`));
2743	end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2744	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[`0`]
2745	: `0`));
2746	start_wc = ((start_elem->type == SB_CHAR \|\| start_elem->type == COLL_SYM)
2747	? parse_byte (start_ch, mbcset) : start_elem->opr.wch);
2748	end_wc = ((end_elem->type == SB_CHAR \|\| end_elem->type == COLL_SYM)
2749	? parse_byte (end_ch, mbcset) : end_elem->opr.wch);
2750	if (start_wc == WEOF \|\| end_wc == WEOF)
2751	return REG_ECOLLATE;
2752	else if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_wc > end_wc, `0`))
2753	return REG_ERANGE;
2754
2755	/ Got valid collation sequence values, add them as a new entry.*
2756	However, for !_LIBC we have no collation elements: if the
2757	character set is single byte, the single byte character set
2758	that we build below suffices. parse_bracket_exp passes
2759	no MBCSET if dfa->mb_cur_max == 1. /*
2760	if (mbcset)
2761	{
2762	/ Check the space of the arrays. /
2763	if (BE (*range_alloc == mbcset->nranges, `0`))
2764	{
2765	/ There is not enough space, need realloc. /
2766	wchar_t new_array_start, new_array_end;
2767	Idx new_nranges;
2768
2769	/ +1 in case of mbcset->nranges is 0. /
2770	new_nranges = `2` * mbcset->nranges + `1`;
2771	/ Use realloc since mbcset->range_starts and mbcset->range_ends*
2772	are NULL if range_alloc == 0. /
2773	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2774	new_nranges);
2775	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2776	new_nranges);
2777
2778	if (BE (new_array_start == NULL \|\| new_array_end == NULL, `0`))
2779	{
2780	re_free (new_array_start);
2781	re_free (new_array_end);
2782	return REG_ESPACE;
2783	}
2784
2785	mbcset->range_starts = new_array_start;
2786	mbcset->range_ends = new_array_end;
2787	*range_alloc = new_nranges;
2788	}
2789
2790	mbcset->range_starts[mbcset->nranges] = start_wc;
2791	mbcset->range_ends[mbcset->nranges++] = end_wc;
2792	}
2793
2794	/ Build the table for single byte characters. /
2795	for (wc = `0`; wc < SBC_MAX; ++wc)
2796	{
2797	if (start_wc <= wc && wc <= end_wc)
2798	bitset_set (sbcset, wc);
2799	}
2800	}
2801	# else /* not RE_ENABLE_I18N */
2802	{
2803	unsigned int ch;
2804	start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2805	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[`0`]
2806	: `0`));
2807	end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2808	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[`0`]
2809	: `0`));
2810	if (start_ch > end_ch)
2811	return REG_ERANGE;
2812	/ Build the table for single byte characters. /
2813	for (ch = `0`; ch < SBC_MAX; ++ch)
2814	if (start_ch <= ch && ch <= end_ch)
2815	bitset_set (sbcset, ch);
2816	}
2817	# endif /* not RE_ENABLE_I18N */
2818	return REG_NOERROR;
2819	}
2820	#endif /* not _LIBC */
2821
2822	#ifndef _LIBC
2823	/ Helper function for parse_bracket_exp only used in case of NOT _LIBC..*
2824	Build the collating element which is represented by NAME.
2825	The result are written to MBCSET and SBCSET.
2826	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2827	pointer argument since we may update it. /*
2828
2829	static reg_errcode_t
2830	# ifdef RE_ENABLE_I18N
2831	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2832	Idx coll_sym_alloc, const* unsigned char *name)
2833	# else /* not RE_ENABLE_I18N */
2834	build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2835	# endif /* not RE_ENABLE_I18N */
2836	{
2837	size_t name_len = strlen ((const char *) name);
2838	if (BE (name_len != `1`, `0`))
2839	return REG_ECOLLATE;
2840	else
2841	{
2842	bitset_set (sbcset, name[`0`]);
2843	return REG_NOERROR;
2844	}
2845	}
2846	#endif /* not _LIBC */
2847
2848	/ This function parse bracket expression like "[abc]", "[a-c]",*
2849	"[[.a-a.]]" etc. /*
2850
2851	static bin_tree_t *
2852	parse_bracket_exp (re_string_t regexp, re_dfa_t dfa, re_token_t *token,
2853	reg_syntax_t syntax, reg_errcode_t *err)
2854	{
2855	#ifdef _LIBC
2856	const unsigned char *collseqmb;
2857	const char *collseqwc;
2858	uint32_t nrules;
2859	int32_t table_size;
2860	const int32_t *symb_table;
2861	const unsigned char *extra;
2862
2863	/ Local function for parse_bracket_exp used in _LIBC environment.*
2864	Seek the collating symbol entry corresponding to NAME.
2865	Return the index of the symbol in the SYMB_TABLE,
2866	or -1 if not found. /*
2867
2868	auto inline int32_t
2869	__attribute__ ((always_inline))
2870	seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
2871	{
2872	int32_t elem;
2873
2874	for (elem = `0`; elem < table_size; elem++)
2875	if (symb_table[`2` * elem] != `0`)
2876	{
2877	int32_t idx = symb_table[`2` * elem + `1`];
2878	/ Skip the name of collating element name. /
2879	idx += `1` + extra[idx];
2880	if (/ Compare the length of the name. /
2881	name_len == extra[idx]
2882	/ Compare the name. /
2883	&& memcmp (name, &extra[idx + `1`], name_len) == `0`)
2884	/ Yep, this is the entry. /
2885	return elem;
2886	}
2887	return -`1`;
2888	}
2889
2890	/ Local function for parse_bracket_exp used in _LIBC environment.*
2891	Look up the collation sequence value of BR_ELEM.
2892	Return the value if succeeded, UINT_MAX otherwise. /*
2893
2894	auto inline unsigned int
2895	__attribute__ ((always_inline))
2896	lookup_collation_sequence_value (bracket_elem_t *br_elem)
2897	{
2898	if (br_elem->type == SB_CHAR)
2899	{
2900	/*
2901	if (MB_CUR_MAX == 1)
2902	*/
2903	if (nrules == `0`)
2904	return collseqmb[br_elem->opr.ch];
2905	else
2906	{
2907	wint_t wc = __btowc (br_elem->opr.ch);
2908	return __collseq_table_lookup (collseqwc, wc);
2909	}
2910	}
2911	else if (br_elem->type == MB_CHAR)
2912	{
2913	if (nrules != `0`)
2914	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2915	}
2916	else if (br_elem->type == COLL_SYM)
2917	{
2918	size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2919	if (nrules != `0`)
2920	{
2921	int32_t elem, idx;
2922	elem = seek_collating_symbol_entry (br_elem->opr.name,
2923	sym_name_len);
2924	if (elem != -`1`)
2925	{
2926	/ We found the entry. /
2927	idx = symb_table[`2` * elem + `1`];
2928	/ Skip the name of collating element name. /
2929	idx += `1` + extra[idx];
2930	/ Skip the byte sequence of the collating element. /
2931	idx += `1` + extra[idx];
2932	/ Adjust for the alignment. /
2933	idx = (idx + `3`) & ~`3`;
2934	/ Skip the multibyte collation sequence value. /
2935	idx += sizeof (unsigned int);
2936	/ Skip the wide char sequence of the collating element. /
2937	idx += sizeof (unsigned int) *
2938	(`1` + (unsigned* int *) (extra + idx));
2939	/ Return the collation sequence value. /
2940	return (unsigned* int *) (extra + idx);
2941	}
2942	else if (sym_name_len == `1`)
2943	{
2944	/ No valid character. Match it as a single byte*
2945	character. /*
2946	return collseqmb[br_elem->opr.name[`0`]];
2947	}
2948	}
2949	else if (sym_name_len == `1`)
2950	return collseqmb[br_elem->opr.name[`0`]];
2951	}
2952	return UINT_MAX;
2953	}
2954
2955	/ Local function for parse_bracket_exp used in _LIBC environment.*
2956	Build the range expression which starts from START_ELEM, and ends
2957	at END_ELEM. The result are written to MBCSET and SBCSET.
2958	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2959	mbcset->range_ends, is a pointer argument since we may
2960	update it. /*
2961
2962	auto inline reg_errcode_t
2963	__attribute__ ((always_inline))
2964	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int* *range_alloc,
2965	bracket_elem_t start_elem, bracket_elem_t end_elem)
2966	{
2967	unsigned int ch;
2968	uint32_t start_collseq;
2969	uint32_t end_collseq;
2970
2971	/ Equivalence Classes and Character Classes can't be a range*
2972	start/end. /*
2973	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2974	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2975	`0`))
2976	return REG_ERANGE;
2977
2978	/ FIXME: Implement rational ranges here, too. /
2979	start_collseq = lookup_collation_sequence_value (start_elem);
2980	end_collseq = lookup_collation_sequence_value (end_elem);
2981	/ Check start/end collation sequence values. /
2982	if (BE (start_collseq == UINT_MAX \|\| end_collseq == UINT_MAX, `0`))
2983	return REG_ECOLLATE;
2984	if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, `0`))
2985	return REG_ERANGE;
2986
2987	/ Got valid collation sequence values, add them as a new entry.*
2988	However, if we have no collation elements, and the character set
2989	is single byte, the single byte character set that we
2990	build below suffices. /*
2991	if (nrules > `0` \|\| dfa->mb_cur_max > `1`)
2992	{
2993	/ Check the space of the arrays. /
2994	if (BE (*range_alloc == mbcset->nranges, `0`))
2995	{
2996	/ There is not enough space, need realloc. /
2997	uint32_t *new_array_start;
2998	uint32_t *new_array_end;
2999	Idx new_nranges;
3000
3001	/ +1 in case of mbcset->nranges is 0. /
3002	new_nranges = `2` * mbcset->nranges + `1`;
3003	new_array_start = re_realloc (mbcset->range_starts, uint32_t,
3004	new_nranges);
3005	new_array_end = re_realloc (mbcset->range_ends, uint32_t,
3006	new_nranges);
3007
3008	if (BE (new_array_start == NULL \|\| new_array_end == NULL, `0`))
3009	return REG_ESPACE;
3010
3011	mbcset->range_starts = new_array_start;
3012	mbcset->range_ends = new_array_end;
3013	*range_alloc = new_nranges;
3014	}
3015
3016	mbcset->range_starts[mbcset->nranges] = start_collseq;
3017	mbcset->range_ends[mbcset->nranges++] = end_collseq;
3018	}
3019
3020	/ Build the table for single byte characters. /
3021	for (ch = `0`; ch < SBC_MAX; ch++)
3022	{
3023	uint32_t ch_collseq;
3024	/*
3025	if (MB_CUR_MAX == 1)
3026	*/
3027	if (nrules == `0`)
3028	ch_collseq = collseqmb[ch];
3029	else
3030	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
3031	if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
3032	bitset_set (sbcset, ch);
3033	}
3034	return REG_NOERROR;
3035	}
3036
3037	/ Local function for parse_bracket_exp used in _LIBC environment.*
3038	Build the collating element which is represented by NAME.
3039	The result are written to MBCSET and SBCSET.
3040	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
3041	pointer argument since we may update it. /*
3042
3043	auto inline reg_errcode_t
3044	__attribute__ ((always_inline))
3045	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
3046	Idx coll_sym_alloc, const* unsigned char *name)
3047	{
3048	int32_t elem, idx;
3049	size_t name_len = strlen ((const char *) name);
3050	if (nrules != `0`)
3051	{
3052	elem = seek_collating_symbol_entry (name, name_len);
3053	if (elem != -`1`)
3054	{
3055	/ We found the entry. /
3056	idx = symb_table[`2` * elem + `1`];
3057	/ Skip the name of collating element name. /
3058	idx += `1` + extra[idx];
3059	}
3060	else if (name_len == `1`)
3061	{
3062	/ No valid character, treat it as a normal*
3063	character. /*
3064	bitset_set (sbcset, name[`0`]);
3065	return REG_NOERROR;
3066	}
3067	else
3068	return REG_ECOLLATE;
3069
3070	/ Got valid collation sequence, add it as a new entry. /
3071	/ Check the space of the arrays. /
3072	if (BE (*coll_sym_alloc == mbcset->ncoll_syms, `0`))
3073	{
3074	/ Not enough, realloc it. /
3075	/ +1 in case of mbcset->ncoll_syms is 0. /
3076	Idx new_coll_sym_alloc = `2` * mbcset->ncoll_syms + `1`;
3077	/ Use realloc since mbcset->coll_syms is NULL*
3078	if alloc == 0. /
3079	int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
3080	new_coll_sym_alloc);
3081	if (BE (new_coll_syms == NULL, `0`))
3082	return REG_ESPACE;
3083	mbcset->coll_syms = new_coll_syms;
3084	*coll_sym_alloc = new_coll_sym_alloc;
3085	}
3086	mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3087	return REG_NOERROR;
3088	}
3089	else
3090	{
3091	if (BE (name_len != `1`, `0`))
3092	return REG_ECOLLATE;
3093	else
3094	{
3095	bitset_set (sbcset, name[`0`]);
3096	return REG_NOERROR;
3097	}
3098	}
3099	}
3100	#endif
3101
3102	re_token_t br_token;
3103	re_bitset_ptr_t sbcset;
3104	#ifdef RE_ENABLE_I18N
3105	re_charset_t *mbcset;
3106	Idx coll_sym_alloc = `0`, range_alloc = `0`, mbchar_alloc = `0`;
3107	Idx equiv_class_alloc = `0`, char_class_alloc = `0`;
3108	#endif /* not RE_ENABLE_I18N */
3109	bool non_match = false;
3110	bin_tree_t *work_tree;
3111	int token_len;
3112	bool first_round = true;
3113	#ifdef _LIBC
3114	collseqmb = (const unsigned char *)
3115	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3116	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3117	if (nrules)
3118	{
3119	/*
3120	if (MB_CUR_MAX > 1)
3121	*/
3122	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3123	table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3124	symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3125	_NL_COLLATE_SYMB_TABLEMB);
3126	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3127	_NL_COLLATE_SYMB_EXTRAMB);
3128	}
3129	#endif
3130	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), `1`);
3131	#ifdef RE_ENABLE_I18N
3132	mbcset = (re_charset_t ) calloc (sizeof* (re_charset_t), `1`);
3133	#endif /* RE_ENABLE_I18N */
3134	#ifdef RE_ENABLE_I18N
3135	if (BE (sbcset == NULL \|\| mbcset == NULL, `0`))
3136	#else
3137	if (BE (sbcset == NULL, `0`))
3138	#endif /* RE_ENABLE_I18N */
3139	{
3140	re_free (sbcset);
3141	#ifdef RE_ENABLE_I18N
3142	re_free (mbcset);
3143	#endif
3144	*err = REG_ESPACE;
3145	return NULL;
3146	}
3147
3148	token_len = peek_token_bracket (token, regexp, syntax);
3149	if (BE (token->type == END_OF_RE, `0`))
3150	{
3151	*err = REG_BADPAT;
3152	goto parse_bracket_exp_free_return;
3153	}
3154	if (token->type == OP_NON_MATCH_LIST)
3155	{
3156	#ifdef RE_ENABLE_I18N
3157	mbcset->non_match = `1`;
3158	#endif /* not RE_ENABLE_I18N */
3159	non_match = true;
3160	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3161	bitset_set (sbcset, `'\n'`);
3162	re_string_skip_bytes (regexp, token_len); / Skip a token. /
3163	token_len = peek_token_bracket (token, regexp, syntax);
3164	if (BE (token->type == END_OF_RE, `0`))
3165	{
3166	*err = REG_BADPAT;
3167	goto parse_bracket_exp_free_return;
3168	}
3169	}
3170
3171	/ We treat the first ']' as a normal character. /
3172	if (token->type == OP_CLOSE_BRACKET)
3173	token->type = CHARACTER;
3174
3175	while (`1`)
3176	{
3177	bracket_elem_t start_elem, end_elem;
3178	unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3179	unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3180	reg_errcode_t ret;
3181	int token_len2 = `0`;
3182	bool is_range_exp = false;
3183	re_token_t token2;
3184
3185	start_elem.opr.name = start_name_buf;
3186	start_elem.type = COLL_SYM;
3187	ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3188	syntax, first_round);
3189	if (BE (ret != REG_NOERROR, `0`))
3190	{
3191	*err = ret;
3192	goto parse_bracket_exp_free_return;
3193	}
3194	first_round = false;
3195
3196	/ Get information about the next token. We need it in any case. /
3197	token_len = peek_token_bracket (token, regexp, syntax);
3198
3199	/ Do not check for ranges if we know they are not allowed. /
3200	if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3201	{
3202	if (BE (token->type == END_OF_RE, `0`))
3203	{
3204	*err = REG_EBRACK;
3205	goto parse_bracket_exp_free_return;
3206	}
3207	if (token->type == OP_CHARSET_RANGE)
3208	{
3209	re_string_skip_bytes (regexp, token_len); / Skip '-'. /
3210	token_len2 = peek_token_bracket (&token2, regexp, syntax);
3211	if (BE (token2.type == END_OF_RE, `0`))
3212	{
3213	*err = REG_EBRACK;
3214	goto parse_bracket_exp_free_return;
3215	}
3216	if (token2.type == OP_CLOSE_BRACKET)
3217	{
3218	/ We treat the last '-' as a normal character. /
3219	re_string_skip_bytes (regexp, -token_len);
3220	token->type = CHARACTER;
3221	}
3222	else
3223	is_range_exp = true;
3224	}
3225	}
3226
3227	if (is_range_exp == true)
3228	{
3229	end_elem.opr.name = end_name_buf;
3230	end_elem.type = COLL_SYM;
3231	ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3232	dfa, syntax, true);
3233	if (BE (ret != REG_NOERROR, `0`))
3234	{
3235	*err = ret;
3236	goto parse_bracket_exp_free_return;
3237	}
3238
3239	token_len = peek_token_bracket (token, regexp, syntax);
3240
3241	#ifdef _LIBC
3242	*err = build_range_exp (sbcset, mbcset, &range_alloc,
3243	&start_elem, &end_elem);
3244	#else
3245	# ifdef RE_ENABLE_I18N
3246	*err = build_range_exp (syntax, sbcset,
3247	dfa->mb_cur_max > `1` ? mbcset : NULL,
3248	&range_alloc, &start_elem, &end_elem);
3249	# else
3250	*err = build_range_exp (syntax, sbcset, &start_elem, &end_elem);
3251	# endif
3252	#endif /* RE_ENABLE_I18N */
3253	if (BE (*err != REG_NOERROR, `0`))
3254	goto parse_bracket_exp_free_return;
3255	}
3256	else
3257	{
3258	switch (start_elem.type)
3259	{
3260	case SB_CHAR:
3261	bitset_set (sbcset, start_elem.opr.ch);
3262	break;
3263	#ifdef RE_ENABLE_I18N
3264	case MB_CHAR:
3265	/ Check whether the array has enough space. /
3266	if (BE (mbchar_alloc == mbcset->nmbchars, `0`))
3267	{
3268	wchar_t *new_mbchars;
3269	/ Not enough, realloc it. /
3270	/ +1 in case of mbcset->nmbchars is 0. /
3271	mbchar_alloc = `2` * mbcset->nmbchars + `1`;
3272	/ Use realloc since array is NULL if alloc == 0. /*
3273	new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3274	mbchar_alloc);
3275	if (BE (new_mbchars == NULL, `0`))
3276	goto parse_bracket_exp_espace;
3277	mbcset->mbchars = new_mbchars;
3278	}
3279	mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3280	break;
3281	#endif /* RE_ENABLE_I18N */
3282	case EQUIV_CLASS:
3283	*err = build_equiv_class (sbcset,
3284	#ifdef RE_ENABLE_I18N
3285	mbcset, &equiv_class_alloc,
3286	#endif /* RE_ENABLE_I18N */
3287	start_elem.opr.name);
3288	if (BE (*err != REG_NOERROR, `0`))
3289	goto parse_bracket_exp_free_return;
3290	break;
3291	case COLL_SYM:
3292	*err = build_collating_symbol (sbcset,
3293	#ifdef RE_ENABLE_I18N
3294	mbcset, &coll_sym_alloc,
3295	#endif /* RE_ENABLE_I18N */
3296	start_elem.opr.name);
3297	if (BE (*err != REG_NOERROR, `0`))
3298	goto parse_bracket_exp_free_return;
3299	break;
3300	case CHAR_CLASS:
3301	*err = build_charclass (regexp->trans, sbcset,
3302	#ifdef RE_ENABLE_I18N
3303	mbcset, &char_class_alloc,
3304	#endif /* RE_ENABLE_I18N */
3305	(const char *) start_elem.opr.name,
3306	syntax);
3307	if (BE (*err != REG_NOERROR, `0`))
3308	goto parse_bracket_exp_free_return;
3309	break;
3310	default:
3311	assert (`0`);
3312	break;
3313	}
3314	}
3315	if (BE (token->type == END_OF_RE, `0`))
3316	{
3317	*err = REG_EBRACK;
3318	goto parse_bracket_exp_free_return;
3319	}
3320	if (token->type == OP_CLOSE_BRACKET)
3321	break;
3322	}
3323
3324	re_string_skip_bytes (regexp, token_len); / Skip a token. /
3325
3326	/ If it is non-matching list. /
3327	if (non_match)
3328	bitset_not (sbcset);
3329
3330	#ifdef RE_ENABLE_I18N
3331	/ Ensure only single byte characters are set. /
3332	if (dfa->mb_cur_max > `1`)
3333	bitset_mask (sbcset, dfa->sb_char);
3334
3335	if (mbcset->nmbchars \|\| mbcset->ncoll_syms \|\| mbcset->nequiv_classes
3336	\|\| mbcset->nranges \|\| (dfa->mb_cur_max > `1` && (mbcset->nchar_classes
3337	\|\| mbcset->non_match)))
3338	{
3339	bin_tree_t *mbc_tree;
3340	int sbc_idx;
3341	/ Build a tree for complex bracket. /
3342	dfa->has_mb_node = `1`;
3343	br_token.type = COMPLEX_BRACKET;
3344	br_token.opr.mbcset = mbcset;
3345	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3346	if (BE (mbc_tree == NULL, `0`))
3347	goto parse_bracket_exp_espace;
3348	for (sbc_idx = `0`; sbc_idx < BITSET_WORDS; ++sbc_idx)
3349	if (sbcset[sbc_idx])
3350	break;
3351	/ If there are no bits set in sbcset, there is no point*
3352	of having both SIMPLE_BRACKET and COMPLEX_BRACKET. /*
3353	if (sbc_idx < BITSET_WORDS)
3354	{
3355	/ Build a tree for simple bracket. /
3356	br_token.type = SIMPLE_BRACKET;
3357	br_token.opr.sbcset = sbcset;
3358	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3359	if (BE (work_tree == NULL, `0`))
3360	goto parse_bracket_exp_espace;
3361
3362	/ Then join them by ALT node. /
3363	work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3364	if (BE (work_tree == NULL, `0`))
3365	goto parse_bracket_exp_espace;
3366	}
3367	else
3368	{
3369	re_free (sbcset);
3370	work_tree = mbc_tree;
3371	}
3372	}
3373	else
3374	#endif /* not RE_ENABLE_I18N */
3375	{
3376	#ifdef RE_ENABLE_I18N
3377	free_charset (mbcset);
3378	#endif
3379	/ Build a tree for simple bracket. /
3380	br_token.type = SIMPLE_BRACKET;
3381	br_token.opr.sbcset = sbcset;
3382	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3383	if (BE (work_tree == NULL, `0`))
3384	goto parse_bracket_exp_espace;
3385	}
3386	return work_tree;
3387
3388	parse_bracket_exp_espace:
3389	*err = REG_ESPACE;
3390	parse_bracket_exp_free_return:
3391	re_free (sbcset);
3392	#ifdef RE_ENABLE_I18N
3393	free_charset (mbcset);
3394	#endif /* RE_ENABLE_I18N */
3395	return NULL;
3396	}
3397
3398	/ Parse an element in the bracket expression. /
3399
3400	static reg_errcode_t
3401	parse_bracket_element (bracket_elem_t elem, re_string_t regexp,
3402	re_token_t token, int* token_len, re_dfa_t *dfa,
3403	reg_syntax_t syntax, bool accept_hyphen)
3404	{
3405	#ifdef RE_ENABLE_I18N
3406	int cur_char_size;
3407	cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3408	if (cur_char_size > `1`)
3409	{
3410	elem->type = MB_CHAR;
3411	elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3412	re_string_skip_bytes (regexp, cur_char_size);
3413	return REG_NOERROR;
3414	}
3415	#endif /* RE_ENABLE_I18N */
3416	re_string_skip_bytes (regexp, token_len); / Skip a token. /
3417	if (token->type == OP_OPEN_COLL_ELEM \|\| token->type == OP_OPEN_CHAR_CLASS
3418	\|\| token->type == OP_OPEN_EQUIV_CLASS)
3419	return parse_bracket_symbol (elem, regexp, token);
3420	if (BE (token->type == OP_CHARSET_RANGE, `0`) && !accept_hyphen)
3421	{
3422	/ A '-' must only appear as anything but a range indicator before*
3423	the closing bracket. Everything else is an error. /*
3424	re_token_t token2;
3425	(void) peek_token_bracket (&token2, regexp, syntax);
3426	if (token2.type != OP_CLOSE_BRACKET)
3427	/ The actual error value is not standardized since this whole*
3428	case is undefined. But ERANGE makes good sense. /*
3429	return REG_ERANGE;
3430	}
3431	elem->type = SB_CHAR;
3432	elem->opr.ch = token->opr.c;
3433	return REG_NOERROR;
3434	}
3435
3436	/ Parse a bracket symbol in the bracket expression. Bracket symbols are*
3437	such as [:<character_class>:], [.<collating_element>.], and
3438	[=<equivalent_class>=]. /*
3439
3440	static reg_errcode_t
3441	parse_bracket_symbol (bracket_elem_t elem, re_string_t regexp,
3442	re_token_t *token)
3443	{
3444	unsigned char ch, delim = token->opr.c;
3445	int i = `0`;
3446	if (re_string_eoi(regexp))
3447	return REG_EBRACK;
3448	for (;; ++i)
3449	{
3450	if (i >= BRACKET_NAME_BUF_SIZE)
3451	return REG_EBRACK;
3452	if (token->type == OP_OPEN_CHAR_CLASS)
3453	ch = re_string_fetch_byte_case (regexp);
3454	else
3455	ch = re_string_fetch_byte (regexp);
3456	if (re_string_eoi(regexp))
3457	return REG_EBRACK;
3458	if (ch == delim && re_string_peek_byte (regexp, `0`) == `']'`)
3459	break;
3460	elem->opr.name[i] = ch;
3461	}
3462	re_string_skip_bytes (regexp, `1`);
3463	elem->opr.name[i] = `'\0'`;
3464	switch (token->type)
3465	{
3466	case OP_OPEN_COLL_ELEM:
3467	elem->type = COLL_SYM;
3468	break;
3469	case OP_OPEN_EQUIV_CLASS:
3470	elem->type = EQUIV_CLASS;
3471	break;
3472	case OP_OPEN_CHAR_CLASS:
3473	elem->type = CHAR_CLASS;
3474	break;
3475	default:
3476	break;
3477	}
3478	return REG_NOERROR;
3479	}
3480
3481	/ Helper function for parse_bracket_exp.*
3482	Build the equivalence class which is represented by NAME.
3483	The result are written to MBCSET and SBCSET.
3484	EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3485	is a pointer argument since we may update it. /*
3486
3487	static reg_errcode_t
3488	#ifdef RE_ENABLE_I18N
3489	build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3490	Idx equiv_class_alloc, const* unsigned char *name)
3491	#else /* not RE_ENABLE_I18N */
3492	build_equiv_class (bitset_t sbcset, const unsigned char *name)
3493	#endif /* not RE_ENABLE_I18N */
3494	{
3495	#ifdef _LIBC
3496	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3497	if (nrules != `0`)
3498	{
3499	const int32_t table, indirect;
3500	const unsigned char weights, extra, *cp;
3501	unsigned char char_buf[`2`];
3502	int32_t idx1, idx2;
3503	unsigned int ch;
3504	size_t len;
3505	/ Calculate the index for equivalence class. /
3506	cp = name;
3507	table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3508	weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3509	_NL_COLLATE_WEIGHTMB);
3510	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3511	_NL_COLLATE_EXTRAMB);
3512	indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3513	_NL_COLLATE_INDIRECTMB);
3514	idx1 = findidx (table, indirect, extra, &cp, -`1`);
3515	if (BE (idx1 == `0` \|\| *cp != `'\0'`, `0`))
3516	/ This isn't a valid character. /
3517	return REG_ECOLLATE;
3518
3519	/ Build single byte matching table for this equivalence class. /
3520	len = weights[idx1 & `0xffffff`];
3521	for (ch = `0`; ch < SBC_MAX; ++ch)
3522	{
3523	char_buf[`0`] = ch;
3524	cp = char_buf;
3525	idx2 = findidx (table, indirect, extra, &cp, `1`);
3526	/*
3527	idx2 = table[ch];
3528	*/
3529	if (idx2 == `0`)
3530	/ This isn't a valid character. /
3531	continue;
3532	/ Compare only if the length matches and the collation rule*
3533	index is the same. /*
3534	if (len == weights[idx2 & `0xffffff`] && (idx1 >> `24`) == (idx2 >> `24`)
3535	&& memcmp (weights + (idx1 & `0xffffff`) + `1`,
3536	weights + (idx2 & `0xffffff`) + `1`, len) == `0`)
3537	bitset_set (sbcset, ch);
3538	}
3539	/ Check whether the array has enough space. /
3540	if (BE (*equiv_class_alloc == mbcset->nequiv_classes, `0`))
3541	{
3542	/ Not enough, realloc it. /
3543	/ +1 in case of mbcset->nequiv_classes is 0. /
3544	Idx new_equiv_class_alloc = `2` * mbcset->nequiv_classes + `1`;
3545	/ Use realloc since the array is NULL if alloc == 0. /*
3546	int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3547	int32_t,
3548	new_equiv_class_alloc);
3549	if (BE (new_equiv_classes == NULL, `0`))
3550	return REG_ESPACE;
3551	mbcset->equiv_classes = new_equiv_classes;
3552	*equiv_class_alloc = new_equiv_class_alloc;
3553	}
3554	mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3555	}
3556	else
3557	#endif /* _LIBC */
3558	{
3559	if (BE (strlen ((const char *) name) != `1`, `0`))
3560	return REG_ECOLLATE;
3561	bitset_set (sbcset, *name);
3562	}
3563	return REG_NOERROR;
3564	}
3565
3566	/ Helper function for parse_bracket_exp.*
3567	Build the character class which is represented by NAME.
3568	The result are written to MBCSET and SBCSET.
3569	CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3570	is a pointer argument since we may update it. /*
3571
3572	static reg_errcode_t
3573	#ifdef RE_ENABLE_I18N
3574	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3575	re_charset_t mbcset, Idx char_class_alloc,
3576	const char *class_name, reg_syntax_t syntax)
3577	#else /* not RE_ENABLE_I18N */
3578	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3579	const char *class_name, reg_syntax_t syntax)
3580	#endif /* not RE_ENABLE_I18N */
3581	{
3582	int i;
3583	const char *name = class_name;
3584
3585	/ In case of REG_ICASE "upper" and "lower" match the both of*
3586	upper and lower cases. /*
3587	if ((syntax & RE_ICASE)
3588	&& (strcmp (name, "upper") == `0` \|\| strcmp (name, "lower") == `0`))
3589	name = "alpha";
3590
3591	#ifdef RE_ENABLE_I18N
3592	/ Check the space of the arrays. /
3593	if (BE (*char_class_alloc == mbcset->nchar_classes, `0`))
3594	{
3595	/ Not enough, realloc it. /
3596	/ +1 in case of mbcset->nchar_classes is 0. /
3597	Idx new_char_class_alloc = `2` * mbcset->nchar_classes + `1`;
3598	/ Use realloc since array is NULL if alloc == 0. /*
3599	wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3600	new_char_class_alloc);
3601	if (BE (new_char_classes == NULL, `0`))
3602	return REG_ESPACE;
3603	mbcset->char_classes = new_char_classes;
3604	*char_class_alloc = new_char_class_alloc;
3605	}
3606	mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3607	#endif /* RE_ENABLE_I18N */
3608
3609	#define BUILD_CHARCLASS_LOOP(ctype_func) \
3610	do { \
3611	if (BE (trans != NULL, 0)) \
3612	{ \
3613	for (i = 0; i < SBC_MAX; ++i) \
3614	if (ctype_func (i)) \
3615	bitset_set (sbcset, trans[i]); \
3616	} \
3617	else \
3618	{ \
3619	for (i = 0; i < SBC_MAX; ++i) \
3620	if (ctype_func (i)) \
3621	bitset_set (sbcset, i); \
3622	} \
3623	} while (0)
3624
3625	if (strcmp (name, "alnum") == `0`)
3626	BUILD_CHARCLASS_LOOP (isalnum);
3627	else if (strcmp (name, "cntrl") == `0`)
3628	BUILD_CHARCLASS_LOOP (iscntrl);
3629	else if (strcmp (name, "lower") == `0`)
3630	BUILD_CHARCLASS_LOOP (islower);
3631	else if (strcmp (name, "space") == `0`)
3632	BUILD_CHARCLASS_LOOP (isspace);
3633	else if (strcmp (name, "alpha") == `0`)
3634	BUILD_CHARCLASS_LOOP (isalpha);
3635	else if (strcmp (name, "digit") == `0`)
3636	BUILD_CHARCLASS_LOOP (isdigit);
3637	else if (strcmp (name, "print") == `0`)
3638	BUILD_CHARCLASS_LOOP (isprint);
3639	else if (strcmp (name, "upper") == `0`)
3640	BUILD_CHARCLASS_LOOP (isupper);
3641	else if (strcmp (name, "blank") == `0`)
3642	BUILD_CHARCLASS_LOOP (isblank);
3643	else if (strcmp (name, "graph") == `0`)
3644	BUILD_CHARCLASS_LOOP (isgraph);
3645	else if (strcmp (name, "punct") == `0`)
3646	BUILD_CHARCLASS_LOOP (ispunct);
3647	else if (strcmp (name, "xdigit") == `0`)
3648	BUILD_CHARCLASS_LOOP (isxdigit);
3649	else
3650	return REG_ECTYPE;
3651
3652	return REG_NOERROR;
3653	}
3654
3655	static bin_tree_t *
3656	build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3657	const char *class_name,
3658	const char *extra, bool non_match,
3659	reg_errcode_t *err)
3660	{
3661	re_bitset_ptr_t sbcset;
3662	#ifdef RE_ENABLE_I18N
3663	re_charset_t *mbcset;
3664	Idx alloc = `0`;
3665	#endif /* not RE_ENABLE_I18N */
3666	reg_errcode_t ret;
3667	re_token_t br_token;
3668	bin_tree_t *tree;
3669
3670	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), `1`);
3671	if (BE (sbcset == NULL, `0`))
3672	{
3673	*err = REG_ESPACE;
3674	return NULL;
3675	}
3676	#ifdef RE_ENABLE_I18N
3677	mbcset = (re_charset_t ) calloc (sizeof* (re_charset_t), `1`);
3678	if (BE (mbcset == NULL, `0`))
3679	{
3680	re_free (sbcset);
3681	*err = REG_ESPACE;
3682	return NULL;
3683	}
3684	mbcset->non_match = non_match;
3685	#endif /* RE_ENABLE_I18N */
3686
3687	/ We don't care the syntax in this case. /
3688	ret = build_charclass (trans, sbcset,
3689	#ifdef RE_ENABLE_I18N
3690	mbcset, &alloc,
3691	#endif /* RE_ENABLE_I18N */
3692	class_name, `0`);
3693
3694	if (BE (ret != REG_NOERROR, `0`))
3695	{
3696	re_free (sbcset);
3697	#ifdef RE_ENABLE_I18N
3698	free_charset (mbcset);
3699	#endif /* RE_ENABLE_I18N */
3700	*err = ret;
3701	return NULL;
3702	}
3703	/ \w match '_' also. /
3704	for (; *extra; extra++)
3705	bitset_set (sbcset, *extra);
3706
3707	/ If it is non-matching list. /
3708	if (non_match)
3709	bitset_not (sbcset);
3710
3711	#ifdef RE_ENABLE_I18N
3712	/ Ensure only single byte characters are set. /
3713	if (dfa->mb_cur_max > `1`)
3714	bitset_mask (sbcset, dfa->sb_char);
3715	#endif
3716
3717	/ Build a tree for simple bracket. /
3718	#if defined GCC_LINT \|\| defined lint
3719	memset (&br_token, `0`, sizeof br_token);
3720	#endif
3721	br_token.type = SIMPLE_BRACKET;
3722	br_token.opr.sbcset = sbcset;
3723	tree = create_token_tree (dfa, NULL, NULL, &br_token);
3724	if (BE (tree == NULL, `0`))
3725	goto build_word_op_espace;
3726
3727	#ifdef RE_ENABLE_I18N
3728	if (dfa->mb_cur_max > `1`)
3729	{
3730	bin_tree_t *mbc_tree;
3731	/ Build a tree for complex bracket. /
3732	br_token.type = COMPLEX_BRACKET;
3733	br_token.opr.mbcset = mbcset;
3734	dfa->has_mb_node = `1`;
3735	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3736	if (BE (mbc_tree == NULL, `0`))
3737	goto build_word_op_espace;
3738	/ Then join them by ALT node. /
3739	tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3740	if (BE (mbc_tree != NULL, `1`))
3741	return tree;
3742	}
3743	else
3744	{
3745	free_charset (mbcset);
3746	return tree;
3747	}
3748	#else /* not RE_ENABLE_I18N */
3749	return tree;
3750	#endif /* not RE_ENABLE_I18N */
3751
3752	build_word_op_espace:
3753	re_free (sbcset);
3754	#ifdef RE_ENABLE_I18N
3755	free_charset (mbcset);
3756	#endif /* RE_ENABLE_I18N */
3757	*err = REG_ESPACE;
3758	return NULL;
3759	}
3760
3761	/ This is intended for the expressions like "a{1,3}".*
3762	Fetch a number from 'input', and return the number.
3763	Return -1 if the number field is empty like "{,1}".
3764	Return RE_DUP_MAX + 1 if the number field is too large.
3765	Return -2 if an error occurred. /*
3766
3767	static Idx
3768	fetch_number (re_string_t input, re_token_t token, reg_syntax_t syntax)
3769	{
3770	Idx num = -`1`;
3771	unsigned char c;
3772	while (`1`)
3773	{
3774	fetch_token (token, input, syntax);
3775	c = token->opr.c;
3776	if (BE (token->type == END_OF_RE, `0`))
3777	return -`2`;
3778	if (token->type == OP_CLOSE_DUP_NUM \|\| c == `','`)
3779	break;
3780	num = ((token->type != CHARACTER \|\| c < `'0'` \|\| `'9'` < c \|\| num == -`2`)
3781	? -`2`
3782	: num == -`1`
3783	? c - `'0'`
3784	: MIN (RE_DUP_MAX + `1`, num * `10` + c - `'0'`));
3785	}
3786	return num;
3787	}
3788
3789	#ifdef RE_ENABLE_I18N
3790	static void
3791	free_charset (re_charset_t *cset)
3792	{
3793	re_free (cset->mbchars);
3794	# ifdef _LIBC
3795	re_free (cset->coll_syms);
3796	re_free (cset->equiv_classes);
3797	re_free (cset->range_starts);
3798	re_free (cset->range_ends);
3799	# endif
3800	re_free (cset->char_classes);
3801	re_free (cset);
3802	}
3803	#endif /* RE_ENABLE_I18N */
3804
3805	/ Functions for binary tree operation. /
3806
3807	/ Create a tree node. /
3808
3809	static bin_tree_t *
3810	create_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3811	re_token_type_t type)
3812	{
3813	re_token_t t;
3814	#if defined GCC_LINT \|\| defined lint
3815	memset (&t, `0`, sizeof t);
3816	#endif
3817	t.type = type;
3818	return create_token_tree (dfa, left, right, &t);
3819	}
3820
3821	static bin_tree_t *
3822	create_token_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3823	const re_token_t *token)
3824	{
3825	bin_tree_t *tree;
3826	if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, `0`))
3827	{
3828	bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, `1`);
3829
3830	if (storage == NULL)
3831	return NULL;
3832	storage->next = dfa->str_tree_storage;
3833	dfa->str_tree_storage = storage;
3834	dfa->str_tree_storage_idx = `0`;
3835	}
3836	tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3837
3838	tree->parent = NULL;
3839	tree->left = left;
3840	tree->right = right;
3841	tree->token = *token;
3842	tree->token.duplicated = `0`;
3843	tree->token.opt_subexp = `0`;
3844	tree->first = NULL;
3845	tree->next = NULL;
3846	tree->node_idx = -`1`;
3847
3848	if (left != NULL)
3849	left->parent = tree;
3850	if (right != NULL)
3851	right->parent = tree;
3852	return tree;
3853	}
3854
3855	/ Mark the tree SRC as an optional subexpression.*
3856	To be called from preorder or postorder. /*
3857
3858	static reg_errcode_t
3859	mark_opt_subexp (void extra, bin_tree_t node)
3860	{
3861	Idx idx = (uintptr_t) extra;
3862	if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3863	node->token.opt_subexp = `1`;
3864
3865	return REG_NOERROR;
3866	}
3867
3868	/ Free the allocated memory inside NODE. /
3869
3870	static void
3871	free_token (re_token_t *node)
3872	{
3873	#ifdef RE_ENABLE_I18N
3874	if (node->type == COMPLEX_BRACKET && node->duplicated == `0`)
3875	free_charset (node->opr.mbcset);
3876	else
3877	#endif /* RE_ENABLE_I18N */
3878	if (node->type == SIMPLE_BRACKET && node->duplicated == `0`)
3879	re_free (node->opr.sbcset);
3880	}
3881
3882	/ Worker function for tree walking. Free the allocated memory inside NODE*
3883	and its children. /*
3884
3885	static reg_errcode_t
3886	free_tree (void extra, bin_tree_t node)
3887	{
3888	free_token (&node->token);
3889	return REG_NOERROR;
3890	}
3891
3892
3893	/ Duplicate the node SRC, and return new node. This is a preorder*
3894	visit similar to the one implemented by the generic visitor, but
3895	we need more infrastructure to maintain two parallel trees --- so,
3896	it's easier to duplicate. /*
3897
3898	static bin_tree_t *
3899	duplicate_tree (const bin_tree_t root, re_dfa_t dfa)
3900	{
3901	const bin_tree_t *node;
3902	bin_tree_t *dup_root;
3903	bin_tree_t *p_new = &dup_root, dup_node = root->parent;
3904
3905	for (node = root; ; )
3906	{
3907	/ Create a new tree and link it back to the current parent. /
3908	*p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3909	if (*p_new == NULL)
3910	return NULL;
3911	(*p_new)->parent = dup_node;
3912	(*p_new)->token.duplicated = `1`;
3913	dup_node = *p_new;
3914
3915	/ Go to the left node, or up and to the right. /
3916	if (node->left)
3917	{
3918	node = node->left;
3919	p_new = &dup_node->left;
3920	}
3921	else
3922	{
3923	const bin_tree_t *prev = NULL;
3924	while (node->right == prev \|\| node->right == NULL)
3925	{
3926	prev = node;
3927	node = node->parent;
3928	dup_node = dup_node->parent;
3929	if (!node)
3930	return dup_root;
3931	}
3932	node = node->right;
3933	p_new = &dup_node->right;
3934	}
3935	}
3936	}
3937

Browse the source code of glibc_src_2.28/posix/regcomp.c