regcomp.c source code [glibc_src_2.27/posix/regcomp.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2018 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include <stdint.h>
21
22	#ifdef _LIBC
23	# include <locale/weight.h>
24	#endif
25
26	static reg_errcode_t re_compile_internal (regex_t preg, const* char * pattern,
27	size_t length, reg_syntax_t syntax);
28	static void re_compile_fastmap_iter (regex_t *bufp,
29	const re_dfastate_t *init_state,
30	char *fastmap);
31	static reg_errcode_t init_dfa (re_dfa_t *dfa, size_t pat_len);
32	#ifdef RE_ENABLE_I18N
33	static void free_charset (re_charset_t *cset);
34	#endif /* RE_ENABLE_I18N */
35	static void free_workarea_compile (regex_t *preg);
36	static reg_errcode_t create_initial_state (re_dfa_t *dfa);
37	#ifdef RE_ENABLE_I18N
38	static void optimize_utf8 (re_dfa_t *dfa);
39	#endif
40	static reg_errcode_t analyze (regex_t *preg);
41	static reg_errcode_t preorder (bin_tree_t *root,
42	reg_errcode_t (fn (void , bin_tree_t )),
43	void *extra);
44	static reg_errcode_t postorder (bin_tree_t *root,
45	reg_errcode_t (fn (void , bin_tree_t )),
46	void *extra);
47	static reg_errcode_t optimize_subexps (void extra, bin_tree_t node);
48	static reg_errcode_t lower_subexps (void extra, bin_tree_t node);
49	static bin_tree_t lower_subexp (reg_errcode_t err, regex_t *preg,
50	bin_tree_t *node);
51	static reg_errcode_t calc_first (void extra, bin_tree_t node);
52	static reg_errcode_t calc_next (void extra, bin_tree_t node);
53	static reg_errcode_t link_nfa_nodes (void extra, bin_tree_t node);
54	static int duplicate_node (re_dfa_t dfa, int* org_idx, unsigned int constraint);
55	static int search_duplicated_node (const re_dfa_t dfa, int* org_node,
56	unsigned int constraint);
57	static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
58	static reg_errcode_t calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa,
59	int node, int root);
60	static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
61	static int fetch_number (re_string_t input, re_token_t token,
62	reg_syntax_t syntax);
63	static int peek_token (re_token_t token, re_string_t input,
64	reg_syntax_t syntax);
65	static bin_tree_t parse (re_string_t regexp, regex_t *preg,
66	reg_syntax_t syntax, reg_errcode_t *err);
67	static bin_tree_t parse_reg_exp (re_string_t regexp, regex_t *preg,
68	re_token_t *token, reg_syntax_t syntax,
69	int nest, reg_errcode_t *err);
70	static bin_tree_t parse_branch (re_string_t regexp, regex_t *preg,
71	re_token_t *token, reg_syntax_t syntax,
72	int nest, reg_errcode_t *err);
73	static bin_tree_t parse_expression (re_string_t regexp, regex_t *preg,
74	re_token_t *token, reg_syntax_t syntax,
75	int nest, reg_errcode_t *err);
76	static bin_tree_t parse_sub_exp (re_string_t regexp, regex_t *preg,
77	re_token_t *token, reg_syntax_t syntax,
78	int nest, reg_errcode_t *err);
79	static bin_tree_t parse_dup_op (bin_tree_t dup_elem, re_string_t *regexp,
80	re_dfa_t dfa, re_token_t token,
81	reg_syntax_t syntax, reg_errcode_t *err);
82	static bin_tree_t parse_bracket_exp (re_string_t regexp, re_dfa_t *dfa,
83	re_token_t *token, reg_syntax_t syntax,
84	reg_errcode_t *err);
85	static reg_errcode_t parse_bracket_element (bracket_elem_t *elem,
86	re_string_t *regexp,
87	re_token_t token, int* token_len,
88	re_dfa_t *dfa,
89	reg_syntax_t syntax,
90	int accept_hyphen);
91	static reg_errcode_t parse_bracket_symbol (bracket_elem_t *elem,
92	re_string_t *regexp,
93	re_token_t *token);
94	#ifdef RE_ENABLE_I18N
95	static reg_errcode_t build_equiv_class (bitset_t sbcset,
96	re_charset_t *mbcset,
97	int *equiv_class_alloc,
98	const unsigned char *name);
99	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
100	bitset_t sbcset,
101	re_charset_t *mbcset,
102	int *char_class_alloc,
103	const unsigned char *class_name,
104	reg_syntax_t syntax);
105	#else /* not RE_ENABLE_I18N */
106	static reg_errcode_t build_equiv_class (bitset_t sbcset,
107	const unsigned char *name);
108	static reg_errcode_t build_charclass (RE_TRANSLATE_TYPE trans,
109	bitset_t sbcset,
110	const unsigned char *class_name,
111	reg_syntax_t syntax);
112	#endif /* not RE_ENABLE_I18N */
113	static bin_tree_t build_charclass_op (re_dfa_t dfa,
114	RE_TRANSLATE_TYPE trans,
115	const unsigned char *class_name,
116	const unsigned char *extra,
117	int non_match, reg_errcode_t *err);
118	static bin_tree_t create_tree (re_dfa_t dfa,
119	bin_tree_t left, bin_tree_t right,
120	re_token_type_t type);
121	static bin_tree_t create_token_tree (re_dfa_t dfa,
122	bin_tree_t left, bin_tree_t right,
123	const re_token_t *token);
124	static bin_tree_t duplicate_tree (const* bin_tree_t src, re_dfa_t dfa);
125	static void free_token (re_token_t *node);
126	static reg_errcode_t free_tree (void extra, bin_tree_t node);
127	static reg_errcode_t mark_opt_subexp (void extra, bin_tree_t node);
128
129	/ This table gives an error message for each of the error codes listed*
130	in regex.h. Obviously the order here has to be same as there.
131	POSIX doesn't require that we do anything for REG_NOERROR,
132	but why not be nice? /*
133
134	const char __re_error_msgid[] attribute_hidden =
135	{
136	#define REG_NOERROR_IDX 0
137	gettext_noop ("Success") / REG_NOERROR /
138	"\0"
139	#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success")
140	gettext_noop ("No match") / REG_NOMATCH /
141	"\0"
142	#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match")
143	gettext_noop ("Invalid regular expression") / REG_BADPAT /
144	"\0"
145	#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression")
146	gettext_noop ("Invalid collation character") / REG_ECOLLATE /
147	"\0"
148	#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character")
149	gettext_noop ("Invalid character class name") / REG_ECTYPE /
150	"\0"
151	#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name")
152	gettext_noop ("Trailing backslash") / REG_EESCAPE /
153	"\0"
154	#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash")
155	gettext_noop ("Invalid back reference") / REG_ESUBREG /
156	"\0"
157	#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
158	gettext_noop ("Unmatched [ or [^") / REG_EBRACK /
159	"\0"
160	#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
161	gettext_noop ("Unmatched ( or \\(") / REG_EPAREN /
162	"\0"
163	#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
164	gettext_noop ("Unmatched \\{") / REG_EBRACE /
165	"\0"
166	#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{")
167	gettext_noop ("Invalid content of \\{\\}") / REG_BADBR /
168	"\0"
169	#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}")
170	gettext_noop ("Invalid range end") / REG_ERANGE /
171	"\0"
172	#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end")
173	gettext_noop ("Memory exhausted") / REG_ESPACE /
174	"\0"
175	#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted")
176	gettext_noop ("Invalid preceding regular expression") / REG_BADRPT /
177	"\0"
178	#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression")
179	gettext_noop ("Premature end of regular expression") / REG_EEND /
180	"\0"
181	#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression")
182	gettext_noop ("Regular expression too big") / REG_ESIZE /
183	"\0"
184	#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big")
185	gettext_noop ("Unmatched ) or \\)") / REG_ERPAREN /
186	};
187
188	const size_t __re_error_msgid_idx[] attribute_hidden =
189	{
190	REG_NOERROR_IDX,
191	REG_NOMATCH_IDX,
192	REG_BADPAT_IDX,
193	REG_ECOLLATE_IDX,
194	REG_ECTYPE_IDX,
195	REG_EESCAPE_IDX,
196	REG_ESUBREG_IDX,
197	REG_EBRACK_IDX,
198	REG_EPAREN_IDX,
199	REG_EBRACE_IDX,
200	REG_BADBR_IDX,
201	REG_ERANGE_IDX,
202	REG_ESPACE_IDX,
203	REG_BADRPT_IDX,
204	REG_EEND_IDX,
205	REG_ESIZE_IDX,
206	REG_ERPAREN_IDX
207	};
208
209	/ Entry points for GNU code. /
210
211	/ re_compile_pattern is the GNU regular expression compiler: it*
212	compiles PATTERN (of length LENGTH) and puts the result in BUFP.
213	Returns 0 if the pattern was valid, otherwise an error string.
214
215	Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
216	are set in BUFP on entry. /*
217
218	const char *
219	re_compile_pattern (const char *pattern, size_t length,
220	struct re_pattern_buffer *bufp)
221	{
222	reg_errcode_t ret;
223
224	/ And GNU code determines whether or not to get register information*
225	by passing null for the REGS argument to re_match, etc., not by
226	setting no_sub, unless RE_NO_SUB is set. /*
227	bufp->no_sub = !!(re_syntax_options & RE_NO_SUB);
228
229	/ Match anchors at newline. /
230	bufp->newline_anchor = `1`;
231
232	ret = re_compile_internal (bufp, pattern, length, re_syntax_options);
233
234	if (!ret)
235	return NULL;
236	return gettext (__re_error_msgid + __re_error_msgid_idx[(int) ret]);
237	}
238	#ifdef _LIBC
239	weak_alias (__re_compile_pattern, re_compile_pattern)
240	#endif
241
242	/ Set by 're_set_syntax' to the current regexp syntax to recognize. Can*
243	also be assigned to arbitrarily: each pattern buffer stores its own
244	syntax, so it can be changed between regex compilations. /*
245	/ This has no initializer because initialized variables in Emacs*
246	become read-only after dumping. /*
247	reg_syntax_t re_syntax_options;
248
249
250	/ Specify the precise syntax of regexps for compilation. This provides*
251	for compatibility for various utilities which historically have
252	different, incompatible syntaxes.
253
254	The argument SYNTAX is a bit mask comprised of the various bits
255	defined in regex.h. We return the old syntax. /*
256
257	reg_syntax_t
258	re_set_syntax (reg_syntax_t syntax)
259	{
260	reg_syntax_t ret = re_syntax_options;
261
262	re_syntax_options = syntax;
263	return ret;
264	}
265	#ifdef _LIBC
266	weak_alias (__re_set_syntax, re_set_syntax)
267	#endif
268
269	int
270	re_compile_fastmap (struct re_pattern_buffer *bufp)
271	{
272	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
273	char *fastmap = bufp->fastmap;
274
275	memset (fastmap, `'\0'`, sizeof (char) * SBC_MAX);
276	re_compile_fastmap_iter (bufp, dfa->init_state, fastmap);
277	if (dfa->init_state != dfa->init_state_word)
278	re_compile_fastmap_iter (bufp, dfa->init_state_word, fastmap);
279	if (dfa->init_state != dfa->init_state_nl)
280	re_compile_fastmap_iter (bufp, dfa->init_state_nl, fastmap);
281	if (dfa->init_state != dfa->init_state_begbuf)
282	re_compile_fastmap_iter (bufp, dfa->init_state_begbuf, fastmap);
283	bufp->fastmap_accurate = `1`;
284	return `0`;
285	}
286	#ifdef _LIBC
287	weak_alias (__re_compile_fastmap, re_compile_fastmap)
288	#endif
289
290	static inline void
291	__attribute__ ((always_inline))
292	re_set_fastmap (char fastmap, bool icase, int* ch)
293	{
294	fastmap[ch] = `1`;
295	if (icase)
296	fastmap[tolower (ch)] = `1`;
297	}
298
299	/ Helper function for re_compile_fastmap.*
300	Compile fastmap for the initial_state INIT_STATE. /*
301
302	static void
303	re_compile_fastmap_iter (regex_t bufp, const* re_dfastate_t *init_state,
304	char *fastmap)
305	{
306	re_dfa_t dfa = (re_dfa_t ) bufp->buffer;
307	int node_cnt;
308	int icase = (dfa->mb_cur_max == `1` && (bufp->syntax & RE_ICASE));
309	for (node_cnt = `0`; node_cnt < init_state->nodes.nelem; ++node_cnt)
310	{
311	int node = init_state->nodes.elems[node_cnt];
312	re_token_type_t type = dfa->nodes[node].type;
313
314	if (type == CHARACTER)
315	{
316	re_set_fastmap (fastmap, icase, dfa->nodes[node].opr.c);
317	#ifdef RE_ENABLE_I18N
318	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > `1`)
319	{
320	unsigned char buf = alloca (dfa->mb_cur_max), p;
321	wchar_t wc;
322	mbstate_t state;
323
324	p = buf;
325	*p++ = dfa->nodes[node].opr.c;
326	while (++node < dfa->nodes_len
327	&& dfa->nodes[node].type == CHARACTER
328	&& dfa->nodes[node].mb_partial)
329	*p++ = dfa->nodes[node].opr.c;
330	memset (&state, `'\0'`, sizeof (state));
331	if (__mbrtowc (&wc, (const char *) buf, p - buf,
332	&state) == p - buf
333	&& (__wcrtomb ((char *) buf, __towlower (wc), &state)
334	!= (size_t) -`1`))
335	re_set_fastmap (fastmap, `0`, buf[`0`]);
336	}
337	#endif
338	}
339	else if (type == SIMPLE_BRACKET)
340	{
341	int i, ch;
342	for (i = `0`, ch = `0`; i < BITSET_WORDS; ++i)
343	{
344	int j;
345	bitset_word_t w = dfa->nodes[node].opr.sbcset[i];
346	for (j = `0`; j < BITSET_WORD_BITS; ++j, ++ch)
347	if (w & ((bitset_word_t) `1` << j))
348	re_set_fastmap (fastmap, icase, ch);
349	}
350	}
351	#ifdef RE_ENABLE_I18N
352	else if (type == COMPLEX_BRACKET)
353	{
354	re_charset_t *cset = dfa->nodes[node].opr.mbcset;
355	int i;
356
357	# ifdef _LIBC
358	/ See if we have to try all bytes which start multiple collation*
359	elements.
360	e.g. In da_DK, we want to catch 'a' since "aa" is a valid
361	collation element, and don't catch 'b' since 'b' is
362	the only collation element which starts from 'b' (and
363	it is caught by SIMPLE_BRACKET). /*
364	if (_NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES) != `0`
365	&& (cset->ncoll_syms \|\| cset->nranges))
366	{
367	const int32_t table = (const* int32_t *)
368	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
369	for (i = `0`; i < SBC_MAX; ++i)
370	if (table[i] < `0`)
371	re_set_fastmap (fastmap, icase, i);
372	}
373	# endif /* _LIBC */
374
375	/ See if we have to start the match at all multibyte characters,*
376	i.e. where we would not find an invalid sequence. This only
377	applies to multibyte character sets; for single byte character
378	sets, the SIMPLE_BRACKET again suffices. /*
379	if (dfa->mb_cur_max > `1`
380	&& (cset->nchar_classes \|\| cset->non_match \|\| cset->nranges
381	# ifdef _LIBC
382	\|\| cset->nequiv_classes
383	# endif /* _LIBC */
384	))
385	{
386	unsigned char c = `0`;
387	do
388	{
389	mbstate_t mbs;
390	memset (&mbs, `0`, sizeof (mbs));
391	if (__mbrtowc (NULL, (char *) &c, `1`, &mbs) == (size_t) -`2`)
392	re_set_fastmap (fastmap, false, (int) c);
393	}
394	while (++c != `0`);
395	}
396
397	else
398	{
399	/ ... Else catch all bytes which can start the mbchars. /
400	for (i = `0`; i < cset->nmbchars; ++i)
401	{
402	char buf[`256`];
403	mbstate_t state;
404	memset (&state, `'\0'`, sizeof (state));
405	if (__wcrtomb (buf, cset->mbchars[i], &state) != (size_t) -`1`)
406	re_set_fastmap (fastmap, icase, (unsigned* char *) buf);
407	if ((bufp->syntax & RE_ICASE) && dfa->mb_cur_max > `1`)
408	{
409	if (__wcrtomb (buf, __towlower (cset->mbchars[i]), &state)
410	!= (size_t) -`1`)
411	re_set_fastmap (fastmap, false, (unsigned* char *) buf);
412	}
413	}
414	}
415	}
416	#endif /* RE_ENABLE_I18N */
417	else if (type == OP_PERIOD
418	#ifdef RE_ENABLE_I18N
419	\|\| type == OP_UTF8_PERIOD
420	#endif /* RE_ENABLE_I18N */
421	\|\| type == END_OF_RE)
422	{
423	memset (fastmap, `'\1'`, sizeof (char) * SBC_MAX);
424	if (type == END_OF_RE)
425	bufp->can_be_null = `1`;
426	return;
427	}
428	}
429	}
430
431	/ Entry point for POSIX code. /
432	/ regcomp takes a regular expression as a string and compiles it.*
433
434	PREG is a regex_t . We do not expect any fields to be initialized,*
435	since POSIX says we shouldn't. Thus, we set
436
437	'buffer' to the compiled pattern;
438	'used' to the length of the compiled pattern;
439	'syntax' to RE_SYNTAX_POSIX_EXTENDED if the
440	REG_EXTENDED bit in CFLAGS is set; otherwise, to
441	RE_SYNTAX_POSIX_BASIC;
442	'newline_anchor' to REG_NEWLINE being set in CFLAGS;
443	'fastmap' to an allocated space for the fastmap;
444	'fastmap_accurate' to zero;
445	're_nsub' to the number of subexpressions in PATTERN.
446
447	PATTERN is the address of the pattern string.
448
449	CFLAGS is a series of bits which affect compilation.
450
451	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
452	use POSIX basic syntax.
453
454	If REG_NEWLINE is set, then . and [^...] don't match newline.
455	Also, regexec will try a match beginning after every newline.
456
457	If REG_ICASE is set, then we considers upper- and lowercase
458	versions of letters to be equivalent when matching.
459
460	If REG_NOSUB is set, then when PREG is passed to regexec, that
461	routine will report only success or failure, and nothing about the
462	registers.
463
464	It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
465	the return codes and their meanings.) /*
466
467	int
468	regcomp (regex_t *__restrict preg, const char *__restrict pattern, int cflags)
469	{
470	reg_errcode_t ret;
471	reg_syntax_t syntax = ((cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED
472	: RE_SYNTAX_POSIX_BASIC);
473
474	preg->buffer = NULL;
475	preg->allocated = `0`;
476	preg->used = `0`;
477
478	/ Try to allocate space for the fastmap. /
479	preg->fastmap = re_malloc (char, SBC_MAX);
480	if (BE (preg->fastmap == NULL, `0`))
481	return REG_ESPACE;
482
483	syntax \|= (cflags & REG_ICASE) ? RE_ICASE : `0`;
484
485	/ If REG_NEWLINE is set, newlines are treated differently. /
486	if (cflags & REG_NEWLINE)
487	{ / REG_NEWLINE implies neither . nor [^...] match newline. /
488	syntax &= ~RE_DOT_NEWLINE;
489	syntax \|= RE_HAT_LISTS_NOT_NEWLINE;
490	/ It also changes the matching behavior. /
491	preg->newline_anchor = `1`;
492	}
493	else
494	preg->newline_anchor = `0`;
495	preg->no_sub = !!(cflags & REG_NOSUB);
496	preg->translate = NULL;
497
498	ret = re_compile_internal (preg, pattern, strlen (pattern), syntax);
499
500	/ POSIX doesn't distinguish between an unmatched open-group and an*
501	unmatched close-group: both are REG_EPAREN. /*
502	if (ret == REG_ERPAREN)
503	ret = REG_EPAREN;
504
505	/ We have already checked preg->fastmap != NULL. /
506	if (BE (ret == REG_NOERROR, `1`))
507	/ Compute the fastmap now, since regexec cannot modify the pattern*
508	buffer. This function never fails in this implementation. /*
509	(void) re_compile_fastmap (preg);
510	else
511	{
512	/ Some error occurred while compiling the expression. /
513	re_free (preg->fastmap);
514	preg->fastmap = NULL;
515	}
516
517	return (int) ret;
518	}
519	#ifdef _LIBC
520	libc_hidden_def (__regcomp)
521	weak_alias (__regcomp, regcomp)
522	#endif
523
524	/ Returns a message corresponding to an error code, ERRCODE, returned*
525	from either regcomp or regexec. We don't use PREG here. /*
526
527	size_t
528	regerror (int errcode, const regex_t *__restrict preg, char *__restrict errbuf,
529	size_t errbuf_size)
530	{
531	const char *msg;
532	size_t msg_size;
533
534	if (BE (errcode < `0`
535	\|\| errcode >= (int) (sizeof (__re_error_msgid_idx)
536	/ sizeof (__re_error_msgid_idx[`0`])), `0`))
537	/ Only error codes returned by the rest of the code should be passed*
538	to this routine. If we are given anything else, or if other regex
539	code generates an invalid error code, then the program has a bug.
540	Dump core so we can fix it. /*
541	abort ();
542
543	msg = gettext (__re_error_msgid + __re_error_msgid_idx[errcode]);
544
545	msg_size = strlen (msg) + `1`; / Includes the null. /
546
547	if (BE (errbuf_size != `0`, `1`))
548	{
549	if (BE (msg_size > errbuf_size, `0`))
550	{
551	#if defined HAVE_MEMPCPY \|\| defined _LIBC
552	((char* *) __mempcpy (errbuf, msg, errbuf_size - `1`)) = `'\0'`;
553	#else
554	memcpy (errbuf, msg, errbuf_size - `1`);
555	errbuf[errbuf_size - `1`] = `0`;
556	#endif
557	}
558	else
559	memcpy (errbuf, msg, msg_size);
560	}
561
562	return msg_size;
563	}
564	#ifdef _LIBC
565	weak_alias (__regerror, regerror)
566	#endif
567
568
569	#ifdef RE_ENABLE_I18N
570	/ This static array is used for the map to single-byte characters when*
571	UTF-8 is used. Otherwise we would allocate memory just to initialize
572	it the same all the time. UTF-8 is the preferred encoding so this is
573	a worthwhile optimization. /*
574	static const bitset_t utf8_sb_map =
575	{
576	/ Set the first 128 bits. /
577	[`0` ... `0x80` / BITSET_WORD_BITS - `1`] = BITSET_WORD_MAX
578	};
579	#endif
580
581
582	static void
583	free_dfa_content (re_dfa_t *dfa)
584	{
585	int i, j;
586
587	if (dfa->nodes)
588	for (i = `0`; i < dfa->nodes_len; ++i)
589	free_token (dfa->nodes + i);
590	re_free (dfa->nexts);
591	for (i = `0`; i < dfa->nodes_len; ++i)
592	{
593	if (dfa->eclosures != NULL)
594	re_node_set_free (dfa->eclosures + i);
595	if (dfa->inveclosures != NULL)
596	re_node_set_free (dfa->inveclosures + i);
597	if (dfa->edests != NULL)
598	re_node_set_free (dfa->edests + i);
599	}
600	re_free (dfa->edests);
601	re_free (dfa->eclosures);
602	re_free (dfa->inveclosures);
603	re_free (dfa->nodes);
604
605	if (dfa->state_table)
606	for (i = `0`; i <= dfa->state_hash_mask; ++i)
607	{
608	struct re_state_table_entry *entry = dfa->state_table + i;
609	for (j = `0`; j < entry->num; ++j)
610	{
611	re_dfastate_t *state = entry->array[j];
612	free_state (state);
613	}
614	re_free (entry->array);
615	}
616	re_free (dfa->state_table);
617	#ifdef RE_ENABLE_I18N
618	if (dfa->sb_char != utf8_sb_map)
619	re_free (dfa->sb_char);
620	#endif
621	re_free (dfa->subexp_map);
622	#ifdef DEBUG
623	re_free (dfa->re_str);
624	#endif
625
626	re_free (dfa);
627	}
628
629
630	/ Free dynamically allocated space used by PREG. /
631
632	void
633	regfree (regex_t *preg)
634	{
635	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
636	if (BE (dfa != NULL, `1`))
637	free_dfa_content (dfa);
638	preg->buffer = NULL;
639	preg->allocated = `0`;
640
641	re_free (preg->fastmap);
642	preg->fastmap = NULL;
643
644	re_free (preg->translate);
645	preg->translate = NULL;
646	}
647	#ifdef _LIBC
648	libc_hidden_def (__regfree)
649	weak_alias (__regfree, regfree)
650	#endif
651
652	/ Entry points compatible with 4.2 BSD regex library. We don't define*
653	them unless specifically requested. /*
654
655	#if defined _REGEX_RE_COMP \|\| defined _LIBC
656
657	/ BSD has one and only one pattern buffer. /
658	static struct re_pattern_buffer re_comp_buf;
659
660	char *
661	# ifdef _LIBC
662	/ Make these definitions weak in libc, so POSIX programs can redefine*
663	these names if they don't use our functions, and still use
664	regcomp/regexec above without link errors. /*
665	weak_function
666	# endif
667	re_comp (const char *s)
668	{
669	reg_errcode_t ret;
670	char *fastmap;
671
672	if (!s)
673	{
674	if (!re_comp_buf.buffer)
675	return gettext ("No previous regular expression");
676	return `0`;
677	}
678
679	if (re_comp_buf.buffer)
680	{
681	fastmap = re_comp_buf.fastmap;
682	re_comp_buf.fastmap = NULL;
683	__regfree (&re_comp_buf);
684	memset (&re_comp_buf, `'\0'`, sizeof (re_comp_buf));
685	re_comp_buf.fastmap = fastmap;
686	}
687
688	if (re_comp_buf.fastmap == NULL)
689	{
690	re_comp_buf.fastmap = (char *) malloc (SBC_MAX);
691	if (re_comp_buf.fastmap == NULL)
692	return (char *) gettext (__re_error_msgid
693	+ __re_error_msgid_idx[(int) REG_ESPACE]);
694	}
695
696	/ Since 're_exec' always passes NULL for the 'regs' argument, we*
697	don't need to initialize the pattern buffer fields which affect it. /*
698
699	/ Match anchors at newlines. /
700	re_comp_buf.newline_anchor = `1`;
701
702	ret = re_compile_internal (&re_comp_buf, s, strlen (s), re_syntax_options);
703
704	if (!ret)
705	return NULL;
706
707	/ Yes, we're discarding `const' here if !HAVE_LIBINTL. /
708	return (char ) gettext (__re_error_msgid + __re_error_msgid_idx[(int*) ret]);
709	}
710
711	#ifdef _LIBC
712	libc_freeres_fn (free_mem)
713	{
714	__regfree (&re_comp_buf);
715	}
716	#endif
717
718	#endif /* _REGEX_RE_COMP */
719
720	/ Internal entry point.*
721	Compile the regular expression PATTERN, whose length is LENGTH.
722	SYNTAX indicate regular expression's syntax. /*
723
724	static reg_errcode_t
725	re_compile_internal (regex_t preg, const* char * pattern, size_t length,
726	reg_syntax_t syntax)
727	{
728	reg_errcode_t err = REG_NOERROR;
729	re_dfa_t *dfa;
730	re_string_t regexp;
731
732	/ Initialize the pattern buffer. /
733	preg->fastmap_accurate = `0`;
734	preg->syntax = syntax;
735	preg->not_bol = preg->not_eol = `0`;
736	preg->used = `0`;
737	preg->re_nsub = `0`;
738	preg->can_be_null = `0`;
739	preg->regs_allocated = REGS_UNALLOCATED;
740
741	/ Initialize the dfa. /
742	dfa = (re_dfa_t *) preg->buffer;
743	if (BE (preg->allocated < sizeof (re_dfa_t), `0`))
744	{
745	/ If zero allocated, but buffer is non-null, try to realloc*
746	enough space. This loses if buffer's address is bogus, but
747	that is the user's responsibility. If ->buffer is NULL this
748	is a simple allocation. /*
749	dfa = re_realloc (preg->buffer, re_dfa_t, `1`);
750	if (dfa == NULL)
751	return REG_ESPACE;
752	preg->allocated = sizeof (re_dfa_t);
753	preg->buffer = (unsigned char *) dfa;
754	}
755	preg->used = sizeof (re_dfa_t);
756
757	err = init_dfa (dfa, length);
758	if (BE (err != REG_NOERROR, `0`))
759	{
760	free_dfa_content (dfa);
761	preg->buffer = NULL;
762	preg->allocated = `0`;
763	return err;
764	}
765	#ifdef DEBUG
766	/ Note: length+1 will not overflow since it is checked in init_dfa. /
767	dfa->re_str = re_malloc (char, length + `1`);
768	strncpy (dfa->re_str, pattern, length + `1`);
769	#endif
770
771	__libc_lock_init (dfa->lock);
772
773	err = re_string_construct (&regexp, pattern, length, preg->translate,
774	syntax & RE_ICASE, dfa);
775	if (BE (err != REG_NOERROR, `0`))
776	{
777	re_compile_internal_free_return:
778	free_workarea_compile (preg);
779	re_string_destruct (&regexp);
780	free_dfa_content (dfa);
781	preg->buffer = NULL;
782	preg->allocated = `0`;
783	return err;
784	}
785
786	/ Parse the regular expression, and build a structure tree. /
787	preg->re_nsub = `0`;
788	dfa->str_tree = parse (&regexp, preg, syntax, &err);
789	if (BE (dfa->str_tree == NULL, `0`))
790	goto re_compile_internal_free_return;
791
792	/ Analyze the tree and create the nfa. /
793	err = analyze (preg);
794	if (BE (err != REG_NOERROR, `0`))
795	goto re_compile_internal_free_return;
796
797	#ifdef RE_ENABLE_I18N
798	/ If possible, do searching in single byte encoding to speed things up. /
799	if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
800	optimize_utf8 (dfa);
801	#endif
802
803	/ Then create the initial state of the dfa. /
804	err = create_initial_state (dfa);
805
806	/ Release work areas. /
807	free_workarea_compile (preg);
808	re_string_destruct (&regexp);
809
810	if (BE (err != REG_NOERROR, `0`))
811	{
812	free_dfa_content (dfa);
813	preg->buffer = NULL;
814	preg->allocated = `0`;
815	}
816
817	return err;
818	}
819
820	/ Initialize DFA. We use the length of the regular expression PAT_LEN*
821	as the initial length of some arrays. /*
822
823	static reg_errcode_t
824	init_dfa (re_dfa_t *dfa, size_t pat_len)
825	{
826	unsigned int table_size;
827	#ifndef _LIBC
828	char *codeset_name;
829	#endif
830
831	memset (dfa, `'\0'`, sizeof (re_dfa_t));
832
833	/ Force allocation of str_tree_storage the first time. /
834	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
835
836	/ Avoid overflows. /
837	if (pat_len == SIZE_MAX)
838	return REG_ESPACE;
839
840	dfa->nodes_alloc = pat_len + `1`;
841	dfa->nodes = re_malloc (re_token_t, dfa->nodes_alloc);
842
843	/ table_size = 2 ^ ceil(log pat_len) /
844	for (table_size = `1`; ; table_size <<= `1`)
845	if (table_size > pat_len)
846	break;
847
848	dfa->state_table = calloc (sizeof (struct re_state_table_entry), table_size);
849	dfa->state_hash_mask = table_size - `1`;
850
851	dfa->mb_cur_max = MB_CUR_MAX;
852	#ifdef _LIBC
853	if (dfa->mb_cur_max == `6`
854	&& strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == `0`)
855	dfa->is_utf8 = `1`;
856	dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
857	!= `0`);
858	#else
859	# ifdef HAVE_LANGINFO_CODESET
860	codeset_name = nl_langinfo (CODESET);
861	# else
862	codeset_name = getenv ("LC_ALL");
863	if (codeset_name == NULL \|\| codeset_name[`0`] == `'\0'`)
864	codeset_name = getenv ("LC_CTYPE");
865	if (codeset_name == NULL \|\| codeset_name[`0`] == `'\0'`)
866	codeset_name = getenv ("LANG");
867	if (codeset_name == NULL)
868	codeset_name = "";
869	else if (strchr (codeset_name, `'.'`) != NULL)
870	codeset_name = strchr (codeset_name, `'.'`) + `1`;
871	# endif
872
873	if (strcasecmp (codeset_name, "UTF-8") == `0`
874	\|\| strcasecmp (codeset_name, "UTF8") == `0`)
875	dfa->is_utf8 = `1`;
876
877	/ We check exhaustively in the loop below if this charset is a*
878	superset of ASCII. /*
879	dfa->map_notascii = `0`;
880	#endif
881
882	#ifdef RE_ENABLE_I18N
883	if (dfa->mb_cur_max > `1`)
884	{
885	if (dfa->is_utf8)
886	dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
887	else
888	{
889	int i, j, ch;
890
891	dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset_t), `1`);
892	if (BE (dfa->sb_char == NULL, `0`))
893	return REG_ESPACE;
894
895	/ Set the bits corresponding to single byte chars. /
896	for (i = `0`, ch = `0`; i < BITSET_WORDS; ++i)
897	for (j = `0`; j < BITSET_WORD_BITS; ++j, ++ch)
898	{
899	wint_t wch = __btowc (ch);
900	if (wch != WEOF)
901	dfa->sb_char[i] \|= (bitset_word_t) `1` << j;
902	# ifndef _LIBC
903	if (isascii (ch) && wch != ch)
904	dfa->map_notascii = `1`;
905	# endif
906	}
907	}
908	}
909	#endif
910
911	if (BE (dfa->nodes == NULL \|\| dfa->state_table == NULL, `0`))
912	return REG_ESPACE;
913	return REG_NOERROR;
914	}
915
916	/ Initialize WORD_CHAR table, which indicate which character is*
917	"word". In this case "word" means that it is the word construction
918	character used by some operators like "\<", "\>", etc. /*
919
920	static void
921	init_word_char (re_dfa_t *dfa)
922	{
923	dfa->word_ops_used = `1`;
924	int i = `0`;
925	int ch = `0`;
926	if (BE (dfa->map_notascii == `0`, `1`))
927	{
928	/ Avoid uint32_t and uint64_t as some non-GCC platforms lack*
929	them, an issue when this code is used in Gnulib. /*
930	bitset_word_t bits0 = `0x00000000`;
931	bitset_word_t bits1 = `0x03ff0000`;
932	bitset_word_t bits2 = `0x87fffffe`;
933	bitset_word_t bits3 = `0x07fffffe`;
934	if (BITSET_WORD_BITS == `64`)
935	{
936	/ Pacify gcc -Woverflow on 32-bit platformns. /
937	dfa->word_char[`0`] = bits1 << `31` << `1` \| bits0;
938	dfa->word_char[`1`] = bits3 << `31` << `1` \| bits2;
939	i = `2`;
940	}
941	else if (BITSET_WORD_BITS == `32`)
942	{
943	dfa->word_char[`0`] = bits0;
944	dfa->word_char[`1`] = bits1;
945	dfa->word_char[`2`] = bits2;
946	dfa->word_char[`3`] = bits3;
947	i = `4`;
948	}
949	else
950	goto general_case;
951	ch = `128`;
952
953	if (BE (dfa->is_utf8, `1`))
954	{
955	memset (&dfa->word_char[i], `'\0'`, (SBC_MAX - ch) / `8`);
956	return;
957	}
958	}
959
960	general_case:
961	for (; i < BITSET_WORDS; ++i)
962	for (int j = `0`; j < BITSET_WORD_BITS; ++j, ++ch)
963	if (isalnum (ch) \|\| ch == `'_'`)
964	dfa->word_char[i] \|= (bitset_word_t) `1` << j;
965	}
966
967	/ Free the work area which are only used while compiling. /
968
969	static void
970	free_workarea_compile (regex_t *preg)
971	{
972	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
973	bin_tree_storage_t storage, next;
974	for (storage = dfa->str_tree_storage; storage; storage = next)
975	{
976	next = storage->next;
977	re_free (storage);
978	}
979	dfa->str_tree_storage = NULL;
980	dfa->str_tree_storage_idx = BIN_TREE_STORAGE_SIZE;
981	dfa->str_tree = NULL;
982	re_free (dfa->org_indices);
983	dfa->org_indices = NULL;
984	}
985
986	/ Create initial states for all contexts. /
987
988	static reg_errcode_t
989	create_initial_state (re_dfa_t *dfa)
990	{
991	int first, i;
992	reg_errcode_t err;
993	re_node_set init_nodes;
994
995	/ Initial states have the epsilon closure of the node which is*
996	the first node of the regular expression. /*
997	first = dfa->str_tree->first->node_idx;
998	dfa->init_node = first;
999	err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
1000	if (BE (err != REG_NOERROR, `0`))
1001	return err;
1002
1003	/ The back-references which are in initial states can epsilon transit,*
1004	since in this case all of the subexpressions can be null.
1005	Then we add epsilon closures of the nodes which are the next nodes of
1006	the back-references. /*
1007	if (dfa->nbackref > `0`)
1008	for (i = `0`; i < init_nodes.nelem; ++i)
1009	{
1010	int node_idx = init_nodes.elems[i];
1011	re_token_type_t type = dfa->nodes[node_idx].type;
1012
1013	int clexp_idx;
1014	if (type != OP_BACK_REF)
1015	continue;
1016	for (clexp_idx = `0`; clexp_idx < init_nodes.nelem; ++clexp_idx)
1017	{
1018	re_token_t *clexp_node;
1019	clexp_node = dfa->nodes + init_nodes.elems[clexp_idx];
1020	if (clexp_node->type == OP_CLOSE_SUBEXP
1021	&& clexp_node->opr.idx == dfa->nodes[node_idx].opr.idx)
1022	break;
1023	}
1024	if (clexp_idx == init_nodes.nelem)
1025	continue;
1026
1027	if (type == OP_BACK_REF)
1028	{
1029	int dest_idx = dfa->edests[node_idx].elems[`0`];
1030	if (!re_node_set_contains (&init_nodes, dest_idx))
1031	{
1032	reg_errcode_t err = re_node_set_merge (&init_nodes,
1033	dfa->eclosures
1034	+ dest_idx);
1035	if (err != REG_NOERROR)
1036	return err;
1037	i = `0`;
1038	}
1039	}
1040	}
1041
1042	/ It must be the first time to invoke acquire_state. /
1043	dfa->init_state = re_acquire_state_context (&err, dfa, &init_nodes, `0`);
1044	/ We don't check ERR here, since the initial state must not be NULL. /
1045	if (BE (dfa->init_state == NULL, `0`))
1046	return err;
1047	if (dfa->init_state->has_constraint)
1048	{
1049	dfa->init_state_word = re_acquire_state_context (&err, dfa, &init_nodes,
1050	CONTEXT_WORD);
1051	dfa->init_state_nl = re_acquire_state_context (&err, dfa, &init_nodes,
1052	CONTEXT_NEWLINE);
1053	dfa->init_state_begbuf = re_acquire_state_context (&err, dfa,
1054	&init_nodes,
1055	CONTEXT_NEWLINE
1056	\| CONTEXT_BEGBUF);
1057	if (BE (dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
1058	\|\| dfa->init_state_begbuf == NULL, `0`))
1059	return err;
1060	}
1061	else
1062	dfa->init_state_word = dfa->init_state_nl
1063	= dfa->init_state_begbuf = dfa->init_state;
1064
1065	re_node_set_free (&init_nodes);
1066	return REG_NOERROR;
1067	}
1068
1069	#ifdef RE_ENABLE_I18N
1070	/ If it is possible to do searching in single byte encoding instead of UTF-8*
1071	to speed things up, set dfa->mb_cur_max to 1, clear is_utf8 and change
1072	DFA nodes where needed. /*
1073
1074	static void
1075	optimize_utf8 (re_dfa_t *dfa)
1076	{
1077	int node, i, mb_chars = `0`, has_period = `0`;
1078
1079	for (node = `0`; node < dfa->nodes_len; ++node)
1080	switch (dfa->nodes[node].type)
1081	{
1082	case CHARACTER:
1083	if (dfa->nodes[node].opr.c >= `0x80`)
1084	mb_chars = `1`;
1085	break;
1086	case ANCHOR:
1087	switch (dfa->nodes[node].opr.ctx_type)
1088	{
1089	case LINE_FIRST:
1090	case LINE_LAST:
1091	case BUF_FIRST:
1092	case BUF_LAST:
1093	break;
1094	default:
1095	/ Word anchors etc. cannot be handled. It's okay to test*
1096	opr.ctx_type since constraints (for all DFA nodes) are
1097	created by ORing one or more opr.ctx_type values. /*
1098	return;
1099	}
1100	break;
1101	case OP_PERIOD:
1102	has_period = `1`;
1103	break;
1104	case OP_BACK_REF:
1105	case OP_ALT:
1106	case END_OF_RE:
1107	case OP_DUP_ASTERISK:
1108	case OP_OPEN_SUBEXP:
1109	case OP_CLOSE_SUBEXP:
1110	break;
1111	case COMPLEX_BRACKET:
1112	return;
1113	case SIMPLE_BRACKET:
1114	/ Just double check. The non-ASCII range starts at 0x80. /
1115	assert (`0x80` % BITSET_WORD_BITS == `0`);
1116	for (i = `0x80` / BITSET_WORD_BITS; i < BITSET_WORDS; ++i)
1117	if (dfa->nodes[node].opr.sbcset[i])
1118	return;
1119	break;
1120	default:
1121	abort ();
1122	}
1123
1124	if (mb_chars \|\| has_period)
1125	for (node = `0`; node < dfa->nodes_len; ++node)
1126	{
1127	if (dfa->nodes[node].type == CHARACTER
1128	&& dfa->nodes[node].opr.c >= `0x80`)
1129	dfa->nodes[node].mb_partial = `0`;
1130	else if (dfa->nodes[node].type == OP_PERIOD)
1131	dfa->nodes[node].type = OP_UTF8_PERIOD;
1132	}
1133
1134	/ The search can be in single byte locale. /
1135	dfa->mb_cur_max = `1`;
1136	dfa->is_utf8 = `0`;
1137	dfa->has_mb_node = dfa->nbackref > `0` \|\| has_period;
1138	}
1139	#endif
1140
1141	/ Analyze the structure tree, and calculate "first", "next", "edest",*
1142	"eclosure", and "inveclosure". /*
1143
1144	static reg_errcode_t
1145	analyze (regex_t *preg)
1146	{
1147	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1148	reg_errcode_t ret;
1149
1150	/ Allocate arrays. /
1151	dfa->nexts = re_malloc (int, dfa->nodes_alloc);
1152	dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
1153	dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
1154	dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
1155	if (BE (dfa->nexts == NULL \|\| dfa->org_indices == NULL \|\| dfa->edests == NULL
1156	\|\| dfa->eclosures == NULL, `0`))
1157	return REG_ESPACE;
1158
1159	dfa->subexp_map = re_malloc (int, preg->re_nsub);
1160	if (dfa->subexp_map != NULL)
1161	{
1162	int i;
1163	for (i = `0`; i < preg->re_nsub; i++)
1164	dfa->subexp_map[i] = i;
1165	preorder (dfa->str_tree, optimize_subexps, dfa);
1166	for (i = `0`; i < preg->re_nsub; i++)
1167	if (dfa->subexp_map[i] != i)
1168	break;
1169	if (i == preg->re_nsub)
1170	{
1171	free (dfa->subexp_map);
1172	dfa->subexp_map = NULL;
1173	}
1174	}
1175
1176	ret = postorder (dfa->str_tree, lower_subexps, preg);
1177	if (BE (ret != REG_NOERROR, `0`))
1178	return ret;
1179	ret = postorder (dfa->str_tree, calc_first, dfa);
1180	if (BE (ret != REG_NOERROR, `0`))
1181	return ret;
1182	preorder (dfa->str_tree, calc_next, dfa);
1183	ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
1184	if (BE (ret != REG_NOERROR, `0`))
1185	return ret;
1186	ret = calc_eclosure (dfa);
1187	if (BE (ret != REG_NOERROR, `0`))
1188	return ret;
1189
1190	/ We only need this during the prune_impossible_nodes pass in regexec.c;*
1191	skip it if p_i_n will not run, as calc_inveclosure can be quadratic. /*
1192	if ((!preg->no_sub && preg->re_nsub > `0` && dfa->has_plural_match)
1193	\|\| dfa->nbackref)
1194	{
1195	dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
1196	if (BE (dfa->inveclosures == NULL, `0`))
1197	return REG_ESPACE;
1198	ret = calc_inveclosure (dfa);
1199	}
1200
1201	return ret;
1202	}
1203
1204	/ Our parse trees are very unbalanced, so we cannot use a stack to*
1205	implement parse tree visits. Instead, we use parent pointers and
1206	some hairy code in these two functions. /*
1207	static reg_errcode_t
1208	postorder (bin_tree_t root, reg_errcode_t (fn (void* , bin_tree_t )),
1209	void *extra)
1210	{
1211	bin_tree_t node, prev;
1212
1213	for (node = root; ; )
1214	{
1215	/ Descend down the tree, preferably to the left (or to the right*
1216	if that's the only child). /*
1217	while (node->left \|\| node->right)
1218	if (node->left)
1219	node = node->left;
1220	else
1221	node = node->right;
1222
1223	do
1224	{
1225	reg_errcode_t err = fn (extra, node);
1226	if (BE (err != REG_NOERROR, `0`))
1227	return err;
1228	if (node->parent == NULL)
1229	return REG_NOERROR;
1230	prev = node;
1231	node = node->parent;
1232	}
1233	/ Go up while we have a node that is reached from the right. /
1234	while (node->right == prev \|\| node->right == NULL);
1235	node = node->right;
1236	}
1237	}
1238
1239	static reg_errcode_t
1240	preorder (bin_tree_t root, reg_errcode_t (fn (void* , bin_tree_t )),
1241	void *extra)
1242	{
1243	bin_tree_t *node;
1244
1245	for (node = root; ; )
1246	{
1247	reg_errcode_t err = fn (extra, node);
1248	if (BE (err != REG_NOERROR, `0`))
1249	return err;
1250
1251	/ Go to the left node, or up and to the right. /
1252	if (node->left)
1253	node = node->left;
1254	else
1255	{
1256	bin_tree_t *prev = NULL;
1257	while (node->right == prev \|\| node->right == NULL)
1258	{
1259	prev = node;
1260	node = node->parent;
1261	if (!node)
1262	return REG_NOERROR;
1263	}
1264	node = node->right;
1265	}
1266	}
1267	}
1268
1269	/ Optimization pass: if a SUBEXP is entirely contained, strip it and tell*
1270	re_search_internal to map the inner one's opr.idx to this one's. Adjust
1271	backreferences as well. Requires a preorder visit. /*
1272	static reg_errcode_t
1273	optimize_subexps (void extra, bin_tree_t node)
1274	{
1275	re_dfa_t dfa = (re_dfa_t ) extra;
1276
1277	if (node->token.type == OP_BACK_REF && dfa->subexp_map)
1278	{
1279	int idx = node->token.opr.idx;
1280	node->token.opr.idx = dfa->subexp_map[idx];
1281	dfa->used_bkref_map \|= `1` << node->token.opr.idx;
1282	}
1283
1284	else if (node->token.type == SUBEXP
1285	&& node->left && node->left->token.type == SUBEXP)
1286	{
1287	int other_idx = node->left->token.opr.idx;
1288
1289	node->left = node->left->left;
1290	if (node->left)
1291	node->left->parent = node;
1292
1293	dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
1294	if (other_idx < BITSET_WORD_BITS)
1295	dfa->used_bkref_map &= ~((bitset_word_t) `1` << other_idx);
1296	}
1297
1298	return REG_NOERROR;
1299	}
1300
1301	/ Lowering pass: Turn each SUBEXP node into the appropriate concatenation*
1302	of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. /*
1303	static reg_errcode_t
1304	lower_subexps (void extra, bin_tree_t node)
1305	{
1306	regex_t preg = (regex_t ) extra;
1307	reg_errcode_t err = REG_NOERROR;
1308
1309	if (node->left && node->left->token.type == SUBEXP)
1310	{
1311	node->left = lower_subexp (&err, preg, node->left);
1312	if (node->left)
1313	node->left->parent = node;
1314	}
1315	if (node->right && node->right->token.type == SUBEXP)
1316	{
1317	node->right = lower_subexp (&err, preg, node->right);
1318	if (node->right)
1319	node->right->parent = node;
1320	}
1321
1322	return err;
1323	}
1324
1325	static bin_tree_t *
1326	lower_subexp (reg_errcode_t err, regex_t preg, bin_tree_t *node)
1327	{
1328	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
1329	bin_tree_t *body = node->left;
1330	bin_tree_t op, cls, tree1, tree;
1331
1332	if (preg->no_sub
1333	/ We do not optimize empty subexpressions, because otherwise we may*
1334	have bad CONCAT nodes with NULL children. This is obviously not
1335	very common, so we do not lose much. An example that triggers
1336	this case is the sed "script" //x. /*
1337	&& node->left != NULL
1338	&& (node->token.opr.idx >= BITSET_WORD_BITS
1339	\|\| !(dfa->used_bkref_map
1340	& ((bitset_word_t) `1` << node->token.opr.idx))))
1341	return node->left;
1342
1343	/ Convert the SUBEXP node to the concatenation of an*
1344	OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. /*
1345	op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
1346	cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
1347	tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
1348	tree = create_tree (dfa, op, tree1, CONCAT);
1349	if (BE (tree == NULL \|\| tree1 == NULL \|\| op == NULL \|\| cls == NULL, `0`))
1350	{
1351	*err = REG_ESPACE;
1352	return NULL;
1353	}
1354
1355	op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
1356	op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
1357	return tree;
1358	}
1359
1360	/ Pass 1 in building the NFA: compute FIRST and create unlinked automaton*
1361	nodes. Requires a postorder visit. /*
1362	static reg_errcode_t
1363	calc_first (void extra, bin_tree_t node)
1364	{
1365	re_dfa_t dfa = (re_dfa_t ) extra;
1366	if (node->token.type == CONCAT)
1367	{
1368	node->first = node->left->first;
1369	node->node_idx = node->left->node_idx;
1370	}
1371	else
1372	{
1373	node->first = node;
1374	node->node_idx = re_dfa_add_node (dfa, node->token);
1375	if (BE (node->node_idx == -`1`, `0`))
1376	return REG_ESPACE;
1377	if (node->token.type == ANCHOR)
1378	dfa->nodes[node->node_idx].constraint = node->token.opr.ctx_type;
1379	}
1380	return REG_NOERROR;
1381	}
1382
1383	/ Pass 2: compute NEXT on the tree. Preorder visit. /
1384	static reg_errcode_t
1385	calc_next (void extra, bin_tree_t node)
1386	{
1387	switch (node->token.type)
1388	{
1389	case OP_DUP_ASTERISK:
1390	node->left->next = node;
1391	break;
1392	case CONCAT:
1393	node->left->next = node->right->first;
1394	node->right->next = node->next;
1395	break;
1396	default:
1397	if (node->left)
1398	node->left->next = node->next;
1399	if (node->right)
1400	node->right->next = node->next;
1401	break;
1402	}
1403	return REG_NOERROR;
1404	}
1405
1406	/ Pass 3: link all DFA nodes to their NEXT node (any order will do). /
1407	static reg_errcode_t
1408	link_nfa_nodes (void extra, bin_tree_t node)
1409	{
1410	re_dfa_t dfa = (re_dfa_t ) extra;
1411	int idx = node->node_idx;
1412	reg_errcode_t err = REG_NOERROR;
1413
1414	switch (node->token.type)
1415	{
1416	case CONCAT:
1417	break;
1418
1419	case END_OF_RE:
1420	assert (node->next == NULL);
1421	break;
1422
1423	case OP_DUP_ASTERISK:
1424	case OP_ALT:
1425	{
1426	int left, right;
1427	dfa->has_plural_match = `1`;
1428	if (node->left != NULL)
1429	left = node->left->first->node_idx;
1430	else
1431	left = node->next->node_idx;
1432	if (node->right != NULL)
1433	right = node->right->first->node_idx;
1434	else
1435	right = node->next->node_idx;
1436	assert (left > -`1`);
1437	assert (right > -`1`);
1438	err = re_node_set_init_2 (dfa->edests + idx, left, right);
1439	}
1440	break;
1441
1442	case ANCHOR:
1443	case OP_OPEN_SUBEXP:
1444	case OP_CLOSE_SUBEXP:
1445	err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
1446	break;
1447
1448	case OP_BACK_REF:
1449	dfa->nexts[idx] = node->next->node_idx;
1450	if (node->token.type == OP_BACK_REF)
1451	err = re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
1452	break;
1453
1454	default:
1455	assert (!IS_EPSILON_NODE (node->token.type));
1456	dfa->nexts[idx] = node->next->node_idx;
1457	break;
1458	}
1459
1460	return err;
1461	}
1462
1463	/ Duplicate the epsilon closure of the node ROOT_NODE.*
1464	Note that duplicated nodes have constraint INIT_CONSTRAINT in addition
1465	to their own constraint. /*
1466
1467	static reg_errcode_t
1468	duplicate_node_closure (re_dfa_t dfa, int* top_org_node, int top_clone_node,
1469	int root_node, unsigned int init_constraint)
1470	{
1471	int org_node, clone_node, ret;
1472	unsigned int constraint = init_constraint;
1473	for (org_node = top_org_node, clone_node = top_clone_node;;)
1474	{
1475	int org_dest, clone_dest;
1476	if (dfa->nodes[org_node].type == OP_BACK_REF)
1477	{
1478	/ If the back reference epsilon-transit, its destination must*
1479	also have the constraint. Then duplicate the epsilon closure
1480	of the destination of the back reference, and store it in
1481	edests of the back reference. /*
1482	org_dest = dfa->nexts[org_node];
1483	re_node_set_empty (dfa->edests + clone_node);
1484	clone_dest = duplicate_node (dfa, org_dest, constraint);
1485	if (BE (clone_dest == -`1`, `0`))
1486	return REG_ESPACE;
1487	dfa->nexts[clone_node] = dfa->nexts[org_node];
1488	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1489	if (BE (ret < `0`, `0`))
1490	return REG_ESPACE;
1491	}
1492	else if (dfa->edests[org_node].nelem == `0`)
1493	{
1494	/ In case of the node can't epsilon-transit, don't duplicate the*
1495	destination and store the original destination as the
1496	destination of the node. /*
1497	dfa->nexts[clone_node] = dfa->nexts[org_node];
1498	break;
1499	}
1500	else if (dfa->edests[org_node].nelem == `1`)
1501	{
1502	/ In case of the node can epsilon-transit, and it has only one*
1503	destination. /*
1504	org_dest = dfa->edests[org_node].elems[`0`];
1505	re_node_set_empty (dfa->edests + clone_node);
1506	/ If the node is root_node itself, it means the epsilon closure*
1507	has a loop. Then tie it to the destination of the root_node. /*
1508	if (org_node == root_node && clone_node != org_node)
1509	{
1510	ret = re_node_set_insert (dfa->edests + clone_node, org_dest);
1511	if (BE (ret < `0`, `0`))
1512	return REG_ESPACE;
1513	break;
1514	}
1515	/ In case the node has another constraint, append it. /
1516	constraint \|= dfa->nodes[org_node].constraint;
1517	clone_dest = duplicate_node (dfa, org_dest, constraint);
1518	if (BE (clone_dest == -`1`, `0`))
1519	return REG_ESPACE;
1520	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1521	if (BE (ret < `0`, `0`))
1522	return REG_ESPACE;
1523	}
1524	else / dfa->edests[org_node].nelem == 2 /
1525	{
1526	/ In case of the node can epsilon-transit, and it has two*
1527	destinations. In the bin_tree_t and DFA, that's '\|' and ''. /
1528	org_dest = dfa->edests[org_node].elems[`0`];
1529	re_node_set_empty (dfa->edests + clone_node);
1530	/ Search for a duplicated node which satisfies the constraint. /
1531	clone_dest = search_duplicated_node (dfa, org_dest, constraint);
1532	if (clone_dest == -`1`)
1533	{
1534	/ There is no such duplicated node, create a new one. /
1535	reg_errcode_t err;
1536	clone_dest = duplicate_node (dfa, org_dest, constraint);
1537	if (BE (clone_dest == -`1`, `0`))
1538	return REG_ESPACE;
1539	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1540	if (BE (ret < `0`, `0`))
1541	return REG_ESPACE;
1542	err = duplicate_node_closure (dfa, org_dest, clone_dest,
1543	root_node, constraint);
1544	if (BE (err != REG_NOERROR, `0`))
1545	return err;
1546	}
1547	else
1548	{
1549	/ There is a duplicated node which satisfies the constraint,*
1550	use it to avoid infinite loop. /*
1551	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1552	if (BE (ret < `0`, `0`))
1553	return REG_ESPACE;
1554	}
1555
1556	org_dest = dfa->edests[org_node].elems[`1`];
1557	clone_dest = duplicate_node (dfa, org_dest, constraint);
1558	if (BE (clone_dest == -`1`, `0`))
1559	return REG_ESPACE;
1560	ret = re_node_set_insert (dfa->edests + clone_node, clone_dest);
1561	if (BE (ret < `0`, `0`))
1562	return REG_ESPACE;
1563	}
1564	org_node = org_dest;
1565	clone_node = clone_dest;
1566	}
1567	return REG_NOERROR;
1568	}
1569
1570	/ Search for a node which is duplicated from the node ORG_NODE, and*
1571	satisfies the constraint CONSTRAINT. /*
1572
1573	static int
1574	search_duplicated_node (const re_dfa_t dfa, int* org_node,
1575	unsigned int constraint)
1576	{
1577	int idx;
1578	for (idx = dfa->nodes_len - `1`; dfa->nodes[idx].duplicated && idx > `0`; --idx)
1579	{
1580	if (org_node == dfa->org_indices[idx]
1581	&& constraint == dfa->nodes[idx].constraint)
1582	return idx; / Found. /
1583	}
1584	return -`1`; / Not found. /
1585	}
1586
1587	/ Duplicate the node whose index is ORG_IDX and set the constraint CONSTRAINT.*
1588	Return the index of the new node, or -1 if insufficient storage is
1589	available. /*
1590
1591	static int
1592	duplicate_node (re_dfa_t dfa, int* org_idx, unsigned int constraint)
1593	{
1594	int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
1595	if (BE (dup_idx != -`1`, `1`))
1596	{
1597	dfa->nodes[dup_idx].constraint = constraint;
1598	dfa->nodes[dup_idx].constraint \|= dfa->nodes[org_idx].constraint;
1599	dfa->nodes[dup_idx].duplicated = `1`;
1600
1601	/ Store the index of the original node. /
1602	dfa->org_indices[dup_idx] = org_idx;
1603	}
1604	return dup_idx;
1605	}
1606
1607	static reg_errcode_t
1608	calc_inveclosure (re_dfa_t *dfa)
1609	{
1610	int src, idx, ret;
1611	for (idx = `0`; idx < dfa->nodes_len; ++idx)
1612	re_node_set_init_empty (dfa->inveclosures + idx);
1613
1614	for (src = `0`; src < dfa->nodes_len; ++src)
1615	{
1616	int *elems = dfa->eclosures[src].elems;
1617	for (idx = `0`; idx < dfa->eclosures[src].nelem; ++idx)
1618	{
1619	ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
1620	if (BE (ret == -`1`, `0`))
1621	return REG_ESPACE;
1622	}
1623	}
1624
1625	return REG_NOERROR;
1626	}
1627
1628	/ Calculate "eclosure" for all the node in DFA. /
1629
1630	static reg_errcode_t
1631	calc_eclosure (re_dfa_t *dfa)
1632	{
1633	int node_idx, incomplete;
1634	#ifdef DEBUG
1635	assert (dfa->nodes_len > `0`);
1636	#endif
1637	incomplete = `0`;
1638	/ For each nodes, calculate epsilon closure. /
1639	for (node_idx = `0`; ; ++node_idx)
1640	{
1641	reg_errcode_t err;
1642	re_node_set eclosure_elem;
1643	if (node_idx == dfa->nodes_len)
1644	{
1645	if (!incomplete)
1646	break;
1647	incomplete = `0`;
1648	node_idx = `0`;
1649	}
1650
1651	#ifdef DEBUG
1652	assert (dfa->eclosures[node_idx].nelem != -`1`);
1653	#endif
1654
1655	/ If we have already calculated, skip it. /
1656	if (dfa->eclosures[node_idx].nelem != `0`)
1657	continue;
1658	/ Calculate epsilon closure of 'node_idx'. /
1659	err = calc_eclosure_iter (&eclosure_elem, dfa, node_idx, `1`);
1660	if (BE (err != REG_NOERROR, `0`))
1661	return err;
1662
1663	if (dfa->eclosures[node_idx].nelem == `0`)
1664	{
1665	incomplete = `1`;
1666	re_node_set_free (&eclosure_elem);
1667	}
1668	}
1669	return REG_NOERROR;
1670	}
1671
1672	/ Calculate epsilon closure of NODE. /
1673
1674	static reg_errcode_t
1675	calc_eclosure_iter (re_node_set new_set, re_dfa_t dfa, int node, int root)
1676	{
1677	reg_errcode_t err;
1678	int i;
1679	re_node_set eclosure;
1680	int ret;
1681	int incomplete = `0`;
1682	err = re_node_set_alloc (&eclosure, dfa->edests[node].nelem + `1`);
1683	if (BE (err != REG_NOERROR, `0`))
1684	return err;
1685
1686	/ This indicates that we are calculating this node now.*
1687	We reference this value to avoid infinite loop. /*
1688	dfa->eclosures[node].nelem = -`1`;
1689
1690	/ If the current node has constraints, duplicate all nodes*
1691	since they must inherit the constraints. /*
1692	if (dfa->nodes[node].constraint
1693	&& dfa->edests[node].nelem
1694	&& !dfa->nodes[dfa->edests[node].elems[`0`]].duplicated)
1695	{
1696	err = duplicate_node_closure (dfa, node, node, node,
1697	dfa->nodes[node].constraint);
1698	if (BE (err != REG_NOERROR, `0`))
1699	return err;
1700	}
1701
1702	/ Expand each epsilon destination nodes. /
1703	if (IS_EPSILON_NODE(dfa->nodes[node].type))
1704	for (i = `0`; i < dfa->edests[node].nelem; ++i)
1705	{
1706	re_node_set eclosure_elem;
1707	int edest = dfa->edests[node].elems[i];
1708	/ If calculating the epsilon closure of `edest' is in progress,*
1709	return intermediate result. /*
1710	if (dfa->eclosures[edest].nelem == -`1`)
1711	{
1712	incomplete = `1`;
1713	continue;
1714	}
1715	/ If we haven't calculated the epsilon closure of `edest' yet,*
1716	calculate now. Otherwise use calculated epsilon closure. /*
1717	if (dfa->eclosures[edest].nelem == `0`)
1718	{
1719	err = calc_eclosure_iter (&eclosure_elem, dfa, edest, `0`);
1720	if (BE (err != REG_NOERROR, `0`))
1721	return err;
1722	}
1723	else
1724	eclosure_elem = dfa->eclosures[edest];
1725	/ Merge the epsilon closure of 'edest'. /
1726	err = re_node_set_merge (&eclosure, &eclosure_elem);
1727	if (BE (err != REG_NOERROR, `0`))
1728	return err;
1729	/ If the epsilon closure of 'edest' is incomplete,*
1730	the epsilon closure of this node is also incomplete. /*
1731	if (dfa->eclosures[edest].nelem == `0`)
1732	{
1733	incomplete = `1`;
1734	re_node_set_free (&eclosure_elem);
1735	}
1736	}
1737
1738	/ An epsilon closure includes itself. /
1739	ret = re_node_set_insert (&eclosure, node);
1740	if (BE (ret < `0`, `0`))
1741	return REG_ESPACE;
1742	if (incomplete && !root)
1743	dfa->eclosures[node].nelem = `0`;
1744	else
1745	dfa->eclosures[node] = eclosure;
1746	*new_set = eclosure;
1747	return REG_NOERROR;
1748	}
1749
1750	/ Functions for token which are used in the parser. /
1751
1752	/ Fetch a token from INPUT.*
1753	We must not use this function inside bracket expressions. /*
1754
1755	static void
1756	fetch_token (re_token_t result, re_string_t input, reg_syntax_t syntax)
1757	{
1758	re_string_skip_bytes (input, peek_token (result, input, syntax));
1759	}
1760
1761	/ Peek a token from INPUT, and return the length of the token.*
1762	We must not use this function inside bracket expressions. /*
1763
1764	static int
1765	peek_token (re_token_t token, re_string_t input, reg_syntax_t syntax)
1766	{
1767	unsigned char c;
1768
1769	if (re_string_eoi (input))
1770	{
1771	token->type = END_OF_RE;
1772	return `0`;
1773	}
1774
1775	c = re_string_peek_byte (input, `0`);
1776	token->opr.c = c;
1777
1778	token->word_char = `0`;
1779	#ifdef RE_ENABLE_I18N
1780	token->mb_partial = `0`;
1781	if (input->mb_cur_max > `1` &&
1782	!re_string_first_byte (input, re_string_cur_idx (input)))
1783	{
1784	token->type = CHARACTER;
1785	token->mb_partial = `1`;
1786	return `1`;
1787	}
1788	#endif
1789	if (c == `'\\'`)
1790	{
1791	unsigned char c2;
1792	if (re_string_cur_idx (input) + `1` >= re_string_length (input))
1793	{
1794	token->type = BACK_SLASH;
1795	return `1`;
1796	}
1797
1798	c2 = re_string_peek_byte_case (input, `1`);
1799	token->opr.c = c2;
1800	token->type = CHARACTER;
1801	#ifdef RE_ENABLE_I18N
1802	if (input->mb_cur_max > `1`)
1803	{
1804	wint_t wc = re_string_wchar_at (input,
1805	re_string_cur_idx (input) + `1`);
1806	token->word_char = IS_WIDE_WORD_CHAR (wc) != `0`;
1807	}
1808	else
1809	#endif
1810	token->word_char = IS_WORD_CHAR (c2) != `0`;
1811
1812	switch (c2)
1813	{
1814	case `'\|'`:
1815	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_NO_BK_VBAR))
1816	token->type = OP_ALT;
1817	break;
1818	case `'1'`: case `'2'`: case `'3'`: case `'4'`: case `'5'`:
1819	case `'6'`: case `'7'`: case `'8'`: case `'9'`:
1820	if (!(syntax & RE_NO_BK_REFS))
1821	{
1822	token->type = OP_BACK_REF;
1823	token->opr.idx = c2 - `'1'`;
1824	}
1825	break;
1826	case `'<'`:
1827	if (!(syntax & RE_NO_GNU_OPS))
1828	{
1829	token->type = ANCHOR;
1830	token->opr.ctx_type = WORD_FIRST;
1831	}
1832	break;
1833	case `'>'`:
1834	if (!(syntax & RE_NO_GNU_OPS))
1835	{
1836	token->type = ANCHOR;
1837	token->opr.ctx_type = WORD_LAST;
1838	}
1839	break;
1840	case `'b'`:
1841	if (!(syntax & RE_NO_GNU_OPS))
1842	{
1843	token->type = ANCHOR;
1844	token->opr.ctx_type = WORD_DELIM;
1845	}
1846	break;
1847	case `'B'`:
1848	if (!(syntax & RE_NO_GNU_OPS))
1849	{
1850	token->type = ANCHOR;
1851	token->opr.ctx_type = NOT_WORD_DELIM;
1852	}
1853	break;
1854	case `'w'`:
1855	if (!(syntax & RE_NO_GNU_OPS))
1856	token->type = OP_WORD;
1857	break;
1858	case `'W'`:
1859	if (!(syntax & RE_NO_GNU_OPS))
1860	token->type = OP_NOTWORD;
1861	break;
1862	case `'s'`:
1863	if (!(syntax & RE_NO_GNU_OPS))
1864	token->type = OP_SPACE;
1865	break;
1866	case `'S'`:
1867	if (!(syntax & RE_NO_GNU_OPS))
1868	token->type = OP_NOTSPACE;
1869	break;
1870	case '`':
1871	if (!(syntax & RE_NO_GNU_OPS))
1872	{
1873	token->type = ANCHOR;
1874	token->opr.ctx_type = BUF_FIRST;
1875	}
1876	break;
1877	case `'\''`:
1878	if (!(syntax & RE_NO_GNU_OPS))
1879	{
1880	token->type = ANCHOR;
1881	token->opr.ctx_type = BUF_LAST;
1882	}
1883	break;
1884	case `'('`:
1885	if (!(syntax & RE_NO_BK_PARENS))
1886	token->type = OP_OPEN_SUBEXP;
1887	break;
1888	case `')'`:
1889	if (!(syntax & RE_NO_BK_PARENS))
1890	token->type = OP_CLOSE_SUBEXP;
1891	break;
1892	case `'+'`:
1893	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1894	token->type = OP_DUP_PLUS;
1895	break;
1896	case `'?'`:
1897	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_BK_PLUS_QM))
1898	token->type = OP_DUP_QUESTION;
1899	break;
1900	case `'{'`:
1901	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1902	token->type = OP_OPEN_DUP_NUM;
1903	break;
1904	case `'}'`:
1905	if ((syntax & RE_INTERVALS) && (!(syntax & RE_NO_BK_BRACES)))
1906	token->type = OP_CLOSE_DUP_NUM;
1907	break;
1908	default:
1909	break;
1910	}
1911	return `2`;
1912	}
1913
1914	token->type = CHARACTER;
1915	#ifdef RE_ENABLE_I18N
1916	if (input->mb_cur_max > `1`)
1917	{
1918	wint_t wc = re_string_wchar_at (input, re_string_cur_idx (input));
1919	token->word_char = IS_WIDE_WORD_CHAR (wc) != `0`;
1920	}
1921	else
1922	#endif
1923	token->word_char = IS_WORD_CHAR (token->opr.c);
1924
1925	switch (c)
1926	{
1927	case `'\n'`:
1928	if (syntax & RE_NEWLINE_ALT)
1929	token->type = OP_ALT;
1930	break;
1931	case `'\|'`:
1932	if (!(syntax & RE_LIMITED_OPS) && (syntax & RE_NO_BK_VBAR))
1933	token->type = OP_ALT;
1934	break;
1935	case `'*'`:
1936	token->type = OP_DUP_ASTERISK;
1937	break;
1938	case `'+'`:
1939	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1940	token->type = OP_DUP_PLUS;
1941	break;
1942	case `'?'`:
1943	if (!(syntax & RE_LIMITED_OPS) && !(syntax & RE_BK_PLUS_QM))
1944	token->type = OP_DUP_QUESTION;
1945	break;
1946	case `'{'`:
1947	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1948	token->type = OP_OPEN_DUP_NUM;
1949	break;
1950	case `'}'`:
1951	if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1952	token->type = OP_CLOSE_DUP_NUM;
1953	break;
1954	case `'('`:
1955	if (syntax & RE_NO_BK_PARENS)
1956	token->type = OP_OPEN_SUBEXP;
1957	break;
1958	case `')'`:
1959	if (syntax & RE_NO_BK_PARENS)
1960	token->type = OP_CLOSE_SUBEXP;
1961	break;
1962	case `'['`:
1963	token->type = OP_OPEN_BRACKET;
1964	break;
1965	case `'.'`:
1966	token->type = OP_PERIOD;
1967	break;
1968	case `'^'`:
1969	if (!(syntax & (RE_CONTEXT_INDEP_ANCHORS \| RE_CARET_ANCHORS_HERE)) &&
1970	re_string_cur_idx (input) != `0`)
1971	{
1972	char prev = re_string_peek_byte (input, -`1`);
1973	if (!(syntax & RE_NEWLINE_ALT) \|\| prev != `'\n'`)
1974	break;
1975	}
1976	token->type = ANCHOR;
1977	token->opr.ctx_type = LINE_FIRST;
1978	break;
1979	case `'$'`:
1980	if (!(syntax & RE_CONTEXT_INDEP_ANCHORS) &&
1981	re_string_cur_idx (input) + `1` != re_string_length (input))
1982	{
1983	re_token_t next;
1984	re_string_skip_bytes (input, `1`);
1985	peek_token (&next, input, syntax);
1986	re_string_skip_bytes (input, -`1`);
1987	if (next.type != OP_ALT && next.type != OP_CLOSE_SUBEXP)
1988	break;
1989	}
1990	token->type = ANCHOR;
1991	token->opr.ctx_type = LINE_LAST;
1992	break;
1993	default:
1994	break;
1995	}
1996	return `1`;
1997	}
1998
1999	/ Peek a token from INPUT, and return the length of the token.*
2000	We must not use this function out of bracket expressions. /*
2001
2002	static int
2003	peek_token_bracket (re_token_t token, re_string_t input, reg_syntax_t syntax)
2004	{
2005	unsigned char c;
2006	if (re_string_eoi (input))
2007	{
2008	token->type = END_OF_RE;
2009	return `0`;
2010	}
2011	c = re_string_peek_byte (input, `0`);
2012	token->opr.c = c;
2013
2014	#ifdef RE_ENABLE_I18N
2015	if (input->mb_cur_max > `1` &&
2016	!re_string_first_byte (input, re_string_cur_idx (input)))
2017	{
2018	token->type = CHARACTER;
2019	return `1`;
2020	}
2021	#endif /* RE_ENABLE_I18N */
2022
2023	if (c == `'\\'` && (syntax & RE_BACKSLASH_ESCAPE_IN_LISTS)
2024	&& re_string_cur_idx (input) + `1` < re_string_length (input))
2025	{
2026	/ In this case, '\' escape a character. /
2027	unsigned char c2;
2028	re_string_skip_bytes (input, `1`);
2029	c2 = re_string_peek_byte (input, `0`);
2030	token->opr.c = c2;
2031	token->type = CHARACTER;
2032	return `1`;
2033	}
2034	if (c == `'['`) / '[' is a special char in a bracket exps. /
2035	{
2036	unsigned char c2;
2037	int token_len;
2038	if (re_string_cur_idx (input) + `1` < re_string_length (input))
2039	c2 = re_string_peek_byte (input, `1`);
2040	else
2041	c2 = `0`;
2042	token->opr.c = c2;
2043	token_len = `2`;
2044	switch (c2)
2045	{
2046	case `'.'`:
2047	token->type = OP_OPEN_COLL_ELEM;
2048	break;
2049	case `'='`:
2050	token->type = OP_OPEN_EQUIV_CLASS;
2051	break;
2052	case `':'`:
2053	if (syntax & RE_CHAR_CLASSES)
2054	{
2055	token->type = OP_OPEN_CHAR_CLASS;
2056	break;
2057	}
2058	/ else fall through. /
2059	default:
2060	token->type = CHARACTER;
2061	token->opr.c = c;
2062	token_len = `1`;
2063	break;
2064	}
2065	return token_len;
2066	}
2067	switch (c)
2068	{
2069	case `'-'`:
2070	token->type = OP_CHARSET_RANGE;
2071	break;
2072	case `']'`:
2073	token->type = OP_CLOSE_BRACKET;
2074	break;
2075	case `'^'`:
2076	token->type = OP_NON_MATCH_LIST;
2077	break;
2078	default:
2079	token->type = CHARACTER;
2080	}
2081	return `1`;
2082	}
2083
2084	/ Functions for parser. /
2085
2086	/ Entry point of the parser.*
2087	Parse the regular expression REGEXP and return the structure tree.
2088	If an error occurs, ERR is set by error code, and return NULL.
2089	This function build the following tree, from regular expression <reg_exp>:
2090	CAT
2091	/ \
2092	/ \
2093	<reg_exp> EOR
2094
2095	CAT means concatenation.
2096	EOR means end of regular expression. /*
2097
2098	static bin_tree_t *
2099	parse (re_string_t regexp, regex_t preg, reg_syntax_t syntax,
2100	reg_errcode_t *err)
2101	{
2102	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2103	bin_tree_t tree, eor, *root;
2104	re_token_t current_token;
2105	dfa->syntax = syntax;
2106	fetch_token (&current_token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2107	tree = parse_reg_exp (regexp, preg, &current_token, syntax, `0`, err);
2108	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2109	return NULL;
2110	eor = create_tree (dfa, NULL, NULL, END_OF_RE);
2111	if (tree != NULL)
2112	root = create_tree (dfa, tree, eor, CONCAT);
2113	else
2114	root = eor;
2115	if (BE (eor == NULL \|\| root == NULL, `0`))
2116	{
2117	*err = REG_ESPACE;
2118	return NULL;
2119	}
2120	return root;
2121	}
2122
2123	/ This function build the following tree, from regular expression*
2124	<branch1>\|<branch2>:
2125	ALT
2126	/ \
2127	/ \
2128	<branch1> <branch2>
2129
2130	ALT means alternative, which represents the operator '\|'. /*
2131
2132	static bin_tree_t *
2133	parse_reg_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2134	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2135	{
2136	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2137	bin_tree_t tree, branch = NULL;
2138	tree = parse_branch (regexp, preg, token, syntax, nest, err);
2139	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2140	return NULL;
2141
2142	while (token->type == OP_ALT)
2143	{
2144	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2145	if (token->type != OP_ALT && token->type != END_OF_RE
2146	&& (nest == `0` \|\| token->type != OP_CLOSE_SUBEXP))
2147	{
2148	branch = parse_branch (regexp, preg, token, syntax, nest, err);
2149	if (BE (*err != REG_NOERROR && branch == NULL, `0`))
2150	{
2151	if (tree != NULL)
2152	postorder (tree, free_tree, NULL);
2153	return NULL;
2154	}
2155	}
2156	else
2157	branch = NULL;
2158	tree = create_tree (dfa, tree, branch, OP_ALT);
2159	if (BE (tree == NULL, `0`))
2160	{
2161	*err = REG_ESPACE;
2162	return NULL;
2163	}
2164	}
2165	return tree;
2166	}
2167
2168	/ This function build the following tree, from regular expression*
2169	<exp1><exp2>:
2170	CAT
2171	/ \
2172	/ \
2173	<exp1> <exp2>
2174
2175	CAT means concatenation. /*
2176
2177	static bin_tree_t *
2178	parse_branch (re_string_t regexp, regex_t preg, re_token_t *token,
2179	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2180	{
2181	bin_tree_t tree, exp;
2182	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2183	tree = parse_expression (regexp, preg, token, syntax, nest, err);
2184	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2185	return NULL;
2186
2187	while (token->type != OP_ALT && token->type != END_OF_RE
2188	&& (nest == `0` \|\| token->type != OP_CLOSE_SUBEXP))
2189	{
2190	exp = parse_expression (regexp, preg, token, syntax, nest, err);
2191	if (BE (*err != REG_NOERROR && exp == NULL, `0`))
2192	{
2193	if (tree != NULL)
2194	postorder (tree, free_tree, NULL);
2195	return NULL;
2196	}
2197	if (tree != NULL && exp != NULL)
2198	{
2199	bin_tree_t *newtree = create_tree (dfa, tree, exp, CONCAT);
2200	if (newtree == NULL)
2201	{
2202	postorder (exp, free_tree, NULL);
2203	postorder (tree, free_tree, NULL);
2204	*err = REG_ESPACE;
2205	return NULL;
2206	}
2207	tree = newtree;
2208	}
2209	else if (tree == NULL)
2210	tree = exp;
2211	/ Otherwise exp == NULL, we don't need to create new tree. /
2212	}
2213	return tree;
2214	}
2215
2216	/ This function build the following tree, from regular expression a:
2217	*
2218	\|
2219	a
2220	*/
2221
2222	static bin_tree_t *
2223	parse_expression (re_string_t regexp, regex_t preg, re_token_t *token,
2224	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2225	{
2226	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2227	bin_tree_t *tree;
2228	switch (token->type)
2229	{
2230	case CHARACTER:
2231	tree = create_token_tree (dfa, NULL, NULL, token);
2232	if (BE (tree == NULL, `0`))
2233	{
2234	*err = REG_ESPACE;
2235	return NULL;
2236	}
2237	#ifdef RE_ENABLE_I18N
2238	if (dfa->mb_cur_max > `1`)
2239	{
2240	while (!re_string_eoi (regexp)
2241	&& !re_string_first_byte (regexp, re_string_cur_idx (regexp)))
2242	{
2243	bin_tree_t *mbc_remain;
2244	fetch_token (token, regexp, syntax);
2245	mbc_remain = create_token_tree (dfa, NULL, NULL, token);
2246	tree = create_tree (dfa, tree, mbc_remain, CONCAT);
2247	if (BE (mbc_remain == NULL \|\| tree == NULL, `0`))
2248	{
2249	*err = REG_ESPACE;
2250	return NULL;
2251	}
2252	}
2253	}
2254	#endif
2255	break;
2256	case OP_OPEN_SUBEXP:
2257	tree = parse_sub_exp (regexp, preg, token, syntax, nest + `1`, err);
2258	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2259	return NULL;
2260	break;
2261	case OP_OPEN_BRACKET:
2262	tree = parse_bracket_exp (regexp, dfa, token, syntax, err);
2263	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2264	return NULL;
2265	break;
2266	case OP_BACK_REF:
2267	if (!BE (dfa->completed_bkref_map & (`1` << token->opr.idx), `1`))
2268	{
2269	*err = REG_ESUBREG;
2270	return NULL;
2271	}
2272	dfa->used_bkref_map \|= `1` << token->opr.idx;
2273	tree = create_token_tree (dfa, NULL, NULL, token);
2274	if (BE (tree == NULL, `0`))
2275	{
2276	*err = REG_ESPACE;
2277	return NULL;
2278	}
2279	++dfa->nbackref;
2280	dfa->has_mb_node = `1`;
2281	break;
2282	case OP_OPEN_DUP_NUM:
2283	if (syntax & RE_CONTEXT_INVALID_DUP)
2284	{
2285	*err = REG_BADRPT;
2286	return NULL;
2287	}
2288	/ FALLTHROUGH /
2289	case OP_DUP_ASTERISK:
2290	case OP_DUP_PLUS:
2291	case OP_DUP_QUESTION:
2292	if (syntax & RE_CONTEXT_INVALID_OPS)
2293	{
2294	*err = REG_BADRPT;
2295	return NULL;
2296	}
2297	else if (syntax & RE_CONTEXT_INDEP_OPS)
2298	{
2299	fetch_token (token, regexp, syntax);
2300	return parse_expression (regexp, preg, token, syntax, nest, err);
2301	}
2302	/ else fall through /
2303	case OP_CLOSE_SUBEXP:
2304	if ((token->type == OP_CLOSE_SUBEXP) &&
2305	!(syntax & RE_UNMATCHED_RIGHT_PAREN_ORD))
2306	{
2307	*err = REG_ERPAREN;
2308	return NULL;
2309	}
2310	/ else fall through /
2311	case OP_CLOSE_DUP_NUM:
2312	/ We treat it as a normal character. /
2313
2314	/ Then we can these characters as normal characters. /
2315	token->type = CHARACTER;
2316	/ mb_partial and word_char bits should be initialized already*
2317	by peek_token. /*
2318	tree = create_token_tree (dfa, NULL, NULL, token);
2319	if (BE (tree == NULL, `0`))
2320	{
2321	*err = REG_ESPACE;
2322	return NULL;
2323	}
2324	break;
2325	case ANCHOR:
2326	if ((token->opr.ctx_type
2327	& (WORD_DELIM \| NOT_WORD_DELIM \| WORD_FIRST \| WORD_LAST))
2328	&& dfa->word_ops_used == `0`)
2329	init_word_char (dfa);
2330	if (token->opr.ctx_type == WORD_DELIM
2331	\|\| token->opr.ctx_type == NOT_WORD_DELIM)
2332	{
2333	bin_tree_t tree_first, tree_last;
2334	if (token->opr.ctx_type == WORD_DELIM)
2335	{
2336	token->opr.ctx_type = WORD_FIRST;
2337	tree_first = create_token_tree (dfa, NULL, NULL, token);
2338	token->opr.ctx_type = WORD_LAST;
2339	}
2340	else
2341	{
2342	token->opr.ctx_type = INSIDE_WORD;
2343	tree_first = create_token_tree (dfa, NULL, NULL, token);
2344	token->opr.ctx_type = INSIDE_NOTWORD;
2345	}
2346	tree_last = create_token_tree (dfa, NULL, NULL, token);
2347	tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
2348	if (BE (tree_first == NULL \|\| tree_last == NULL \|\| tree == NULL, `0`))
2349	{
2350	*err = REG_ESPACE;
2351	return NULL;
2352	}
2353	}
2354	else
2355	{
2356	tree = create_token_tree (dfa, NULL, NULL, token);
2357	if (BE (tree == NULL, `0`))
2358	{
2359	*err = REG_ESPACE;
2360	return NULL;
2361	}
2362	}
2363	/ We must return here, since ANCHORs can't be followed*
2364	by repetition operators.
2365	eg. RE"^" is invalid or "<ANCHOR(^)><CHAR()>",
2366	it must not be "<ANCHOR(^)><REPEAT()>". /
2367	fetch_token (token, regexp, syntax);
2368	return tree;
2369	case OP_PERIOD:
2370	tree = create_token_tree (dfa, NULL, NULL, token);
2371	if (BE (tree == NULL, `0`))
2372	{
2373	*err = REG_ESPACE;
2374	return NULL;
2375	}
2376	if (dfa->mb_cur_max > `1`)
2377	dfa->has_mb_node = `1`;
2378	break;
2379	case OP_WORD:
2380	case OP_NOTWORD:
2381	tree = build_charclass_op (dfa, regexp->trans,
2382	(const unsigned char *) "alnum",
2383	(const unsigned char *) "_",
2384	token->type == OP_NOTWORD, err);
2385	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2386	return NULL;
2387	break;
2388	case OP_SPACE:
2389	case OP_NOTSPACE:
2390	tree = build_charclass_op (dfa, regexp->trans,
2391	(const unsigned char *) "space",
2392	(const unsigned char *) "",
2393	token->type == OP_NOTSPACE, err);
2394	if (BE (*err != REG_NOERROR && tree == NULL, `0`))
2395	return NULL;
2396	break;
2397	case OP_ALT:
2398	case END_OF_RE:
2399	return NULL;
2400	case BACK_SLASH:
2401	*err = REG_EESCAPE;
2402	return NULL;
2403	default:
2404	/ Must not happen? /
2405	#ifdef DEBUG
2406	assert (`0`);
2407	#endif
2408	return NULL;
2409	}
2410	fetch_token (token, regexp, syntax);
2411
2412	while (token->type == OP_DUP_ASTERISK \|\| token->type == OP_DUP_PLUS
2413	\|\| token->type == OP_DUP_QUESTION \|\| token->type == OP_OPEN_DUP_NUM)
2414	{
2415	bin_tree_t *dup_tree = parse_dup_op (tree, regexp, dfa, token, syntax, err);
2416	if (BE (*err != REG_NOERROR && dup_tree == NULL, `0`))
2417	{
2418	if (tree != NULL)
2419	postorder (tree, free_tree, NULL);
2420	return NULL;
2421	}
2422	tree = dup_tree;
2423	/ In BRE consecutive duplications are not allowed. /
2424	if ((syntax & RE_CONTEXT_INVALID_DUP)
2425	&& (token->type == OP_DUP_ASTERISK
2426	\|\| token->type == OP_OPEN_DUP_NUM))
2427	{
2428	if (tree != NULL)
2429	postorder (tree, free_tree, NULL);
2430	*err = REG_BADRPT;
2431	return NULL;
2432	}
2433	}
2434
2435	return tree;
2436	}
2437
2438	/ This function build the following tree, from regular expression*
2439	(<reg_exp>):
2440	SUBEXP
2441	\|
2442	<reg_exp>
2443	*/
2444
2445	static bin_tree_t *
2446	parse_sub_exp (re_string_t regexp, regex_t preg, re_token_t *token,
2447	reg_syntax_t syntax, int nest, reg_errcode_t *err)
2448	{
2449	re_dfa_t dfa = (re_dfa_t ) preg->buffer;
2450	bin_tree_t *tree;
2451	size_t cur_nsub;
2452	cur_nsub = preg->re_nsub++;
2453
2454	fetch_token (token, regexp, syntax \| RE_CARET_ANCHORS_HERE);
2455
2456	/ The subexpression may be a null string. /
2457	if (token->type == OP_CLOSE_SUBEXP)
2458	tree = NULL;
2459	else
2460	{
2461	tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
2462	if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, `0`))
2463	{
2464	if (tree != NULL)
2465	postorder (tree, free_tree, NULL);
2466	*err = REG_EPAREN;
2467	}
2468	if (BE (*err != REG_NOERROR, `0`))
2469	return NULL;
2470	}
2471
2472	if (cur_nsub <= `'9'` - `'1'`)
2473	dfa->completed_bkref_map \|= `1` << cur_nsub;
2474
2475	tree = create_tree (dfa, tree, NULL, SUBEXP);
2476	if (BE (tree == NULL, `0`))
2477	{
2478	*err = REG_ESPACE;
2479	return NULL;
2480	}
2481	tree->token.opr.idx = cur_nsub;
2482	return tree;
2483	}
2484
2485	/ This function parse repetition operators like "", "+", "{1,3}" etc. /*
2486
2487	static bin_tree_t *
2488	parse_dup_op (bin_tree_t elem, re_string_t regexp, re_dfa_t *dfa,
2489	re_token_t token, reg_syntax_t syntax, reg_errcode_t err)
2490	{
2491	bin_tree_t tree = NULL, old_tree = NULL;
2492	int i, start, end, start_idx = re_string_cur_idx (regexp);
2493	re_token_t start_token = *token;
2494
2495	if (token->type == OP_OPEN_DUP_NUM)
2496	{
2497	end = `0`;
2498	start = fetch_number (regexp, token, syntax);
2499	if (start == -`1`)
2500	{
2501	if (token->type == CHARACTER && token->opr.c == `','`)
2502	start = `0`; / We treat "{,m}" as "{0,m}". /
2503	else
2504	{
2505	err = REG_BADBR; /* <re>{} is invalid. /
2506	return NULL;
2507	}
2508	}
2509	if (BE (start != -`2`, `1`))
2510	{
2511	/ We treat "{n}" as "{n,n}". /
2512	end = ((token->type == OP_CLOSE_DUP_NUM) ? start
2513	: ((token->type == CHARACTER && token->opr.c == `','`)
2514	? fetch_number (regexp, token, syntax) : -`2`));
2515	}
2516	if (BE (start == -`2` \|\| end == -`2`, `0`))
2517	{
2518	/ Invalid sequence. /
2519	if (BE (!(syntax & RE_INVALID_INTERVAL_ORD), `0`))
2520	{
2521	if (token->type == END_OF_RE)
2522	*err = REG_EBRACE;
2523	else
2524	*err = REG_BADBR;
2525
2526	return NULL;
2527	}
2528
2529	/ If the syntax bit is set, rollback. /
2530	re_string_set_index (regexp, start_idx);
2531	*token = start_token;
2532	token->type = CHARACTER;
2533	/ mb_partial and word_char bits should be already initialized by*
2534	peek_token. /*
2535	return elem;
2536	}
2537
2538	if (BE ((end != -`1` && start > end) \|\| token->type != OP_CLOSE_DUP_NUM, `0`))
2539	{
2540	/ First number greater than second. /
2541	*err = REG_BADBR;
2542	return NULL;
2543	}
2544	}
2545	else
2546	{
2547	start = (token->type == OP_DUP_PLUS) ? `1` : `0`;
2548	end = (token->type == OP_DUP_QUESTION) ? `1` : -`1`;
2549	}
2550
2551	fetch_token (token, regexp, syntax);
2552
2553	if (BE (elem == NULL, `0`))
2554	return NULL;
2555	if (BE (start == `0` && end == `0`, `0`))
2556	{
2557	postorder (elem, free_tree, NULL);
2558	return NULL;
2559	}
2560
2561	/ Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". /
2562	if (BE (start > `0`, `0`))
2563	{
2564	tree = elem;
2565	for (i = `2`; i <= start; ++i)
2566	{
2567	elem = duplicate_tree (elem, dfa);
2568	tree = create_tree (dfa, tree, elem, CONCAT);
2569	if (BE (elem == NULL \|\| tree == NULL, `0`))
2570	goto parse_dup_op_espace;
2571	}
2572
2573	if (start == end)
2574	return tree;
2575
2576	/ Duplicate ELEM before it is marked optional. /
2577	elem = duplicate_tree (elem, dfa);
2578	if (BE (elem == NULL, `0`))
2579	goto parse_dup_op_espace;
2580	old_tree = tree;
2581	}
2582	else
2583	old_tree = NULL;
2584
2585	if (elem->token.type == SUBEXP)
2586	postorder (elem, mark_opt_subexp, (void ) (long*) elem->token.opr.idx);
2587
2588	tree = create_tree (dfa, elem, NULL, (end == -`1` ? OP_DUP_ASTERISK : OP_ALT));
2589	if (BE (tree == NULL, `0`))
2590	goto parse_dup_op_espace;
2591
2592	/ This loop is actually executed only when end != -1,*
2593	to rewrite <re>{0,n} as (<re>(<re>...<re>?)?)?... We have
2594	already created the start+1-th copy. /*
2595	for (i = start + `2`; i <= end; ++i)
2596	{
2597	elem = duplicate_tree (elem, dfa);
2598	tree = create_tree (dfa, tree, elem, CONCAT);
2599	if (BE (elem == NULL \|\| tree == NULL, `0`))
2600	goto parse_dup_op_espace;
2601
2602	tree = create_tree (dfa, tree, NULL, OP_ALT);
2603	if (BE (tree == NULL, `0`))
2604	goto parse_dup_op_espace;
2605	}
2606
2607	if (old_tree)
2608	tree = create_tree (dfa, old_tree, tree, CONCAT);
2609
2610	return tree;
2611
2612	parse_dup_op_espace:
2613	*err = REG_ESPACE;
2614	return NULL;
2615	}
2616
2617	/ Size of the names for collating symbol/equivalence_class/character_class.*
2618	I'm not sure, but maybe enough. /*
2619	#define BRACKET_NAME_BUF_SIZE 32
2620
2621	#ifndef _LIBC
2622	/ Local function for parse_bracket_exp only used in case of NOT _LIBC.*
2623	Build the range expression which starts from START_ELEM, and ends
2624	at END_ELEM. The result are written to MBCSET and SBCSET.
2625	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2626	mbcset->range_ends, is a pointer argument since we may
2627	update it. /*
2628
2629	static reg_errcode_t
2630	# ifdef RE_ENABLE_I18N
2631	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int* *range_alloc,
2632	bracket_elem_t start_elem, bracket_elem_t end_elem)
2633	# else /* not RE_ENABLE_I18N */
2634	build_range_exp (bitset_t sbcset, bracket_elem_t *start_elem,
2635	bracket_elem_t *end_elem)
2636	# endif /* not RE_ENABLE_I18N */
2637	{
2638	unsigned int start_ch, end_ch;
2639	/ Equivalence Classes and Character Classes can't be a range start/end. /
2640	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2641	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2642	`0`))
2643	return REG_ERANGE;
2644
2645	/ We can handle no multi character collating elements without libc*
2646	support. /*
2647	if (BE ((start_elem->type == COLL_SYM
2648	&& strlen ((char *) start_elem->opr.name) > `1`)
2649	\|\| (end_elem->type == COLL_SYM
2650	&& strlen ((char *) end_elem->opr.name) > `1`), `0`))
2651	return REG_ECOLLATE;
2652
2653	# ifdef RE_ENABLE_I18N
2654	{
2655	wchar_t wc;
2656	wint_t start_wc;
2657	wint_t end_wc;
2658	wchar_t cmp_buf[`6`] = {L`'\0'`, L`'\0'`, L`'\0'`, L`'\0'`, L`'\0'`, L`'\0'`};
2659
2660	start_ch = ((start_elem->type == SB_CHAR) ? start_elem->opr.ch
2661	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[`0`]
2662	: `0`));
2663	end_ch = ((end_elem->type == SB_CHAR) ? end_elem->opr.ch
2664	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[`0`]
2665	: `0`));
2666	start_wc = ((start_elem->type == SB_CHAR \|\| start_elem->type == COLL_SYM)
2667	? __btowc (start_ch) : start_elem->opr.wch);
2668	end_wc = ((end_elem->type == SB_CHAR \|\| end_elem->type == COLL_SYM)
2669	? __btowc (end_ch) : end_elem->opr.wch);
2670	if (start_wc == WEOF \|\| end_wc == WEOF)
2671	return REG_ECOLLATE;
2672	cmp_buf[`0`] = start_wc;
2673	cmp_buf[`4`] = end_wc;
2674	if (__wcscoll (cmp_buf, cmp_buf + `4`) > `0`)
2675	return REG_ERANGE;
2676
2677	/ Got valid collation sequence values, add them as a new entry.*
2678	However, for !_LIBC we have no collation elements: if the
2679	character set is single byte, the single byte character set
2680	that we build below suffices. parse_bracket_exp passes
2681	no MBCSET if dfa->mb_cur_max == 1. /*
2682	if (mbcset)
2683	{
2684	/ Check the space of the arrays. /
2685	if (BE (*range_alloc == mbcset->nranges, `0`))
2686	{
2687	/ There is not enough space, need realloc. /
2688	wchar_t new_array_start, new_array_end;
2689	int new_nranges;
2690
2691	/ +1 in case of mbcset->nranges is 0. /
2692	new_nranges = `2` * mbcset->nranges + `1`;
2693	/ Use realloc since mbcset->range_starts and mbcset->range_ends*
2694	are NULL if range_alloc == 0. /
2695	new_array_start = re_realloc (mbcset->range_starts, wchar_t,
2696	new_nranges);
2697	new_array_end = re_realloc (mbcset->range_ends, wchar_t,
2698	new_nranges);
2699
2700	if (BE (new_array_start == NULL \|\| new_array_end == NULL, `0`))
2701	return REG_ESPACE;
2702
2703	mbcset->range_starts = new_array_start;
2704	mbcset->range_ends = new_array_end;
2705	*range_alloc = new_nranges;
2706	}
2707
2708	mbcset->range_starts[mbcset->nranges] = start_wc;
2709	mbcset->range_ends[mbcset->nranges++] = end_wc;
2710	}
2711
2712	/ Build the table for single byte characters. /
2713	for (wc = `0`; wc < SBC_MAX; ++wc)
2714	{
2715	cmp_buf[`2`] = wc;
2716	if (__wcscoll (cmp_buf, cmp_buf + `2`) <= `0`
2717	&& __wcscoll (cmp_buf + `2`, cmp_buf + `4`) <= `0`)
2718	bitset_set (sbcset, wc);
2719	}
2720	}
2721	# else /* not RE_ENABLE_I18N */
2722	{
2723	unsigned int ch;
2724	start_ch = ((start_elem->type == SB_CHAR ) ? start_elem->opr.ch
2725	: ((start_elem->type == COLL_SYM) ? start_elem->opr.name[`0`]
2726	: `0`));
2727	end_ch = ((end_elem->type == SB_CHAR ) ? end_elem->opr.ch
2728	: ((end_elem->type == COLL_SYM) ? end_elem->opr.name[`0`]
2729	: `0`));
2730	if (start_ch > end_ch)
2731	return REG_ERANGE;
2732	/ Build the table for single byte characters. /
2733	for (ch = `0`; ch < SBC_MAX; ++ch)
2734	if (start_ch <= ch && ch <= end_ch)
2735	bitset_set (sbcset, ch);
2736	}
2737	# endif /* not RE_ENABLE_I18N */
2738	return REG_NOERROR;
2739	}
2740	#endif /* not _LIBC */
2741
2742	#ifndef _LIBC
2743	/ Helper function for parse_bracket_exp only used in case of NOT _LIBC..*
2744	Build the collating element which is represented by NAME.
2745	The result are written to MBCSET and SBCSET.
2746	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2747	pointer argument since we may update it. /*
2748
2749	static reg_errcode_t
2750	# ifdef RE_ENABLE_I18N
2751	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2752	int coll_sym_alloc, const* unsigned char *name)
2753	# else /* not RE_ENABLE_I18N */
2754	build_collating_symbol (bitset_t sbcset, const unsigned char *name)
2755	# endif /* not RE_ENABLE_I18N */
2756	{
2757	size_t name_len = strlen ((const char *) name);
2758	if (BE (name_len != `1`, `0`))
2759	return REG_ECOLLATE;
2760	else
2761	{
2762	bitset_set (sbcset, name[`0`]);
2763	return REG_NOERROR;
2764	}
2765	}
2766	#endif /* not _LIBC */
2767
2768	/ This function parse bracket expression like "[abc]", "[a-c]",*
2769	"[[.a-a.]]" etc. /*
2770
2771	static bin_tree_t *
2772	parse_bracket_exp (re_string_t regexp, re_dfa_t dfa, re_token_t *token,
2773	reg_syntax_t syntax, reg_errcode_t *err)
2774	{
2775	#ifdef _LIBC
2776	const unsigned char *collseqmb;
2777	const char *collseqwc;
2778	uint32_t nrules;
2779	int32_t table_size;
2780	const int32_t *symb_table;
2781	const unsigned char *extra;
2782
2783	/ Local function for parse_bracket_exp used in _LIBC environment.*
2784	Seek the collating symbol entry corresponding to NAME.
2785	Return the index of the symbol in the SYMB_TABLE,
2786	or -1 if not found. /*
2787
2788	auto inline int32_t
2789	__attribute__ ((always_inline))
2790	seek_collating_symbol_entry (const unsigned char *name, size_t name_len)
2791	{
2792	int32_t elem;
2793
2794	for (elem = `0`; elem < table_size; elem++)
2795	if (symb_table[`2` * elem] != `0`)
2796	{
2797	int32_t idx = symb_table[`2` * elem + `1`];
2798	/ Skip the name of collating element name. /
2799	idx += `1` + extra[idx];
2800	if (/ Compare the length of the name. /
2801	name_len == extra[idx]
2802	/ Compare the name. /
2803	&& memcmp (name, &extra[idx + `1`], name_len) == `0`)
2804	/ Yep, this is the entry. /
2805	return elem;
2806	}
2807	return -`1`;
2808	}
2809
2810	/ Local function for parse_bracket_exp used in _LIBC environment.*
2811	Look up the collation sequence value of BR_ELEM.
2812	Return the value if succeeded, UINT_MAX otherwise. /*
2813
2814	auto inline unsigned int
2815	__attribute__ ((always_inline))
2816	lookup_collation_sequence_value (bracket_elem_t *br_elem)
2817	{
2818	if (br_elem->type == SB_CHAR)
2819	{
2820	/*
2821	if (MB_CUR_MAX == 1)
2822	*/
2823	if (nrules == `0`)
2824	return collseqmb[br_elem->opr.ch];
2825	else
2826	{
2827	wint_t wc = __btowc (br_elem->opr.ch);
2828	return __collseq_table_lookup (collseqwc, wc);
2829	}
2830	}
2831	else if (br_elem->type == MB_CHAR)
2832	{
2833	if (nrules != `0`)
2834	return __collseq_table_lookup (collseqwc, br_elem->opr.wch);
2835	}
2836	else if (br_elem->type == COLL_SYM)
2837	{
2838	size_t sym_name_len = strlen ((char *) br_elem->opr.name);
2839	if (nrules != `0`)
2840	{
2841	int32_t elem, idx;
2842	elem = seek_collating_symbol_entry (br_elem->opr.name,
2843	sym_name_len);
2844	if (elem != -`1`)
2845	{
2846	/ We found the entry. /
2847	idx = symb_table[`2` * elem + `1`];
2848	/ Skip the name of collating element name. /
2849	idx += `1` + extra[idx];
2850	/ Skip the byte sequence of the collating element. /
2851	idx += `1` + extra[idx];
2852	/ Adjust for the alignment. /
2853	idx = (idx + `3`) & ~`3`;
2854	/ Skip the multibyte collation sequence value. /
2855	idx += sizeof (unsigned int);
2856	/ Skip the wide char sequence of the collating element. /
2857	idx += sizeof (unsigned int) *
2858	(`1` + (unsigned* int *) (extra + idx));
2859	/ Return the collation sequence value. /
2860	return (unsigned* int *) (extra + idx);
2861	}
2862	else if (sym_name_len == `1`)
2863	{
2864	/ No valid character. Match it as a single byte*
2865	character. /*
2866	return collseqmb[br_elem->opr.name[`0`]];
2867	}
2868	}
2869	else if (sym_name_len == `1`)
2870	return collseqmb[br_elem->opr.name[`0`]];
2871	}
2872	return UINT_MAX;
2873	}
2874
2875	/ Local function for parse_bracket_exp used in _LIBC environment.*
2876	Build the range expression which starts from START_ELEM, and ends
2877	at END_ELEM. The result are written to MBCSET and SBCSET.
2878	RANGE_ALLOC is the allocated size of mbcset->range_starts, and
2879	mbcset->range_ends, is a pointer argument since we may
2880	update it. /*
2881
2882	auto inline reg_errcode_t
2883	__attribute__ ((always_inline))
2884	build_range_exp (bitset_t sbcset, re_charset_t mbcset, int* *range_alloc,
2885	bracket_elem_t start_elem, bracket_elem_t end_elem)
2886	{
2887	unsigned int ch;
2888	uint32_t start_collseq;
2889	uint32_t end_collseq;
2890
2891	/ Equivalence Classes and Character Classes can't be a range*
2892	start/end. /*
2893	if (BE (start_elem->type == EQUIV_CLASS \|\| start_elem->type == CHAR_CLASS
2894	\|\| end_elem->type == EQUIV_CLASS \|\| end_elem->type == CHAR_CLASS,
2895	`0`))
2896	return REG_ERANGE;
2897
2898	start_collseq = lookup_collation_sequence_value (start_elem);
2899	end_collseq = lookup_collation_sequence_value (end_elem);
2900	/ Check start/end collation sequence values. /
2901	if (BE (start_collseq == UINT_MAX \|\| end_collseq == UINT_MAX, `0`))
2902	return REG_ECOLLATE;
2903	if (BE ((syntax & RE_NO_EMPTY_RANGES) && start_collseq > end_collseq, `0`))
2904	return REG_ERANGE;
2905
2906	/ Got valid collation sequence values, add them as a new entry.*
2907	However, if we have no collation elements, and the character set
2908	is single byte, the single byte character set that we
2909	build below suffices. /*
2910	if (nrules > `0` \|\| dfa->mb_cur_max > `1`)
2911	{
2912	/ Check the space of the arrays. /
2913	if (BE (*range_alloc == mbcset->nranges, `0`))
2914	{
2915	/ There is not enough space, need realloc. /
2916	uint32_t *new_array_start;
2917	uint32_t *new_array_end;
2918	int new_nranges;
2919
2920	/ +1 in case of mbcset->nranges is 0. /
2921	new_nranges = `2` * mbcset->nranges + `1`;
2922	new_array_start = re_realloc (mbcset->range_starts, uint32_t,
2923	new_nranges);
2924	new_array_end = re_realloc (mbcset->range_ends, uint32_t,
2925	new_nranges);
2926
2927	if (BE (new_array_start == NULL \|\| new_array_end == NULL, `0`))
2928	return REG_ESPACE;
2929
2930	mbcset->range_starts = new_array_start;
2931	mbcset->range_ends = new_array_end;
2932	*range_alloc = new_nranges;
2933	}
2934
2935	mbcset->range_starts[mbcset->nranges] = start_collseq;
2936	mbcset->range_ends[mbcset->nranges++] = end_collseq;
2937	}
2938
2939	/ Build the table for single byte characters. /
2940	for (ch = `0`; ch < SBC_MAX; ch++)
2941	{
2942	uint32_t ch_collseq;
2943	/*
2944	if (MB_CUR_MAX == 1)
2945	*/
2946	if (nrules == `0`)
2947	ch_collseq = collseqmb[ch];
2948	else
2949	ch_collseq = __collseq_table_lookup (collseqwc, __btowc (ch));
2950	if (start_collseq <= ch_collseq && ch_collseq <= end_collseq)
2951	bitset_set (sbcset, ch);
2952	}
2953	return REG_NOERROR;
2954	}
2955
2956	/ Local function for parse_bracket_exp used in _LIBC environment.*
2957	Build the collating element which is represented by NAME.
2958	The result are written to MBCSET and SBCSET.
2959	COLL_SYM_ALLOC is the allocated size of mbcset->coll_sym, is a
2960	pointer argument since we may update it. /*
2961
2962	auto inline reg_errcode_t
2963	__attribute__ ((always_inline))
2964	build_collating_symbol (bitset_t sbcset, re_charset_t *mbcset,
2965	int coll_sym_alloc, const* unsigned char *name)
2966	{
2967	int32_t elem, idx;
2968	size_t name_len = strlen ((const char *) name);
2969	if (nrules != `0`)
2970	{
2971	elem = seek_collating_symbol_entry (name, name_len);
2972	if (elem != -`1`)
2973	{
2974	/ We found the entry. /
2975	idx = symb_table[`2` * elem + `1`];
2976	/ Skip the name of collating element name. /
2977	idx += `1` + extra[idx];
2978	}
2979	else if (name_len == `1`)
2980	{
2981	/ No valid character, treat it as a normal*
2982	character. /*
2983	bitset_set (sbcset, name[`0`]);
2984	return REG_NOERROR;
2985	}
2986	else
2987	return REG_ECOLLATE;
2988
2989	/ Got valid collation sequence, add it as a new entry. /
2990	/ Check the space of the arrays. /
2991	if (BE (*coll_sym_alloc == mbcset->ncoll_syms, `0`))
2992	{
2993	/ Not enough, realloc it. /
2994	/ +1 in case of mbcset->ncoll_syms is 0. /
2995	int new_coll_sym_alloc = `2` * mbcset->ncoll_syms + `1`;
2996	/ Use realloc since mbcset->coll_syms is NULL*
2997	if alloc == 0. /
2998	int32_t *new_coll_syms = re_realloc (mbcset->coll_syms, int32_t,
2999	new_coll_sym_alloc);
3000	if (BE (new_coll_syms == NULL, `0`))
3001	return REG_ESPACE;
3002	mbcset->coll_syms = new_coll_syms;
3003	*coll_sym_alloc = new_coll_sym_alloc;
3004	}
3005	mbcset->coll_syms[mbcset->ncoll_syms++] = idx;
3006	return REG_NOERROR;
3007	}
3008	else
3009	{
3010	if (BE (name_len != `1`, `0`))
3011	return REG_ECOLLATE;
3012	else
3013	{
3014	bitset_set (sbcset, name[`0`]);
3015	return REG_NOERROR;
3016	}
3017	}
3018	}
3019	#endif
3020
3021	re_token_t br_token;
3022	re_bitset_ptr_t sbcset;
3023	#ifdef RE_ENABLE_I18N
3024	re_charset_t *mbcset;
3025	int coll_sym_alloc = `0`, range_alloc = `0`, mbchar_alloc = `0`;
3026	int equiv_class_alloc = `0`, char_class_alloc = `0`;
3027	#endif /* not RE_ENABLE_I18N */
3028	int non_match = `0`;
3029	bin_tree_t *work_tree;
3030	int token_len;
3031	int first_round = `1`;
3032	#ifdef _LIBC
3033	collseqmb = (const unsigned char *)
3034	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3035	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3036	if (nrules)
3037	{
3038	/*
3039	if (MB_CUR_MAX > 1)
3040	*/
3041	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3042	table_size = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_SYMB_HASH_SIZEMB);
3043	symb_table = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3044	_NL_COLLATE_SYMB_TABLEMB);
3045	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3046	_NL_COLLATE_SYMB_EXTRAMB);
3047	}
3048	#endif
3049	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), `1`);
3050	#ifdef RE_ENABLE_I18N
3051	mbcset = (re_charset_t ) calloc (sizeof* (re_charset_t), `1`);
3052	#endif /* RE_ENABLE_I18N */
3053	#ifdef RE_ENABLE_I18N
3054	if (BE (sbcset == NULL \|\| mbcset == NULL, `0`))
3055	#else
3056	if (BE (sbcset == NULL, `0`))
3057	#endif /* RE_ENABLE_I18N */
3058	{
3059	re_free (sbcset);
3060	#ifdef RE_ENABLE_I18N
3061	re_free (mbcset);
3062	#endif
3063	*err = REG_ESPACE;
3064	return NULL;
3065	}
3066
3067	token_len = peek_token_bracket (token, regexp, syntax);
3068	if (BE (token->type == END_OF_RE, `0`))
3069	{
3070	*err = REG_BADPAT;
3071	goto parse_bracket_exp_free_return;
3072	}
3073	if (token->type == OP_NON_MATCH_LIST)
3074	{
3075	#ifdef RE_ENABLE_I18N
3076	mbcset->non_match = `1`;
3077	#endif /* not RE_ENABLE_I18N */
3078	non_match = `1`;
3079	if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
3080	bitset_set (sbcset, `'\n'`);
3081	re_string_skip_bytes (regexp, token_len); / Skip a token. /
3082	token_len = peek_token_bracket (token, regexp, syntax);
3083	if (BE (token->type == END_OF_RE, `0`))
3084	{
3085	*err = REG_BADPAT;
3086	goto parse_bracket_exp_free_return;
3087	}
3088	}
3089
3090	/ We treat the first ']' as a normal character. /
3091	if (token->type == OP_CLOSE_BRACKET)
3092	token->type = CHARACTER;
3093
3094	while (`1`)
3095	{
3096	bracket_elem_t start_elem, end_elem;
3097	unsigned char start_name_buf[BRACKET_NAME_BUF_SIZE];
3098	unsigned char end_name_buf[BRACKET_NAME_BUF_SIZE];
3099	reg_errcode_t ret;
3100	int token_len2 = `0`, is_range_exp = `0`;
3101	re_token_t token2;
3102
3103	start_elem.opr.name = start_name_buf;
3104	start_elem.type = COLL_SYM;
3105	ret = parse_bracket_element (&start_elem, regexp, token, token_len, dfa,
3106	syntax, first_round);
3107	if (BE (ret != REG_NOERROR, `0`))
3108	{
3109	*err = ret;
3110	goto parse_bracket_exp_free_return;
3111	}
3112	first_round = `0`;
3113
3114	/ Get information about the next token. We need it in any case. /
3115	token_len = peek_token_bracket (token, regexp, syntax);
3116
3117	/ Do not check for ranges if we know they are not allowed. /
3118	if (start_elem.type != CHAR_CLASS && start_elem.type != EQUIV_CLASS)
3119	{
3120	if (BE (token->type == END_OF_RE, `0`))
3121	{
3122	*err = REG_EBRACK;
3123	goto parse_bracket_exp_free_return;
3124	}
3125	if (token->type == OP_CHARSET_RANGE)
3126	{
3127	re_string_skip_bytes (regexp, token_len); / Skip '-'. /
3128	token_len2 = peek_token_bracket (&token2, regexp, syntax);
3129	if (BE (token2.type == END_OF_RE, `0`))
3130	{
3131	*err = REG_EBRACK;
3132	goto parse_bracket_exp_free_return;
3133	}
3134	if (token2.type == OP_CLOSE_BRACKET)
3135	{
3136	/ We treat the last '-' as a normal character. /
3137	re_string_skip_bytes (regexp, -token_len);
3138	token->type = CHARACTER;
3139	}
3140	else
3141	is_range_exp = `1`;
3142	}
3143	}
3144
3145	if (is_range_exp == `1`)
3146	{
3147	end_elem.opr.name = end_name_buf;
3148	end_elem.type = COLL_SYM;
3149	ret = parse_bracket_element (&end_elem, regexp, &token2, token_len2,
3150	dfa, syntax, `1`);
3151	if (BE (ret != REG_NOERROR, `0`))
3152	{
3153	*err = ret;
3154	goto parse_bracket_exp_free_return;
3155	}
3156
3157	token_len = peek_token_bracket (token, regexp, syntax);
3158
3159	#ifdef _LIBC
3160	*err = build_range_exp (sbcset, mbcset, &range_alloc,
3161	&start_elem, &end_elem);
3162	#else
3163	# ifdef RE_ENABLE_I18N
3164	*err = build_range_exp (sbcset,
3165	dfa->mb_cur_max > `1` ? mbcset : NULL,
3166	&range_alloc, &start_elem, &end_elem);
3167	# else
3168	*err = build_range_exp (sbcset, &start_elem, &end_elem);
3169	# endif
3170	#endif /* RE_ENABLE_I18N */
3171	if (BE (*err != REG_NOERROR, `0`))
3172	goto parse_bracket_exp_free_return;
3173	}
3174	else
3175	{
3176	switch (start_elem.type)
3177	{
3178	case SB_CHAR:
3179	bitset_set (sbcset, start_elem.opr.ch);
3180	break;
3181	#ifdef RE_ENABLE_I18N
3182	case MB_CHAR:
3183	/ Check whether the array has enough space. /
3184	if (BE (mbchar_alloc == mbcset->nmbchars, `0`))
3185	{
3186	wchar_t *new_mbchars;
3187	/ Not enough, realloc it. /
3188	/ +1 in case of mbcset->nmbchars is 0. /
3189	mbchar_alloc = `2` * mbcset->nmbchars + `1`;
3190	/ Use realloc since array is NULL if alloc == 0. /*
3191	new_mbchars = re_realloc (mbcset->mbchars, wchar_t,
3192	mbchar_alloc);
3193	if (BE (new_mbchars == NULL, `0`))
3194	goto parse_bracket_exp_espace;
3195	mbcset->mbchars = new_mbchars;
3196	}
3197	mbcset->mbchars[mbcset->nmbchars++] = start_elem.opr.wch;
3198	break;
3199	#endif /* RE_ENABLE_I18N */
3200	case EQUIV_CLASS:
3201	*err = build_equiv_class (sbcset,
3202	#ifdef RE_ENABLE_I18N
3203	mbcset, &equiv_class_alloc,
3204	#endif /* RE_ENABLE_I18N */
3205	start_elem.opr.name);
3206	if (BE (*err != REG_NOERROR, `0`))
3207	goto parse_bracket_exp_free_return;
3208	break;
3209	case COLL_SYM:
3210	*err = build_collating_symbol (sbcset,
3211	#ifdef RE_ENABLE_I18N
3212	mbcset, &coll_sym_alloc,
3213	#endif /* RE_ENABLE_I18N */
3214	start_elem.opr.name);
3215	if (BE (*err != REG_NOERROR, `0`))
3216	goto parse_bracket_exp_free_return;
3217	break;
3218	case CHAR_CLASS:
3219	*err = build_charclass (regexp->trans, sbcset,
3220	#ifdef RE_ENABLE_I18N
3221	mbcset, &char_class_alloc,
3222	#endif /* RE_ENABLE_I18N */
3223	start_elem.opr.name, syntax);
3224	if (BE (*err != REG_NOERROR, `0`))
3225	goto parse_bracket_exp_free_return;
3226	break;
3227	default:
3228	assert (`0`);
3229	break;
3230	}
3231	}
3232	if (BE (token->type == END_OF_RE, `0`))
3233	{
3234	*err = REG_EBRACK;
3235	goto parse_bracket_exp_free_return;
3236	}
3237	if (token->type == OP_CLOSE_BRACKET)
3238	break;
3239	}
3240
3241	re_string_skip_bytes (regexp, token_len); / Skip a token. /
3242
3243	/ If it is non-matching list. /
3244	if (non_match)
3245	bitset_not (sbcset);
3246
3247	#ifdef RE_ENABLE_I18N
3248	/ Ensure only single byte characters are set. /
3249	if (dfa->mb_cur_max > `1`)
3250	bitset_mask (sbcset, dfa->sb_char);
3251
3252	if (mbcset->nmbchars \|\| mbcset->ncoll_syms \|\| mbcset->nequiv_classes
3253	\|\| mbcset->nranges \|\| (dfa->mb_cur_max > `1` && (mbcset->nchar_classes
3254	\|\| mbcset->non_match)))
3255	{
3256	bin_tree_t *mbc_tree;
3257	int sbc_idx;
3258	/ Build a tree for complex bracket. /
3259	dfa->has_mb_node = `1`;
3260	br_token.type = COMPLEX_BRACKET;
3261	br_token.opr.mbcset = mbcset;
3262	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3263	if (BE (mbc_tree == NULL, `0`))
3264	goto parse_bracket_exp_espace;
3265	for (sbc_idx = `0`; sbc_idx < BITSET_WORDS; ++sbc_idx)
3266	if (sbcset[sbc_idx])
3267	break;
3268	/ If there are no bits set in sbcset, there is no point*
3269	of having both SIMPLE_BRACKET and COMPLEX_BRACKET. /*
3270	if (sbc_idx < BITSET_WORDS)
3271	{
3272	/ Build a tree for simple bracket. /
3273	br_token.type = SIMPLE_BRACKET;
3274	br_token.opr.sbcset = sbcset;
3275	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3276	if (BE (work_tree == NULL, `0`))
3277	goto parse_bracket_exp_espace;
3278
3279	/ Then join them by ALT node. /
3280	work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
3281	if (BE (work_tree == NULL, `0`))
3282	goto parse_bracket_exp_espace;
3283	}
3284	else
3285	{
3286	re_free (sbcset);
3287	work_tree = mbc_tree;
3288	}
3289	}
3290	else
3291	#endif /* not RE_ENABLE_I18N */
3292	{
3293	#ifdef RE_ENABLE_I18N
3294	free_charset (mbcset);
3295	#endif
3296	/ Build a tree for simple bracket. /
3297	br_token.type = SIMPLE_BRACKET;
3298	br_token.opr.sbcset = sbcset;
3299	work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3300	if (BE (work_tree == NULL, `0`))
3301	goto parse_bracket_exp_espace;
3302	}
3303	return work_tree;
3304
3305	parse_bracket_exp_espace:
3306	*err = REG_ESPACE;
3307	parse_bracket_exp_free_return:
3308	re_free (sbcset);
3309	#ifdef RE_ENABLE_I18N
3310	free_charset (mbcset);
3311	#endif /* RE_ENABLE_I18N */
3312	return NULL;
3313	}
3314
3315	/ Parse an element in the bracket expression. /
3316
3317	static reg_errcode_t
3318	parse_bracket_element (bracket_elem_t elem, re_string_t regexp,
3319	re_token_t token, int* token_len, re_dfa_t *dfa,
3320	reg_syntax_t syntax, int accept_hyphen)
3321	{
3322	#ifdef RE_ENABLE_I18N
3323	int cur_char_size;
3324	cur_char_size = re_string_char_size_at (regexp, re_string_cur_idx (regexp));
3325	if (cur_char_size > `1`)
3326	{
3327	elem->type = MB_CHAR;
3328	elem->opr.wch = re_string_wchar_at (regexp, re_string_cur_idx (regexp));
3329	re_string_skip_bytes (regexp, cur_char_size);
3330	return REG_NOERROR;
3331	}
3332	#endif /* RE_ENABLE_I18N */
3333	re_string_skip_bytes (regexp, token_len); / Skip a token. /
3334	if (token->type == OP_OPEN_COLL_ELEM \|\| token->type == OP_OPEN_CHAR_CLASS
3335	\|\| token->type == OP_OPEN_EQUIV_CLASS)
3336	return parse_bracket_symbol (elem, regexp, token);
3337	if (BE (token->type == OP_CHARSET_RANGE, `0`) && !accept_hyphen)
3338	{
3339	/ A '-' must only appear as anything but a range indicator before*
3340	the closing bracket. Everything else is an error. /*
3341	re_token_t token2;
3342	(void) peek_token_bracket (&token2, regexp, syntax);
3343	if (token2.type != OP_CLOSE_BRACKET)
3344	/ The actual error value is not standardized since this whole*
3345	case is undefined. But ERANGE makes good sense. /*
3346	return REG_ERANGE;
3347	}
3348	elem->type = SB_CHAR;
3349	elem->opr.ch = token->opr.c;
3350	return REG_NOERROR;
3351	}
3352
3353	/ Parse a bracket symbol in the bracket expression. Bracket symbols are*
3354	such as [:<character_class>:], [.<collating_element>.], and
3355	[=<equivalent_class>=]. /*
3356
3357	static reg_errcode_t
3358	parse_bracket_symbol (bracket_elem_t elem, re_string_t regexp,
3359	re_token_t *token)
3360	{
3361	unsigned char ch, delim = token->opr.c;
3362	int i = `0`;
3363	if (re_string_eoi(regexp))
3364	return REG_EBRACK;
3365	for (;; ++i)
3366	{
3367	if (i >= BRACKET_NAME_BUF_SIZE)
3368	return REG_EBRACK;
3369	if (token->type == OP_OPEN_CHAR_CLASS)
3370	ch = re_string_fetch_byte_case (regexp);
3371	else
3372	ch = re_string_fetch_byte (regexp);
3373	if (re_string_eoi(regexp))
3374	return REG_EBRACK;
3375	if (ch == delim && re_string_peek_byte (regexp, `0`) == `']'`)
3376	break;
3377	elem->opr.name[i] = ch;
3378	}
3379	re_string_skip_bytes (regexp, `1`);
3380	elem->opr.name[i] = `'\0'`;
3381	switch (token->type)
3382	{
3383	case OP_OPEN_COLL_ELEM:
3384	elem->type = COLL_SYM;
3385	break;
3386	case OP_OPEN_EQUIV_CLASS:
3387	elem->type = EQUIV_CLASS;
3388	break;
3389	case OP_OPEN_CHAR_CLASS:
3390	elem->type = CHAR_CLASS;
3391	break;
3392	default:
3393	break;
3394	}
3395	return REG_NOERROR;
3396	}
3397
3398	/ Helper function for parse_bracket_exp.*
3399	Build the equivalence class which is represented by NAME.
3400	The result are written to MBCSET and SBCSET.
3401	EQUIV_CLASS_ALLOC is the allocated size of mbcset->equiv_classes,
3402	is a pointer argument since we may update it. /*
3403
3404	static reg_errcode_t
3405	#ifdef RE_ENABLE_I18N
3406	build_equiv_class (bitset_t sbcset, re_charset_t *mbcset,
3407	int equiv_class_alloc, const* unsigned char *name)
3408	#else /* not RE_ENABLE_I18N */
3409	build_equiv_class (bitset_t sbcset, const unsigned char *name)
3410	#endif /* not RE_ENABLE_I18N */
3411	{
3412	#ifdef _LIBC
3413	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3414	if (nrules != `0`)
3415	{
3416	const int32_t table, indirect;
3417	const unsigned char weights, extra, *cp;
3418	unsigned char char_buf[`2`];
3419	int32_t idx1, idx2;
3420	unsigned int ch;
3421	size_t len;
3422	/ Calculate the index for equivalence class. /
3423	cp = name;
3424	table = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3425	weights = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3426	_NL_COLLATE_WEIGHTMB);
3427	extra = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
3428	_NL_COLLATE_EXTRAMB);
3429	indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE,
3430	_NL_COLLATE_INDIRECTMB);
3431	idx1 = findidx (table, indirect, extra, &cp, -`1`);
3432	if (BE (idx1 == `0` \|\| *cp != `'\0'`, `0`))
3433	/ This isn't a valid character. /
3434	return REG_ECOLLATE;
3435
3436	/ Build single byte matching table for this equivalence class. /
3437	len = weights[idx1 & `0xffffff`];
3438	for (ch = `0`; ch < SBC_MAX; ++ch)
3439	{
3440	char_buf[`0`] = ch;
3441	cp = char_buf;
3442	idx2 = findidx (table, indirect, extra, &cp, `1`);
3443	/*
3444	idx2 = table[ch];
3445	*/
3446	if (idx2 == `0`)
3447	/ This isn't a valid character. /
3448	continue;
3449	/ Compare only if the length matches and the collation rule*
3450	index is the same. /*
3451	if (len == weights[idx2 & `0xffffff`] && (idx1 >> `24`) == (idx2 >> `24`))
3452	{
3453	int cnt = `0`;
3454
3455	while (cnt <= len &&
3456	weights[(idx1 & `0xffffff`) + `1` + cnt]
3457	== weights[(idx2 & `0xffffff`) + `1` + cnt])
3458	++cnt;
3459
3460	if (cnt > len)
3461	bitset_set (sbcset, ch);
3462	}
3463	}
3464	/ Check whether the array has enough space. /
3465	if (BE (*equiv_class_alloc == mbcset->nequiv_classes, `0`))
3466	{
3467	/ Not enough, realloc it. /
3468	/ +1 in case of mbcset->nequiv_classes is 0. /
3469	int new_equiv_class_alloc = `2` * mbcset->nequiv_classes + `1`;
3470	/ Use realloc since the array is NULL if alloc == 0. /*
3471	int32_t *new_equiv_classes = re_realloc (mbcset->equiv_classes,
3472	int32_t,
3473	new_equiv_class_alloc);
3474	if (BE (new_equiv_classes == NULL, `0`))
3475	return REG_ESPACE;
3476	mbcset->equiv_classes = new_equiv_classes;
3477	*equiv_class_alloc = new_equiv_class_alloc;
3478	}
3479	mbcset->equiv_classes[mbcset->nequiv_classes++] = idx1;
3480	}
3481	else
3482	#endif /* _LIBC */
3483	{
3484	if (BE (strlen ((const char *) name) != `1`, `0`))
3485	return REG_ECOLLATE;
3486	bitset_set (sbcset, *name);
3487	}
3488	return REG_NOERROR;
3489	}
3490
3491	/ Helper function for parse_bracket_exp.*
3492	Build the character class which is represented by NAME.
3493	The result are written to MBCSET and SBCSET.
3494	CHAR_CLASS_ALLOC is the allocated size of mbcset->char_classes,
3495	is a pointer argument since we may update it. /*
3496
3497	static reg_errcode_t
3498	#ifdef RE_ENABLE_I18N
3499	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3500	re_charset_t mbcset, int* *char_class_alloc,
3501	const unsigned char *class_name, reg_syntax_t syntax)
3502	#else /* not RE_ENABLE_I18N */
3503	build_charclass (RE_TRANSLATE_TYPE trans, bitset_t sbcset,
3504	const unsigned char *class_name, reg_syntax_t syntax)
3505	#endif /* not RE_ENABLE_I18N */
3506	{
3507	int i;
3508	const char name = (const* char *) class_name;
3509
3510	/ In case of REG_ICASE "upper" and "lower" match the both of*
3511	upper and lower cases. /*
3512	if ((syntax & RE_ICASE)
3513	&& (strcmp (name, "upper") == `0` \|\| strcmp (name, "lower") == `0`))
3514	name = "alpha";
3515
3516	#ifdef RE_ENABLE_I18N
3517	/ Check the space of the arrays. /
3518	if (BE (*char_class_alloc == mbcset->nchar_classes, `0`))
3519	{
3520	/ Not enough, realloc it. /
3521	/ +1 in case of mbcset->nchar_classes is 0. /
3522	int new_char_class_alloc = `2` * mbcset->nchar_classes + `1`;
3523	/ Use realloc since array is NULL if alloc == 0. /*
3524	wctype_t *new_char_classes = re_realloc (mbcset->char_classes, wctype_t,
3525	new_char_class_alloc);
3526	if (BE (new_char_classes == NULL, `0`))
3527	return REG_ESPACE;
3528	mbcset->char_classes = new_char_classes;
3529	*char_class_alloc = new_char_class_alloc;
3530	}
3531	mbcset->char_classes[mbcset->nchar_classes++] = __wctype (name);
3532	#endif /* RE_ENABLE_I18N */
3533
3534	#define BUILD_CHARCLASS_LOOP(ctype_func) \
3535	do { \
3536	if (BE (trans != NULL, 0)) \
3537	{ \
3538	for (i = 0; i < SBC_MAX; ++i) \
3539	if (ctype_func (i)) \
3540	bitset_set (sbcset, trans[i]); \
3541	} \
3542	else \
3543	{ \
3544	for (i = 0; i < SBC_MAX; ++i) \
3545	if (ctype_func (i)) \
3546	bitset_set (sbcset, i); \
3547	} \
3548	} while (0)
3549
3550	if (strcmp (name, "alnum") == `0`)
3551	BUILD_CHARCLASS_LOOP (isalnum);
3552	else if (strcmp (name, "cntrl") == `0`)
3553	BUILD_CHARCLASS_LOOP (iscntrl);
3554	else if (strcmp (name, "lower") == `0`)
3555	BUILD_CHARCLASS_LOOP (islower);
3556	else if (strcmp (name, "space") == `0`)
3557	BUILD_CHARCLASS_LOOP (isspace);
3558	else if (strcmp (name, "alpha") == `0`)
3559	BUILD_CHARCLASS_LOOP (isalpha);
3560	else if (strcmp (name, "digit") == `0`)
3561	BUILD_CHARCLASS_LOOP (isdigit);
3562	else if (strcmp (name, "print") == `0`)
3563	BUILD_CHARCLASS_LOOP (isprint);
3564	else if (strcmp (name, "upper") == `0`)
3565	BUILD_CHARCLASS_LOOP (isupper);
3566	else if (strcmp (name, "blank") == `0`)
3567	BUILD_CHARCLASS_LOOP (isblank);
3568	else if (strcmp (name, "graph") == `0`)
3569	BUILD_CHARCLASS_LOOP (isgraph);
3570	else if (strcmp (name, "punct") == `0`)
3571	BUILD_CHARCLASS_LOOP (ispunct);
3572	else if (strcmp (name, "xdigit") == `0`)
3573	BUILD_CHARCLASS_LOOP (isxdigit);
3574	else
3575	return REG_ECTYPE;
3576
3577	return REG_NOERROR;
3578	}
3579
3580	static bin_tree_t *
3581	build_charclass_op (re_dfa_t *dfa, RE_TRANSLATE_TYPE trans,
3582	const unsigned char *class_name,
3583	const unsigned char extra, int* non_match,
3584	reg_errcode_t *err)
3585	{
3586	re_bitset_ptr_t sbcset;
3587	#ifdef RE_ENABLE_I18N
3588	re_charset_t *mbcset;
3589	int alloc = `0`;
3590	#endif /* not RE_ENABLE_I18N */
3591	reg_errcode_t ret;
3592	re_token_t br_token;
3593	bin_tree_t *tree;
3594
3595	sbcset = (re_bitset_ptr_t) calloc (sizeof (bitset_t), `1`);
3596	#ifdef RE_ENABLE_I18N
3597	mbcset = (re_charset_t ) calloc (sizeof* (re_charset_t), `1`);
3598	#endif /* RE_ENABLE_I18N */
3599
3600	#ifdef RE_ENABLE_I18N
3601	if (BE (sbcset == NULL \|\| mbcset == NULL, `0`))
3602	#else /* not RE_ENABLE_I18N */
3603	if (BE (sbcset == NULL, `0`))
3604	#endif /* not RE_ENABLE_I18N */
3605	{
3606	*err = REG_ESPACE;
3607	return NULL;
3608	}
3609
3610	if (non_match)
3611	{
3612	#ifdef RE_ENABLE_I18N
3613	mbcset->non_match = `1`;
3614	#endif /* not RE_ENABLE_I18N */
3615	}
3616
3617	/ We don't care the syntax in this case. /
3618	ret = build_charclass (trans, sbcset,
3619	#ifdef RE_ENABLE_I18N
3620	mbcset, &alloc,
3621	#endif /* RE_ENABLE_I18N */
3622	class_name, `0`);
3623
3624	if (BE (ret != REG_NOERROR, `0`))
3625	{
3626	re_free (sbcset);
3627	#ifdef RE_ENABLE_I18N
3628	free_charset (mbcset);
3629	#endif /* RE_ENABLE_I18N */
3630	*err = ret;
3631	return NULL;
3632	}
3633	/ \w match '_' also. /
3634	for (; *extra; extra++)
3635	bitset_set (sbcset, *extra);
3636
3637	/ If it is non-matching list. /
3638	if (non_match)
3639	bitset_not (sbcset);
3640
3641	#ifdef RE_ENABLE_I18N
3642	/ Ensure only single byte characters are set. /
3643	if (dfa->mb_cur_max > `1`)
3644	bitset_mask (sbcset, dfa->sb_char);
3645	#endif
3646
3647	/ Build a tree for simple bracket. /
3648	br_token.type = SIMPLE_BRACKET;
3649	br_token.opr.sbcset = sbcset;
3650	tree = create_token_tree (dfa, NULL, NULL, &br_token);
3651	if (BE (tree == NULL, `0`))
3652	goto build_word_op_espace;
3653
3654	#ifdef RE_ENABLE_I18N
3655	if (dfa->mb_cur_max > `1`)
3656	{
3657	bin_tree_t *mbc_tree;
3658	/ Build a tree for complex bracket. /
3659	br_token.type = COMPLEX_BRACKET;
3660	br_token.opr.mbcset = mbcset;
3661	dfa->has_mb_node = `1`;
3662	mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
3663	if (BE (mbc_tree == NULL, `0`))
3664	goto build_word_op_espace;
3665	/ Then join them by ALT node. /
3666	tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
3667	if (BE (mbc_tree != NULL, `1`))
3668	return tree;
3669	}
3670	else
3671	{
3672	free_charset (mbcset);
3673	return tree;
3674	}
3675	#else /* not RE_ENABLE_I18N */
3676	return tree;
3677	#endif /* not RE_ENABLE_I18N */
3678
3679	build_word_op_espace:
3680	re_free (sbcset);
3681	#ifdef RE_ENABLE_I18N
3682	free_charset (mbcset);
3683	#endif /* RE_ENABLE_I18N */
3684	*err = REG_ESPACE;
3685	return NULL;
3686	}
3687
3688	/ This is intended for the expressions like "a{1,3}".*
3689	Fetch a number from `input', and return the number.
3690	Return -1, if the number field is empty like "{,1}".
3691	Return -2, If an error is occured. /*
3692
3693	static int
3694	fetch_number (re_string_t input, re_token_t token, reg_syntax_t syntax)
3695	{
3696	int num = -`1`;
3697	unsigned char c;
3698	while (`1`)
3699	{
3700	fetch_token (token, input, syntax);
3701	c = token->opr.c;
3702	if (BE (token->type == END_OF_RE, `0`))
3703	return -`2`;
3704	if (token->type == OP_CLOSE_DUP_NUM \|\| c == `','`)
3705	break;
3706	num = ((token->type != CHARACTER \|\| c < `'0'` \|\| `'9'` < c \|\| num == -`2`)
3707	? -`2` : ((num == -`1`) ? c - `'0'` : num * `10` + c - `'0'`));
3708	num = (num > RE_DUP_MAX) ? -`2` : num;
3709	}
3710	return num;
3711	}
3712
3713	#ifdef RE_ENABLE_I18N
3714	static void
3715	free_charset (re_charset_t *cset)
3716	{
3717	re_free (cset->mbchars);
3718	# ifdef _LIBC
3719	re_free (cset->coll_syms);
3720	re_free (cset->equiv_classes);
3721	re_free (cset->range_starts);
3722	re_free (cset->range_ends);
3723	# endif
3724	re_free (cset->char_classes);
3725	re_free (cset);
3726	}
3727	#endif /* RE_ENABLE_I18N */
3728
3729	/ Functions for binary tree operation. /
3730
3731	/ Create a tree node. /
3732
3733	static bin_tree_t *
3734	create_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3735	re_token_type_t type)
3736	{
3737	re_token_t t;
3738	t.type = type;
3739	return create_token_tree (dfa, left, right, &t);
3740	}
3741
3742	static bin_tree_t *
3743	create_token_tree (re_dfa_t dfa, bin_tree_t left, bin_tree_t *right,
3744	const re_token_t *token)
3745	{
3746	bin_tree_t *tree;
3747	if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, `0`))
3748	{
3749	bin_tree_storage_t *storage = re_malloc (bin_tree_storage_t, `1`);
3750
3751	if (storage == NULL)
3752	return NULL;
3753	storage->next = dfa->str_tree_storage;
3754	dfa->str_tree_storage = storage;
3755	dfa->str_tree_storage_idx = `0`;
3756	}
3757	tree = &dfa->str_tree_storage->data[dfa->str_tree_storage_idx++];
3758
3759	tree->parent = NULL;
3760	tree->left = left;
3761	tree->right = right;
3762	tree->token = *token;
3763	tree->token.duplicated = `0`;
3764	tree->token.opt_subexp = `0`;
3765	tree->first = NULL;
3766	tree->next = NULL;
3767	tree->node_idx = -`1`;
3768
3769	if (left != NULL)
3770	left->parent = tree;
3771	if (right != NULL)
3772	right->parent = tree;
3773	return tree;
3774	}
3775
3776	/ Mark the tree SRC as an optional subexpression.*
3777	To be called from preorder or postorder. /*
3778
3779	static reg_errcode_t
3780	mark_opt_subexp (void extra, bin_tree_t node)
3781	{
3782	int idx = (int) (long) extra;
3783	if (node->token.type == SUBEXP && node->token.opr.idx == idx)
3784	node->token.opt_subexp = `1`;
3785
3786	return REG_NOERROR;
3787	}
3788
3789	/ Free the allocated memory inside NODE. /
3790
3791	static void
3792	free_token (re_token_t *node)
3793	{
3794	#ifdef RE_ENABLE_I18N
3795	if (node->type == COMPLEX_BRACKET && node->duplicated == `0`)
3796	free_charset (node->opr.mbcset);
3797	else
3798	#endif /* RE_ENABLE_I18N */
3799	if (node->type == SIMPLE_BRACKET && node->duplicated == `0`)
3800	re_free (node->opr.sbcset);
3801	}
3802
3803	/ Worker function for tree walking. Free the allocated memory inside NODE*
3804	and its children. /*
3805
3806	static reg_errcode_t
3807	free_tree (void extra, bin_tree_t node)
3808	{
3809	free_token (&node->token);
3810	return REG_NOERROR;
3811	}
3812
3813
3814	/ Duplicate the node SRC, and return new node. This is a preorder*
3815	visit similar to the one implemented by the generic visitor, but
3816	we need more infrastructure to maintain two parallel trees --- so,
3817	it's easier to duplicate. /*
3818
3819	static bin_tree_t *
3820	duplicate_tree (const bin_tree_t root, re_dfa_t dfa)
3821	{
3822	const bin_tree_t *node;
3823	bin_tree_t *dup_root;
3824	bin_tree_t *p_new = &dup_root, dup_node = root->parent;
3825
3826	for (node = root; ; )
3827	{
3828	/ Create a new tree and link it back to the current parent. /
3829	*p_new = create_token_tree (dfa, NULL, NULL, &node->token);
3830	if (*p_new == NULL)
3831	return NULL;
3832	(*p_new)->parent = dup_node;
3833	(*p_new)->token.duplicated = `1`;
3834	dup_node = *p_new;
3835
3836	/ Go to the left node, or up and to the right. /
3837	if (node->left)
3838	{
3839	node = node->left;
3840	p_new = &dup_node->left;
3841	}
3842	else
3843	{
3844	const bin_tree_t *prev = NULL;
3845	while (node->right == prev \|\| node->right == NULL)
3846	{
3847	prev = node;
3848	node = node->parent;
3849	dup_node = dup_node->parent;
3850	if (!node)
3851	return dup_root;
3852	}
3853	node = node->right;
3854	p_new = &dup_node->right;
3855	}
3856	}
3857	}
3858

Browse the source code of glibc_src_2.27/posix/regcomp.c