regexec.c source code [glibc_src_2.31/posix/regexec.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2020 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	static reg_errcode_t match_ctx_init (re_match_context_t cache, int* eflags,
21	Idx n);
22	static void match_ctx_clean (re_match_context_t *mctx);
23	static void match_ctx_free (re_match_context_t *cache);
24	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25	Idx str_idx, Idx from, Idx to);
26	static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28	Idx str_idx);
29	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30	Idx node, Idx str_idx);
31	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
32	re_dfastate_t **limited_sts, Idx last_node,
33	Idx last_str_idx);
34	static reg_errcode_t re_search_internal (const regex_t *preg,
35	const char *string, Idx length,
36	Idx start, Idx last_start, Idx stop,
37	size_t nmatch, regmatch_t pmatch[],
38	int eflags);
39	static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40	const char *string1, Idx length1,
41	const char *string2, Idx length2,
42	Idx start, regoff_t range,
43	struct re_registers *regs,
44	Idx stop, bool ret_len);
45	static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46	const char *string, Idx length, Idx start,
47	regoff_t range, Idx stop,
48	struct re_registers *regs,
49	bool ret_len);
50	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
51	Idx nregs, int regs_allocated);
52	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53	static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54	Idx *p_match_first);
55	static Idx check_halt_state_context (const re_match_context_t *mctx,
56	const re_dfastate_t *state, Idx idx);
57	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
58	regmatch_t *prev_idx_match, Idx cur_node,
59	Idx cur_idx, Idx nmatch);
60	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61	Idx str_idx, Idx dest_node, Idx nregs,
62	regmatch_t *regs,
63	re_node_set *eps_via_nodes);
64	static reg_errcode_t set_regs (const regex_t *preg,
65	const re_match_context_t *mctx,
66	size_t nmatch, regmatch_t *pmatch,
67	bool fl_backtrack);
68	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69
70	#ifdef RE_ENABLE_I18N
71	static int sift_states_iter_mb (const re_match_context_t *mctx,
72	re_sift_context_t *sctx,
73	Idx node_idx, Idx str_idx, Idx max_str_idx);
74	#endif /* RE_ENABLE_I18N */
75	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76	re_sift_context_t *sctx);
77	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78	re_sift_context_t *sctx, Idx str_idx,
79	re_node_set *cur_dest);
80	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81	re_sift_context_t *sctx,
82	Idx str_idx,
83	re_node_set *dest_nodes);
84	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85	re_node_set *dest_nodes,
86	const re_node_set *candidates);
87	static bool check_dst_limits (const re_match_context_t *mctx,
88	const re_node_set *limits,
89	Idx dst_node, Idx dst_idx, Idx src_node,
90	Idx src_idx);
91	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92	int boundaries, Idx subexp_idx,
93	Idx from_node, Idx bkref_idx);
94	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95	Idx limit, Idx subexp_idx,
96	Idx node, Idx str_idx,
97	Idx bkref_idx);
98	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99	re_node_set *dest_nodes,
100	const re_node_set *candidates,
101	re_node_set *limits,
102	struct re_backref_cache_entry *bkref_ents,
103	Idx str_idx);
104	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105	re_sift_context_t *sctx,
106	Idx str_idx, const re_node_set *candidates);
107	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108	re_dfastate_t **dst,
109	re_dfastate_t **src, Idx num);
110	static re_dfastate_t find_recover_state (reg_errcode_t err,
111	re_match_context_t *mctx);
112	static re_dfastate_t transit_state (reg_errcode_t err,
113	re_match_context_t *mctx,
114	re_dfastate_t *state);
115	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
116	re_match_context_t *mctx,
117	re_dfastate_t *next_state);
118	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119	re_node_set *cur_nodes,
120	Idx str_idx);
121	#if 0
122	static re_dfastate_t transit_state_sb (reg_errcode_t err,
123	re_match_context_t *mctx,
124	re_dfastate_t *pstate);
125	#endif
126	#ifdef RE_ENABLE_I18N
127	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128	re_dfastate_t *pstate);
129	#endif /* RE_ENABLE_I18N */
130	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131	const re_node_set *nodes);
132	static reg_errcode_t get_subexp (re_match_context_t *mctx,
133	Idx bkref_node, Idx bkref_str_idx);
134	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135	const re_sub_match_top_t *sub_top,
136	re_sub_match_last_t *sub_last,
137	Idx bkref_node, Idx bkref_str);
138	static Idx find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
139	Idx subexp_idx, int type);
140	static reg_errcode_t check_arrival (re_match_context_t *mctx,
141	state_array_t *path, Idx top_node,
142	Idx top_str, Idx last_node, Idx last_str,
143	int type);
144	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145	Idx str_idx,
146	re_node_set *cur_nodes,
147	re_node_set *next_nodes);
148	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149	re_node_set *cur_nodes,
150	Idx ex_subexp, int type);
151	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152	re_node_set *dst_nodes,
153	Idx target, Idx ex_subexp,
154	int type);
155	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156	re_node_set *cur_nodes, Idx cur_str,
157	Idx subexp_num, int type);
158	static bool build_trtable (const re_dfa_t dfa, re_dfastate_t state);
159	#ifdef RE_ENABLE_I18N
160	static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161	const re_string_t *input, Idx idx);
162	# ifdef _LIBC
163	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164	size_t name_len);
165	# endif /* _LIBC */
166	#endif /* RE_ENABLE_I18N */
167	static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168	const re_dfastate_t *state,
169	re_node_set *states_node,
170	bitset_t *states_ch);
171	static bool check_node_accept (const re_match_context_t *mctx,
172	const re_token_t *node, Idx idx);
173	static reg_errcode_t extend_buffers (re_match_context_t mctx, int* min_len);
174
175	/ Entry point for POSIX code. /
176
177	/ regexec searches for a given pattern, specified by PREG, in the*
178	string STRING.
179
180	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181	'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
182	least NMATCH elements, and we set them to the offsets of the
183	corresponding matched substrings.
184
185	EFLAGS specifies "execution flags" which affect matching: if
186	REG_NOTBOL is set, then ^ does not match at the beginning of the
187	string; if REG_NOTEOL is set, then $ does not match at the end.
188
189	We return 0 if we find a match and REG_NOMATCH if not. /*
190
191	int
192	regexec (const regex_t *__restrict preg, const char *__restrict string,
193	size_t nmatch, regmatch_t pmatch[], int eflags)
194	{
195	reg_errcode_t err;
196	Idx start, length;
197	re_dfa_t *dfa = preg->buffer;
198
199	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
200	return REG_BADPAT;
201
202	if (eflags & REG_STARTEND)
203	{
204	start = pmatch[`0`].rm_so;
205	length = pmatch[`0`].rm_eo;
206	}
207	else
208	{
209	start = `0`;
210	length = strlen (string);
211	}
212
213	lock_lock (dfa->lock);
214	if (preg->no_sub)
215	err = re_search_internal (preg, string, length, start, length,
216	length, `0`, NULL, eflags);
217	else
218	err = re_search_internal (preg, string, length, start, length,
219	length, nmatch, pmatch, eflags);
220	lock_unlock (dfa->lock);
221	return err != REG_NOERROR;
222	}
223
224	#ifdef _LIBC
225	libc_hidden_def (__regexec)
226
227	# include <shlib-compat.h>
228	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
229
230	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231	__typeof__ (__regexec) __compat_regexec;
232
233	int
234	attribute_compat_text_section
235	__compat_regexec (const regex_t *__restrict preg,
236	const char *__restrict string, size_t nmatch,
237	regmatch_t pmatch[], int eflags)
238	{
239	return regexec (preg, string, nmatch, pmatch,
240	eflags & (REG_NOTBOL \| REG_NOTEOL));
241	}
242	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
243	# endif
244	#endif
245
246	/ Entry points for GNU code. /
247
248	/ re_match, re_search, re_match_2, re_search_2*
249
250	The former two functions operate on STRING with length LENGTH,
251	while the later two operate on concatenation of STRING1 and STRING2
252	with lengths LENGTH1 and LENGTH2, respectively.
253
254	re_match() matches the compiled pattern in BUFP against the string,
255	starting at index START.
256
257	re_search() first tries matching at index START, then it tries to match
258	starting from index START + 1, and so on. The last start position tried
259	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
260	way as re_match().)
261
262	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263	the first STOP characters of the concatenation of the strings should be
264	concerned.
265
266	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267	and all groups is stored in REGS. (For the "_2" variants, the offsets are
268	computed relative to the concatenation, not relative to the individual
269	strings.)
270
271	On success, re_match* functions return the length of the match, re_search*
272	return the position of the start of the match. Return value -1 means no
273	match was found and -2 indicates an internal error. /*
274
275	regoff_t
276	re_match (struct re_pattern_buffer bufp, const* char *string, Idx length,
277	Idx start, struct re_registers *regs)
278	{
279	return re_search_stub (bufp, string, length, start, `0`, length, regs, true);
280	}
281	#ifdef _LIBC
282	weak_alias (__re_match, re_match)
283	#endif
284
285	regoff_t
286	re_search (struct re_pattern_buffer bufp, const* char *string, Idx length,
287	Idx start, regoff_t range, struct re_registers *regs)
288	{
289	return re_search_stub (bufp, string, length, start, range, length, regs,
290	false);
291	}
292	#ifdef _LIBC
293	weak_alias (__re_search, re_search)
294	#endif
295
296	regoff_t
297	re_match_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
298	const char *string2, Idx length2, Idx start,
299	struct re_registers *regs, Idx stop)
300	{
301	return re_search_2_stub (bufp, string1, length1, string2, length2,
302	start, `0`, regs, stop, true);
303	}
304	#ifdef _LIBC
305	weak_alias (__re_match_2, re_match_2)
306	#endif
307
308	regoff_t
309	re_search_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
310	const char *string2, Idx length2, Idx start, regoff_t range,
311	struct re_registers *regs, Idx stop)
312	{
313	return re_search_2_stub (bufp, string1, length1, string2, length2,
314	start, range, regs, stop, false);
315	}
316	#ifdef _LIBC
317	weak_alias (__re_search_2, re_search_2)
318	#endif
319
320	static regoff_t
321	re_search_2_stub (struct re_pattern_buffer bufp, const* char *string1,
322	Idx length1, const char *string2, Idx length2, Idx start,
323	regoff_t range, struct re_registers *regs,
324	Idx stop, bool ret_len)
325	{
326	const char *str;
327	regoff_t rval;
328	Idx len;
329	char *s = NULL;
330
331	if (__glibc_unlikely ((length1 < `0` \|\| length2 < `0` \|\| stop < `0`
332	\|\| INT_ADD_WRAPV (length1, length2, &len))))
333	return -`2`;
334
335	/ Concatenate the strings. /
336	if (length2 > `0`)
337	if (length1 > `0`)
338	{
339	s = re_malloc (char, len);
340
341	if (__glibc_unlikely (s == NULL))
342	return -`2`;
343	#ifdef _LIBC
344	memcpy (__mempcpy (s, string1, length1), string2, length2);
345	#else
346	memcpy (s, string1, length1);
347	memcpy (s + length1, string2, length2);
348	#endif
349	str = s;
350	}
351	else
352	str = string2;
353	else
354	str = string1;
355
356	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
357	ret_len);
358	re_free (s);
359	return rval;
360	}
361
362	/ The parameters have the same meaning as those of re_search.*
363	Additional parameters:
364	If RET_LEN is true the length of the match is returned (re_match style);
365	otherwise the position of the match is returned. /*
366
367	static regoff_t
368	re_search_stub (struct re_pattern_buffer bufp, const* char *string, Idx length,
369	Idx start, regoff_t range, Idx stop, struct re_registers *regs,
370	bool ret_len)
371	{
372	reg_errcode_t result;
373	regmatch_t *pmatch;
374	Idx nregs;
375	regoff_t rval;
376	int eflags = `0`;
377	re_dfa_t *dfa = bufp->buffer;
378	Idx last_start = start + range;
379
380	/ Check for out-of-range. /
381	if (__glibc_unlikely (start < `0` \|\| start > length))
382	return -`1`;
383	if (__glibc_unlikely (length < last_start
384	\|\| (`0` <= range && last_start < start)))
385	last_start = length;
386	else if (__glibc_unlikely (last_start < `0`
387	\|\| (range < `0` && start <= last_start)))
388	last_start = `0`;
389
390	lock_lock (dfa->lock);
391
392	eflags \|= (bufp->not_bol) ? REG_NOTBOL : `0`;
393	eflags \|= (bufp->not_eol) ? REG_NOTEOL : `0`;
394
395	/ Compile fastmap if we haven't yet. /
396	if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
397	re_compile_fastmap (bufp);
398
399	if (__glibc_unlikely (bufp->no_sub))
400	regs = NULL;
401
402	/ We need at least 1 register. /
403	if (regs == NULL)
404	nregs = `1`;
405	else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
406	&& regs->num_regs <= bufp->re_nsub))
407	{
408	nregs = regs->num_regs;
409	if (__glibc_unlikely (nregs < `1`))
410	{
411	/ Nothing can be copied to regs. /
412	regs = NULL;
413	nregs = `1`;
414	}
415	}
416	else
417	nregs = bufp->re_nsub + `1`;
418	pmatch = re_malloc (regmatch_t, nregs);
419	if (__glibc_unlikely (pmatch == NULL))
420	{
421	rval = -`2`;
422	goto out;
423	}
424
425	result = re_search_internal (bufp, string, length, start, last_start, stop,
426	nregs, pmatch, eflags);
427
428	rval = `0`;
429
430	/ I hope we needn't fill their regs with -1's when no match was found. /
431	if (result != REG_NOERROR)
432	rval = result == REG_NOMATCH ? -`1` : -`2`;
433	else if (regs != NULL)
434	{
435	/ If caller wants register contents data back, copy them. /
436	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
437	bufp->regs_allocated);
438	if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
439	rval = -`2`;
440	}
441
442	if (__glibc_likely (rval == `0`))
443	{
444	if (ret_len)
445	{
446	DEBUG_ASSERT (pmatch[`0`].rm_so == start);
447	rval = pmatch[`0`].rm_eo - start;
448	}
449	else
450	rval = pmatch[`0`].rm_so;
451	}
452	re_free (pmatch);
453	out:
454	lock_unlock (dfa->lock);
455	return rval;
456	}
457
458	static unsigned
459	re_copy_regs (struct re_registers regs, regmatch_t pmatch, Idx nregs,
460	int regs_allocated)
461	{
462	int rval = REGS_REALLOCATE;
463	Idx i;
464	Idx need_regs = nregs + `1`;
465	/ We need one extra element beyond 'num_regs' for the '-1' marker GNU code*
466	uses. /*
467
468	/ Have the register data arrays been allocated? /
469	if (regs_allocated == REGS_UNALLOCATED)
470	{ / No. So allocate them with malloc. /
471	regs->start = re_malloc (regoff_t, need_regs);
472	if (__glibc_unlikely (regs->start == NULL))
473	return REGS_UNALLOCATED;
474	regs->end = re_malloc (regoff_t, need_regs);
475	if (__glibc_unlikely (regs->end == NULL))
476	{
477	re_free (regs->start);
478	return REGS_UNALLOCATED;
479	}
480	regs->num_regs = need_regs;
481	}
482	else if (regs_allocated == REGS_REALLOCATE)
483	{ / Yes. If we need more elements than were already*
484	allocated, reallocate them. If we need fewer, just
485	leave it alone. /*
486	if (__glibc_unlikely (need_regs > regs->num_regs))
487	{
488	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
489	regoff_t *new_end;
490	if (__glibc_unlikely (new_start == NULL))
491	return REGS_UNALLOCATED;
492	new_end = re_realloc (regs->end, regoff_t, need_regs);
493	if (__glibc_unlikely (new_end == NULL))
494	{
495	re_free (new_start);
496	return REGS_UNALLOCATED;
497	}
498	regs->start = new_start;
499	regs->end = new_end;
500	regs->num_regs = need_regs;
501	}
502	}
503	else
504	{
505	DEBUG_ASSERT (regs_allocated == REGS_FIXED);
506	/ This function may not be called with REGS_FIXED and nregs too big. /
507	DEBUG_ASSERT (nregs <= regs->num_regs);
508	rval = REGS_FIXED;
509	}
510
511	/ Copy the regs. /
512	for (i = `0`; i < nregs; ++i)
513	{
514	regs->start[i] = pmatch[i].rm_so;
515	regs->end[i] = pmatch[i].rm_eo;
516	}
517	for ( ; i < regs->num_regs; ++i)
518	regs->start[i] = regs->end[i] = -`1`;
519
520	return rval;
521	}
522
523	/ Set REGS to hold NUM_REGS registers, storing them in STARTS and*
524	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
525	this memory for recording register information. STARTS and ENDS
526	must be allocated using the malloc library routine, and must each
527	be at least NUM_REGS sizeof (regoff_t) bytes long.*
528
529	If NUM_REGS == 0, then subsequent matches should allocate their own
530	register data.
531
532	Unless this function is called, the first search or match using
533	PATTERN_BUFFER will allocate its own register data, without
534	freeing the old data. /*
535
536	void
537	re_set_registers (struct re_pattern_buffer bufp, struct* re_registers *regs,
538	__re_size_t num_regs, regoff_t starts, regoff_t ends)
539	{
540	if (num_regs)
541	{
542	bufp->regs_allocated = REGS_REALLOCATE;
543	regs->num_regs = num_regs;
544	regs->start = starts;
545	regs->end = ends;
546	}
547	else
548	{
549	bufp->regs_allocated = REGS_UNALLOCATED;
550	regs->num_regs = `0`;
551	regs->start = regs->end = NULL;
552	}
553	}
554	#ifdef _LIBC
555	weak_alias (__re_set_registers, re_set_registers)
556	#endif
557
558	/ Entry points compatible with 4.2 BSD regex library. We don't define*
559	them unless specifically requested. /*
560
561	#if defined _REGEX_RE_COMP \|\| defined _LIBC
562	int
563	# ifdef _LIBC
564	weak_function
565	# endif
566	re_exec (const char *s)
567	{
568	return `0` == regexec (&re_comp_buf, s, `0`, NULL, `0`);
569	}
570	#endif /* _REGEX_RE_COMP */
571
572	/ Internal entry point. /
573
574	/ Searches for a compiled pattern PREG in the string STRING, whose*
575	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
576	meaning as with regexec. LAST_START is START + RANGE, where
577	START and RANGE have the same meaning as with re_search.
578	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
579	otherwise return the error code.
580	Note: We assume front end functions already check ranges.
581	(0 <= LAST_START && LAST_START <= LENGTH) /*
582
583	static reg_errcode_t
584	__attribute_warn_unused_result__
585	re_search_internal (const regex_t preg, const* char *string, Idx length,
586	Idx start, Idx last_start, Idx stop, size_t nmatch,
587	regmatch_t pmatch[], int eflags)
588	{
589	reg_errcode_t err;
590	const re_dfa_t *dfa = preg->buffer;
591	Idx left_lim, right_lim;
592	int incr;
593	bool fl_longest_match;
594	int match_kind;
595	Idx match_first;
596	Idx match_last = -`1`;
597	Idx extra_nmatch;
598	bool sb;
599	int ch;
600	re_match_context_t mctx = { .dfa = dfa };
601	char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
602	&& start != last_start && !preg->can_be_null)
603	? preg->fastmap : NULL);
604	RE_TRANSLATE_TYPE t = preg->translate;
605
606	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + `1`) : `0`;
607	nmatch -= extra_nmatch;
608
609	/ Check if the DFA haven't been compiled. /
610	if (__glibc_unlikely (preg->used == `0` \|\| dfa->init_state == NULL
611	\|\| dfa->init_state_word == NULL
612	\|\| dfa->init_state_nl == NULL
613	\|\| dfa->init_state_begbuf == NULL))
614	return REG_NOMATCH;
615
616	/ We assume front-end functions already check them. /
617	DEBUG_ASSERT (`0` <= last_start && last_start <= length);
618
619	/ If initial states with non-begbuf contexts have no elements,*
620	the regex must be anchored. If preg->newline_anchor is set,
621	we'll never use init_state_nl, so do not check it. /*
622	if (dfa->init_state->nodes.nelem == `0`
623	&& dfa->init_state_word->nodes.nelem == `0`
624	&& (dfa->init_state_nl->nodes.nelem == `0`
625	\|\| !preg->newline_anchor))
626	{
627	if (start != `0` && last_start != `0`)
628	return REG_NOMATCH;
629	start = last_start = `0`;
630	}
631
632	/ We must check the longest matching, if nmatch > 0. /
633	fl_longest_match = (nmatch != `0` \|\| dfa->nbackref);
634
635	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + `1`,
636	preg->translate, (preg->syntax & RE_ICASE) != `0`,
637	dfa);
638	if (__glibc_unlikely (err != REG_NOERROR))
639	goto free_return;
640	mctx.input.stop = stop;
641	mctx.input.raw_stop = stop;
642	mctx.input.newline_anchor = preg->newline_anchor;
643
644	err = match_ctx_init (&mctx, eflags, dfa->nbackref * `2`);
645	if (__glibc_unlikely (err != REG_NOERROR))
646	goto free_return;
647
648	/ We will log all the DFA states through which the dfa pass,*
649	if nmatch > 1, or this dfa has "multibyte node", which is a
650	back-reference or a node which can accept multibyte character or
651	multi character collating element. /*
652	if (nmatch > `1` \|\| dfa->has_mb_node)
653	{
654	/ Avoid overflow. /
655	if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
656	<= mctx.input.bufs_len)))
657	{
658	err = REG_ESPACE;
659	goto free_return;
660	}
661
662	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + `1`);
663	if (__glibc_unlikely (mctx.state_log == NULL))
664	{
665	err = REG_ESPACE;
666	goto free_return;
667	}
668	}
669
670	match_first = start;
671	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
672	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
673
674	/ Check incrementally whether the input string matches. /
675	incr = (last_start < start) ? -`1` : `1`;
676	left_lim = (last_start < start) ? last_start : start;
677	right_lim = (last_start < start) ? start : last_start;
678	sb = dfa->mb_cur_max == `1`;
679	match_kind =
680	(fastmap
681	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? `4` : `0`)
682	\| (start <= last_start ? `2` : `0`)
683	\| (t != NULL ? `1` : `0`))
684	: `8`);
685
686	for (;; match_first += incr)
687	{
688	err = REG_NOMATCH;
689	if (match_first < left_lim \|\| right_lim < match_first)
690	goto free_return;
691
692	/ Advance as rapidly as possible through the string, until we*
693	find a plausible place to start matching. This may be done
694	with varying efficiency, so there are various possibilities:
695	only the most common of them are specialized, in order to
696	save on code size. We use a switch statement for speed. /*
697	switch (match_kind)
698	{
699	case `8`:
700	/ No fastmap. /
701	break;
702
703	case `7`:
704	/ Fastmap with single-byte translation, match forward. /
705	while (__glibc_likely (match_first < right_lim)
706	&& !fastmap[t[(unsigned char) string[match_first]]])
707	++match_first;
708	goto forward_match_found_start_or_reached_end;
709
710	case `6`:
711	/ Fastmap without translation, match forward. /
712	while (__glibc_likely (match_first < right_lim)
713	&& !fastmap[(unsigned char) string[match_first]])
714	++match_first;
715
716	forward_match_found_start_or_reached_end:
717	if (__glibc_unlikely (match_first == right_lim))
718	{
719	ch = match_first >= length
720	? `0` : (unsigned char) string[match_first];
721	if (!fastmap[t ? t[ch] : ch])
722	goto free_return;
723	}
724	break;
725
726	case `4`:
727	case `5`:
728	/ Fastmap without multi-byte translation, match backwards. /
729	while (match_first >= left_lim)
730	{
731	ch = match_first >= length
732	? `0` : (unsigned char) string[match_first];
733	if (fastmap[t ? t[ch] : ch])
734	break;
735	--match_first;
736	}
737	if (match_first < left_lim)
738	goto free_return;
739	break;
740
741	default:
742	/ In this case, we can't determine easily the current byte,*
743	since it might be a component byte of a multibyte
744	character. Then we use the constructed buffer instead. /*
745	for (;;)
746	{
747	/ If MATCH_FIRST is out of the valid range, reconstruct the*
748	buffers. /*
749	__re_size_t offset = match_first - mctx.input.raw_mbs_idx;
750	if (__glibc_unlikely (offset
751	>= (__re_size_t) mctx.input.valid_raw_len))
752	{
753	err = re_string_reconstruct (&mctx.input, match_first,
754	eflags);
755	if (__glibc_unlikely (err != REG_NOERROR))
756	goto free_return;
757
758	offset = match_first - mctx.input.raw_mbs_idx;
759	}
760	/ If MATCH_FIRST is out of the buffer, leave it as '\0'.*
761	Note that MATCH_FIRST must not be smaller than 0. /*
762	ch = (match_first >= length
763	? `0` : re_string_byte_at (&mctx.input, offset));
764	if (fastmap[ch])
765	break;
766	match_first += incr;
767	if (match_first < left_lim \|\| match_first > right_lim)
768	{
769	err = REG_NOMATCH;
770	goto free_return;
771	}
772	}
773	break;
774	}
775
776	/ Reconstruct the buffers so that the matcher can assume that*
777	the matching starts from the beginning of the buffer. /*
778	err = re_string_reconstruct (&mctx.input, match_first, eflags);
779	if (__glibc_unlikely (err != REG_NOERROR))
780	goto free_return;
781
782	#ifdef RE_ENABLE_I18N
783	/ Don't consider this char as a possible match start if it part,*
784	yet isn't the head, of a multibyte character. /*
785	if (!sb && !re_string_first_byte (&mctx.input, `0`))
786	continue;
787	#endif
788
789	/ It seems to be appropriate one, then use the matcher. /
790	/ We assume that the matching starts from 0. /
791	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = `0`;
792	match_last = check_matching (&mctx, fl_longest_match,
793	start <= last_start ? &match_first : NULL);
794	if (match_last != -`1`)
795	{
796	if (__glibc_unlikely (match_last == -`2`))
797	{
798	err = REG_ESPACE;
799	goto free_return;
800	}
801	else
802	{
803	mctx.match_last = match_last;
804	if ((!preg->no_sub && nmatch > `1`) \|\| dfa->nbackref)
805	{
806	re_dfastate_t *pstate = mctx.state_log[match_last];
807	mctx.last_node = check_halt_state_context (&mctx, pstate,
808	match_last);
809	}
810	if ((!preg->no_sub && nmatch > `1` && dfa->has_plural_match)
811	\|\| dfa->nbackref)
812	{
813	err = prune_impossible_nodes (&mctx);
814	if (err == REG_NOERROR)
815	break;
816	if (__glibc_unlikely (err != REG_NOMATCH))
817	goto free_return;
818	match_last = -`1`;
819	}
820	else
821	break; / We found a match. /
822	}
823	}
824
825	match_ctx_clean (&mctx);
826	}
827
828	DEBUG_ASSERT (match_last != -`1`);
829	DEBUG_ASSERT (err == REG_NOERROR);
830
831	/ Set pmatch[] if we need. /
832	if (nmatch > `0`)
833	{
834	Idx reg_idx;
835
836	/ Initialize registers. /
837	for (reg_idx = `1`; reg_idx < nmatch; ++reg_idx)
838	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -`1`;
839
840	/ Set the points where matching start/end. /
841	pmatch[`0`].rm_so = `0`;
842	pmatch[`0`].rm_eo = mctx.match_last;
843	/ FIXME: This function should fail if mctx.match_last exceeds*
844	the maximum possible regoff_t value. We need a new error
845	code REG_OVERFLOW. /*
846
847	if (!preg->no_sub && nmatch > `1`)
848	{
849	err = set_regs (preg, &mctx, nmatch, pmatch,
850	dfa->has_plural_match && dfa->nbackref > `0`);
851	if (__glibc_unlikely (err != REG_NOERROR))
852	goto free_return;
853	}
854
855	/ At last, add the offset to each register, since we slid*
856	the buffers so that we could assume that the matching starts
857	from 0. /*
858	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
859	if (pmatch[reg_idx].rm_so != -`1`)
860	{
861	#ifdef RE_ENABLE_I18N
862	if (__glibc_unlikely (mctx.input.offsets_needed != `0`))
863	{
864	pmatch[reg_idx].rm_so =
865	(pmatch[reg_idx].rm_so == mctx.input.valid_len
866	? mctx.input.valid_raw_len
867	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
868	pmatch[reg_idx].rm_eo =
869	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
870	? mctx.input.valid_raw_len
871	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
872	}
873	#else
874	DEBUG_ASSERT (mctx.input.offsets_needed == `0`);
875	#endif
876	pmatch[reg_idx].rm_so += match_first;
877	pmatch[reg_idx].rm_eo += match_first;
878	}
879	for (reg_idx = `0`; reg_idx < extra_nmatch; ++reg_idx)
880	{
881	pmatch[nmatch + reg_idx].rm_so = -`1`;
882	pmatch[nmatch + reg_idx].rm_eo = -`1`;
883	}
884
885	if (dfa->subexp_map)
886	for (reg_idx = `0`; reg_idx + `1` < nmatch; reg_idx++)
887	if (dfa->subexp_map[reg_idx] != reg_idx)
888	{
889	pmatch[reg_idx + `1`].rm_so
890	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_so;
891	pmatch[reg_idx + `1`].rm_eo
892	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_eo;
893	}
894	}
895
896	free_return:
897	re_free (mctx.state_log);
898	if (dfa->nbackref)
899	match_ctx_free (&mctx);
900	re_string_destruct (&mctx.input);
901	return err;
902	}
903
904	static reg_errcode_t
905	__attribute_warn_unused_result__
906	prune_impossible_nodes (re_match_context_t *mctx)
907	{
908	const re_dfa_t *const dfa = mctx->dfa;
909	Idx halt_node, match_last;
910	reg_errcode_t ret;
911	re_dfastate_t **sifted_states;
912	re_dfastate_t **lim_states = NULL;
913	re_sift_context_t sctx;
914	DEBUG_ASSERT (mctx->state_log != NULL);
915	match_last = mctx->match_last;
916	halt_node = mctx->last_node;
917
918	/ Avoid overflow. /
919	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
920	<= match_last))
921	return REG_ESPACE;
922
923	sifted_states = re_malloc (re_dfastate_t *, match_last + `1`);
924	if (__glibc_unlikely (sifted_states == NULL))
925	{
926	ret = REG_ESPACE;
927	goto free_return;
928	}
929	if (dfa->nbackref)
930	{
931	lim_states = re_malloc (re_dfastate_t *, match_last + `1`);
932	if (__glibc_unlikely (lim_states == NULL))
933	{
934	ret = REG_ESPACE;
935	goto free_return;
936	}
937	while (`1`)
938	{
939	memset (lim_states, `'\0'`,
940	sizeof (re_dfastate_t ) (match_last + `1`));
941	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
942	match_last);
943	ret = sift_states_backward (mctx, &sctx);
944	re_node_set_free (&sctx.limits);
945	if (__glibc_unlikely (ret != REG_NOERROR))
946	goto free_return;
947	if (sifted_states[`0`] != NULL \|\| lim_states[`0`] != NULL)
948	break;
949	do
950	{
951	--match_last;
952	if (match_last < `0`)
953	{
954	ret = REG_NOMATCH;
955	goto free_return;
956	}
957	} while (mctx->state_log[match_last] == NULL
958	\|\| !mctx->state_log[match_last]->halt);
959	halt_node = check_halt_state_context (mctx,
960	mctx->state_log[match_last],
961	match_last);
962	}
963	ret = merge_state_array (dfa, sifted_states, lim_states,
964	match_last + `1`);
965	re_free (lim_states);
966	lim_states = NULL;
967	if (__glibc_unlikely (ret != REG_NOERROR))
968	goto free_return;
969	}
970	else
971	{
972	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
973	ret = sift_states_backward (mctx, &sctx);
974	re_node_set_free (&sctx.limits);
975	if (__glibc_unlikely (ret != REG_NOERROR))
976	goto free_return;
977	if (sifted_states[`0`] == NULL)
978	{
979	ret = REG_NOMATCH;
980	goto free_return;
981	}
982	}
983	re_free (mctx->state_log);
984	mctx->state_log = sifted_states;
985	sifted_states = NULL;
986	mctx->last_node = halt_node;
987	mctx->match_last = match_last;
988	ret = REG_NOERROR;
989	free_return:
990	re_free (sifted_states);
991	re_free (lim_states);
992	return ret;
993	}
994
995	/ Acquire an initial state and return it.*
996	We must select appropriate initial state depending on the context,
997	since initial states may have constraints like "\<", "^", etc.. /*
998
999	static inline re_dfastate_t *
1000	__attribute__ ((always_inline))
1001	acquire_init_state_context (reg_errcode_t err, const* re_match_context_t *mctx,
1002	Idx idx)
1003	{
1004	const re_dfa_t *const dfa = mctx->dfa;
1005	if (dfa->init_state->has_constraint)
1006	{
1007	unsigned int context;
1008	context = re_string_context_at (&mctx->input, idx - `1`, mctx->eflags);
1009	if (IS_WORD_CONTEXT (context))
1010	return dfa->init_state_word;
1011	else if (IS_ORDINARY_CONTEXT (context))
1012	return dfa->init_state;
1013	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1014	return dfa->init_state_begbuf;
1015	else if (IS_NEWLINE_CONTEXT (context))
1016	return dfa->init_state_nl;
1017	else if (IS_BEGBUF_CONTEXT (context))
1018	{
1019	/ It is relatively rare case, then calculate on demand. /
1020	return re_acquire_state_context (err, dfa,
1021	dfa->init_state->entrance_nodes,
1022	context);
1023	}
1024	else
1025	/ Must not happen? /
1026	return dfa->init_state;
1027	}
1028	else
1029	return dfa->init_state;
1030	}
1031
1032	/ Check whether the regular expression match input string INPUT or not,*
1033	and return the index where the matching end. Return -1 if
1034	there is no match, and return -2 in case of an error.
1035	FL_LONGEST_MATCH means we want the POSIX longest matching.
1036	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1037	next place where we may want to try matching.
1038	Note that the matcher assumes that the matching starts from the current
1039	index of the buffer. /*
1040
1041	static Idx
1042	__attribute_warn_unused_result__
1043	check_matching (re_match_context_t *mctx, bool fl_longest_match,
1044	Idx *p_match_first)
1045	{
1046	const re_dfa_t *const dfa = mctx->dfa;
1047	reg_errcode_t err;
1048	Idx match = `0`;
1049	Idx match_last = -`1`;
1050	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1051	re_dfastate_t *cur_state;
1052	bool at_init_state = p_match_first != NULL;
1053	Idx next_start_idx = cur_str_idx;
1054
1055	err = REG_NOERROR;
1056	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1057	/ An initial state must not be NULL (invalid). /
1058	if (__glibc_unlikely (cur_state == NULL))
1059	{
1060	DEBUG_ASSERT (err == REG_ESPACE);
1061	return -`2`;
1062	}
1063
1064	if (mctx->state_log != NULL)
1065	{
1066	mctx->state_log[cur_str_idx] = cur_state;
1067
1068	/ Check OP_OPEN_SUBEXP in the initial state in case that we use them*
1069	later. E.g. Processing back references. /*
1070	if (__glibc_unlikely (dfa->nbackref))
1071	{
1072	at_init_state = false;
1073	err = check_subexp_matching_top (mctx, &cur_state->nodes, `0`);
1074	if (__glibc_unlikely (err != REG_NOERROR))
1075	return err;
1076
1077	if (cur_state->has_backref)
1078	{
1079	err = transit_state_bkref (mctx, &cur_state->nodes);
1080	if (__glibc_unlikely (err != REG_NOERROR))
1081	return err;
1082	}
1083	}
1084	}
1085
1086	/ If the RE accepts NULL string. /
1087	if (__glibc_unlikely (cur_state->halt))
1088	{
1089	if (!cur_state->has_constraint
1090	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1091	{
1092	if (!fl_longest_match)
1093	return cur_str_idx;
1094	else
1095	{
1096	match_last = cur_str_idx;
1097	match = `1`;
1098	}
1099	}
1100	}
1101
1102	while (!re_string_eoi (&mctx->input))
1103	{
1104	re_dfastate_t *old_state = cur_state;
1105	Idx next_char_idx = re_string_cur_idx (&mctx->input) + `1`;
1106
1107	if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1108	&& mctx->input.bufs_len < mctx->input.len)
1109	\|\| (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1110	&& mctx->input.valid_len < mctx->input.len))
1111	{
1112	err = extend_buffers (mctx, next_char_idx + `1`);
1113	if (__glibc_unlikely (err != REG_NOERROR))
1114	{
1115	DEBUG_ASSERT (err == REG_ESPACE);
1116	return -`2`;
1117	}
1118	}
1119
1120	cur_state = transit_state (&err, mctx, cur_state);
1121	if (mctx->state_log != NULL)
1122	cur_state = merge_state_with_log (&err, mctx, cur_state);
1123
1124	if (cur_state == NULL)
1125	{
1126	/ Reached the invalid state or an error. Try to recover a valid*
1127	state using the state log, if available and if we have not
1128	already found a valid (even if not the longest) match. /*
1129	if (__glibc_unlikely (err != REG_NOERROR))
1130	return -`2`;
1131
1132	if (mctx->state_log == NULL
1133	\|\| (match && !fl_longest_match)
1134	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1135	break;
1136	}
1137
1138	if (__glibc_unlikely (at_init_state))
1139	{
1140	if (old_state == cur_state)
1141	next_start_idx = next_char_idx;
1142	else
1143	at_init_state = false;
1144	}
1145
1146	if (cur_state->halt)
1147	{
1148	/ Reached a halt state.*
1149	Check the halt state can satisfy the current context. /*
1150	if (!cur_state->has_constraint
1151	\|\| check_halt_state_context (mctx, cur_state,
1152	re_string_cur_idx (&mctx->input)))
1153	{
1154	/ We found an appropriate halt state. /
1155	match_last = re_string_cur_idx (&mctx->input);
1156	match = `1`;
1157
1158	/ We found a match, do not modify match_first below. /
1159	p_match_first = NULL;
1160	if (!fl_longest_match)
1161	break;
1162	}
1163	}
1164	}
1165
1166	if (p_match_first)
1167	*p_match_first += next_start_idx;
1168
1169	return match_last;
1170	}
1171
1172	/ Check NODE match the current context. /
1173
1174	static bool
1175	check_halt_node_context (const re_dfa_t dfa, Idx node, unsigned* int context)
1176	{
1177	re_token_type_t type = dfa->nodes[node].type;
1178	unsigned int constraint = dfa->nodes[node].constraint;
1179	if (type != END_OF_RE)
1180	return false;
1181	if (!constraint)
1182	return true;
1183	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1184	return false;
1185	return true;
1186	}
1187
1188	/ Check the halt state STATE match the current context.*
1189	Return 0 if not match, if the node, STATE has, is a halt node and
1190	match the context, return the node. /*
1191
1192	static Idx
1193	check_halt_state_context (const re_match_context_t *mctx,
1194	const re_dfastate_t *state, Idx idx)
1195	{
1196	Idx i;
1197	unsigned int context;
1198	DEBUG_ASSERT (state->halt);
1199	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1200	for (i = `0`; i < state->nodes.nelem; ++i)
1201	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1202	return state->nodes.elems[i];
1203	return `0`;
1204	}
1205
1206	/ Compute the next node to which "NFA" transit from NODE("NFA" is a NFA*
1207	corresponding to the DFA).
1208	Return the destination node, and update EPS_VIA_NODES;
1209	return -1 in case of errors. /*
1210
1211	static Idx
1212	proceed_next_node (const re_match_context_t mctx, Idx nregs, regmatch_t regs,
1213	Idx pidx, Idx node, re_node_set eps_via_nodes,
1214	struct re_fail_stack_t *fs)
1215	{
1216	const re_dfa_t *const dfa = mctx->dfa;
1217	Idx i;
1218	bool ok;
1219	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1220	{
1221	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1222	re_node_set *edests = &dfa->edests[node];
1223	Idx dest_node;
1224	ok = re_node_set_insert (eps_via_nodes, node);
1225	if (__glibc_unlikely (! ok))
1226	return -`2`;
1227	/ Pick up a valid destination, or return -1 if none*
1228	is found. /*
1229	for (dest_node = -`1`, i = `0`; i < edests->nelem; ++i)
1230	{
1231	Idx candidate = edests->elems[i];
1232	if (!re_node_set_contains (cur_nodes, candidate))
1233	continue;
1234	if (dest_node == -`1`)
1235	dest_node = candidate;
1236
1237	else
1238	{
1239	/ In order to avoid infinite loop like "(a)", return the second*
1240	epsilon-transition if the first was already considered. /*
1241	if (re_node_set_contains (eps_via_nodes, dest_node))
1242	return candidate;
1243
1244	/ Otherwise, push the second epsilon-transition on the fail stack. /
1245	else if (fs != NULL
1246	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1247	eps_via_nodes))
1248	return -`2`;
1249
1250	/ We know we are going to exit. /
1251	break;
1252	}
1253	}
1254	return dest_node;
1255	}
1256	else
1257	{
1258	Idx naccepted = `0`;
1259	re_token_type_t type = dfa->nodes[node].type;
1260
1261	#ifdef RE_ENABLE_I18N
1262	if (dfa->nodes[node].accept_mb)
1263	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1264	else
1265	#endif /* RE_ENABLE_I18N */
1266	if (type == OP_BACK_REF)
1267	{
1268	Idx subexp_idx = dfa->nodes[node].opr.idx + `1`;
1269	if (subexp_idx < nregs)
1270	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1271	if (fs != NULL)
1272	{
1273	if (subexp_idx >= nregs
1274	\|\| regs[subexp_idx].rm_so == -`1`
1275	\|\| regs[subexp_idx].rm_eo == -`1`)
1276	return -`1`;
1277	else if (naccepted)
1278	{
1279	char buf = (char* *) re_string_get_buffer (&mctx->input);
1280	if (mctx->input.valid_len - *pidx < naccepted
1281	\|\| (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1282	naccepted)
1283	!= `0`))
1284	return -`1`;
1285	}
1286	}
1287
1288	if (naccepted == `0`)
1289	{
1290	Idx dest_node;
1291	ok = re_node_set_insert (eps_via_nodes, node);
1292	if (__glibc_unlikely (! ok))
1293	return -`2`;
1294	dest_node = dfa->edests[node].elems[`0`];
1295	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1296	dest_node))
1297	return dest_node;
1298	}
1299	}
1300
1301	if (naccepted != `0`
1302	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1303	{
1304	Idx dest_node = dfa->nexts[node];
1305	pidx = (naccepted == `0`) ? pidx + `1` : *pidx + naccepted;
1306	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1307	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1308	dest_node)))
1309	return -`1`;
1310	re_node_set_empty (eps_via_nodes);
1311	return dest_node;
1312	}
1313	}
1314	return -`1`;
1315	}
1316
1317	static reg_errcode_t
1318	__attribute_warn_unused_result__
1319	push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1320	Idx nregs, regmatch_t regs, re_node_set eps_via_nodes)
1321	{
1322	reg_errcode_t err;
1323	Idx num = fs->num++;
1324	if (fs->num == fs->alloc)
1325	{
1326	struct re_fail_stack_ent_t *new_array;
1327	new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1328	fs->alloc * `2`);
1329	if (new_array == NULL)
1330	return REG_ESPACE;
1331	fs->alloc *= `2`;
1332	fs->stack = new_array;
1333	}
1334	fs->stack[num].idx = str_idx;
1335	fs->stack[num].node = dest_node;
1336	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1337	if (fs->stack[num].regs == NULL)
1338	return REG_ESPACE;
1339	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1340	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1341	return err;
1342	}
1343
1344	static Idx
1345	pop_fail_stack (struct re_fail_stack_t fs, Idx pidx, Idx nregs,
1346	regmatch_t regs, re_node_set eps_via_nodes)
1347	{
1348	Idx num = --fs->num;
1349	DEBUG_ASSERT (num >= `0`);
1350	*pidx = fs->stack[num].idx;
1351	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1352	re_node_set_free (eps_via_nodes);
1353	re_free (fs->stack[num].regs);
1354	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1355	return fs->stack[num].node;
1356	}
1357
1358	/ Set the positions where the subexpressions are starts/ends to registers*
1359	PMATCH.
1360	Note: We assume that pmatch[0] is already set, and
1361	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. /*
1362
1363	static reg_errcode_t
1364	__attribute_warn_unused_result__
1365	set_regs (const regex_t preg, const* re_match_context_t *mctx, size_t nmatch,
1366	regmatch_t *pmatch, bool fl_backtrack)
1367	{
1368	const re_dfa_t *dfa = preg->buffer;
1369	Idx idx, cur_node;
1370	re_node_set eps_via_nodes;
1371	struct re_fail_stack_t *fs;
1372	struct re_fail_stack_t fs_body = { `0`, `2`, NULL };
1373	regmatch_t *prev_idx_match;
1374	bool prev_idx_match_malloced = false;
1375
1376	DEBUG_ASSERT (nmatch > `1`);
1377	DEBUG_ASSERT (mctx->state_log != NULL);
1378	if (fl_backtrack)
1379	{
1380	fs = &fs_body;
1381	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1382	if (fs->stack == NULL)
1383	return REG_ESPACE;
1384	}
1385	else
1386	fs = NULL;
1387
1388	cur_node = dfa->init_node;
1389	re_node_set_init_empty (&eps_via_nodes);
1390
1391	if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1392	prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
1393	else
1394	{
1395	prev_idx_match = re_malloc (regmatch_t, nmatch);
1396	if (prev_idx_match == NULL)
1397	{
1398	free_fail_stack_return (fs);
1399	return REG_ESPACE;
1400	}
1401	prev_idx_match_malloced = true;
1402	}
1403	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1404
1405	for (idx = pmatch[`0`].rm_so; idx <= pmatch[`0`].rm_eo ;)
1406	{
1407	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1408
1409	if (idx == pmatch[`0`].rm_eo && cur_node == mctx->last_node)
1410	{
1411	Idx reg_idx;
1412	if (fs)
1413	{
1414	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
1415	if (pmatch[reg_idx].rm_so > -`1` && pmatch[reg_idx].rm_eo == -`1`)
1416	break;
1417	if (reg_idx == nmatch)
1418	{
1419	re_node_set_free (&eps_via_nodes);
1420	if (prev_idx_match_malloced)
1421	re_free (prev_idx_match);
1422	return free_fail_stack_return (fs);
1423	}
1424	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1425	&eps_via_nodes);
1426	}
1427	else
1428	{
1429	re_node_set_free (&eps_via_nodes);
1430	if (prev_idx_match_malloced)
1431	re_free (prev_idx_match);
1432	return REG_NOERROR;
1433	}
1434	}
1435
1436	/ Proceed to next node. /
1437	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1438	&eps_via_nodes, fs);
1439
1440	if (__glibc_unlikely (cur_node < `0`))
1441	{
1442	if (__glibc_unlikely (cur_node == -`2`))
1443	{
1444	re_node_set_free (&eps_via_nodes);
1445	if (prev_idx_match_malloced)
1446	re_free (prev_idx_match);
1447	free_fail_stack_return (fs);
1448	return REG_ESPACE;
1449	}
1450	if (fs)
1451	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1452	&eps_via_nodes);
1453	else
1454	{
1455	re_node_set_free (&eps_via_nodes);
1456	if (prev_idx_match_malloced)
1457	re_free (prev_idx_match);
1458	return REG_NOMATCH;
1459	}
1460	}
1461	}
1462	re_node_set_free (&eps_via_nodes);
1463	if (prev_idx_match_malloced)
1464	re_free (prev_idx_match);
1465	return free_fail_stack_return (fs);
1466	}
1467
1468	static reg_errcode_t
1469	free_fail_stack_return (struct re_fail_stack_t *fs)
1470	{
1471	if (fs)
1472	{
1473	Idx fs_idx;
1474	for (fs_idx = `0`; fs_idx < fs->num; ++fs_idx)
1475	{
1476	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1477	re_free (fs->stack[fs_idx].regs);
1478	}
1479	re_free (fs->stack);
1480	}
1481	return REG_NOERROR;
1482	}
1483
1484	static void
1485	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
1486	regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1487	{
1488	int type = dfa->nodes[cur_node].type;
1489	if (type == OP_OPEN_SUBEXP)
1490	{
1491	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1492
1493	/ We are at the first node of this sub expression. /
1494	if (reg_num < nmatch)
1495	{
1496	pmatch[reg_num].rm_so = cur_idx;
1497	pmatch[reg_num].rm_eo = -`1`;
1498	}
1499	}
1500	else if (type == OP_CLOSE_SUBEXP)
1501	{
1502	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1503	if (reg_num < nmatch)
1504	{
1505	/ We are at the last node of this sub expression. /
1506	if (pmatch[reg_num].rm_so < cur_idx)
1507	{
1508	pmatch[reg_num].rm_eo = cur_idx;
1509	/ This is a non-empty match or we are not inside an optional*
1510	subexpression. Accept this right away. /*
1511	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1512	}
1513	else
1514	{
1515	if (dfa->nodes[cur_node].opt_subexp
1516	&& prev_idx_match[reg_num].rm_so != -`1`)
1517	/ We transited through an empty match for an optional*
1518	subexpression, like (a?), and this is not the subexp's*
1519	first match. Copy back the old content of the registers
1520	so that matches of an inner subexpression are undone as
1521	well, like in ((a?)). /
1522	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1523	else
1524	/ We completed a subexpression, but it may be part of*
1525	an optional one, so do not update PREV_IDX_MATCH. /*
1526	pmatch[reg_num].rm_eo = cur_idx;
1527	}
1528	}
1529	}
1530	}
1531
1532	/ This function checks the STATE_LOG from the SCTX->last_str_idx to 0*
1533	and sift the nodes in each states according to the following rules.
1534	Updated state_log will be wrote to STATE_LOG.
1535
1536	Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1537	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1538	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1539	the LAST_NODE, we throw away the node 'a'.
1540	2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1541	string 's' and transit to 'b':
1542	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1543	away the node 'a'.
1544	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1545	thrown away, we throw away the node 'a'.
1546	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1547	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1548	node 'a'.
1549	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1550	we throw away the node 'a'. /*
1551
1552	#define STATE_NODE_CONTAINS(state,node) \
1553	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1554
1555	static reg_errcode_t
1556	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
1557	{
1558	reg_errcode_t err;
1559	int null_cnt = `0`;
1560	Idx str_idx = sctx->last_str_idx;
1561	re_node_set cur_dest;
1562
1563	DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1564
1565	/ Build sifted state_log[str_idx]. It has the nodes which can epsilon*
1566	transit to the last_node and the last_node itself. /*
1567	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1568	if (__glibc_unlikely (err != REG_NOERROR))
1569	return err;
1570	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1571	if (__glibc_unlikely (err != REG_NOERROR))
1572	goto free_return;
1573
1574	/ Then check each states in the state_log. /
1575	while (str_idx > `0`)
1576	{
1577	/ Update counters. /
1578	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + `1` : `0`;
1579	if (null_cnt > mctx->max_mb_elem_len)
1580	{
1581	memset (sctx->sifted_states, `'\0'`,
1582	sizeof (re_dfastate_t ) str_idx);
1583	re_node_set_free (&cur_dest);
1584	return REG_NOERROR;
1585	}
1586	re_node_set_empty (&cur_dest);
1587	--str_idx;
1588
1589	if (mctx->state_log[str_idx])
1590	{
1591	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1592	if (__glibc_unlikely (err != REG_NOERROR))
1593	goto free_return;
1594	}
1595
1596	/ Add all the nodes which satisfy the following conditions:*
1597	- It can epsilon transit to a node in CUR_DEST.
1598	- It is in CUR_SRC.
1599	And update state_log. /*
1600	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1601	if (__glibc_unlikely (err != REG_NOERROR))
1602	goto free_return;
1603	}
1604	err = REG_NOERROR;
1605	free_return:
1606	re_node_set_free (&cur_dest);
1607	return err;
1608	}
1609
1610	static reg_errcode_t
1611	__attribute_warn_unused_result__
1612	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
1613	Idx str_idx, re_node_set *cur_dest)
1614	{
1615	const re_dfa_t *const dfa = mctx->dfa;
1616	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1617	Idx i;
1618
1619	/ Then build the next sifted state.*
1620	We build the next sifted state on 'cur_dest', and update
1621	'sifted_states[str_idx]' with 'cur_dest'.
1622	Note:
1623	'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1624	'cur_src' points the node_set of the old 'state_log[str_idx]'
1625	(with the epsilon nodes pre-filtered out). /*
1626	for (i = `0`; i < cur_src->nelem; i++)
1627	{
1628	Idx prev_node = cur_src->elems[i];
1629	int naccepted = `0`;
1630	bool ok;
1631	DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
1632
1633	#ifdef RE_ENABLE_I18N
1634	/ If the node may accept "multi byte". /
1635	if (dfa->nodes[prev_node].accept_mb)
1636	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1637	str_idx, sctx->last_str_idx);
1638	#endif /* RE_ENABLE_I18N */
1639
1640	/ We don't check backreferences here.*
1641	See update_cur_sifted_state(). /*
1642	if (!naccepted
1643	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1644	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + `1`],
1645	dfa->nexts[prev_node]))
1646	naccepted = `1`;
1647
1648	if (naccepted == `0`)
1649	continue;
1650
1651	if (sctx->limits.nelem)
1652	{
1653	Idx to_idx = str_idx + naccepted;
1654	if (check_dst_limits (mctx, &sctx->limits,
1655	dfa->nexts[prev_node], to_idx,
1656	prev_node, str_idx))
1657	continue;
1658	}
1659	ok = re_node_set_insert (cur_dest, prev_node);
1660	if (__glibc_unlikely (! ok))
1661	return REG_ESPACE;
1662	}
1663
1664	return REG_NOERROR;
1665	}
1666
1667	/ Helper functions. /
1668
1669	static reg_errcode_t
1670	clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1671	{
1672	Idx top = mctx->state_log_top;
1673
1674	if ((next_state_log_idx >= mctx->input.bufs_len
1675	&& mctx->input.bufs_len < mctx->input.len)
1676	\|\| (next_state_log_idx >= mctx->input.valid_len
1677	&& mctx->input.valid_len < mctx->input.len))
1678	{
1679	reg_errcode_t err;
1680	err = extend_buffers (mctx, next_state_log_idx + `1`);
1681	if (__glibc_unlikely (err != REG_NOERROR))
1682	return err;
1683	}
1684
1685	if (top < next_state_log_idx)
1686	{
1687	memset (mctx->state_log + top + `1`, `'\0'`,
1688	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1689	mctx->state_log_top = next_state_log_idx;
1690	}
1691	return REG_NOERROR;
1692	}
1693
1694	static reg_errcode_t
1695	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
1696	re_dfastate_t **src, Idx num)
1697	{
1698	Idx st_idx;
1699	reg_errcode_t err;
1700	for (st_idx = `0`; st_idx < num; ++st_idx)
1701	{
1702	if (dst[st_idx] == NULL)
1703	dst[st_idx] = src[st_idx];
1704	else if (src[st_idx] != NULL)
1705	{
1706	re_node_set merged_set;
1707	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1708	&src[st_idx]->nodes);
1709	if (__glibc_unlikely (err != REG_NOERROR))
1710	return err;
1711	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1712	re_node_set_free (&merged_set);
1713	if (__glibc_unlikely (err != REG_NOERROR))
1714	return err;
1715	}
1716	}
1717	return REG_NOERROR;
1718	}
1719
1720	static reg_errcode_t
1721	update_cur_sifted_state (const re_match_context_t *mctx,
1722	re_sift_context_t *sctx, Idx str_idx,
1723	re_node_set *dest_nodes)
1724	{
1725	const re_dfa_t *const dfa = mctx->dfa;
1726	reg_errcode_t err = REG_NOERROR;
1727	const re_node_set *candidates;
1728	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1729	: &mctx->state_log[str_idx]->nodes);
1730
1731	if (dest_nodes->nelem == `0`)
1732	sctx->sifted_states[str_idx] = NULL;
1733	else
1734	{
1735	if (candidates)
1736	{
1737	/ At first, add the nodes which can epsilon transit to a node in*
1738	DEST_NODE. /*
1739	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1740	if (__glibc_unlikely (err != REG_NOERROR))
1741	return err;
1742
1743	/ Then, check the limitations in the current sift_context. /
1744	if (sctx->limits.nelem)
1745	{
1746	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1747	mctx->bkref_ents, str_idx);
1748	if (__glibc_unlikely (err != REG_NOERROR))
1749	return err;
1750	}
1751	}
1752
1753	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1754	if (__glibc_unlikely (err != REG_NOERROR))
1755	return err;
1756	}
1757
1758	if (candidates && mctx->state_log[str_idx]->has_backref)
1759	{
1760	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1761	if (__glibc_unlikely (err != REG_NOERROR))
1762	return err;
1763	}
1764	return REG_NOERROR;
1765	}
1766
1767	static reg_errcode_t
1768	__attribute_warn_unused_result__
1769	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
1770	const re_node_set *candidates)
1771	{
1772	reg_errcode_t err = REG_NOERROR;
1773	Idx i;
1774
1775	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1776	if (__glibc_unlikely (err != REG_NOERROR))
1777	return err;
1778
1779	if (!state->inveclosure.alloc)
1780	{
1781	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1782	if (__glibc_unlikely (err != REG_NOERROR))
1783	return REG_ESPACE;
1784	for (i = `0`; i < dest_nodes->nelem; i++)
1785	{
1786	err = re_node_set_merge (&state->inveclosure,
1787	dfa->inveclosures + dest_nodes->elems[i]);
1788	if (__glibc_unlikely (err != REG_NOERROR))
1789	return REG_ESPACE;
1790	}
1791	}
1792	return re_node_set_add_intersect (dest_nodes, candidates,
1793	&state->inveclosure);
1794	}
1795
1796	static reg_errcode_t
1797	sub_epsilon_src_nodes (const re_dfa_t dfa, Idx node, re_node_set dest_nodes,
1798	const re_node_set *candidates)
1799	{
1800	Idx ecl_idx;
1801	reg_errcode_t err;
1802	re_node_set *inv_eclosure = dfa->inveclosures + node;
1803	re_node_set except_nodes;
1804	re_node_set_init_empty (&except_nodes);
1805	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1806	{
1807	Idx cur_node = inv_eclosure->elems[ecl_idx];
1808	if (cur_node == node)
1809	continue;
1810	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1811	{
1812	Idx edst1 = dfa->edests[cur_node].elems[`0`];
1813	Idx edst2 = ((dfa->edests[cur_node].nelem > `1`)
1814	? dfa->edests[cur_node].elems[`1`] : -`1`);
1815	if ((!re_node_set_contains (inv_eclosure, edst1)
1816	&& re_node_set_contains (dest_nodes, edst1))
1817	\|\| (edst2 > `0`
1818	&& !re_node_set_contains (inv_eclosure, edst2)
1819	&& re_node_set_contains (dest_nodes, edst2)))
1820	{
1821	err = re_node_set_add_intersect (&except_nodes, candidates,
1822	dfa->inveclosures + cur_node);
1823	if (__glibc_unlikely (err != REG_NOERROR))
1824	{
1825	re_node_set_free (&except_nodes);
1826	return err;
1827	}
1828	}
1829	}
1830	}
1831	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1832	{
1833	Idx cur_node = inv_eclosure->elems[ecl_idx];
1834	if (!re_node_set_contains (&except_nodes, cur_node))
1835	{
1836	Idx idx = re_node_set_contains (dest_nodes, cur_node) - `1`;
1837	re_node_set_remove_at (dest_nodes, idx);
1838	}
1839	}
1840	re_node_set_free (&except_nodes);
1841	return REG_NOERROR;
1842	}
1843
1844	static bool
1845	check_dst_limits (const re_match_context_t mctx, const* re_node_set *limits,
1846	Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1847	{
1848	const re_dfa_t *const dfa = mctx->dfa;
1849	Idx lim_idx, src_pos, dst_pos;
1850
1851	Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1852	Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1853	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1854	{
1855	Idx subexp_idx;
1856	struct re_backref_cache_entry *ent;
1857	ent = mctx->bkref_ents + limits->elems[lim_idx];
1858	subexp_idx = dfa->nodes[ent->node].opr.idx;
1859
1860	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1861	subexp_idx, dst_node, dst_idx,
1862	dst_bkref_idx);
1863	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1864	subexp_idx, src_node, src_idx,
1865	src_bkref_idx);
1866
1867	/ In case of:*
1868	<src> <dst> ( <subexp> )
1869	( <subexp> ) <src> <dst>
1870	( <subexp1> <src> <subexp2> <dst> <subexp3> ) /*
1871	if (src_pos == dst_pos)
1872	continue; / This is unrelated limitation. /
1873	else
1874	return true;
1875	}
1876	return false;
1877	}
1878
1879	static int
1880	check_dst_limits_calc_pos_1 (const re_match_context_t mctx, int* boundaries,
1881	Idx subexp_idx, Idx from_node, Idx bkref_idx)
1882	{
1883	const re_dfa_t *const dfa = mctx->dfa;
1884	const re_node_set *eclosures = dfa->eclosures + from_node;
1885	Idx node_idx;
1886
1887	/ Else, we are on the boundary: examine the nodes on the epsilon*
1888	closure. /*
1889	for (node_idx = `0`; node_idx < eclosures->nelem; ++node_idx)
1890	{
1891	Idx node = eclosures->elems[node_idx];
1892	switch (dfa->nodes[node].type)
1893	{
1894	case OP_BACK_REF:
1895	if (bkref_idx != -`1`)
1896	{
1897	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1898	do
1899	{
1900	Idx dst;
1901	int cpos;
1902
1903	if (ent->node != node)
1904	continue;
1905
1906	if (subexp_idx < BITSET_WORD_BITS
1907	&& !(ent->eps_reachable_subexps_map
1908	& ((bitset_word_t) `1` << subexp_idx)))
1909	continue;
1910
1911	/ Recurse trying to reach the OP_OPEN_SUBEXP and*
1912	OP_CLOSE_SUBEXP cases below. But, if the
1913	destination node is the same node as the source
1914	node, don't recurse because it would cause an
1915	infinite loop: a regex that exhibits this behavior
1916	is ()\1\1* /
1917	dst = dfa->edests[node].elems[`0`];
1918	if (dst == from_node)
1919	{
1920	if (boundaries & `1`)
1921	return -`1`;
1922	else / if (boundaries & 2) /
1923	return `0`;
1924	}
1925
1926	cpos =
1927	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1928	dst, bkref_idx);
1929	if (cpos == -`1` / && (boundaries & 1) /)
1930	return -`1`;
1931	if (cpos == `0` && (boundaries & `2`))
1932	return `0`;
1933
1934	if (subexp_idx < BITSET_WORD_BITS)
1935	ent->eps_reachable_subexps_map
1936	&= ~((bitset_word_t) `1` << subexp_idx);
1937	}
1938	while (ent++->more);
1939	}
1940	break;
1941
1942	case OP_OPEN_SUBEXP:
1943	if ((boundaries & `1`) && subexp_idx == dfa->nodes[node].opr.idx)
1944	return -`1`;
1945	break;
1946
1947	case OP_CLOSE_SUBEXP:
1948	if ((boundaries & `2`) && subexp_idx == dfa->nodes[node].opr.idx)
1949	return `0`;
1950	break;
1951
1952	default:
1953	break;
1954	}
1955	}
1956
1957	return (boundaries & `2`) ? `1` : `0`;
1958	}
1959
1960	static int
1961	check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1962	Idx subexp_idx, Idx from_node, Idx str_idx,
1963	Idx bkref_idx)
1964	{
1965	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1966	int boundaries;
1967
1968	/ If we are outside the range of the subexpression, return -1 or 1. /
1969	if (str_idx < lim->subexp_from)
1970	return -`1`;
1971
1972	if (lim->subexp_to < str_idx)
1973	return `1`;
1974
1975	/ If we are within the subexpression, return 0. /
1976	boundaries = (str_idx == lim->subexp_from);
1977	boundaries \|= (str_idx == lim->subexp_to) << `1`;
1978	if (boundaries == `0`)
1979	return `0`;
1980
1981	/ Else, examine epsilon closure. /
1982	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1983	from_node, bkref_idx);
1984	}
1985
1986	/ Check the limitations of sub expressions LIMITS, and remove the nodes*
1987	which are against limitations from DEST_NODES. /*
1988
1989	static reg_errcode_t
1990	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
1991	const re_node_set candidates, re_node_set limits,
1992	struct re_backref_cache_entry *bkref_ents, Idx str_idx)
1993	{
1994	reg_errcode_t err;
1995	Idx node_idx, lim_idx;
1996
1997	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1998	{
1999	Idx subexp_idx;
2000	struct re_backref_cache_entry *ent;
2001	ent = bkref_ents + limits->elems[lim_idx];
2002
2003	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2004	continue; / This is unrelated limitation. /
2005
2006	subexp_idx = dfa->nodes[ent->node].opr.idx;
2007	if (ent->subexp_to == str_idx)
2008	{
2009	Idx ops_node = -`1`;
2010	Idx cls_node = -`1`;
2011	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2012	{
2013	Idx node = dest_nodes->elems[node_idx];
2014	re_token_type_t type = dfa->nodes[node].type;
2015	if (type == OP_OPEN_SUBEXP
2016	&& subexp_idx == dfa->nodes[node].opr.idx)
2017	ops_node = node;
2018	else if (type == OP_CLOSE_SUBEXP
2019	&& subexp_idx == dfa->nodes[node].opr.idx)
2020	cls_node = node;
2021	}
2022
2023	/ Check the limitation of the open subexpression. /
2024	/ Note that (ent->subexp_to = str_idx != ent->subexp_from). /
2025	if (ops_node >= `0`)
2026	{
2027	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2028	candidates);
2029	if (__glibc_unlikely (err != REG_NOERROR))
2030	return err;
2031	}
2032
2033	/ Check the limitation of the close subexpression. /
2034	if (cls_node >= `0`)
2035	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2036	{
2037	Idx node = dest_nodes->elems[node_idx];
2038	if (!re_node_set_contains (dfa->inveclosures + node,
2039	cls_node)
2040	&& !re_node_set_contains (dfa->eclosures + node,
2041	cls_node))
2042	{
2043	/ It is against this limitation.*
2044	Remove it form the current sifted state. /*
2045	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2046	candidates);
2047	if (__glibc_unlikely (err != REG_NOERROR))
2048	return err;
2049	--node_idx;
2050	}
2051	}
2052	}
2053	else / (ent->subexp_to != str_idx) /
2054	{
2055	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2056	{
2057	Idx node = dest_nodes->elems[node_idx];
2058	re_token_type_t type = dfa->nodes[node].type;
2059	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2060	{
2061	if (subexp_idx != dfa->nodes[node].opr.idx)
2062	continue;
2063	/ It is against this limitation.*
2064	Remove it form the current sifted state. /*
2065	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2066	candidates);
2067	if (__glibc_unlikely (err != REG_NOERROR))
2068	return err;
2069	}
2070	}
2071	}
2072	}
2073	return REG_NOERROR;
2074	}
2075
2076	static reg_errcode_t
2077	__attribute_warn_unused_result__
2078	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
2079	Idx str_idx, const re_node_set *candidates)
2080	{
2081	const re_dfa_t *const dfa = mctx->dfa;
2082	reg_errcode_t err;
2083	Idx node_idx, node;
2084	re_sift_context_t local_sctx;
2085	Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2086
2087	if (first_idx == -`1`)
2088	return REG_NOERROR;
2089
2090	local_sctx.sifted_states = NULL; / Mark that it hasn't been initialized. /
2091
2092	for (node_idx = `0`; node_idx < candidates->nelem; ++node_idx)
2093	{
2094	Idx enabled_idx;
2095	re_token_type_t type;
2096	struct re_backref_cache_entry *entry;
2097	node = candidates->elems[node_idx];
2098	type = dfa->nodes[node].type;
2099	/ Avoid infinite loop for the REs like "()\1+". /
2100	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2101	continue;
2102	if (type != OP_BACK_REF)
2103	continue;
2104
2105	entry = mctx->bkref_ents + first_idx;
2106	enabled_idx = first_idx;
2107	do
2108	{
2109	Idx subexp_len;
2110	Idx to_idx;
2111	Idx dst_node;
2112	bool ok;
2113	re_dfastate_t *cur_state;
2114
2115	if (entry->node != node)
2116	continue;
2117	subexp_len = entry->subexp_to - entry->subexp_from;
2118	to_idx = str_idx + subexp_len;
2119	dst_node = (subexp_len ? dfa->nexts[node]
2120	: dfa->edests[node].elems[`0`]);
2121
2122	if (to_idx > sctx->last_str_idx
2123	\|\| sctx->sifted_states[to_idx] == NULL
2124	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2125	\|\| check_dst_limits (mctx, &sctx->limits, node,
2126	str_idx, dst_node, to_idx))
2127	continue;
2128
2129	if (local_sctx.sifted_states == NULL)
2130	{
2131	local_sctx = *sctx;
2132	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2133	if (__glibc_unlikely (err != REG_NOERROR))
2134	goto free_return;
2135	}
2136	local_sctx.last_node = node;
2137	local_sctx.last_str_idx = str_idx;
2138	ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2139	if (__glibc_unlikely (! ok))
2140	{
2141	err = REG_ESPACE;
2142	goto free_return;
2143	}
2144	cur_state = local_sctx.sifted_states[str_idx];
2145	err = sift_states_backward (mctx, &local_sctx);
2146	if (__glibc_unlikely (err != REG_NOERROR))
2147	goto free_return;
2148	if (sctx->limited_states != NULL)
2149	{
2150	err = merge_state_array (dfa, sctx->limited_states,
2151	local_sctx.sifted_states,
2152	str_idx + `1`);
2153	if (__glibc_unlikely (err != REG_NOERROR))
2154	goto free_return;
2155	}
2156	local_sctx.sifted_states[str_idx] = cur_state;
2157	re_node_set_remove (&local_sctx.limits, enabled_idx);
2158
2159	/ mctx->bkref_ents may have changed, reload the pointer. /
2160	entry = mctx->bkref_ents + enabled_idx;
2161	}
2162	while (enabled_idx++, entry++->more);
2163	}
2164	err = REG_NOERROR;
2165	free_return:
2166	if (local_sctx.sifted_states != NULL)
2167	{
2168	re_node_set_free (&local_sctx.limits);
2169	}
2170
2171	return err;
2172	}
2173
2174
2175	#ifdef RE_ENABLE_I18N
2176	static int
2177	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
2178	Idx node_idx, Idx str_idx, Idx max_str_idx)
2179	{
2180	const re_dfa_t *const dfa = mctx->dfa;
2181	int naccepted;
2182	/ Check the node can accept "multi byte". /
2183	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2184	if (naccepted > `0` && str_idx + naccepted <= max_str_idx
2185	&& !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2186	dfa->nexts[node_idx]))
2187	/ The node can't accept the "multi byte", or the*
2188	destination was already thrown away, then the node
2189	couldn't accept the current input "multi byte". /*
2190	naccepted = `0`;
2191	/ Otherwise, it is sure that the node could accept*
2192	'naccepted' bytes input. /*
2193	return naccepted;
2194	}
2195	#endif /* RE_ENABLE_I18N */
2196
2197
2198	/ Functions for state transition. /
2199
2200	/ Return the next state to which the current state STATE will transit by*
2201	accepting the current input byte, and update STATE_LOG if necessary.
2202	If STATE can accept a multibyte char/collating element/back reference
2203	update the destination of STATE_LOG. /*
2204
2205	static re_dfastate_t *
2206	__attribute_warn_unused_result__
2207	transit_state (reg_errcode_t err, re_match_context_t mctx,
2208	re_dfastate_t *state)
2209	{
2210	re_dfastate_t **trtable;
2211	unsigned char ch;
2212
2213	#ifdef RE_ENABLE_I18N
2214	/ If the current state can accept multibyte. /
2215	if (__glibc_unlikely (state->accept_mb))
2216	{
2217	*err = transit_state_mb (mctx, state);
2218	if (__glibc_unlikely (*err != REG_NOERROR))
2219	return NULL;
2220	}
2221	#endif /* RE_ENABLE_I18N */
2222
2223	/ Then decide the next state with the single byte. /
2224	#if 0
2225	if (`0`)
2226	/ don't use transition table /
2227	return transit_state_sb (err, mctx, state);
2228	#endif
2229
2230	/ Use transition table /
2231	ch = re_string_fetch_byte (&mctx->input);
2232	for (;;)
2233	{
2234	trtable = state->trtable;
2235	if (__glibc_likely (trtable != NULL))
2236	return trtable[ch];
2237
2238	trtable = state->word_trtable;
2239	if (__glibc_likely (trtable != NULL))
2240	{
2241	unsigned int context;
2242	context
2243	= re_string_context_at (&mctx->input,
2244	re_string_cur_idx (&mctx->input) - `1`,
2245	mctx->eflags);
2246	if (IS_WORD_CONTEXT (context))
2247	return trtable[ch + SBC_MAX];
2248	else
2249	return trtable[ch];
2250	}
2251
2252	if (!build_trtable (mctx->dfa, state))
2253	{
2254	*err = REG_ESPACE;
2255	return NULL;
2256	}
2257
2258	/ Retry, we now have a transition table. /
2259	}
2260	}
2261
2262	/ Update the state_log if we need /
2263	static re_dfastate_t *
2264	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
2265	re_dfastate_t *next_state)
2266	{
2267	const re_dfa_t *const dfa = mctx->dfa;
2268	Idx cur_idx = re_string_cur_idx (&mctx->input);
2269
2270	if (cur_idx > mctx->state_log_top)
2271	{
2272	mctx->state_log[cur_idx] = next_state;
2273	mctx->state_log_top = cur_idx;
2274	}
2275	else if (mctx->state_log[cur_idx] == `0`)
2276	{
2277	mctx->state_log[cur_idx] = next_state;
2278	}
2279	else
2280	{
2281	re_dfastate_t *pstate;
2282	unsigned int context;
2283	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2284	/ If (state_log[cur_idx] != 0), it implies that cur_idx is*
2285	the destination of a multibyte char/collating element/
2286	back reference. Then the next state is the union set of
2287	these destinations and the results of the transition table. /*
2288	pstate = mctx->state_log[cur_idx];
2289	log_nodes = pstate->entrance_nodes;
2290	if (next_state != NULL)
2291	{
2292	table_nodes = next_state->entrance_nodes;
2293	*err = re_node_set_init_union (&next_nodes, table_nodes,
2294	log_nodes);
2295	if (__glibc_unlikely (*err != REG_NOERROR))
2296	return NULL;
2297	}
2298	else
2299	next_nodes = *log_nodes;
2300	/ Note: We already add the nodes of the initial state,*
2301	then we don't need to add them here. /*
2302
2303	context = re_string_context_at (&mctx->input,
2304	re_string_cur_idx (&mctx->input) - `1`,
2305	mctx->eflags);
2306	next_state = mctx->state_log[cur_idx]
2307	= re_acquire_state_context (err, dfa, &next_nodes, context);
2308	/ We don't need to check errors here, since the return value of*
2309	this function is next_state and ERR is already set. /*
2310
2311	if (table_nodes != NULL)
2312	re_node_set_free (&next_nodes);
2313	}
2314
2315	if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2316	{
2317	/ Check OP_OPEN_SUBEXP in the current state in case that we use them*
2318	later. We must check them here, since the back references in the
2319	next state might use them. /*
2320	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2321	cur_idx);
2322	if (__glibc_unlikely (*err != REG_NOERROR))
2323	return NULL;
2324
2325	/ If the next state has back references. /
2326	if (next_state->has_backref)
2327	{
2328	*err = transit_state_bkref (mctx, &next_state->nodes);
2329	if (__glibc_unlikely (*err != REG_NOERROR))
2330	return NULL;
2331	next_state = mctx->state_log[cur_idx];
2332	}
2333	}
2334
2335	return next_state;
2336	}
2337
2338	/ Skip bytes in the input that correspond to part of a*
2339	multi-byte match, then look in the log for a state
2340	from which to restart matching. /*
2341	static re_dfastate_t *
2342	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
2343	{
2344	re_dfastate_t *cur_state;
2345	do
2346	{
2347	Idx max = mctx->state_log_top;
2348	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2349
2350	do
2351	{
2352	if (++cur_str_idx > max)
2353	return NULL;
2354	re_string_skip_bytes (&mctx->input, `1`);
2355	}
2356	while (mctx->state_log[cur_str_idx] == NULL);
2357
2358	cur_state = merge_state_with_log (err, mctx, NULL);
2359	}
2360	while (*err == REG_NOERROR && cur_state == NULL);
2361	return cur_state;
2362	}
2363
2364	/ Helper functions for transit_state. /
2365
2366	/ From the node set CUR_NODES, pick up the nodes whose types are*
2367	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2368	expression. And register them to use them later for evaluating the
2369	corresponding back references. /*
2370
2371	static reg_errcode_t
2372	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
2373	Idx str_idx)
2374	{
2375	const re_dfa_t *const dfa = mctx->dfa;
2376	Idx node_idx;
2377	reg_errcode_t err;
2378
2379	/ TODO: This isn't efficient.*
2380	Because there might be more than one nodes whose types are
2381	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2382	nodes.
2383	E.g. RE: (a){2} /*
2384	for (node_idx = `0`; node_idx < cur_nodes->nelem; ++node_idx)
2385	{
2386	Idx node = cur_nodes->elems[node_idx];
2387	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2388	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2389	&& (dfa->used_bkref_map
2390	& ((bitset_word_t) `1` << dfa->nodes[node].opr.idx)))
2391	{
2392	err = match_ctx_add_subtop (mctx, node, str_idx);
2393	if (__glibc_unlikely (err != REG_NOERROR))
2394	return err;
2395	}
2396	}
2397	return REG_NOERROR;
2398	}
2399
2400	#if 0
2401	/ Return the next state to which the current state STATE will transit by*
2402	accepting the current input byte. /*
2403
2404	static re_dfastate_t *
2405	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
2406	re_dfastate_t *state)
2407	{
2408	const re_dfa_t *const dfa = mctx->dfa;
2409	re_node_set next_nodes;
2410	re_dfastate_t *next_state;
2411	Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2412	unsigned int context;
2413
2414	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + `1`);
2415	if (__glibc_unlikely (*err != REG_NOERROR))
2416	return NULL;
2417	for (node_cnt = `0`; node_cnt < state->nodes.nelem; ++node_cnt)
2418	{
2419	Idx cur_node = state->nodes.elems[node_cnt];
2420	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2421	{
2422	*err = re_node_set_merge (&next_nodes,
2423	dfa->eclosures + dfa->nexts[cur_node]);
2424	if (__glibc_unlikely (*err != REG_NOERROR))
2425	{
2426	re_node_set_free (&next_nodes);
2427	return NULL;
2428	}
2429	}
2430	}
2431	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2432	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2433	/ We don't need to check errors here, since the return value of*
2434	this function is next_state and ERR is already set. /*
2435
2436	re_node_set_free (&next_nodes);
2437	re_string_skip_bytes (&mctx->input, `1`);
2438	return next_state;
2439	}
2440	#endif
2441
2442	#ifdef RE_ENABLE_I18N
2443	static reg_errcode_t
2444	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
2445	{
2446	const re_dfa_t *const dfa = mctx->dfa;
2447	reg_errcode_t err;
2448	Idx i;
2449
2450	for (i = `0`; i < pstate->nodes.nelem; ++i)
2451	{
2452	re_node_set dest_nodes, *new_nodes;
2453	Idx cur_node_idx = pstate->nodes.elems[i];
2454	int naccepted;
2455	Idx dest_idx;
2456	unsigned int context;
2457	re_dfastate_t *dest_state;
2458
2459	if (!dfa->nodes[cur_node_idx].accept_mb)
2460	continue;
2461
2462	if (dfa->nodes[cur_node_idx].constraint)
2463	{
2464	context = re_string_context_at (&mctx->input,
2465	re_string_cur_idx (&mctx->input),
2466	mctx->eflags);
2467	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2468	context))
2469	continue;
2470	}
2471
2472	/ How many bytes the node can accept? /
2473	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2474	re_string_cur_idx (&mctx->input));
2475	if (naccepted == `0`)
2476	continue;
2477
2478	/ The node can accepts 'naccepted' bytes. /
2479	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2480	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2481	: mctx->max_mb_elem_len);
2482	err = clean_state_log_if_needed (mctx, dest_idx);
2483	if (__glibc_unlikely (err != REG_NOERROR))
2484	return err;
2485	DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -`1`);
2486	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2487
2488	dest_state = mctx->state_log[dest_idx];
2489	if (dest_state == NULL)
2490	dest_nodes = *new_nodes;
2491	else
2492	{
2493	err = re_node_set_init_union (&dest_nodes,
2494	dest_state->entrance_nodes, new_nodes);
2495	if (__glibc_unlikely (err != REG_NOERROR))
2496	return err;
2497	}
2498	context = re_string_context_at (&mctx->input, dest_idx - `1`,
2499	mctx->eflags);
2500	mctx->state_log[dest_idx]
2501	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2502	if (dest_state != NULL)
2503	re_node_set_free (&dest_nodes);
2504	if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2505	&& err != REG_NOERROR))
2506	return err;
2507	}
2508	return REG_NOERROR;
2509	}
2510	#endif /* RE_ENABLE_I18N */
2511
2512	static reg_errcode_t
2513	transit_state_bkref (re_match_context_t mctx, const* re_node_set *nodes)
2514	{
2515	const re_dfa_t *const dfa = mctx->dfa;
2516	reg_errcode_t err;
2517	Idx i;
2518	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2519
2520	for (i = `0`; i < nodes->nelem; ++i)
2521	{
2522	Idx dest_str_idx, prev_nelem, bkc_idx;
2523	Idx node_idx = nodes->elems[i];
2524	unsigned int context;
2525	const re_token_t *node = dfa->nodes + node_idx;
2526	re_node_set *new_dest_nodes;
2527
2528	/ Check whether 'node' is a backreference or not. /
2529	if (node->type != OP_BACK_REF)
2530	continue;
2531
2532	if (node->constraint)
2533	{
2534	context = re_string_context_at (&mctx->input, cur_str_idx,
2535	mctx->eflags);
2536	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2537	continue;
2538	}
2539
2540	/ 'node' is a backreference.*
2541	Check the substring which the substring matched. /*
2542	bkc_idx = mctx->nbkref_ents;
2543	err = get_subexp (mctx, node_idx, cur_str_idx);
2544	if (__glibc_unlikely (err != REG_NOERROR))
2545	goto free_return;
2546
2547	/ And add the epsilon closures (which is 'new_dest_nodes') of*
2548	the backreference to appropriate state_log. /*
2549	DEBUG_ASSERT (dfa->nexts[node_idx] != -`1`);
2550	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2551	{
2552	Idx subexp_len;
2553	re_dfastate_t *dest_state;
2554	struct re_backref_cache_entry *bkref_ent;
2555	bkref_ent = mctx->bkref_ents + bkc_idx;
2556	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2557	continue;
2558	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2559	new_dest_nodes = (subexp_len == `0`
2560	? dfa->eclosures + dfa->edests[node_idx].elems[`0`]
2561	: dfa->eclosures + dfa->nexts[node_idx]);
2562	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2563	- bkref_ent->subexp_from);
2564	context = re_string_context_at (&mctx->input, dest_str_idx - `1`,
2565	mctx->eflags);
2566	dest_state = mctx->state_log[dest_str_idx];
2567	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? `0`
2568	: mctx->state_log[cur_str_idx]->nodes.nelem);
2569	/ Add 'new_dest_node' to state_log. /
2570	if (dest_state == NULL)
2571	{
2572	mctx->state_log[dest_str_idx]
2573	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2574	context);
2575	if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2576	&& err != REG_NOERROR))
2577	goto free_return;
2578	}
2579	else
2580	{
2581	re_node_set dest_nodes;
2582	err = re_node_set_init_union (&dest_nodes,
2583	dest_state->entrance_nodes,
2584	new_dest_nodes);
2585	if (__glibc_unlikely (err != REG_NOERROR))
2586	{
2587	re_node_set_free (&dest_nodes);
2588	goto free_return;
2589	}
2590	mctx->state_log[dest_str_idx]
2591	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2592	re_node_set_free (&dest_nodes);
2593	if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2594	&& err != REG_NOERROR))
2595	goto free_return;
2596	}
2597	/ We need to check recursively if the backreference can epsilon*
2598	transit. /*
2599	if (subexp_len == `0`
2600	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2601	{
2602	err = check_subexp_matching_top (mctx, new_dest_nodes,
2603	cur_str_idx);
2604	if (__glibc_unlikely (err != REG_NOERROR))
2605	goto free_return;
2606	err = transit_state_bkref (mctx, new_dest_nodes);
2607	if (__glibc_unlikely (err != REG_NOERROR))
2608	goto free_return;
2609	}
2610	}
2611	}
2612	err = REG_NOERROR;
2613	free_return:
2614	return err;
2615	}
2616
2617	/ Enumerate all the candidates which the backreference BKREF_NODE can match*
2618	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2619	Note that we might collect inappropriate candidates here.
2620	However, the cost of checking them strictly here is too high, then we
2621	delay these checking for prune_impossible_nodes(). /*
2622
2623	static reg_errcode_t
2624	__attribute_warn_unused_result__
2625	get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2626	{
2627	const re_dfa_t *const dfa = mctx->dfa;
2628	Idx subexp_num, sub_top_idx;
2629	const char buf = (const* char *) re_string_get_buffer (&mctx->input);
2630	/ Return if we have already checked BKREF_NODE at BKREF_STR_IDX. /
2631	Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2632	if (cache_idx != -`1`)
2633	{
2634	const struct re_backref_cache_entry *entry
2635	= mctx->bkref_ents + cache_idx;
2636	do
2637	if (entry->node == bkref_node)
2638	return REG_NOERROR; / We already checked it. /
2639	while (entry++->more);
2640	}
2641
2642	subexp_num = dfa->nodes[bkref_node].opr.idx;
2643
2644	/ For each sub expression /
2645	for (sub_top_idx = `0`; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2646	{
2647	reg_errcode_t err;
2648	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2649	re_sub_match_last_t *sub_last;
2650	Idx sub_last_idx, sl_str, bkref_str_off;
2651
2652	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2653	continue; / It isn't related. /
2654
2655	sl_str = sub_top->str_idx;
2656	bkref_str_off = bkref_str_idx;
2657	/ At first, check the last node of sub expressions we already*
2658	evaluated. /*
2659	for (sub_last_idx = `0`; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2660	{
2661	regoff_t sl_str_diff;
2662	sub_last = sub_top->lasts[sub_last_idx];
2663	sl_str_diff = sub_last->str_idx - sl_str;
2664	/ The matched string by the sub expression match with the substring*
2665	at the back reference? /*
2666	if (sl_str_diff > `0`)
2667	{
2668	if (__glibc_unlikely (bkref_str_off + sl_str_diff
2669	> mctx->input.valid_len))
2670	{
2671	/ Not enough chars for a successful match. /
2672	if (bkref_str_off + sl_str_diff > mctx->input.len)
2673	break;
2674
2675	err = clean_state_log_if_needed (mctx,
2676	bkref_str_off
2677	+ sl_str_diff);
2678	if (__glibc_unlikely (err != REG_NOERROR))
2679	return err;
2680	buf = (const char *) re_string_get_buffer (&mctx->input);
2681	}
2682	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != `0`)
2683	/ We don't need to search this sub expression any more. /
2684	break;
2685	}
2686	bkref_str_off += sl_str_diff;
2687	sl_str += sl_str_diff;
2688	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2689	bkref_str_idx);
2690
2691	/ Reload buf, since the preceding call might have reallocated*
2692	the buffer. /*
2693	buf = (const char *) re_string_get_buffer (&mctx->input);
2694
2695	if (err == REG_NOMATCH)
2696	continue;
2697	if (__glibc_unlikely (err != REG_NOERROR))
2698	return err;
2699	}
2700
2701	if (sub_last_idx < sub_top->nlasts)
2702	continue;
2703	if (sub_last_idx > `0`)
2704	++sl_str;
2705	/ Then, search for the other last nodes of the sub expression. /
2706	for (; sl_str <= bkref_str_idx; ++sl_str)
2707	{
2708	Idx cls_node;
2709	regoff_t sl_str_off;
2710	const re_node_set *nodes;
2711	sl_str_off = sl_str - sub_top->str_idx;
2712	/ The matched string by the sub expression match with the substring*
2713	at the back reference? /*
2714	if (sl_str_off > `0`)
2715	{
2716	if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2717	{
2718	/ If we are at the end of the input, we cannot match. /
2719	if (bkref_str_off >= mctx->input.len)
2720	break;
2721
2722	err = extend_buffers (mctx, bkref_str_off + `1`);
2723	if (__glibc_unlikely (err != REG_NOERROR))
2724	return err;
2725
2726	buf = (const char *) re_string_get_buffer (&mctx->input);
2727	}
2728	if (buf [bkref_str_off++] != buf[sl_str - `1`])
2729	break; / We don't need to search this sub expression*
2730	any more. /*
2731	}
2732	if (mctx->state_log[sl_str] == NULL)
2733	continue;
2734	/ Does this state have a ')' of the sub expression? /
2735	nodes = &mctx->state_log[sl_str]->nodes;
2736	cls_node = find_subexp_node (dfa, nodes, subexp_num,
2737	OP_CLOSE_SUBEXP);
2738	if (cls_node == -`1`)
2739	continue; / No. /
2740	if (sub_top->path == NULL)
2741	{
2742	sub_top->path = calloc (sizeof (state_array_t),
2743	sl_str - sub_top->str_idx + `1`);
2744	if (sub_top->path == NULL)
2745	return REG_ESPACE;
2746	}
2747	/ Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node*
2748	in the current context? /*
2749	err = check_arrival (mctx, sub_top->path, sub_top->node,
2750	sub_top->str_idx, cls_node, sl_str,
2751	OP_CLOSE_SUBEXP);
2752	if (err == REG_NOMATCH)
2753	continue;
2754	if (__glibc_unlikely (err != REG_NOERROR))
2755	return err;
2756	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2757	if (__glibc_unlikely (sub_last == NULL))
2758	return REG_ESPACE;
2759	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2760	bkref_str_idx);
2761	buf = (const char *) re_string_get_buffer (&mctx->input);
2762	if (err == REG_NOMATCH)
2763	continue;
2764	if (__glibc_unlikely (err != REG_NOERROR))
2765	return err;
2766	}
2767	}
2768	return REG_NOERROR;
2769	}
2770
2771	/ Helper functions for get_subexp(). /
2772
2773	/ Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.*
2774	If it can arrive, register the sub expression expressed with SUB_TOP
2775	and SUB_LAST. /*
2776
2777	static reg_errcode_t
2778	get_subexp_sub (re_match_context_t mctx, const* re_sub_match_top_t *sub_top,
2779	re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2780	{
2781	reg_errcode_t err;
2782	Idx to_idx;
2783	/ Can the subexpression arrive the back reference? /
2784	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2785	sub_last->str_idx, bkref_node, bkref_str,
2786	OP_OPEN_SUBEXP);
2787	if (err != REG_NOERROR)
2788	return err;
2789	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2790	sub_last->str_idx);
2791	if (__glibc_unlikely (err != REG_NOERROR))
2792	return err;
2793	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2794	return clean_state_log_if_needed (mctx, to_idx);
2795	}
2796
2797	/ Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.*
2798	Search '(' if FL_OPEN, or search ')' otherwise.
2799	TODO: This function isn't efficient...
2800	Because there might be more than one nodes whose types are
2801	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2802	nodes.
2803	E.g. RE: (a){2} /*
2804
2805	static Idx
2806	find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
2807	Idx subexp_idx, int type)
2808	{
2809	Idx cls_idx;
2810	for (cls_idx = `0`; cls_idx < nodes->nelem; ++cls_idx)
2811	{
2812	Idx cls_node = nodes->elems[cls_idx];
2813	const re_token_t *node = dfa->nodes + cls_node;
2814	if (node->type == type
2815	&& node->opr.idx == subexp_idx)
2816	return cls_node;
2817	}
2818	return -`1`;
2819	}
2820
2821	/ Check whether the node TOP_NODE at TOP_STR can arrive to the node*
2822	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2823	heavily reused.
2824	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. /*
2825
2826	static reg_errcode_t
2827	__attribute_warn_unused_result__
2828	check_arrival (re_match_context_t mctx, state_array_t path, Idx top_node,
2829	Idx top_str, Idx last_node, Idx last_str, int type)
2830	{
2831	const re_dfa_t *const dfa = mctx->dfa;
2832	reg_errcode_t err = REG_NOERROR;
2833	Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2834	re_dfastate_t *cur_state = NULL;
2835	re_node_set *cur_nodes, next_nodes;
2836	re_dfastate_t **backup_state_log;
2837	unsigned int context;
2838
2839	subexp_num = dfa->nodes[top_node].opr.idx;
2840	/ Extend the buffer if we need. /
2841	if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + `1`))
2842	{
2843	re_dfastate_t **new_array;
2844	Idx old_alloc = path->alloc;
2845	Idx incr_alloc = last_str + mctx->max_mb_elem_len + `1`;
2846	Idx new_alloc;
2847	if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2848	return REG_ESPACE;
2849	new_alloc = old_alloc + incr_alloc;
2850	if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2851	return REG_ESPACE;
2852	new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2853	if (__glibc_unlikely (new_array == NULL))
2854	return REG_ESPACE;
2855	path->array = new_array;
2856	path->alloc = new_alloc;
2857	memset (new_array + old_alloc, `'\0'`,
2858	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2859	}
2860
2861	str_idx = path->next_idx ? path->next_idx : top_str;
2862
2863	/ Temporary modify MCTX. /
2864	backup_state_log = mctx->state_log;
2865	backup_cur_idx = mctx->input.cur_idx;
2866	mctx->state_log = path->array;
2867	mctx->input.cur_idx = str_idx;
2868
2869	/ Setup initial node set. /
2870	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2871	if (str_idx == top_str)
2872	{
2873	err = re_node_set_init_1 (&next_nodes, top_node);
2874	if (__glibc_unlikely (err != REG_NOERROR))
2875	return err;
2876	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2877	if (__glibc_unlikely (err != REG_NOERROR))
2878	{
2879	re_node_set_free (&next_nodes);
2880	return err;
2881	}
2882	}
2883	else
2884	{
2885	cur_state = mctx->state_log[str_idx];
2886	if (cur_state && cur_state->has_backref)
2887	{
2888	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2889	if (__glibc_unlikely (err != REG_NOERROR))
2890	return err;
2891	}
2892	else
2893	re_node_set_init_empty (&next_nodes);
2894	}
2895	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2896	{
2897	if (next_nodes.nelem)
2898	{
2899	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2900	subexp_num, type);
2901	if (__glibc_unlikely (err != REG_NOERROR))
2902	{
2903	re_node_set_free (&next_nodes);
2904	return err;
2905	}
2906	}
2907	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2908	if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2909	{
2910	re_node_set_free (&next_nodes);
2911	return err;
2912	}
2913	mctx->state_log[str_idx] = cur_state;
2914	}
2915
2916	for (null_cnt = `0`; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2917	{
2918	re_node_set_empty (&next_nodes);
2919	if (mctx->state_log[str_idx + `1`])
2920	{
2921	err = re_node_set_merge (&next_nodes,
2922	&mctx->state_log[str_idx + `1`]->nodes);
2923	if (__glibc_unlikely (err != REG_NOERROR))
2924	{
2925	re_node_set_free (&next_nodes);
2926	return err;
2927	}
2928	}
2929	if (cur_state)
2930	{
2931	err = check_arrival_add_next_nodes (mctx, str_idx,
2932	&cur_state->non_eps_nodes,
2933	&next_nodes);
2934	if (__glibc_unlikely (err != REG_NOERROR))
2935	{
2936	re_node_set_free (&next_nodes);
2937	return err;
2938	}
2939	}
2940	++str_idx;
2941	if (next_nodes.nelem)
2942	{
2943	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2944	if (__glibc_unlikely (err != REG_NOERROR))
2945	{
2946	re_node_set_free (&next_nodes);
2947	return err;
2948	}
2949	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2950	subexp_num, type);
2951	if (__glibc_unlikely (err != REG_NOERROR))
2952	{
2953	re_node_set_free (&next_nodes);
2954	return err;
2955	}
2956	}
2957	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2958	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2959	if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2960	{
2961	re_node_set_free (&next_nodes);
2962	return err;
2963	}
2964	mctx->state_log[str_idx] = cur_state;
2965	null_cnt = cur_state == NULL ? null_cnt + `1` : `0`;
2966	}
2967	re_node_set_free (&next_nodes);
2968	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2969	: &mctx->state_log[last_str]->nodes);
2970	path->next_idx = str_idx;
2971
2972	/ Fix MCTX. /
2973	mctx->state_log = backup_state_log;
2974	mctx->input.cur_idx = backup_cur_idx;
2975
2976	/ Then check the current node set has the node LAST_NODE. /
2977	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2978	return REG_NOERROR;
2979
2980	return REG_NOMATCH;
2981	}
2982
2983	/ Helper functions for check_arrival. /
2984
2985	/ Calculate the destination nodes of CUR_NODES at STR_IDX, and append them*
2986	to NEXT_NODES.
2987	TODO: This function is similar to the functions transit_state(),*
2988	however this function has many additional works.
2989	Can't we unify them? /*
2990
2991	static reg_errcode_t
2992	__attribute_warn_unused_result__
2993	check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
2994	re_node_set cur_nodes, re_node_set next_nodes)
2995	{
2996	const re_dfa_t *const dfa = mctx->dfa;
2997	bool ok;
2998	Idx cur_idx;
2999	#ifdef RE_ENABLE_I18N
3000	reg_errcode_t err = REG_NOERROR;
3001	#endif
3002	re_node_set union_set;
3003	re_node_set_init_empty (&union_set);
3004	for (cur_idx = `0`; cur_idx < cur_nodes->nelem; ++cur_idx)
3005	{
3006	int naccepted = `0`;
3007	Idx cur_node = cur_nodes->elems[cur_idx];
3008	DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
3009
3010	#ifdef RE_ENABLE_I18N
3011	/ If the node may accept "multi byte". /
3012	if (dfa->nodes[cur_node].accept_mb)
3013	{
3014	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3015	str_idx);
3016	if (naccepted > `1`)
3017	{
3018	re_dfastate_t *dest_state;
3019	Idx next_node = dfa->nexts[cur_node];
3020	Idx next_idx = str_idx + naccepted;
3021	dest_state = mctx->state_log[next_idx];
3022	re_node_set_empty (&union_set);
3023	if (dest_state)
3024	{
3025	err = re_node_set_merge (&union_set, &dest_state->nodes);
3026	if (__glibc_unlikely (err != REG_NOERROR))
3027	{
3028	re_node_set_free (&union_set);
3029	return err;
3030	}
3031	}
3032	ok = re_node_set_insert (&union_set, next_node);
3033	if (__glibc_unlikely (! ok))
3034	{
3035	re_node_set_free (&union_set);
3036	return REG_ESPACE;
3037	}
3038	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3039	&union_set);
3040	if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3041	&& err != REG_NOERROR))
3042	{
3043	re_node_set_free (&union_set);
3044	return err;
3045	}
3046	}
3047	}
3048	#endif /* RE_ENABLE_I18N */
3049	if (naccepted
3050	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3051	{
3052	ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3053	if (__glibc_unlikely (! ok))
3054	{
3055	re_node_set_free (&union_set);
3056	return REG_ESPACE;
3057	}
3058	}
3059	}
3060	re_node_set_free (&union_set);
3061	return REG_NOERROR;
3062	}
3063
3064	/ For all the nodes in CUR_NODES, add the epsilon closures of them to*
3065	CUR_NODES, however exclude the nodes which are:
3066	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3067	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3068	*/
3069
3070	static reg_errcode_t
3071	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
3072	Idx ex_subexp, int type)
3073	{
3074	reg_errcode_t err;
3075	Idx idx, outside_node;
3076	re_node_set new_nodes;
3077	DEBUG_ASSERT (cur_nodes->nelem);
3078	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3079	if (__glibc_unlikely (err != REG_NOERROR))
3080	return err;
3081	/ Create a new node set NEW_NODES with the nodes which are epsilon*
3082	closures of the node in CUR_NODES. /*
3083
3084	for (idx = `0`; idx < cur_nodes->nelem; ++idx)
3085	{
3086	Idx cur_node = cur_nodes->elems[idx];
3087	const re_node_set *eclosure = dfa->eclosures + cur_node;
3088	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3089	if (outside_node == -`1`)
3090	{
3091	/ There are no problematic nodes, just merge them. /
3092	err = re_node_set_merge (&new_nodes, eclosure);
3093	if (__glibc_unlikely (err != REG_NOERROR))
3094	{
3095	re_node_set_free (&new_nodes);
3096	return err;
3097	}
3098	}
3099	else
3100	{
3101	/ There are problematic nodes, re-calculate incrementally. /
3102	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3103	ex_subexp, type);
3104	if (__glibc_unlikely (err != REG_NOERROR))
3105	{
3106	re_node_set_free (&new_nodes);
3107	return err;
3108	}
3109	}
3110	}
3111	re_node_set_free (cur_nodes);
3112	*cur_nodes = new_nodes;
3113	return REG_NOERROR;
3114	}
3115
3116	/ Helper function for check_arrival_expand_ecl.*
3117	Check incrementally the epsilon closure of TARGET, and if it isn't
3118	problematic append it to DST_NODES. /*
3119
3120	static reg_errcode_t
3121	__attribute_warn_unused_result__
3122	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
3123	Idx target, Idx ex_subexp, int type)
3124	{
3125	Idx cur_node;
3126	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3127	{
3128	bool ok;
3129
3130	if (dfa->nodes[cur_node].type == type
3131	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3132	{
3133	if (type == OP_CLOSE_SUBEXP)
3134	{
3135	ok = re_node_set_insert (dst_nodes, cur_node);
3136	if (__glibc_unlikely (! ok))
3137	return REG_ESPACE;
3138	}
3139	break;
3140	}
3141	ok = re_node_set_insert (dst_nodes, cur_node);
3142	if (__glibc_unlikely (! ok))
3143	return REG_ESPACE;
3144	if (dfa->edests[cur_node].nelem == `0`)
3145	break;
3146	if (dfa->edests[cur_node].nelem == `2`)
3147	{
3148	reg_errcode_t err;
3149	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3150	dfa->edests[cur_node].elems[`1`],
3151	ex_subexp, type);
3152	if (__glibc_unlikely (err != REG_NOERROR))
3153	return err;
3154	}
3155	cur_node = dfa->edests[cur_node].elems[`0`];
3156	}
3157	return REG_NOERROR;
3158	}
3159
3160
3161	/ For all the back references in the current state, calculate the*
3162	destination of the back references by the appropriate entry
3163	in MCTX->BKREF_ENTS. /*
3164
3165	static reg_errcode_t
3166	__attribute_warn_unused_result__
3167	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
3168	Idx cur_str, Idx subexp_num, int type)
3169	{
3170	const re_dfa_t *const dfa = mctx->dfa;
3171	reg_errcode_t err;
3172	Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3173	struct re_backref_cache_entry *ent;
3174
3175	if (cache_idx_start == -`1`)
3176	return REG_NOERROR;
3177
3178	restart:
3179	ent = mctx->bkref_ents + cache_idx_start;
3180	do
3181	{
3182	Idx to_idx, next_node;
3183
3184	/ Is this entry ENT is appropriate? /
3185	if (!re_node_set_contains (cur_nodes, ent->node))
3186	continue; / No. /
3187
3188	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3189	/ Calculate the destination of the back reference, and append it*
3190	to MCTX->STATE_LOG. /*
3191	if (to_idx == cur_str)
3192	{
3193	/ The backreference did epsilon transit, we must re-check all the*
3194	node in the current state. /*
3195	re_node_set new_dests;
3196	reg_errcode_t err2, err3;
3197	next_node = dfa->edests[ent->node].elems[`0`];
3198	if (re_node_set_contains (cur_nodes, next_node))
3199	continue;
3200	err = re_node_set_init_1 (&new_dests, next_node);
3201	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3202	err3 = re_node_set_merge (cur_nodes, &new_dests);
3203	re_node_set_free (&new_dests);
3204	if (__glibc_unlikely (err != REG_NOERROR \|\| err2 != REG_NOERROR
3205	\|\| err3 != REG_NOERROR))
3206	{
3207	err = (err != REG_NOERROR ? err
3208	: (err2 != REG_NOERROR ? err2 : err3));
3209	return err;
3210	}
3211	/ TODO: It is still inefficient... /
3212	goto restart;
3213	}
3214	else
3215	{
3216	re_node_set union_set;
3217	next_node = dfa->nexts[ent->node];
3218	if (mctx->state_log[to_idx])
3219	{
3220	bool ok;
3221	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3222	next_node))
3223	continue;
3224	err = re_node_set_init_copy (&union_set,
3225	&mctx->state_log[to_idx]->nodes);
3226	ok = re_node_set_insert (&union_set, next_node);
3227	if (__glibc_unlikely (err != REG_NOERROR \|\| ! ok))
3228	{
3229	re_node_set_free (&union_set);
3230	err = err != REG_NOERROR ? err : REG_ESPACE;
3231	return err;
3232	}
3233	}
3234	else
3235	{
3236	err = re_node_set_init_1 (&union_set, next_node);
3237	if (__glibc_unlikely (err != REG_NOERROR))
3238	return err;
3239	}
3240	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3241	re_node_set_free (&union_set);
3242	if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3243	&& err != REG_NOERROR))
3244	return err;
3245	}
3246	}
3247	while (ent++->more);
3248	return REG_NOERROR;
3249	}
3250
3251	/ Build transition table for the state.*
3252	Return true if successful. /*
3253
3254	static bool
3255	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
3256	{
3257	reg_errcode_t err;
3258	Idx i, j;
3259	int ch;
3260	bool need_word_trtable = false;
3261	bitset_word_t elem, mask;
3262	bool dests_node_malloced = false;
3263	bool dest_states_malloced = false;
3264	Idx ndests; / Number of the destination states from 'state'. /
3265	re_dfastate_t **trtable;
3266	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3267	re_node_set follows, *dests_node;
3268	bitset_t *dests_ch;
3269	bitset_t acceptable;
3270
3271	struct dests_alloc
3272	{
3273	re_node_set dests_node[SBC_MAX];
3274	bitset_t dests_ch[SBC_MAX];
3275	} *dests_alloc;
3276
3277	/ We build DFA states which corresponds to the destination nodes*
3278	from 'state'. 'dests_node[i]' represents the nodes which i-th
3279	destination state contains, and 'dests_ch[i]' represents the
3280	characters which i-th destination state accepts. /*
3281	if (__libc_use_alloca (sizeof (struct dests_alloc)))
3282	dests_alloc = (struct dests_alloc ) alloca (sizeof* (struct dests_alloc));
3283	else
3284	{
3285	dests_alloc = re_malloc (struct dests_alloc, `1`);
3286	if (__glibc_unlikely (dests_alloc == NULL))
3287	return false;
3288	dests_node_malloced = true;
3289	}
3290	dests_node = dests_alloc->dests_node;
3291	dests_ch = dests_alloc->dests_ch;
3292
3293	/ Initialize transition table. /
3294	state->word_trtable = state->trtable = NULL;
3295
3296	/ At first, group all nodes belonging to 'state' into several*
3297	destinations. /*
3298	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3299	if (__glibc_unlikely (ndests <= `0`))
3300	{
3301	if (dests_node_malloced)
3302	re_free (dests_alloc);
3303	/ Return false in case of an error, true otherwise. /
3304	if (ndests == `0`)
3305	{
3306	state->trtable = (re_dfastate_t **)
3307	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3308	if (__glibc_unlikely (state->trtable == NULL))
3309	return false;
3310	return true;
3311	}
3312	return false;
3313	}
3314
3315	err = re_node_set_alloc (&follows, ndests + `1`);
3316	if (__glibc_unlikely (err != REG_NOERROR))
3317	goto out_free;
3318
3319	/ Avoid arithmetic overflow in size calculation. /
3320	size_t ndests_max
3321	= ((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3322	/ (`3` * sizeof (re_dfastate_t *)));
3323	if (__glibc_unlikely (ndests_max < ndests))
3324	goto out_free;
3325
3326	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3327	+ ndests * `3` * sizeof (re_dfastate_t *)))
3328	dest_states = (re_dfastate_t **)
3329	alloca (ndests * `3` * sizeof (re_dfastate_t *));
3330	else
3331	{
3332	dest_states = re_malloc (re_dfastate_t , ndests `3`);
3333	if (__glibc_unlikely (dest_states == NULL))
3334	{
3335	out_free:
3336	if (dest_states_malloced)
3337	re_free (dest_states);
3338	re_node_set_free (&follows);
3339	for (i = `0`; i < ndests; ++i)
3340	re_node_set_free (dests_node + i);
3341	if (dests_node_malloced)
3342	re_free (dests_alloc);
3343	return false;
3344	}
3345	dest_states_malloced = true;
3346	}
3347	dest_states_word = dest_states + ndests;
3348	dest_states_nl = dest_states_word + ndests;
3349	bitset_empty (acceptable);
3350
3351	/ Then build the states for all destinations. /
3352	for (i = `0`; i < ndests; ++i)
3353	{
3354	Idx next_node;
3355	re_node_set_empty (&follows);
3356	/ Merge the follows of this destination states. /
3357	for (j = `0`; j < dests_node[i].nelem; ++j)
3358	{
3359	next_node = dfa->nexts[dests_node[i].elems[j]];
3360	if (next_node != -`1`)
3361	{
3362	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3363	if (__glibc_unlikely (err != REG_NOERROR))
3364	goto out_free;
3365	}
3366	}
3367	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, `0`);
3368	if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3369	goto out_free;
3370	/ If the new state has context constraint,*
3371	build appropriate states for these contexts. /*
3372	if (dest_states[i]->has_constraint)
3373	{
3374	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3375	CONTEXT_WORD);
3376	if (__glibc_unlikely (dest_states_word[i] == NULL
3377	&& err != REG_NOERROR))
3378	goto out_free;
3379
3380	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > `1`)
3381	need_word_trtable = true;
3382
3383	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3384	CONTEXT_NEWLINE);
3385	if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3386	goto out_free;
3387	}
3388	else
3389	{
3390	dest_states_word[i] = dest_states[i];
3391	dest_states_nl[i] = dest_states[i];
3392	}
3393	bitset_merge (acceptable, dests_ch[i]);
3394	}
3395
3396	if (!__glibc_unlikely (need_word_trtable))
3397	{
3398	/ We don't care about whether the following character is a word*
3399	character, or we are in a single-byte character set so we can
3400	discern by looking at the character code: allocate a
3401	256-entry transition table. /*
3402	trtable = state->trtable =
3403	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t *), SBC_MAX);
3404	if (__glibc_unlikely (trtable == NULL))
3405	goto out_free;
3406
3407	/ For all characters ch...: /
3408	for (i = `0`; i < BITSET_WORDS; ++i)
3409	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3410	elem;
3411	mask <<= `1`, elem >>= `1`, ++ch)
3412	if (__glibc_unlikely (elem & `1`))
3413	{
3414	/ There must be exactly one destination which accepts*
3415	character ch. See group_nodes_into_DFAstates. /*
3416	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3417	;
3418
3419	/ j-th destination accepts the word character ch. /
3420	if (dfa->word_char[i] & mask)
3421	trtable[ch] = dest_states_word[j];
3422	else
3423	trtable[ch] = dest_states[j];
3424	}
3425	}
3426	else
3427	{
3428	/ We care about whether the following character is a word*
3429	character, and we are in a multi-byte character set: discern
3430	by looking at the character code: build two 256-entry
3431	transition tables, one starting at trtable[0] and one
3432	starting at trtable[SBC_MAX]. /*
3433	trtable = state->word_trtable =
3434	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t ), `2` SBC_MAX);
3435	if (__glibc_unlikely (trtable == NULL))
3436	goto out_free;
3437
3438	/ For all characters ch...: /
3439	for (i = `0`; i < BITSET_WORDS; ++i)
3440	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3441	elem;
3442	mask <<= `1`, elem >>= `1`, ++ch)
3443	if (__glibc_unlikely (elem & `1`))
3444	{
3445	/ There must be exactly one destination which accepts*
3446	character ch. See group_nodes_into_DFAstates. /*
3447	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3448	;
3449
3450	/ j-th destination accepts the word character ch. /
3451	trtable[ch] = dest_states[j];
3452	trtable[ch + SBC_MAX] = dest_states_word[j];
3453	}
3454	}
3455
3456	/ new line /
3457	if (bitset_contain (acceptable, NEWLINE_CHAR))
3458	{
3459	/ The current state accepts newline character. /
3460	for (j = `0`; j < ndests; ++j)
3461	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3462	{
3463	/ k-th destination accepts newline character. /
3464	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3465	if (need_word_trtable)
3466	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3467	/ There must be only one destination which accepts*
3468	newline. See group_nodes_into_DFAstates. /*
3469	break;
3470	}
3471	}
3472
3473	if (dest_states_malloced)
3474	re_free (dest_states);
3475
3476	re_node_set_free (&follows);
3477	for (i = `0`; i < ndests; ++i)
3478	re_node_set_free (dests_node + i);
3479
3480	if (dests_node_malloced)
3481	re_free (dests_alloc);
3482
3483	return true;
3484	}
3485
3486	/ Group all nodes belonging to STATE into several destinations.*
3487	Then for all destinations, set the nodes belonging to the destination
3488	to DESTS_NODE[i] and set the characters accepted by the destination
3489	to DEST_CH[i]. This function return the number of destinations. /*
3490
3491	static Idx
3492	group_nodes_into_DFAstates (const re_dfa_t dfa, const* re_dfastate_t *state,
3493	re_node_set dests_node, bitset_t dests_ch)
3494	{
3495	reg_errcode_t err;
3496	bool ok;
3497	Idx i, j, k;
3498	Idx ndests; / Number of the destinations from 'state'. /
3499	bitset_t accepts; / Characters a node can accept. /
3500	const re_node_set *cur_nodes = &state->nodes;
3501	bitset_empty (accepts);
3502	ndests = `0`;
3503
3504	/ For all the nodes belonging to 'state', /
3505	for (i = `0`; i < cur_nodes->nelem; ++i)
3506	{
3507	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3508	re_token_type_t type = node->type;
3509	unsigned int constraint = node->constraint;
3510
3511	/ Enumerate all single byte character this node can accept. /
3512	if (type == CHARACTER)
3513	bitset_set (accepts, node->opr.c);
3514	else if (type == SIMPLE_BRACKET)
3515	{
3516	bitset_merge (accepts, node->opr.sbcset);
3517	}
3518	else if (type == OP_PERIOD)
3519	{
3520	#ifdef RE_ENABLE_I18N
3521	if (dfa->mb_cur_max > `1`)
3522	bitset_merge (accepts, dfa->sb_char);
3523	else
3524	#endif
3525	bitset_set_all (accepts);
3526	if (!(dfa->syntax & RE_DOT_NEWLINE))
3527	bitset_clear (accepts, `'\n'`);
3528	if (dfa->syntax & RE_DOT_NOT_NULL)
3529	bitset_clear (accepts, `'\0'`);
3530	}
3531	#ifdef RE_ENABLE_I18N
3532	else if (type == OP_UTF8_PERIOD)
3533	{
3534	if (ASCII_CHARS % BITSET_WORD_BITS == `0`)
3535	memset (accepts, -`1`, ASCII_CHARS / CHAR_BIT);
3536	else
3537	bitset_merge (accepts, utf8_sb_map);
3538	if (!(dfa->syntax & RE_DOT_NEWLINE))
3539	bitset_clear (accepts, `'\n'`);
3540	if (dfa->syntax & RE_DOT_NOT_NULL)
3541	bitset_clear (accepts, `'\0'`);
3542	}
3543	#endif
3544	else
3545	continue;
3546
3547	/ Check the 'accepts' and sift the characters which are not*
3548	match it the context. /*
3549	if (constraint)
3550	{
3551	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3552	{
3553	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3554	bitset_empty (accepts);
3555	if (accepts_newline)
3556	bitset_set (accepts, NEWLINE_CHAR);
3557	else
3558	continue;
3559	}
3560	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3561	{
3562	bitset_empty (accepts);
3563	continue;
3564	}
3565
3566	if (constraint & NEXT_WORD_CONSTRAINT)
3567	{
3568	bitset_word_t any_set = `0`;
3569	if (type == CHARACTER && !node->word_char)
3570	{
3571	bitset_empty (accepts);
3572	continue;
3573	}
3574	#ifdef RE_ENABLE_I18N
3575	if (dfa->mb_cur_max > `1`)
3576	for (j = `0`; j < BITSET_WORDS; ++j)
3577	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3578	else
3579	#endif
3580	for (j = `0`; j < BITSET_WORDS; ++j)
3581	any_set \|= (accepts[j] &= dfa->word_char[j]);
3582	if (!any_set)
3583	continue;
3584	}
3585	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3586	{
3587	bitset_word_t any_set = `0`;
3588	if (type == CHARACTER && node->word_char)
3589	{
3590	bitset_empty (accepts);
3591	continue;
3592	}
3593	#ifdef RE_ENABLE_I18N
3594	if (dfa->mb_cur_max > `1`)
3595	for (j = `0`; j < BITSET_WORDS; ++j)
3596	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3597	else
3598	#endif
3599	for (j = `0`; j < BITSET_WORDS; ++j)
3600	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3601	if (!any_set)
3602	continue;
3603	}
3604	}
3605
3606	/ Then divide 'accepts' into DFA states, or create a new*
3607	state. Above, we make sure that accepts is not empty. /*
3608	for (j = `0`; j < ndests; ++j)
3609	{
3610	bitset_t intersec; / Intersection sets, see below. /
3611	bitset_t remains;
3612	/ Flags, see below. /
3613	bitset_word_t has_intersec, not_subset, not_consumed;
3614
3615	/ Optimization, skip if this state doesn't accept the character. /
3616	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3617	continue;
3618
3619	/ Enumerate the intersection set of this state and 'accepts'. /
3620	has_intersec = `0`;
3621	for (k = `0`; k < BITSET_WORDS; ++k)
3622	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3623	/ And skip if the intersection set is empty. /
3624	if (!has_intersec)
3625	continue;
3626
3627	/ Then check if this state is a subset of 'accepts'. /
3628	not_subset = not_consumed = `0`;
3629	for (k = `0`; k < BITSET_WORDS; ++k)
3630	{
3631	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3632	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3633	}
3634
3635	/ If this state isn't a subset of 'accepts', create a*
3636	new group state, which has the 'remains'. /*
3637	if (not_subset)
3638	{
3639	bitset_copy (dests_ch[ndests], remains);
3640	bitset_copy (dests_ch[j], intersec);
3641	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3642	if (__glibc_unlikely (err != REG_NOERROR))
3643	goto error_return;
3644	++ndests;
3645	}
3646
3647	/ Put the position in the current group. /
3648	ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3649	if (__glibc_unlikely (! ok))
3650	goto error_return;
3651
3652	/ If all characters are consumed, go to next node. /
3653	if (!not_consumed)
3654	break;
3655	}
3656	/ Some characters remain, create a new group. /
3657	if (j == ndests)
3658	{
3659	bitset_copy (dests_ch[ndests], accepts);
3660	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3661	if (__glibc_unlikely (err != REG_NOERROR))
3662	goto error_return;
3663	++ndests;
3664	bitset_empty (accepts);
3665	}
3666	}
3667	assume (ndests <= SBC_MAX);
3668	return ndests;
3669	error_return:
3670	for (j = `0`; j < ndests; ++j)
3671	re_node_set_free (dests_node + j);
3672	return -`1`;
3673	}
3674
3675	#ifdef RE_ENABLE_I18N
3676	/ Check how many bytes the node 'dfa->nodes[node_idx]' accepts.*
3677	Return the number of the bytes the node accepts.
3678	STR_IDX is the current index of the input string.
3679
3680	This function handles the nodes which can accept one character, or
3681	one collating element like '.', '[a-z]', opposite to the other nodes
3682	can only accept one byte. /*
3683
3684	# ifdef _LIBC
3685	# include <locale/weight.h>
3686	# endif
3687
3688	static int
3689	check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3690	const re_string_t *input, Idx str_idx)
3691	{
3692	const re_token_t *node = dfa->nodes + node_idx;
3693	int char_len, elem_len;
3694	Idx i;
3695
3696	if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3697	{
3698	unsigned char c = re_string_byte_at (input, str_idx), d;
3699	if (__glibc_likely (c < `0xc2`))
3700	return `0`;
3701
3702	if (str_idx + `2` > input->len)
3703	return `0`;
3704
3705	d = re_string_byte_at (input, str_idx + `1`);
3706	if (c < `0xe0`)
3707	return (d < `0x80` \|\| d > `0xbf`) ? `0` : `2`;
3708	else if (c < `0xf0`)
3709	{
3710	char_len = `3`;
3711	if (c == `0xe0` && d < `0xa0`)
3712	return `0`;
3713	}
3714	else if (c < `0xf8`)
3715	{
3716	char_len = `4`;
3717	if (c == `0xf0` && d < `0x90`)
3718	return `0`;
3719	}
3720	else if (c < `0xfc`)
3721	{
3722	char_len = `5`;
3723	if (c == `0xf8` && d < `0x88`)
3724	return `0`;
3725	}
3726	else if (c < `0xfe`)
3727	{
3728	char_len = `6`;
3729	if (c == `0xfc` && d < `0x84`)
3730	return `0`;
3731	}
3732	else
3733	return `0`;
3734
3735	if (str_idx + char_len > input->len)
3736	return `0`;
3737
3738	for (i = `1`; i < char_len; ++i)
3739	{
3740	d = re_string_byte_at (input, str_idx + i);
3741	if (d < `0x80` \|\| d > `0xbf`)
3742	return `0`;
3743	}
3744	return char_len;
3745	}
3746
3747	char_len = re_string_char_size_at (input, str_idx);
3748	if (node->type == OP_PERIOD)
3749	{
3750	if (char_len <= `1`)
3751	return `0`;
3752	/ FIXME: I don't think this if is needed, as both '\n'*
3753	and '\0' are char_len == 1. /*
3754	/ '.' accepts any one character except the following two cases. /
3755	if ((!(dfa->syntax & RE_DOT_NEWLINE)
3756	&& re_string_byte_at (input, str_idx) == `'\n'`)
3757	\|\| ((dfa->syntax & RE_DOT_NOT_NULL)
3758	&& re_string_byte_at (input, str_idx) == `'\0'`))
3759	return `0`;
3760	return char_len;
3761	}
3762
3763	elem_len = re_string_elem_size_at (input, str_idx);
3764	if ((elem_len <= `1` && char_len <= `1`) \|\| char_len == `0`)
3765	return `0`;
3766
3767	if (node->type == COMPLEX_BRACKET)
3768	{
3769	const re_charset_t *cset = node->opr.mbcset;
3770	# ifdef _LIBC
3771	const unsigned char *pin
3772	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3773	Idx j;
3774	uint32_t nrules;
3775	# endif /* _LIBC */
3776	int match_len = `0`;
3777	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3778	? re_string_wchar_at (input, str_idx) : `0`);
3779
3780	/ match with multibyte character? /
3781	for (i = `0`; i < cset->nmbchars; ++i)
3782	if (wc == cset->mbchars[i])
3783	{
3784	match_len = char_len;
3785	goto check_node_accept_bytes_match;
3786	}
3787	/ match with character_class? /
3788	for (i = `0`; i < cset->nchar_classes; ++i)
3789	{
3790	wctype_t wt = cset->char_classes[i];
3791	if (__iswctype (wc, wt))
3792	{
3793	match_len = char_len;
3794	goto check_node_accept_bytes_match;
3795	}
3796	}
3797
3798	# ifdef _LIBC
3799	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3800	if (nrules != `0`)
3801	{
3802	unsigned int in_collseq = `0`;
3803	const int32_t table, indirect;
3804	const unsigned char weights, extra;
3805	const char *collseqwc;
3806
3807	/ match with collating_symbol? /
3808	if (cset->ncoll_syms)
3809	extra = (const unsigned char *)
3810	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3811	for (i = `0`; i < cset->ncoll_syms; ++i)
3812	{
3813	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3814	/ Compare the length of input collating element and*
3815	the length of current collating element. /*
3816	if (*coll_sym != elem_len)
3817	continue;
3818	/ Compare each bytes. /
3819	for (j = `0`; j < *coll_sym; j++)
3820	if (pin[j] != coll_sym[`1` + j])
3821	break;
3822	if (j == *coll_sym)
3823	{
3824	/ Match if every bytes is equal. /
3825	match_len = j;
3826	goto check_node_accept_bytes_match;
3827	}
3828	}
3829
3830	if (cset->nranges)
3831	{
3832	if (elem_len <= char_len)
3833	{
3834	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3835	in_collseq = __collseq_table_lookup (collseqwc, wc);
3836	}
3837	else
3838	in_collseq = find_collation_sequence_value (pin, elem_len);
3839	}
3840	/ match with range expression? /
3841	/ FIXME: Implement rational ranges here, too. /
3842	for (i = `0`; i < cset->nranges; ++i)
3843	if (cset->range_starts[i] <= in_collseq
3844	&& in_collseq <= cset->range_ends[i])
3845	{
3846	match_len = elem_len;
3847	goto check_node_accept_bytes_match;
3848	}
3849
3850	/ match with equivalence_class? /
3851	if (cset->nequiv_classes)
3852	{
3853	const unsigned char *cp = pin;
3854	table = (const int32_t *)
3855	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3856	weights = (const unsigned char *)
3857	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3858	extra = (const unsigned char *)
3859	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3860	indirect = (const int32_t *)
3861	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3862	int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3863	int32_t rule = idx >> `24`;
3864	idx &= `0xffffff`;
3865	if (idx > `0`)
3866	{
3867	size_t weight_len = weights[idx];
3868	for (i = `0`; i < cset->nequiv_classes; ++i)
3869	{
3870	int32_t equiv_class_idx = cset->equiv_classes[i];
3871	int32_t equiv_class_rule = equiv_class_idx >> `24`;
3872	equiv_class_idx &= `0xffffff`;
3873	if (weights[equiv_class_idx] == weight_len
3874	&& equiv_class_rule == rule
3875	&& memcmp (weights + idx + `1`,
3876	weights + equiv_class_idx + `1`,
3877	weight_len) == `0`)
3878	{
3879	match_len = elem_len;
3880	goto check_node_accept_bytes_match;
3881	}
3882	}
3883	}
3884	}
3885	}
3886	else
3887	# endif /* _LIBC */
3888	{
3889	/ match with range expression? /
3890	for (i = `0`; i < cset->nranges; ++i)
3891	{
3892	if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3893	{
3894	match_len = char_len;
3895	goto check_node_accept_bytes_match;
3896	}
3897	}
3898	}
3899	check_node_accept_bytes_match:
3900	if (!cset->non_match)
3901	return match_len;
3902	else
3903	{
3904	if (match_len > `0`)
3905	return `0`;
3906	else
3907	return (elem_len > char_len) ? elem_len : char_len;
3908	}
3909	}
3910	return `0`;
3911	}
3912
3913	# ifdef _LIBC
3914	static unsigned int
3915	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3916	{
3917	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3918	if (nrules == `0`)
3919	{
3920	if (mbs_len == `1`)
3921	{
3922	/ No valid character. Match it as a single byte character. /
3923	const unsigned char collseq = (const* unsigned char *)
3924	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3925	return collseq[mbs[`0`]];
3926	}
3927	return UINT_MAX;
3928	}
3929	else
3930	{
3931	int32_t idx;
3932	const unsigned char extra = (const* unsigned char *)
3933	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3934	int32_t extrasize = (const unsigned char *)
3935	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + `1`) - extra;
3936
3937	for (idx = `0`; idx < extrasize;)
3938	{
3939	int mbs_cnt;
3940	bool found = false;
3941	int32_t elem_mbs_len;
3942	/ Skip the name of collating element name. /
3943	idx = idx + extra[idx] + `1`;
3944	elem_mbs_len = extra[idx++];
3945	if (mbs_len == elem_mbs_len)
3946	{
3947	for (mbs_cnt = `0`; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3948	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3949	break;
3950	if (mbs_cnt == elem_mbs_len)
3951	/ Found the entry. /
3952	found = true;
3953	}
3954	/ Skip the byte sequence of the collating element. /
3955	idx += elem_mbs_len;
3956	/ Adjust for the alignment. /
3957	idx = (idx + `3`) & ~`3`;
3958	/ Skip the collation sequence value. /
3959	idx += sizeof (uint32_t);
3960	/ Skip the wide char sequence of the collating element. /
3961	idx = idx + sizeof (uint32_t) * ((int32_t ) (extra + idx) + `1`);
3962	/ If we found the entry, return the sequence value. /
3963	if (found)
3964	return (uint32_t ) (extra + idx);
3965	/ Skip the collation sequence value. /
3966	idx += sizeof (uint32_t);
3967	}
3968	return UINT_MAX;
3969	}
3970	}
3971	# endif /* _LIBC */
3972	#endif /* RE_ENABLE_I18N */
3973
3974	/ Check whether the node accepts the byte which is IDX-th*
3975	byte of the INPUT. /*
3976
3977	static bool
3978	check_node_accept (const re_match_context_t mctx, const* re_token_t *node,
3979	Idx idx)
3980	{
3981	unsigned char ch;
3982	ch = re_string_byte_at (&mctx->input, idx);
3983	switch (node->type)
3984	{
3985	case CHARACTER:
3986	if (node->opr.c != ch)
3987	return false;
3988	break;
3989
3990	case SIMPLE_BRACKET:
3991	if (!bitset_contain (node->opr.sbcset, ch))
3992	return false;
3993	break;
3994
3995	#ifdef RE_ENABLE_I18N
3996	case OP_UTF8_PERIOD:
3997	if (ch >= ASCII_CHARS)
3998	return false;
3999	FALLTHROUGH;
4000	#endif
4001	case OP_PERIOD:
4002	if ((ch == `'\n'` && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4003	\|\| (ch == `'\0'` && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4004	return false;
4005	break;
4006
4007	default:
4008	return false;
4009	}
4010
4011	if (node->constraint)
4012	{
4013	/ The node has constraints. Check whether the current context*
4014	satisfies the constraints. /*
4015	unsigned int context = re_string_context_at (&mctx->input, idx,
4016	mctx->eflags);
4017	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4018	return false;
4019	}
4020
4021	return true;
4022	}
4023
4024	/ Extend the buffers, if the buffers have run out. /
4025
4026	static reg_errcode_t
4027	__attribute_warn_unused_result__
4028	extend_buffers (re_match_context_t mctx, int* min_len)
4029	{
4030	reg_errcode_t ret;
4031	re_string_t *pstr = &mctx->input;
4032
4033	/ Avoid overflow. /
4034	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / `2`
4035	<= pstr->bufs_len))
4036	return REG_ESPACE;
4037
4038	/ Double the lengths of the buffers, but allocate at least MIN_LEN. /
4039	ret = re_string_realloc_buffers (pstr,
4040	MAX (min_len,
4041	MIN (pstr->len, pstr->bufs_len * `2`)));
4042	if (__glibc_unlikely (ret != REG_NOERROR))
4043	return ret;
4044
4045	if (mctx->state_log != NULL)
4046	{
4047	/ And double the length of state_log. /
4048	/ XXX We have no indication of the size of this buffer. If this*
4049	allocation fail we have no indication that the state_log array
4050	does not have the right size. /*
4051	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4052	pstr->bufs_len + `1`);
4053	if (__glibc_unlikely (new_array == NULL))
4054	return REG_ESPACE;
4055	mctx->state_log = new_array;
4056	}
4057
4058	/ Then reconstruct the buffers. /
4059	if (pstr->icase)
4060	{
4061	#ifdef RE_ENABLE_I18N
4062	if (pstr->mb_cur_max > `1`)
4063	{
4064	ret = build_wcs_upper_buffer (pstr);
4065	if (__glibc_unlikely (ret != REG_NOERROR))
4066	return ret;
4067	}
4068	else
4069	#endif /* RE_ENABLE_I18N */
4070	build_upper_buffer (pstr);
4071	}
4072	else
4073	{
4074	#ifdef RE_ENABLE_I18N
4075	if (pstr->mb_cur_max > `1`)
4076	build_wcs_buffer (pstr);
4077	else
4078	#endif /* RE_ENABLE_I18N */
4079	{
4080	if (pstr->trans != NULL)
4081	re_string_translate_buffer (pstr);
4082	}
4083	}
4084	return REG_NOERROR;
4085	}
4086
4087
4088	/ Functions for matching context. /
4089
4090	/ Initialize MCTX. /
4091
4092	static reg_errcode_t
4093	__attribute_warn_unused_result__
4094	match_ctx_init (re_match_context_t mctx, int* eflags, Idx n)
4095	{
4096	mctx->eflags = eflags;
4097	mctx->match_last = -`1`;
4098	if (n > `0`)
4099	{
4100	/ Avoid overflow. /
4101	size_t max_object_size =
4102	MAX (sizeof (struct re_backref_cache_entry),
4103	sizeof (re_sub_match_top_t *));
4104	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4105	return REG_ESPACE;
4106
4107	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4108	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4109	if (__glibc_unlikely (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL))
4110	return REG_ESPACE;
4111	}
4112	/ Already zero-ed by the caller.*
4113	else
4114	mctx->bkref_ents = NULL;
4115	mctx->nbkref_ents = 0;
4116	mctx->nsub_tops = 0; /*
4117	mctx->abkref_ents = n;
4118	mctx->max_mb_elem_len = `1`;
4119	mctx->asub_tops = n;
4120	return REG_NOERROR;
4121	}
4122
4123	/ Clean the entries which depend on the current input in MCTX.*
4124	This function must be invoked when the matcher changes the start index
4125	of the input, or changes the input string. /*
4126
4127	static void
4128	match_ctx_clean (re_match_context_t *mctx)
4129	{
4130	Idx st_idx;
4131	for (st_idx = `0`; st_idx < mctx->nsub_tops; ++st_idx)
4132	{
4133	Idx sl_idx;
4134	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4135	for (sl_idx = `0`; sl_idx < top->nlasts; ++sl_idx)
4136	{
4137	re_sub_match_last_t *last = top->lasts[sl_idx];
4138	re_free (last->path.array);
4139	re_free (last);
4140	}
4141	re_free (top->lasts);
4142	if (top->path)
4143	{
4144	re_free (top->path->array);
4145	re_free (top->path);
4146	}
4147	re_free (top);
4148	}
4149
4150	mctx->nsub_tops = `0`;
4151	mctx->nbkref_ents = `0`;
4152	}
4153
4154	/ Free all the memory associated with MCTX. /
4155
4156	static void
4157	match_ctx_free (re_match_context_t *mctx)
4158	{
4159	/ First, free all the memory associated with MCTX->SUB_TOPS. /
4160	match_ctx_clean (mctx);
4161	re_free (mctx->sub_tops);
4162	re_free (mctx->bkref_ents);
4163	}
4164
4165	/ Add a new backreference entry to MCTX.*
4166	Note that we assume that caller never call this function with duplicate
4167	entry, and call with STR_IDX which isn't smaller than any existing entry.
4168	*/
4169
4170	static reg_errcode_t
4171	__attribute_warn_unused_result__
4172	match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4173	Idx to)
4174	{
4175	if (mctx->nbkref_ents >= mctx->abkref_ents)
4176	{
4177	struct re_backref_cache_entry* new_entry;
4178	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4179	mctx->abkref_ents * `2`);
4180	if (__glibc_unlikely (new_entry == NULL))
4181	{
4182	re_free (mctx->bkref_ents);
4183	return REG_ESPACE;
4184	}
4185	mctx->bkref_ents = new_entry;
4186	memset (mctx->bkref_ents + mctx->nbkref_ents, `'\0'`,
4187	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4188	mctx->abkref_ents *= `2`;
4189	}
4190	if (mctx->nbkref_ents > `0`
4191	&& mctx->bkref_ents[mctx->nbkref_ents - `1`].str_idx == str_idx)
4192	mctx->bkref_ents[mctx->nbkref_ents - `1`].more = `1`;
4193
4194	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4195	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4196	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4197	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4198
4199	/ This is a cache that saves negative results of check_dst_limits_calc_pos.*
4200	If bit N is clear, means that this entry won't epsilon-transition to
4201	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4202	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4203	such node.
4204
4205	A backreference does not epsilon-transition unless it is empty, so set
4206	to all zeros if FROM != TO. /*
4207	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4208	= (from == to ? -`1` : `0`);
4209
4210	mctx->bkref_ents[mctx->nbkref_ents++].more = `0`;
4211	if (mctx->max_mb_elem_len < to - from)
4212	mctx->max_mb_elem_len = to - from;
4213	return REG_NOERROR;
4214	}
4215
4216	/ Return the first entry with the same str_idx, or -1 if none is*
4217	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. /*
4218
4219	static Idx
4220	search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4221	{
4222	Idx left, right, mid, last;
4223	last = right = mctx->nbkref_ents;
4224	for (left = `0`; left < right;)
4225	{
4226	mid = (left + right) / `2`;
4227	if (mctx->bkref_ents[mid].str_idx < str_idx)
4228	left = mid + `1`;
4229	else
4230	right = mid;
4231	}
4232	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4233	return left;
4234	else
4235	return -`1`;
4236	}
4237
4238	/ Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches*
4239	at STR_IDX. /*
4240
4241	static reg_errcode_t
4242	__attribute_warn_unused_result__
4243	match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4244	{
4245	DEBUG_ASSERT (mctx->sub_tops != NULL);
4246	DEBUG_ASSERT (mctx->asub_tops > `0`);
4247	if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4248	{
4249	Idx new_asub_tops = mctx->asub_tops * `2`;
4250	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4251	re_sub_match_top_t *,
4252	new_asub_tops);
4253	if (__glibc_unlikely (new_array == NULL))
4254	return REG_ESPACE;
4255	mctx->sub_tops = new_array;
4256	mctx->asub_tops = new_asub_tops;
4257	}
4258	mctx->sub_tops[mctx->nsub_tops] = calloc (`1`, sizeof (re_sub_match_top_t));
4259	if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4260	return REG_ESPACE;
4261	mctx->sub_tops[mctx->nsub_tops]->node = node;
4262	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4263	return REG_NOERROR;
4264	}
4265
4266	/ Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches*
4267	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. /*
4268
4269	static re_sub_match_last_t *
4270	match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4271	{
4272	re_sub_match_last_t *new_entry;
4273	if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4274	{
4275	Idx new_alasts = `2` * subtop->alasts + `1`;
4276	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4277	re_sub_match_last_t *,
4278	new_alasts);
4279	if (__glibc_unlikely (new_array == NULL))
4280	return NULL;
4281	subtop->lasts = new_array;
4282	subtop->alasts = new_alasts;
4283	}
4284	new_entry = calloc (`1`, sizeof (re_sub_match_last_t));
4285	if (__glibc_likely (new_entry != NULL))
4286	{
4287	subtop->lasts[subtop->nlasts] = new_entry;
4288	new_entry->node = node;
4289	new_entry->str_idx = str_idx;
4290	++subtop->nlasts;
4291	}
4292	return new_entry;
4293	}
4294
4295	static void
4296	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4297	re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4298	{
4299	sctx->sifted_states = sifted_sts;
4300	sctx->limited_states = limited_sts;
4301	sctx->last_node = last_node;
4302	sctx->last_str_idx = last_str_idx;
4303	re_node_set_init_empty (&sctx->limits);
4304	}
4305

Browse the source code of glibc_src_2.31/posix/regexec.c