regexec.c source code [glibc_src_2.28/posix/regexec.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2018 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	static reg_errcode_t match_ctx_init (re_match_context_t cache, int* eflags,
21	Idx n);
22	static void match_ctx_clean (re_match_context_t *mctx);
23	static void match_ctx_free (re_match_context_t *cache);
24	static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25	Idx str_idx, Idx from, Idx to);
26	static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27	static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28	Idx str_idx);
29	static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30	Idx node, Idx str_idx);
31	static void sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
32	re_dfastate_t **limited_sts, Idx last_node,
33	Idx last_str_idx);
34	static reg_errcode_t re_search_internal (const regex_t *preg,
35	const char *string, Idx length,
36	Idx start, Idx last_start, Idx stop,
37	size_t nmatch, regmatch_t pmatch[],
38	int eflags);
39	static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40	const char *string1, Idx length1,
41	const char *string2, Idx length2,
42	Idx start, regoff_t range,
43	struct re_registers *regs,
44	Idx stop, bool ret_len);
45	static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46	const char *string, Idx length, Idx start,
47	regoff_t range, Idx stop,
48	struct re_registers *regs,
49	bool ret_len);
50	static unsigned re_copy_regs (struct re_registers regs, regmatch_t pmatch,
51	Idx nregs, int regs_allocated);
52	static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53	static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54	Idx *p_match_first);
55	static Idx check_halt_state_context (const re_match_context_t *mctx,
56	const re_dfastate_t *state, Idx idx);
57	static void update_regs (const re_dfa_t dfa, regmatch_t pmatch,
58	regmatch_t *prev_idx_match, Idx cur_node,
59	Idx cur_idx, Idx nmatch);
60	static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61	Idx str_idx, Idx dest_node, Idx nregs,
62	regmatch_t *regs,
63	re_node_set *eps_via_nodes);
64	static reg_errcode_t set_regs (const regex_t *preg,
65	const re_match_context_t *mctx,
66	size_t nmatch, regmatch_t *pmatch,
67	bool fl_backtrack);
68	static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69
70	#ifdef RE_ENABLE_I18N
71	static int sift_states_iter_mb (const re_match_context_t *mctx,
72	re_sift_context_t *sctx,
73	Idx node_idx, Idx str_idx, Idx max_str_idx);
74	#endif /* RE_ENABLE_I18N */
75	static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
76	re_sift_context_t *sctx);
77	static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
78	re_sift_context_t *sctx, Idx str_idx,
79	re_node_set *cur_dest);
80	static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
81	re_sift_context_t *sctx,
82	Idx str_idx,
83	re_node_set *dest_nodes);
84	static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
85	re_node_set *dest_nodes,
86	const re_node_set *candidates);
87	static bool check_dst_limits (const re_match_context_t *mctx,
88	const re_node_set *limits,
89	Idx dst_node, Idx dst_idx, Idx src_node,
90	Idx src_idx);
91	static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
92	int boundaries, Idx subexp_idx,
93	Idx from_node, Idx bkref_idx);
94	static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
95	Idx limit, Idx subexp_idx,
96	Idx node, Idx str_idx,
97	Idx bkref_idx);
98	static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
99	re_node_set *dest_nodes,
100	const re_node_set *candidates,
101	re_node_set *limits,
102	struct re_backref_cache_entry *bkref_ents,
103	Idx str_idx);
104	static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
105	re_sift_context_t *sctx,
106	Idx str_idx, const re_node_set *candidates);
107	static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
108	re_dfastate_t **dst,
109	re_dfastate_t **src, Idx num);
110	static re_dfastate_t find_recover_state (reg_errcode_t err,
111	re_match_context_t *mctx);
112	static re_dfastate_t transit_state (reg_errcode_t err,
113	re_match_context_t *mctx,
114	re_dfastate_t *state);
115	static re_dfastate_t merge_state_with_log (reg_errcode_t err,
116	re_match_context_t *mctx,
117	re_dfastate_t *next_state);
118	static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
119	re_node_set *cur_nodes,
120	Idx str_idx);
121	#if 0
122	static re_dfastate_t transit_state_sb (reg_errcode_t err,
123	re_match_context_t *mctx,
124	re_dfastate_t *pstate);
125	#endif
126	#ifdef RE_ENABLE_I18N
127	static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
128	re_dfastate_t *pstate);
129	#endif /* RE_ENABLE_I18N */
130	static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
131	const re_node_set *nodes);
132	static reg_errcode_t get_subexp (re_match_context_t *mctx,
133	Idx bkref_node, Idx bkref_str_idx);
134	static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
135	const re_sub_match_top_t *sub_top,
136	re_sub_match_last_t *sub_last,
137	Idx bkref_node, Idx bkref_str);
138	static Idx find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
139	Idx subexp_idx, int type);
140	static reg_errcode_t check_arrival (re_match_context_t *mctx,
141	state_array_t *path, Idx top_node,
142	Idx top_str, Idx last_node, Idx last_str,
143	int type);
144	static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
145	Idx str_idx,
146	re_node_set *cur_nodes,
147	re_node_set *next_nodes);
148	static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
149	re_node_set *cur_nodes,
150	Idx ex_subexp, int type);
151	static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
152	re_node_set *dst_nodes,
153	Idx target, Idx ex_subexp,
154	int type);
155	static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
156	re_node_set *cur_nodes, Idx cur_str,
157	Idx subexp_num, int type);
158	static bool build_trtable (const re_dfa_t dfa, re_dfastate_t state);
159	#ifdef RE_ENABLE_I18N
160	static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
161	const re_string_t *input, Idx idx);
162	# ifdef _LIBC
163	static unsigned int find_collation_sequence_value (const unsigned char *mbs,
164	size_t name_len);
165	# endif /* _LIBC */
166	#endif /* RE_ENABLE_I18N */
167	static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
168	const re_dfastate_t *state,
169	re_node_set *states_node,
170	bitset_t *states_ch);
171	static bool check_node_accept (const re_match_context_t *mctx,
172	const re_token_t *node, Idx idx);
173	static reg_errcode_t extend_buffers (re_match_context_t mctx, int* min_len);
174
175	/ Entry point for POSIX code. /
176
177	/ regexec searches for a given pattern, specified by PREG, in the*
178	string STRING.
179
180	If NMATCH is zero or REG_NOSUB was set in the cflags argument to
181	'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
182	least NMATCH elements, and we set them to the offsets of the
183	corresponding matched substrings.
184
185	EFLAGS specifies "execution flags" which affect matching: if
186	REG_NOTBOL is set, then ^ does not match at the beginning of the
187	string; if REG_NOTEOL is set, then $ does not match at the end.
188
189	We return 0 if we find a match and REG_NOMATCH if not. /*
190
191	int
192	regexec (const regex_t _Restrict_ preg, const* char *_Restrict_ string,
193	size_t nmatch, regmatch_t pmatch[], int eflags)
194	{
195	reg_errcode_t err;
196	Idx start, length;
197	re_dfa_t *dfa = preg->buffer;
198
199	if (eflags & ~(REG_NOTBOL \| REG_NOTEOL \| REG_STARTEND))
200	return REG_BADPAT;
201
202	if (eflags & REG_STARTEND)
203	{
204	start = pmatch[`0`].rm_so;
205	length = pmatch[`0`].rm_eo;
206	}
207	else
208	{
209	start = `0`;
210	length = strlen (string);
211	}
212
213	lock_lock (dfa->lock);
214	if (preg->no_sub)
215	err = re_search_internal (preg, string, length, start, length,
216	length, `0`, NULL, eflags);
217	else
218	err = re_search_internal (preg, string, length, start, length,
219	length, nmatch, pmatch, eflags);
220	lock_unlock (dfa->lock);
221	return err != REG_NOERROR;
222	}
223
224	#ifdef _LIBC
225	libc_hidden_def (__regexec)
226
227	# include <shlib-compat.h>
228	versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
229
230	# if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
231	__typeof__ (__regexec) __compat_regexec;
232
233	int
234	attribute_compat_text_section
235	__compat_regexec (const regex_t *_Restrict_ preg,
236	const char *_Restrict_ string, size_t nmatch,
237	regmatch_t pmatch[], int eflags)
238	{
239	return regexec (preg, string, nmatch, pmatch,
240	eflags & (REG_NOTBOL \| REG_NOTEOL));
241	}
242	compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
243	# endif
244	#endif
245
246	/ Entry points for GNU code. /
247
248	/ re_match, re_search, re_match_2, re_search_2*
249
250	The former two functions operate on STRING with length LENGTH,
251	while the later two operate on concatenation of STRING1 and STRING2
252	with lengths LENGTH1 and LENGTH2, respectively.
253
254	re_match() matches the compiled pattern in BUFP against the string,
255	starting at index START.
256
257	re_search() first tries matching at index START, then it tries to match
258	starting from index START + 1, and so on. The last start position tried
259	is START + RANGE. (Thus RANGE = 0 forces re_search to operate the same
260	way as re_match().)
261
262	The parameter STOP of re_{match,search}_2 specifies that no match exceeding
263	the first STOP characters of the concatenation of the strings should be
264	concerned.
265
266	If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
267	and all groups is stored in REGS. (For the "_2" variants, the offsets are
268	computed relative to the concatenation, not relative to the individual
269	strings.)
270
271	On success, re_match* functions return the length of the match, re_search*
272	return the position of the start of the match. Return value -1 means no
273	match was found and -2 indicates an internal error. /*
274
275	regoff_t
276	re_match (struct re_pattern_buffer bufp, const* char *string, Idx length,
277	Idx start, struct re_registers *regs)
278	{
279	return re_search_stub (bufp, string, length, start, `0`, length, regs, true);
280	}
281	#ifdef _LIBC
282	weak_alias (__re_match, re_match)
283	#endif
284
285	regoff_t
286	re_search (struct re_pattern_buffer bufp, const* char *string, Idx length,
287	Idx start, regoff_t range, struct re_registers *regs)
288	{
289	return re_search_stub (bufp, string, length, start, range, length, regs,
290	false);
291	}
292	#ifdef _LIBC
293	weak_alias (__re_search, re_search)
294	#endif
295
296	regoff_t
297	re_match_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
298	const char *string2, Idx length2, Idx start,
299	struct re_registers *regs, Idx stop)
300	{
301	return re_search_2_stub (bufp, string1, length1, string2, length2,
302	start, `0`, regs, stop, true);
303	}
304	#ifdef _LIBC
305	weak_alias (__re_match_2, re_match_2)
306	#endif
307
308	regoff_t
309	re_search_2 (struct re_pattern_buffer bufp, const* char *string1, Idx length1,
310	const char *string2, Idx length2, Idx start, regoff_t range,
311	struct re_registers *regs, Idx stop)
312	{
313	return re_search_2_stub (bufp, string1, length1, string2, length2,
314	start, range, regs, stop, false);
315	}
316	#ifdef _LIBC
317	weak_alias (__re_search_2, re_search_2)
318	#endif
319
320	static regoff_t
321	re_search_2_stub (struct re_pattern_buffer bufp, const* char *string1,
322	Idx length1, const char *string2, Idx length2, Idx start,
323	regoff_t range, struct re_registers *regs,
324	Idx stop, bool ret_len)
325	{
326	const char *str;
327	regoff_t rval;
328	Idx len;
329	char *s = NULL;
330
331	if (BE ((length1 < `0` \|\| length2 < `0` \|\| stop < `0`
332	\|\| INT_ADD_WRAPV (length1, length2, &len)),
333	`0`))
334	return -`2`;
335
336	/ Concatenate the strings. /
337	if (length2 > `0`)
338	if (length1 > `0`)
339	{
340	s = re_malloc (char, len);
341
342	if (BE (s == NULL, `0`))
343	return -`2`;
344	#ifdef _LIBC
345	memcpy (__mempcpy (s, string1, length1), string2, length2);
346	#else
347	memcpy (s, string1, length1);
348	memcpy (s + length1, string2, length2);
349	#endif
350	str = s;
351	}
352	else
353	str = string2;
354	else
355	str = string1;
356
357	rval = re_search_stub (bufp, str, len, start, range, stop, regs,
358	ret_len);
359	re_free (s);
360	return rval;
361	}
362
363	/ The parameters have the same meaning as those of re_search.*
364	Additional parameters:
365	If RET_LEN is true the length of the match is returned (re_match style);
366	otherwise the position of the match is returned. /*
367
368	static regoff_t
369	re_search_stub (struct re_pattern_buffer bufp, const* char *string, Idx length,
370	Idx start, regoff_t range, Idx stop, struct re_registers *regs,
371	bool ret_len)
372	{
373	reg_errcode_t result;
374	regmatch_t *pmatch;
375	Idx nregs;
376	regoff_t rval;
377	int eflags = `0`;
378	re_dfa_t *dfa = bufp->buffer;
379	Idx last_start = start + range;
380
381	/ Check for out-of-range. /
382	if (BE (start < `0` \|\| start > length, `0`))
383	return -`1`;
384	if (BE (length < last_start \|\| (`0` <= range && last_start < start), `0`))
385	last_start = length;
386	else if (BE (last_start < `0` \|\| (range < `0` && start <= last_start), `0`))
387	last_start = `0`;
388
389	lock_lock (dfa->lock);
390
391	eflags \|= (bufp->not_bol) ? REG_NOTBOL : `0`;
392	eflags \|= (bufp->not_eol) ? REG_NOTEOL : `0`;
393
394	/ Compile fastmap if we haven't yet. /
395	if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
396	re_compile_fastmap (bufp);
397
398	if (BE (bufp->no_sub, `0`))
399	regs = NULL;
400
401	/ We need at least 1 register. /
402	if (regs == NULL)
403	nregs = `1`;
404	else if (BE (bufp->regs_allocated == REGS_FIXED
405	&& regs->num_regs <= bufp->re_nsub, `0`))
406	{
407	nregs = regs->num_regs;
408	if (BE (nregs < `1`, `0`))
409	{
410	/ Nothing can be copied to regs. /
411	regs = NULL;
412	nregs = `1`;
413	}
414	}
415	else
416	nregs = bufp->re_nsub + `1`;
417	pmatch = re_malloc (regmatch_t, nregs);
418	if (BE (pmatch == NULL, `0`))
419	{
420	rval = -`2`;
421	goto out;
422	}
423
424	result = re_search_internal (bufp, string, length, start, last_start, stop,
425	nregs, pmatch, eflags);
426
427	rval = `0`;
428
429	/ I hope we needn't fill their regs with -1's when no match was found. /
430	if (result != REG_NOERROR)
431	rval = result == REG_NOMATCH ? -`1` : -`2`;
432	else if (regs != NULL)
433	{
434	/ If caller wants register contents data back, copy them. /
435	bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
436	bufp->regs_allocated);
437	if (BE (bufp->regs_allocated == REGS_UNALLOCATED, `0`))
438	rval = -`2`;
439	}
440
441	if (BE (rval == `0`, `1`))
442	{
443	if (ret_len)
444	{
445	assert (pmatch[`0`].rm_so == start);
446	rval = pmatch[`0`].rm_eo - start;
447	}
448	else
449	rval = pmatch[`0`].rm_so;
450	}
451	re_free (pmatch);
452	out:
453	lock_unlock (dfa->lock);
454	return rval;
455	}
456
457	static unsigned
458	re_copy_regs (struct re_registers regs, regmatch_t pmatch, Idx nregs,
459	int regs_allocated)
460	{
461	int rval = REGS_REALLOCATE;
462	Idx i;
463	Idx need_regs = nregs + `1`;
464	/ We need one extra element beyond 'num_regs' for the '-1' marker GNU code*
465	uses. /*
466
467	/ Have the register data arrays been allocated? /
468	if (regs_allocated == REGS_UNALLOCATED)
469	{ / No. So allocate them with malloc. /
470	regs->start = re_malloc (regoff_t, need_regs);
471	if (BE (regs->start == NULL, `0`))
472	return REGS_UNALLOCATED;
473	regs->end = re_malloc (regoff_t, need_regs);
474	if (BE (regs->end == NULL, `0`))
475	{
476	re_free (regs->start);
477	return REGS_UNALLOCATED;
478	}
479	regs->num_regs = need_regs;
480	}
481	else if (regs_allocated == REGS_REALLOCATE)
482	{ / Yes. If we need more elements than were already*
483	allocated, reallocate them. If we need fewer, just
484	leave it alone. /*
485	if (BE (need_regs > regs->num_regs, `0`))
486	{
487	regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
488	regoff_t *new_end;
489	if (BE (new_start == NULL, `0`))
490	return REGS_UNALLOCATED;
491	new_end = re_realloc (regs->end, regoff_t, need_regs);
492	if (BE (new_end == NULL, `0`))
493	{
494	re_free (new_start);
495	return REGS_UNALLOCATED;
496	}
497	regs->start = new_start;
498	regs->end = new_end;
499	regs->num_regs = need_regs;
500	}
501	}
502	else
503	{
504	assert (regs_allocated == REGS_FIXED);
505	/ This function may not be called with REGS_FIXED and nregs too big. /
506	assert (regs->num_regs >= nregs);
507	rval = REGS_FIXED;
508	}
509
510	/ Copy the regs. /
511	for (i = `0`; i < nregs; ++i)
512	{
513	regs->start[i] = pmatch[i].rm_so;
514	regs->end[i] = pmatch[i].rm_eo;
515	}
516	for ( ; i < regs->num_regs; ++i)
517	regs->start[i] = regs->end[i] = -`1`;
518
519	return rval;
520	}
521
522	/ Set REGS to hold NUM_REGS registers, storing them in STARTS and*
523	ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
524	this memory for recording register information. STARTS and ENDS
525	must be allocated using the malloc library routine, and must each
526	be at least NUM_REGS sizeof (regoff_t) bytes long.*
527
528	If NUM_REGS == 0, then subsequent matches should allocate their own
529	register data.
530
531	Unless this function is called, the first search or match using
532	PATTERN_BUFFER will allocate its own register data, without
533	freeing the old data. /*
534
535	void
536	re_set_registers (struct re_pattern_buffer bufp, struct* re_registers *regs,
537	__re_size_t num_regs, regoff_t starts, regoff_t ends)
538	{
539	if (num_regs)
540	{
541	bufp->regs_allocated = REGS_REALLOCATE;
542	regs->num_regs = num_regs;
543	regs->start = starts;
544	regs->end = ends;
545	}
546	else
547	{
548	bufp->regs_allocated = REGS_UNALLOCATED;
549	regs->num_regs = `0`;
550	regs->start = regs->end = NULL;
551	}
552	}
553	#ifdef _LIBC
554	weak_alias (__re_set_registers, re_set_registers)
555	#endif
556
557	/ Entry points compatible with 4.2 BSD regex library. We don't define*
558	them unless specifically requested. /*
559
560	#if defined _REGEX_RE_COMP \|\| defined _LIBC
561	int
562	# ifdef _LIBC
563	weak_function
564	# endif
565	re_exec (const char *s)
566	{
567	return `0` == regexec (&re_comp_buf, s, `0`, NULL, `0`);
568	}
569	#endif /* _REGEX_RE_COMP */
570
571	/ Internal entry point. /
572
573	/ Searches for a compiled pattern PREG in the string STRING, whose*
574	length is LENGTH. NMATCH, PMATCH, and EFLAGS have the same
575	meaning as with regexec. LAST_START is START + RANGE, where
576	START and RANGE have the same meaning as with re_search.
577	Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
578	otherwise return the error code.
579	Note: We assume front end functions already check ranges.
580	(0 <= LAST_START && LAST_START <= LENGTH) /*
581
582	static reg_errcode_t
583	__attribute_warn_unused_result__
584	re_search_internal (const regex_t preg, const* char *string, Idx length,
585	Idx start, Idx last_start, Idx stop, size_t nmatch,
586	regmatch_t pmatch[], int eflags)
587	{
588	reg_errcode_t err;
589	const re_dfa_t *dfa = preg->buffer;
590	Idx left_lim, right_lim;
591	int incr;
592	bool fl_longest_match;
593	int match_kind;
594	Idx match_first;
595	Idx match_last = -`1`;
596	Idx extra_nmatch;
597	bool sb;
598	int ch;
599	#if defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
600	re_match_context_t mctx = { .dfa = dfa };
601	#else
602	re_match_context_t mctx;
603	#endif
604	char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
605	&& start != last_start && !preg->can_be_null)
606	? preg->fastmap : NULL);
607	RE_TRANSLATE_TYPE t = preg->translate;
608
609	#if !(defined _LIBC \|\| (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L))
610	memset (&mctx, `'\0'`, sizeof (re_match_context_t));
611	mctx.dfa = dfa;
612	#endif
613
614	extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + `1`) : `0`;
615	nmatch -= extra_nmatch;
616
617	/ Check if the DFA haven't been compiled. /
618	if (BE (preg->used == `0` \|\| dfa->init_state == NULL
619	\|\| dfa->init_state_word == NULL \|\| dfa->init_state_nl == NULL
620	\|\| dfa->init_state_begbuf == NULL, `0`))
621	return REG_NOMATCH;
622
623	#ifdef DEBUG
624	/ We assume front-end functions already check them. /
625	assert (`0` <= last_start && last_start <= length);
626	#endif
627
628	/ If initial states with non-begbuf contexts have no elements,*
629	the regex must be anchored. If preg->newline_anchor is set,
630	we'll never use init_state_nl, so do not check it. /*
631	if (dfa->init_state->nodes.nelem == `0`
632	&& dfa->init_state_word->nodes.nelem == `0`
633	&& (dfa->init_state_nl->nodes.nelem == `0`
634	\|\| !preg->newline_anchor))
635	{
636	if (start != `0` && last_start != `0`)
637	return REG_NOMATCH;
638	start = last_start = `0`;
639	}
640
641	/ We must check the longest matching, if nmatch > 0. /
642	fl_longest_match = (nmatch != `0` \|\| dfa->nbackref);
643
644	err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + `1`,
645	preg->translate, (preg->syntax & RE_ICASE) != `0`,
646	dfa);
647	if (BE (err != REG_NOERROR, `0`))
648	goto free_return;
649	mctx.input.stop = stop;
650	mctx.input.raw_stop = stop;
651	mctx.input.newline_anchor = preg->newline_anchor;
652
653	err = match_ctx_init (&mctx, eflags, dfa->nbackref * `2`);
654	if (BE (err != REG_NOERROR, `0`))
655	goto free_return;
656
657	/ We will log all the DFA states through which the dfa pass,*
658	if nmatch > 1, or this dfa has "multibyte node", which is a
659	back-reference or a node which can accept multibyte character or
660	multi character collating element. /*
661	if (nmatch > `1` \|\| dfa->has_mb_node)
662	{
663	/ Avoid overflow. /
664	if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
665	<= mctx.input.bufs_len), `0`))
666	{
667	err = REG_ESPACE;
668	goto free_return;
669	}
670
671	mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + `1`);
672	if (BE (mctx.state_log == NULL, `0`))
673	{
674	err = REG_ESPACE;
675	goto free_return;
676	}
677	}
678	else
679	mctx.state_log = NULL;
680
681	match_first = start;
682	mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
683	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF;
684
685	/ Check incrementally whether the input string matches. /
686	incr = (last_start < start) ? -`1` : `1`;
687	left_lim = (last_start < start) ? last_start : start;
688	right_lim = (last_start < start) ? start : last_start;
689	sb = dfa->mb_cur_max == `1`;
690	match_kind =
691	(fastmap
692	? ((sb \|\| !(preg->syntax & RE_ICASE \|\| t) ? `4` : `0`)
693	\| (start <= last_start ? `2` : `0`)
694	\| (t != NULL ? `1` : `0`))
695	: `8`);
696
697	for (;; match_first += incr)
698	{
699	err = REG_NOMATCH;
700	if (match_first < left_lim \|\| right_lim < match_first)
701	goto free_return;
702
703	/ Advance as rapidly as possible through the string, until we*
704	find a plausible place to start matching. This may be done
705	with varying efficiency, so there are various possibilities:
706	only the most common of them are specialized, in order to
707	save on code size. We use a switch statement for speed. /*
708	switch (match_kind)
709	{
710	case `8`:
711	/ No fastmap. /
712	break;
713
714	case `7`:
715	/ Fastmap with single-byte translation, match forward. /
716	while (BE (match_first < right_lim, `1`)
717	&& !fastmap[t[(unsigned char) string[match_first]]])
718	++match_first;
719	goto forward_match_found_start_or_reached_end;
720
721	case `6`:
722	/ Fastmap without translation, match forward. /
723	while (BE (match_first < right_lim, `1`)
724	&& !fastmap[(unsigned char) string[match_first]])
725	++match_first;
726
727	forward_match_found_start_or_reached_end:
728	if (BE (match_first == right_lim, `0`))
729	{
730	ch = match_first >= length
731	? `0` : (unsigned char) string[match_first];
732	if (!fastmap[t ? t[ch] : ch])
733	goto free_return;
734	}
735	break;
736
737	case `4`:
738	case `5`:
739	/ Fastmap without multi-byte translation, match backwards. /
740	while (match_first >= left_lim)
741	{
742	ch = match_first >= length
743	? `0` : (unsigned char) string[match_first];
744	if (fastmap[t ? t[ch] : ch])
745	break;
746	--match_first;
747	}
748	if (match_first < left_lim)
749	goto free_return;
750	break;
751
752	default:
753	/ In this case, we can't determine easily the current byte,*
754	since it might be a component byte of a multibyte
755	character. Then we use the constructed buffer instead. /*
756	for (;;)
757	{
758	/ If MATCH_FIRST is out of the valid range, reconstruct the*
759	buffers. /*
760	__re_size_t offset = match_first - mctx.input.raw_mbs_idx;
761	if (BE (offset >= (__re_size_t) mctx.input.valid_raw_len, `0`))
762	{
763	err = re_string_reconstruct (&mctx.input, match_first,
764	eflags);
765	if (BE (err != REG_NOERROR, `0`))
766	goto free_return;
767
768	offset = match_first - mctx.input.raw_mbs_idx;
769	}
770	/ If MATCH_FIRST is out of the buffer, leave it as '\0'.*
771	Note that MATCH_FIRST must not be smaller than 0. /*
772	ch = (match_first >= length
773	? `0` : re_string_byte_at (&mctx.input, offset));
774	if (fastmap[ch])
775	break;
776	match_first += incr;
777	if (match_first < left_lim \|\| match_first > right_lim)
778	{
779	err = REG_NOMATCH;
780	goto free_return;
781	}
782	}
783	break;
784	}
785
786	/ Reconstruct the buffers so that the matcher can assume that*
787	the matching starts from the beginning of the buffer. /*
788	err = re_string_reconstruct (&mctx.input, match_first, eflags);
789	if (BE (err != REG_NOERROR, `0`))
790	goto free_return;
791
792	#ifdef RE_ENABLE_I18N
793	/ Don't consider this char as a possible match start if it part,*
794	yet isn't the head, of a multibyte character. /*
795	if (!sb && !re_string_first_byte (&mctx.input, `0`))
796	continue;
797	#endif
798
799	/ It seems to be appropriate one, then use the matcher. /
800	/ We assume that the matching starts from 0. /
801	mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = `0`;
802	match_last = check_matching (&mctx, fl_longest_match,
803	start <= last_start ? &match_first : NULL);
804	if (match_last != -`1`)
805	{
806	if (BE (match_last == -`2`, `0`))
807	{
808	err = REG_ESPACE;
809	goto free_return;
810	}
811	else
812	{
813	mctx.match_last = match_last;
814	if ((!preg->no_sub && nmatch > `1`) \|\| dfa->nbackref)
815	{
816	re_dfastate_t *pstate = mctx.state_log[match_last];
817	mctx.last_node = check_halt_state_context (&mctx, pstate,
818	match_last);
819	}
820	if ((!preg->no_sub && nmatch > `1` && dfa->has_plural_match)
821	\|\| dfa->nbackref)
822	{
823	err = prune_impossible_nodes (&mctx);
824	if (err == REG_NOERROR)
825	break;
826	if (BE (err != REG_NOMATCH, `0`))
827	goto free_return;
828	match_last = -`1`;
829	}
830	else
831	break; / We found a match. /
832	}
833	}
834
835	match_ctx_clean (&mctx);
836	}
837
838	#ifdef DEBUG
839	assert (match_last != -`1`);
840	assert (err == REG_NOERROR);
841	#endif
842
843	/ Set pmatch[] if we need. /
844	if (nmatch > `0`)
845	{
846	Idx reg_idx;
847
848	/ Initialize registers. /
849	for (reg_idx = `1`; reg_idx < nmatch; ++reg_idx)
850	pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -`1`;
851
852	/ Set the points where matching start/end. /
853	pmatch[`0`].rm_so = `0`;
854	pmatch[`0`].rm_eo = mctx.match_last;
855	/ FIXME: This function should fail if mctx.match_last exceeds*
856	the maximum possible regoff_t value. We need a new error
857	code REG_OVERFLOW. /*
858
859	if (!preg->no_sub && nmatch > `1`)
860	{
861	err = set_regs (preg, &mctx, nmatch, pmatch,
862	dfa->has_plural_match && dfa->nbackref > `0`);
863	if (BE (err != REG_NOERROR, `0`))
864	goto free_return;
865	}
866
867	/ At last, add the offset to each register, since we slid*
868	the buffers so that we could assume that the matching starts
869	from 0. /*
870	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
871	if (pmatch[reg_idx].rm_so != -`1`)
872	{
873	#ifdef RE_ENABLE_I18N
874	if (BE (mctx.input.offsets_needed != `0`, `0`))
875	{
876	pmatch[reg_idx].rm_so =
877	(pmatch[reg_idx].rm_so == mctx.input.valid_len
878	? mctx.input.valid_raw_len
879	: mctx.input.offsets[pmatch[reg_idx].rm_so]);
880	pmatch[reg_idx].rm_eo =
881	(pmatch[reg_idx].rm_eo == mctx.input.valid_len
882	? mctx.input.valid_raw_len
883	: mctx.input.offsets[pmatch[reg_idx].rm_eo]);
884	}
885	#else
886	assert (mctx.input.offsets_needed == `0`);
887	#endif
888	pmatch[reg_idx].rm_so += match_first;
889	pmatch[reg_idx].rm_eo += match_first;
890	}
891	for (reg_idx = `0`; reg_idx < extra_nmatch; ++reg_idx)
892	{
893	pmatch[nmatch + reg_idx].rm_so = -`1`;
894	pmatch[nmatch + reg_idx].rm_eo = -`1`;
895	}
896
897	if (dfa->subexp_map)
898	for (reg_idx = `0`; reg_idx + `1` < nmatch; reg_idx++)
899	if (dfa->subexp_map[reg_idx] != reg_idx)
900	{
901	pmatch[reg_idx + `1`].rm_so
902	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_so;
903	pmatch[reg_idx + `1`].rm_eo
904	= pmatch[dfa->subexp_map[reg_idx] + `1`].rm_eo;
905	}
906	}
907
908	free_return:
909	re_free (mctx.state_log);
910	if (dfa->nbackref)
911	match_ctx_free (&mctx);
912	re_string_destruct (&mctx.input);
913	return err;
914	}
915
916	static reg_errcode_t
917	__attribute_warn_unused_result__
918	prune_impossible_nodes (re_match_context_t *mctx)
919	{
920	const re_dfa_t *const dfa = mctx->dfa;
921	Idx halt_node, match_last;
922	reg_errcode_t ret;
923	re_dfastate_t **sifted_states;
924	re_dfastate_t **lim_states = NULL;
925	re_sift_context_t sctx;
926	#ifdef DEBUG
927	assert (mctx->state_log != NULL);
928	#endif
929	match_last = mctx->match_last;
930	halt_node = mctx->last_node;
931
932	/ Avoid overflow. /
933	if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, `0`))
934	return REG_ESPACE;
935
936	sifted_states = re_malloc (re_dfastate_t *, match_last + `1`);
937	if (BE (sifted_states == NULL, `0`))
938	{
939	ret = REG_ESPACE;
940	goto free_return;
941	}
942	if (dfa->nbackref)
943	{
944	lim_states = re_malloc (re_dfastate_t *, match_last + `1`);
945	if (BE (lim_states == NULL, `0`))
946	{
947	ret = REG_ESPACE;
948	goto free_return;
949	}
950	while (`1`)
951	{
952	memset (lim_states, `'\0'`,
953	sizeof (re_dfastate_t ) (match_last + `1`));
954	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
955	match_last);
956	ret = sift_states_backward (mctx, &sctx);
957	re_node_set_free (&sctx.limits);
958	if (BE (ret != REG_NOERROR, `0`))
959	goto free_return;
960	if (sifted_states[`0`] != NULL \|\| lim_states[`0`] != NULL)
961	break;
962	do
963	{
964	--match_last;
965	if (match_last < `0`)
966	{
967	ret = REG_NOMATCH;
968	goto free_return;
969	}
970	} while (mctx->state_log[match_last] == NULL
971	\|\| !mctx->state_log[match_last]->halt);
972	halt_node = check_halt_state_context (mctx,
973	mctx->state_log[match_last],
974	match_last);
975	}
976	ret = merge_state_array (dfa, sifted_states, lim_states,
977	match_last + `1`);
978	re_free (lim_states);
979	lim_states = NULL;
980	if (BE (ret != REG_NOERROR, `0`))
981	goto free_return;
982	}
983	else
984	{
985	sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
986	ret = sift_states_backward (mctx, &sctx);
987	re_node_set_free (&sctx.limits);
988	if (BE (ret != REG_NOERROR, `0`))
989	goto free_return;
990	if (sifted_states[`0`] == NULL)
991	{
992	ret = REG_NOMATCH;
993	goto free_return;
994	}
995	}
996	re_free (mctx->state_log);
997	mctx->state_log = sifted_states;
998	sifted_states = NULL;
999	mctx->last_node = halt_node;
1000	mctx->match_last = match_last;
1001	ret = REG_NOERROR;
1002	free_return:
1003	re_free (sifted_states);
1004	re_free (lim_states);
1005	return ret;
1006	}
1007
1008	/ Acquire an initial state and return it.*
1009	We must select appropriate initial state depending on the context,
1010	since initial states may have constraints like "\<", "^", etc.. /*
1011
1012	static inline re_dfastate_t *
1013	__attribute__ ((always_inline))
1014	acquire_init_state_context (reg_errcode_t err, const* re_match_context_t *mctx,
1015	Idx idx)
1016	{
1017	const re_dfa_t *const dfa = mctx->dfa;
1018	if (dfa->init_state->has_constraint)
1019	{
1020	unsigned int context;
1021	context = re_string_context_at (&mctx->input, idx - `1`, mctx->eflags);
1022	if (IS_WORD_CONTEXT (context))
1023	return dfa->init_state_word;
1024	else if (IS_ORDINARY_CONTEXT (context))
1025	return dfa->init_state;
1026	else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1027	return dfa->init_state_begbuf;
1028	else if (IS_NEWLINE_CONTEXT (context))
1029	return dfa->init_state_nl;
1030	else if (IS_BEGBUF_CONTEXT (context))
1031	{
1032	/ It is relatively rare case, then calculate on demand. /
1033	return re_acquire_state_context (err, dfa,
1034	dfa->init_state->entrance_nodes,
1035	context);
1036	}
1037	else
1038	/ Must not happen? /
1039	return dfa->init_state;
1040	}
1041	else
1042	return dfa->init_state;
1043	}
1044
1045	/ Check whether the regular expression match input string INPUT or not,*
1046	and return the index where the matching end. Return -1 if
1047	there is no match, and return -2 in case of an error.
1048	FL_LONGEST_MATCH means we want the POSIX longest matching.
1049	If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
1050	next place where we may want to try matching.
1051	Note that the matcher assumes that the matching starts from the current
1052	index of the buffer. /*
1053
1054	static Idx
1055	__attribute_warn_unused_result__
1056	check_matching (re_match_context_t *mctx, bool fl_longest_match,
1057	Idx *p_match_first)
1058	{
1059	const re_dfa_t *const dfa = mctx->dfa;
1060	reg_errcode_t err;
1061	Idx match = `0`;
1062	Idx match_last = -`1`;
1063	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1064	re_dfastate_t *cur_state;
1065	bool at_init_state = p_match_first != NULL;
1066	Idx next_start_idx = cur_str_idx;
1067
1068	err = REG_NOERROR;
1069	cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1070	/ An initial state must not be NULL (invalid). /
1071	if (BE (cur_state == NULL, `0`))
1072	{
1073	assert (err == REG_ESPACE);
1074	return -`2`;
1075	}
1076
1077	if (mctx->state_log != NULL)
1078	{
1079	mctx->state_log[cur_str_idx] = cur_state;
1080
1081	/ Check OP_OPEN_SUBEXP in the initial state in case that we use them*
1082	later. E.g. Processing back references. /*
1083	if (BE (dfa->nbackref, `0`))
1084	{
1085	at_init_state = false;
1086	err = check_subexp_matching_top (mctx, &cur_state->nodes, `0`);
1087	if (BE (err != REG_NOERROR, `0`))
1088	return err;
1089
1090	if (cur_state->has_backref)
1091	{
1092	err = transit_state_bkref (mctx, &cur_state->nodes);
1093	if (BE (err != REG_NOERROR, `0`))
1094	return err;
1095	}
1096	}
1097	}
1098
1099	/ If the RE accepts NULL string. /
1100	if (BE (cur_state->halt, `0`))
1101	{
1102	if (!cur_state->has_constraint
1103	\|\| check_halt_state_context (mctx, cur_state, cur_str_idx))
1104	{
1105	if (!fl_longest_match)
1106	return cur_str_idx;
1107	else
1108	{
1109	match_last = cur_str_idx;
1110	match = `1`;
1111	}
1112	}
1113	}
1114
1115	while (!re_string_eoi (&mctx->input))
1116	{
1117	re_dfastate_t *old_state = cur_state;
1118	Idx next_char_idx = re_string_cur_idx (&mctx->input) + `1`;
1119
1120	if ((BE (next_char_idx >= mctx->input.bufs_len, `0`)
1121	&& mctx->input.bufs_len < mctx->input.len)
1122	\|\| (BE (next_char_idx >= mctx->input.valid_len, `0`)
1123	&& mctx->input.valid_len < mctx->input.len))
1124	{
1125	err = extend_buffers (mctx, next_char_idx + `1`);
1126	if (BE (err != REG_NOERROR, `0`))
1127	{
1128	assert (err == REG_ESPACE);
1129	return -`2`;
1130	}
1131	}
1132
1133	cur_state = transit_state (&err, mctx, cur_state);
1134	if (mctx->state_log != NULL)
1135	cur_state = merge_state_with_log (&err, mctx, cur_state);
1136
1137	if (cur_state == NULL)
1138	{
1139	/ Reached the invalid state or an error. Try to recover a valid*
1140	state using the state log, if available and if we have not
1141	already found a valid (even if not the longest) match. /*
1142	if (BE (err != REG_NOERROR, `0`))
1143	return -`2`;
1144
1145	if (mctx->state_log == NULL
1146	\|\| (match && !fl_longest_match)
1147	\|\| (cur_state = find_recover_state (&err, mctx)) == NULL)
1148	break;
1149	}
1150
1151	if (BE (at_init_state, `0`))
1152	{
1153	if (old_state == cur_state)
1154	next_start_idx = next_char_idx;
1155	else
1156	at_init_state = false;
1157	}
1158
1159	if (cur_state->halt)
1160	{
1161	/ Reached a halt state.*
1162	Check the halt state can satisfy the current context. /*
1163	if (!cur_state->has_constraint
1164	\|\| check_halt_state_context (mctx, cur_state,
1165	re_string_cur_idx (&mctx->input)))
1166	{
1167	/ We found an appropriate halt state. /
1168	match_last = re_string_cur_idx (&mctx->input);
1169	match = `1`;
1170
1171	/ We found a match, do not modify match_first below. /
1172	p_match_first = NULL;
1173	if (!fl_longest_match)
1174	break;
1175	}
1176	}
1177	}
1178
1179	if (p_match_first)
1180	*p_match_first += next_start_idx;
1181
1182	return match_last;
1183	}
1184
1185	/ Check NODE match the current context. /
1186
1187	static bool
1188	check_halt_node_context (const re_dfa_t dfa, Idx node, unsigned* int context)
1189	{
1190	re_token_type_t type = dfa->nodes[node].type;
1191	unsigned int constraint = dfa->nodes[node].constraint;
1192	if (type != END_OF_RE)
1193	return false;
1194	if (!constraint)
1195	return true;
1196	if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1197	return false;
1198	return true;
1199	}
1200
1201	/ Check the halt state STATE match the current context.*
1202	Return 0 if not match, if the node, STATE has, is a halt node and
1203	match the context, return the node. /*
1204
1205	static Idx
1206	check_halt_state_context (const re_match_context_t *mctx,
1207	const re_dfastate_t *state, Idx idx)
1208	{
1209	Idx i;
1210	unsigned int context;
1211	#ifdef DEBUG
1212	assert (state->halt);
1213	#endif
1214	context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1215	for (i = `0`; i < state->nodes.nelem; ++i)
1216	if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1217	return state->nodes.elems[i];
1218	return `0`;
1219	}
1220
1221	/ Compute the next node to which "NFA" transit from NODE("NFA" is a NFA*
1222	corresponding to the DFA).
1223	Return the destination node, and update EPS_VIA_NODES;
1224	return -1 in case of errors. /*
1225
1226	static Idx
1227	proceed_next_node (const re_match_context_t mctx, Idx nregs, regmatch_t regs,
1228	Idx pidx, Idx node, re_node_set eps_via_nodes,
1229	struct re_fail_stack_t *fs)
1230	{
1231	const re_dfa_t *const dfa = mctx->dfa;
1232	Idx i;
1233	bool ok;
1234	if (IS_EPSILON_NODE (dfa->nodes[node].type))
1235	{
1236	re_node_set cur_nodes = &mctx->state_log[pidx]->nodes;
1237	re_node_set *edests = &dfa->edests[node];
1238	Idx dest_node;
1239	ok = re_node_set_insert (eps_via_nodes, node);
1240	if (BE (! ok, `0`))
1241	return -`2`;
1242	/ Pick up a valid destination, or return -1 if none*
1243	is found. /*
1244	for (dest_node = -`1`, i = `0`; i < edests->nelem; ++i)
1245	{
1246	Idx candidate = edests->elems[i];
1247	if (!re_node_set_contains (cur_nodes, candidate))
1248	continue;
1249	if (dest_node == -`1`)
1250	dest_node = candidate;
1251
1252	else
1253	{
1254	/ In order to avoid infinite loop like "(a)", return the second*
1255	epsilon-transition if the first was already considered. /*
1256	if (re_node_set_contains (eps_via_nodes, dest_node))
1257	return candidate;
1258
1259	/ Otherwise, push the second epsilon-transition on the fail stack. /
1260	else if (fs != NULL
1261	&& push_fail_stack (fs, *pidx, candidate, nregs, regs,
1262	eps_via_nodes))
1263	return -`2`;
1264
1265	/ We know we are going to exit. /
1266	break;
1267	}
1268	}
1269	return dest_node;
1270	}
1271	else
1272	{
1273	Idx naccepted = `0`;
1274	re_token_type_t type = dfa->nodes[node].type;
1275
1276	#ifdef RE_ENABLE_I18N
1277	if (dfa->nodes[node].accept_mb)
1278	naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1279	else
1280	#endif /* RE_ENABLE_I18N */
1281	if (type == OP_BACK_REF)
1282	{
1283	Idx subexp_idx = dfa->nodes[node].opr.idx + `1`;
1284	naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1285	if (fs != NULL)
1286	{
1287	if (regs[subexp_idx].rm_so == -`1` \|\| regs[subexp_idx].rm_eo == -`1`)
1288	return -`1`;
1289	else if (naccepted)
1290	{
1291	char buf = (char* *) re_string_get_buffer (&mctx->input);
1292	if (mctx->input.valid_len - *pidx < naccepted
1293	\|\| (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1294	naccepted)
1295	!= `0`))
1296	return -`1`;
1297	}
1298	}
1299
1300	if (naccepted == `0`)
1301	{
1302	Idx dest_node;
1303	ok = re_node_set_insert (eps_via_nodes, node);
1304	if (BE (! ok, `0`))
1305	return -`2`;
1306	dest_node = dfa->edests[node].elems[`0`];
1307	if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1308	dest_node))
1309	return dest_node;
1310	}
1311	}
1312
1313	if (naccepted != `0`
1314	\|\| check_node_accept (mctx, dfa->nodes + node, *pidx))
1315	{
1316	Idx dest_node = dfa->nexts[node];
1317	pidx = (naccepted == `0`) ? pidx + `1` : *pidx + naccepted;
1318	if (fs && (pidx > mctx->match_last \|\| mctx->state_log[pidx] == NULL
1319	\|\| !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1320	dest_node)))
1321	return -`1`;
1322	re_node_set_empty (eps_via_nodes);
1323	return dest_node;
1324	}
1325	}
1326	return -`1`;
1327	}
1328
1329	static reg_errcode_t
1330	__attribute_warn_unused_result__
1331	push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1332	Idx nregs, regmatch_t regs, re_node_set eps_via_nodes)
1333	{
1334	reg_errcode_t err;
1335	Idx num = fs->num++;
1336	if (fs->num == fs->alloc)
1337	{
1338	struct re_fail_stack_ent_t *new_array;
1339	new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1340	fs->alloc * `2`);
1341	if (new_array == NULL)
1342	return REG_ESPACE;
1343	fs->alloc *= `2`;
1344	fs->stack = new_array;
1345	}
1346	fs->stack[num].idx = str_idx;
1347	fs->stack[num].node = dest_node;
1348	fs->stack[num].regs = re_malloc (regmatch_t, nregs);
1349	if (fs->stack[num].regs == NULL)
1350	return REG_ESPACE;
1351	memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1352	err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1353	return err;
1354	}
1355
1356	static Idx
1357	pop_fail_stack (struct re_fail_stack_t fs, Idx pidx, Idx nregs,
1358	regmatch_t regs, re_node_set eps_via_nodes)
1359	{
1360	Idx num = --fs->num;
1361	assert (num >= `0`);
1362	*pidx = fs->stack[num].idx;
1363	memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1364	re_node_set_free (eps_via_nodes);
1365	re_free (fs->stack[num].regs);
1366	*eps_via_nodes = fs->stack[num].eps_via_nodes;
1367	return fs->stack[num].node;
1368	}
1369
1370	/ Set the positions where the subexpressions are starts/ends to registers*
1371	PMATCH.
1372	Note: We assume that pmatch[0] is already set, and
1373	pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch. /*
1374
1375	static reg_errcode_t
1376	__attribute_warn_unused_result__
1377	set_regs (const regex_t preg, const* re_match_context_t *mctx, size_t nmatch,
1378	regmatch_t *pmatch, bool fl_backtrack)
1379	{
1380	const re_dfa_t *dfa = preg->buffer;
1381	Idx idx, cur_node;
1382	re_node_set eps_via_nodes;
1383	struct re_fail_stack_t *fs;
1384	struct re_fail_stack_t fs_body = { `0`, `2`, NULL };
1385	regmatch_t *prev_idx_match;
1386	bool prev_idx_match_malloced = false;
1387
1388	#ifdef DEBUG
1389	assert (nmatch > `1`);
1390	assert (mctx->state_log != NULL);
1391	#endif
1392	if (fl_backtrack)
1393	{
1394	fs = &fs_body;
1395	fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1396	if (fs->stack == NULL)
1397	return REG_ESPACE;
1398	}
1399	else
1400	fs = NULL;
1401
1402	cur_node = dfa->init_node;
1403	re_node_set_init_empty (&eps_via_nodes);
1404
1405	if (__libc_use_alloca (nmatch * sizeof (regmatch_t)))
1406	prev_idx_match = (regmatch_t ) alloca (nmatch sizeof (regmatch_t));
1407	else
1408	{
1409	prev_idx_match = re_malloc (regmatch_t, nmatch);
1410	if (prev_idx_match == NULL)
1411	{
1412	free_fail_stack_return (fs);
1413	return REG_ESPACE;
1414	}
1415	prev_idx_match_malloced = true;
1416	}
1417	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1418
1419	for (idx = pmatch[`0`].rm_so; idx <= pmatch[`0`].rm_eo ;)
1420	{
1421	update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1422
1423	if (idx == pmatch[`0`].rm_eo && cur_node == mctx->last_node)
1424	{
1425	Idx reg_idx;
1426	if (fs)
1427	{
1428	for (reg_idx = `0`; reg_idx < nmatch; ++reg_idx)
1429	if (pmatch[reg_idx].rm_so > -`1` && pmatch[reg_idx].rm_eo == -`1`)
1430	break;
1431	if (reg_idx == nmatch)
1432	{
1433	re_node_set_free (&eps_via_nodes);
1434	if (prev_idx_match_malloced)
1435	re_free (prev_idx_match);
1436	return free_fail_stack_return (fs);
1437	}
1438	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1439	&eps_via_nodes);
1440	}
1441	else
1442	{
1443	re_node_set_free (&eps_via_nodes);
1444	if (prev_idx_match_malloced)
1445	re_free (prev_idx_match);
1446	return REG_NOERROR;
1447	}
1448	}
1449
1450	/ Proceed to next node. /
1451	cur_node = proceed_next_node (mctx, nmatch, pmatch, &idx, cur_node,
1452	&eps_via_nodes, fs);
1453
1454	if (BE (cur_node < `0`, `0`))
1455	{
1456	if (BE (cur_node == -`2`, `0`))
1457	{
1458	re_node_set_free (&eps_via_nodes);
1459	if (prev_idx_match_malloced)
1460	re_free (prev_idx_match);
1461	free_fail_stack_return (fs);
1462	return REG_ESPACE;
1463	}
1464	if (fs)
1465	cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1466	&eps_via_nodes);
1467	else
1468	{
1469	re_node_set_free (&eps_via_nodes);
1470	if (prev_idx_match_malloced)
1471	re_free (prev_idx_match);
1472	return REG_NOMATCH;
1473	}
1474	}
1475	}
1476	re_node_set_free (&eps_via_nodes);
1477	if (prev_idx_match_malloced)
1478	re_free (prev_idx_match);
1479	return free_fail_stack_return (fs);
1480	}
1481
1482	static reg_errcode_t
1483	free_fail_stack_return (struct re_fail_stack_t *fs)
1484	{
1485	if (fs)
1486	{
1487	Idx fs_idx;
1488	for (fs_idx = `0`; fs_idx < fs->num; ++fs_idx)
1489	{
1490	re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1491	re_free (fs->stack[fs_idx].regs);
1492	}
1493	re_free (fs->stack);
1494	}
1495	return REG_NOERROR;
1496	}
1497
1498	static void
1499	update_regs (const re_dfa_t dfa, regmatch_t pmatch,
1500	regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1501	{
1502	int type = dfa->nodes[cur_node].type;
1503	if (type == OP_OPEN_SUBEXP)
1504	{
1505	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1506
1507	/ We are at the first node of this sub expression. /
1508	if (reg_num < nmatch)
1509	{
1510	pmatch[reg_num].rm_so = cur_idx;
1511	pmatch[reg_num].rm_eo = -`1`;
1512	}
1513	}
1514	else if (type == OP_CLOSE_SUBEXP)
1515	{
1516	Idx reg_num = dfa->nodes[cur_node].opr.idx + `1`;
1517	if (reg_num < nmatch)
1518	{
1519	/ We are at the last node of this sub expression. /
1520	if (pmatch[reg_num].rm_so < cur_idx)
1521	{
1522	pmatch[reg_num].rm_eo = cur_idx;
1523	/ This is a non-empty match or we are not inside an optional*
1524	subexpression. Accept this right away. /*
1525	memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1526	}
1527	else
1528	{
1529	if (dfa->nodes[cur_node].opt_subexp
1530	&& prev_idx_match[reg_num].rm_so != -`1`)
1531	/ We transited through an empty match for an optional*
1532	subexpression, like (a?), and this is not the subexp's*
1533	first match. Copy back the old content of the registers
1534	so that matches of an inner subexpression are undone as
1535	well, like in ((a?)). /
1536	memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1537	else
1538	/ We completed a subexpression, but it may be part of*
1539	an optional one, so do not update PREV_IDX_MATCH. /*
1540	pmatch[reg_num].rm_eo = cur_idx;
1541	}
1542	}
1543	}
1544	}
1545
1546	/ This function checks the STATE_LOG from the SCTX->last_str_idx to 0*
1547	and sift the nodes in each states according to the following rules.
1548	Updated state_log will be wrote to STATE_LOG.
1549
1550	Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
1551	1. When STR_IDX == MATCH_LAST(the last index in the state_log):
1552	If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
1553	the LAST_NODE, we throw away the node 'a'.
1554	2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
1555	string 's' and transit to 'b':
1556	i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
1557	away the node 'a'.
1558	ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
1559	thrown away, we throw away the node 'a'.
1560	3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
1561	i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
1562	node 'a'.
1563	ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
1564	we throw away the node 'a'. /*
1565
1566	#define STATE_NODE_CONTAINS(state,node) \
1567	((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1568
1569	static reg_errcode_t
1570	sift_states_backward (const re_match_context_t mctx, re_sift_context_t sctx)
1571	{
1572	reg_errcode_t err;
1573	int null_cnt = `0`;
1574	Idx str_idx = sctx->last_str_idx;
1575	re_node_set cur_dest;
1576
1577	#ifdef DEBUG
1578	assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1579	#endif
1580
1581	/ Build sifted state_log[str_idx]. It has the nodes which can epsilon*
1582	transit to the last_node and the last_node itself. /*
1583	err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1584	if (BE (err != REG_NOERROR, `0`))
1585	return err;
1586	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1587	if (BE (err != REG_NOERROR, `0`))
1588	goto free_return;
1589
1590	/ Then check each states in the state_log. /
1591	while (str_idx > `0`)
1592	{
1593	/ Update counters. /
1594	null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + `1` : `0`;
1595	if (null_cnt > mctx->max_mb_elem_len)
1596	{
1597	memset (sctx->sifted_states, `'\0'`,
1598	sizeof (re_dfastate_t ) str_idx);
1599	re_node_set_free (&cur_dest);
1600	return REG_NOERROR;
1601	}
1602	re_node_set_empty (&cur_dest);
1603	--str_idx;
1604
1605	if (mctx->state_log[str_idx])
1606	{
1607	err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1608	if (BE (err != REG_NOERROR, `0`))
1609	goto free_return;
1610	}
1611
1612	/ Add all the nodes which satisfy the following conditions:*
1613	- It can epsilon transit to a node in CUR_DEST.
1614	- It is in CUR_SRC.
1615	And update state_log. /*
1616	err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1617	if (BE (err != REG_NOERROR, `0`))
1618	goto free_return;
1619	}
1620	err = REG_NOERROR;
1621	free_return:
1622	re_node_set_free (&cur_dest);
1623	return err;
1624	}
1625
1626	static reg_errcode_t
1627	__attribute_warn_unused_result__
1628	build_sifted_states (const re_match_context_t mctx, re_sift_context_t sctx,
1629	Idx str_idx, re_node_set *cur_dest)
1630	{
1631	const re_dfa_t *const dfa = mctx->dfa;
1632	const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1633	Idx i;
1634
1635	/ Then build the next sifted state.*
1636	We build the next sifted state on 'cur_dest', and update
1637	'sifted_states[str_idx]' with 'cur_dest'.
1638	Note:
1639	'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
1640	'cur_src' points the node_set of the old 'state_log[str_idx]'
1641	(with the epsilon nodes pre-filtered out). /*
1642	for (i = `0`; i < cur_src->nelem; i++)
1643	{
1644	Idx prev_node = cur_src->elems[i];
1645	int naccepted = `0`;
1646	bool ok;
1647
1648	#ifdef DEBUG
1649	re_token_type_t type = dfa->nodes[prev_node].type;
1650	assert (!IS_EPSILON_NODE (type));
1651	#endif
1652	#ifdef RE_ENABLE_I18N
1653	/ If the node may accept "multi byte". /
1654	if (dfa->nodes[prev_node].accept_mb)
1655	naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1656	str_idx, sctx->last_str_idx);
1657	#endif /* RE_ENABLE_I18N */
1658
1659	/ We don't check backreferences here.*
1660	See update_cur_sifted_state(). /*
1661	if (!naccepted
1662	&& check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1663	&& STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + `1`],
1664	dfa->nexts[prev_node]))
1665	naccepted = `1`;
1666
1667	if (naccepted == `0`)
1668	continue;
1669
1670	if (sctx->limits.nelem)
1671	{
1672	Idx to_idx = str_idx + naccepted;
1673	if (check_dst_limits (mctx, &sctx->limits,
1674	dfa->nexts[prev_node], to_idx,
1675	prev_node, str_idx))
1676	continue;
1677	}
1678	ok = re_node_set_insert (cur_dest, prev_node);
1679	if (BE (! ok, `0`))
1680	return REG_ESPACE;
1681	}
1682
1683	return REG_NOERROR;
1684	}
1685
1686	/ Helper functions. /
1687
1688	static reg_errcode_t
1689	clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1690	{
1691	Idx top = mctx->state_log_top;
1692
1693	if ((next_state_log_idx >= mctx->input.bufs_len
1694	&& mctx->input.bufs_len < mctx->input.len)
1695	\|\| (next_state_log_idx >= mctx->input.valid_len
1696	&& mctx->input.valid_len < mctx->input.len))
1697	{
1698	reg_errcode_t err;
1699	err = extend_buffers (mctx, next_state_log_idx + `1`);
1700	if (BE (err != REG_NOERROR, `0`))
1701	return err;
1702	}
1703
1704	if (top < next_state_log_idx)
1705	{
1706	memset (mctx->state_log + top + `1`, `'\0'`,
1707	sizeof (re_dfastate_t ) (next_state_log_idx - top));
1708	mctx->state_log_top = next_state_log_idx;
1709	}
1710	return REG_NOERROR;
1711	}
1712
1713	static reg_errcode_t
1714	merge_state_array (const re_dfa_t dfa, re_dfastate_t *dst,
1715	re_dfastate_t **src, Idx num)
1716	{
1717	Idx st_idx;
1718	reg_errcode_t err;
1719	for (st_idx = `0`; st_idx < num; ++st_idx)
1720	{
1721	if (dst[st_idx] == NULL)
1722	dst[st_idx] = src[st_idx];
1723	else if (src[st_idx] != NULL)
1724	{
1725	re_node_set merged_set;
1726	err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1727	&src[st_idx]->nodes);
1728	if (BE (err != REG_NOERROR, `0`))
1729	return err;
1730	dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1731	re_node_set_free (&merged_set);
1732	if (BE (err != REG_NOERROR, `0`))
1733	return err;
1734	}
1735	}
1736	return REG_NOERROR;
1737	}
1738
1739	static reg_errcode_t
1740	update_cur_sifted_state (const re_match_context_t *mctx,
1741	re_sift_context_t *sctx, Idx str_idx,
1742	re_node_set *dest_nodes)
1743	{
1744	const re_dfa_t *const dfa = mctx->dfa;
1745	reg_errcode_t err = REG_NOERROR;
1746	const re_node_set *candidates;
1747	candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1748	: &mctx->state_log[str_idx]->nodes);
1749
1750	if (dest_nodes->nelem == `0`)
1751	sctx->sifted_states[str_idx] = NULL;
1752	else
1753	{
1754	if (candidates)
1755	{
1756	/ At first, add the nodes which can epsilon transit to a node in*
1757	DEST_NODE. /*
1758	err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1759	if (BE (err != REG_NOERROR, `0`))
1760	return err;
1761
1762	/ Then, check the limitations in the current sift_context. /
1763	if (sctx->limits.nelem)
1764	{
1765	err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1766	mctx->bkref_ents, str_idx);
1767	if (BE (err != REG_NOERROR, `0`))
1768	return err;
1769	}
1770	}
1771
1772	sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1773	if (BE (err != REG_NOERROR, `0`))
1774	return err;
1775	}
1776
1777	if (candidates && mctx->state_log[str_idx]->has_backref)
1778	{
1779	err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1780	if (BE (err != REG_NOERROR, `0`))
1781	return err;
1782	}
1783	return REG_NOERROR;
1784	}
1785
1786	static reg_errcode_t
1787	__attribute_warn_unused_result__
1788	add_epsilon_src_nodes (const re_dfa_t dfa, re_node_set dest_nodes,
1789	const re_node_set *candidates)
1790	{
1791	reg_errcode_t err = REG_NOERROR;
1792	Idx i;
1793
1794	re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1795	if (BE (err != REG_NOERROR, `0`))
1796	return err;
1797
1798	if (!state->inveclosure.alloc)
1799	{
1800	err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1801	if (BE (err != REG_NOERROR, `0`))
1802	return REG_ESPACE;
1803	for (i = `0`; i < dest_nodes->nelem; i++)
1804	{
1805	err = re_node_set_merge (&state->inveclosure,
1806	dfa->inveclosures + dest_nodes->elems[i]);
1807	if (BE (err != REG_NOERROR, `0`))
1808	return REG_ESPACE;
1809	}
1810	}
1811	return re_node_set_add_intersect (dest_nodes, candidates,
1812	&state->inveclosure);
1813	}
1814
1815	static reg_errcode_t
1816	sub_epsilon_src_nodes (const re_dfa_t dfa, Idx node, re_node_set dest_nodes,
1817	const re_node_set *candidates)
1818	{
1819	Idx ecl_idx;
1820	reg_errcode_t err;
1821	re_node_set *inv_eclosure = dfa->inveclosures + node;
1822	re_node_set except_nodes;
1823	re_node_set_init_empty (&except_nodes);
1824	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1825	{
1826	Idx cur_node = inv_eclosure->elems[ecl_idx];
1827	if (cur_node == node)
1828	continue;
1829	if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1830	{
1831	Idx edst1 = dfa->edests[cur_node].elems[`0`];
1832	Idx edst2 = ((dfa->edests[cur_node].nelem > `1`)
1833	? dfa->edests[cur_node].elems[`1`] : -`1`);
1834	if ((!re_node_set_contains (inv_eclosure, edst1)
1835	&& re_node_set_contains (dest_nodes, edst1))
1836	\|\| (edst2 > `0`
1837	&& !re_node_set_contains (inv_eclosure, edst2)
1838	&& re_node_set_contains (dest_nodes, edst2)))
1839	{
1840	err = re_node_set_add_intersect (&except_nodes, candidates,
1841	dfa->inveclosures + cur_node);
1842	if (BE (err != REG_NOERROR, `0`))
1843	{
1844	re_node_set_free (&except_nodes);
1845	return err;
1846	}
1847	}
1848	}
1849	}
1850	for (ecl_idx = `0`; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1851	{
1852	Idx cur_node = inv_eclosure->elems[ecl_idx];
1853	if (!re_node_set_contains (&except_nodes, cur_node))
1854	{
1855	Idx idx = re_node_set_contains (dest_nodes, cur_node) - `1`;
1856	re_node_set_remove_at (dest_nodes, idx);
1857	}
1858	}
1859	re_node_set_free (&except_nodes);
1860	return REG_NOERROR;
1861	}
1862
1863	static bool
1864	check_dst_limits (const re_match_context_t mctx, const* re_node_set *limits,
1865	Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1866	{
1867	const re_dfa_t *const dfa = mctx->dfa;
1868	Idx lim_idx, src_pos, dst_pos;
1869
1870	Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1871	Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1872	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
1873	{
1874	Idx subexp_idx;
1875	struct re_backref_cache_entry *ent;
1876	ent = mctx->bkref_ents + limits->elems[lim_idx];
1877	subexp_idx = dfa->nodes[ent->node].opr.idx;
1878
1879	dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1880	subexp_idx, dst_node, dst_idx,
1881	dst_bkref_idx);
1882	src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1883	subexp_idx, src_node, src_idx,
1884	src_bkref_idx);
1885
1886	/ In case of:*
1887	<src> <dst> ( <subexp> )
1888	( <subexp> ) <src> <dst>
1889	( <subexp1> <src> <subexp2> <dst> <subexp3> ) /*
1890	if (src_pos == dst_pos)
1891	continue; / This is unrelated limitation. /
1892	else
1893	return true;
1894	}
1895	return false;
1896	}
1897
1898	static int
1899	check_dst_limits_calc_pos_1 (const re_match_context_t mctx, int* boundaries,
1900	Idx subexp_idx, Idx from_node, Idx bkref_idx)
1901	{
1902	const re_dfa_t *const dfa = mctx->dfa;
1903	const re_node_set *eclosures = dfa->eclosures + from_node;
1904	Idx node_idx;
1905
1906	/ Else, we are on the boundary: examine the nodes on the epsilon*
1907	closure. /*
1908	for (node_idx = `0`; node_idx < eclosures->nelem; ++node_idx)
1909	{
1910	Idx node = eclosures->elems[node_idx];
1911	switch (dfa->nodes[node].type)
1912	{
1913	case OP_BACK_REF:
1914	if (bkref_idx != -`1`)
1915	{
1916	struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1917	do
1918	{
1919	Idx dst;
1920	int cpos;
1921
1922	if (ent->node != node)
1923	continue;
1924
1925	if (subexp_idx < BITSET_WORD_BITS
1926	&& !(ent->eps_reachable_subexps_map
1927	& ((bitset_word_t) `1` << subexp_idx)))
1928	continue;
1929
1930	/ Recurse trying to reach the OP_OPEN_SUBEXP and*
1931	OP_CLOSE_SUBEXP cases below. But, if the
1932	destination node is the same node as the source
1933	node, don't recurse because it would cause an
1934	infinite loop: a regex that exhibits this behavior
1935	is ()\1\1* /
1936	dst = dfa->edests[node].elems[`0`];
1937	if (dst == from_node)
1938	{
1939	if (boundaries & `1`)
1940	return -`1`;
1941	else / if (boundaries & 2) /
1942	return `0`;
1943	}
1944
1945	cpos =
1946	check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1947	dst, bkref_idx);
1948	if (cpos == -`1` / && (boundaries & 1) /)
1949	return -`1`;
1950	if (cpos == `0` && (boundaries & `2`))
1951	return `0`;
1952
1953	if (subexp_idx < BITSET_WORD_BITS)
1954	ent->eps_reachable_subexps_map
1955	&= ~((bitset_word_t) `1` << subexp_idx);
1956	}
1957	while (ent++->more);
1958	}
1959	break;
1960
1961	case OP_OPEN_SUBEXP:
1962	if ((boundaries & `1`) && subexp_idx == dfa->nodes[node].opr.idx)
1963	return -`1`;
1964	break;
1965
1966	case OP_CLOSE_SUBEXP:
1967	if ((boundaries & `2`) && subexp_idx == dfa->nodes[node].opr.idx)
1968	return `0`;
1969	break;
1970
1971	default:
1972	break;
1973	}
1974	}
1975
1976	return (boundaries & `2`) ? `1` : `0`;
1977	}
1978
1979	static int
1980	check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1981	Idx subexp_idx, Idx from_node, Idx str_idx,
1982	Idx bkref_idx)
1983	{
1984	struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1985	int boundaries;
1986
1987	/ If we are outside the range of the subexpression, return -1 or 1. /
1988	if (str_idx < lim->subexp_from)
1989	return -`1`;
1990
1991	if (lim->subexp_to < str_idx)
1992	return `1`;
1993
1994	/ If we are within the subexpression, return 0. /
1995	boundaries = (str_idx == lim->subexp_from);
1996	boundaries \|= (str_idx == lim->subexp_to) << `1`;
1997	if (boundaries == `0`)
1998	return `0`;
1999
2000	/ Else, examine epsilon closure. /
2001	return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
2002	from_node, bkref_idx);
2003	}
2004
2005	/ Check the limitations of sub expressions LIMITS, and remove the nodes*
2006	which are against limitations from DEST_NODES. /*
2007
2008	static reg_errcode_t
2009	check_subexp_limits (const re_dfa_t dfa, re_node_set dest_nodes,
2010	const re_node_set candidates, re_node_set limits,
2011	struct re_backref_cache_entry *bkref_ents, Idx str_idx)
2012	{
2013	reg_errcode_t err;
2014	Idx node_idx, lim_idx;
2015
2016	for (lim_idx = `0`; lim_idx < limits->nelem; ++lim_idx)
2017	{
2018	Idx subexp_idx;
2019	struct re_backref_cache_entry *ent;
2020	ent = bkref_ents + limits->elems[lim_idx];
2021
2022	if (str_idx <= ent->subexp_from \|\| ent->str_idx < str_idx)
2023	continue; / This is unrelated limitation. /
2024
2025	subexp_idx = dfa->nodes[ent->node].opr.idx;
2026	if (ent->subexp_to == str_idx)
2027	{
2028	Idx ops_node = -`1`;
2029	Idx cls_node = -`1`;
2030	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2031	{
2032	Idx node = dest_nodes->elems[node_idx];
2033	re_token_type_t type = dfa->nodes[node].type;
2034	if (type == OP_OPEN_SUBEXP
2035	&& subexp_idx == dfa->nodes[node].opr.idx)
2036	ops_node = node;
2037	else if (type == OP_CLOSE_SUBEXP
2038	&& subexp_idx == dfa->nodes[node].opr.idx)
2039	cls_node = node;
2040	}
2041
2042	/ Check the limitation of the open subexpression. /
2043	/ Note that (ent->subexp_to = str_idx != ent->subexp_from). /
2044	if (ops_node >= `0`)
2045	{
2046	err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2047	candidates);
2048	if (BE (err != REG_NOERROR, `0`))
2049	return err;
2050	}
2051
2052	/ Check the limitation of the close subexpression. /
2053	if (cls_node >= `0`)
2054	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2055	{
2056	Idx node = dest_nodes->elems[node_idx];
2057	if (!re_node_set_contains (dfa->inveclosures + node,
2058	cls_node)
2059	&& !re_node_set_contains (dfa->eclosures + node,
2060	cls_node))
2061	{
2062	/ It is against this limitation.*
2063	Remove it form the current sifted state. /*
2064	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2065	candidates);
2066	if (BE (err != REG_NOERROR, `0`))
2067	return err;
2068	--node_idx;
2069	}
2070	}
2071	}
2072	else / (ent->subexp_to != str_idx) /
2073	{
2074	for (node_idx = `0`; node_idx < dest_nodes->nelem; ++node_idx)
2075	{
2076	Idx node = dest_nodes->elems[node_idx];
2077	re_token_type_t type = dfa->nodes[node].type;
2078	if (type == OP_CLOSE_SUBEXP \|\| type == OP_OPEN_SUBEXP)
2079	{
2080	if (subexp_idx != dfa->nodes[node].opr.idx)
2081	continue;
2082	/ It is against this limitation.*
2083	Remove it form the current sifted state. /*
2084	err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2085	candidates);
2086	if (BE (err != REG_NOERROR, `0`))
2087	return err;
2088	}
2089	}
2090	}
2091	}
2092	return REG_NOERROR;
2093	}
2094
2095	static reg_errcode_t
2096	__attribute_warn_unused_result__
2097	sift_states_bkref (const re_match_context_t mctx, re_sift_context_t sctx,
2098	Idx str_idx, const re_node_set *candidates)
2099	{
2100	const re_dfa_t *const dfa = mctx->dfa;
2101	reg_errcode_t err;
2102	Idx node_idx, node;
2103	re_sift_context_t local_sctx;
2104	Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2105
2106	if (first_idx == -`1`)
2107	return REG_NOERROR;
2108
2109	local_sctx.sifted_states = NULL; / Mark that it hasn't been initialized. /
2110
2111	for (node_idx = `0`; node_idx < candidates->nelem; ++node_idx)
2112	{
2113	Idx enabled_idx;
2114	re_token_type_t type;
2115	struct re_backref_cache_entry *entry;
2116	node = candidates->elems[node_idx];
2117	type = dfa->nodes[node].type;
2118	/ Avoid infinite loop for the REs like "()\1+". /
2119	if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2120	continue;
2121	if (type != OP_BACK_REF)
2122	continue;
2123
2124	entry = mctx->bkref_ents + first_idx;
2125	enabled_idx = first_idx;
2126	do
2127	{
2128	Idx subexp_len;
2129	Idx to_idx;
2130	Idx dst_node;
2131	bool ok;
2132	re_dfastate_t *cur_state;
2133
2134	if (entry->node != node)
2135	continue;
2136	subexp_len = entry->subexp_to - entry->subexp_from;
2137	to_idx = str_idx + subexp_len;
2138	dst_node = (subexp_len ? dfa->nexts[node]
2139	: dfa->edests[node].elems[`0`]);
2140
2141	if (to_idx > sctx->last_str_idx
2142	\|\| sctx->sifted_states[to_idx] == NULL
2143	\|\| !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2144	\|\| check_dst_limits (mctx, &sctx->limits, node,
2145	str_idx, dst_node, to_idx))
2146	continue;
2147
2148	if (local_sctx.sifted_states == NULL)
2149	{
2150	local_sctx = *sctx;
2151	err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2152	if (BE (err != REG_NOERROR, `0`))
2153	goto free_return;
2154	}
2155	local_sctx.last_node = node;
2156	local_sctx.last_str_idx = str_idx;
2157	ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2158	if (BE (! ok, `0`))
2159	{
2160	err = REG_ESPACE;
2161	goto free_return;
2162	}
2163	cur_state = local_sctx.sifted_states[str_idx];
2164	err = sift_states_backward (mctx, &local_sctx);
2165	if (BE (err != REG_NOERROR, `0`))
2166	goto free_return;
2167	if (sctx->limited_states != NULL)
2168	{
2169	err = merge_state_array (dfa, sctx->limited_states,
2170	local_sctx.sifted_states,
2171	str_idx + `1`);
2172	if (BE (err != REG_NOERROR, `0`))
2173	goto free_return;
2174	}
2175	local_sctx.sifted_states[str_idx] = cur_state;
2176	re_node_set_remove (&local_sctx.limits, enabled_idx);
2177
2178	/ mctx->bkref_ents may have changed, reload the pointer. /
2179	entry = mctx->bkref_ents + enabled_idx;
2180	}
2181	while (enabled_idx++, entry++->more);
2182	}
2183	err = REG_NOERROR;
2184	free_return:
2185	if (local_sctx.sifted_states != NULL)
2186	{
2187	re_node_set_free (&local_sctx.limits);
2188	}
2189
2190	return err;
2191	}
2192
2193
2194	#ifdef RE_ENABLE_I18N
2195	static int
2196	sift_states_iter_mb (const re_match_context_t mctx, re_sift_context_t sctx,
2197	Idx node_idx, Idx str_idx, Idx max_str_idx)
2198	{
2199	const re_dfa_t *const dfa = mctx->dfa;
2200	int naccepted;
2201	/ Check the node can accept "multi byte". /
2202	naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2203	if (naccepted > `0` && str_idx + naccepted <= max_str_idx &&
2204	!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2205	dfa->nexts[node_idx]))
2206	/ The node can't accept the "multi byte", or the*
2207	destination was already thrown away, then the node
2208	could't accept the current input "multi byte". /*
2209	naccepted = `0`;
2210	/ Otherwise, it is sure that the node could accept*
2211	'naccepted' bytes input. /*
2212	return naccepted;
2213	}
2214	#endif /* RE_ENABLE_I18N */
2215
2216
2217	/ Functions for state transition. /
2218
2219	/ Return the next state to which the current state STATE will transit by*
2220	accepting the current input byte, and update STATE_LOG if necessary.
2221	If STATE can accept a multibyte char/collating element/back reference
2222	update the destination of STATE_LOG. /*
2223
2224	static re_dfastate_t *
2225	__attribute_warn_unused_result__
2226	transit_state (reg_errcode_t err, re_match_context_t mctx,
2227	re_dfastate_t *state)
2228	{
2229	re_dfastate_t **trtable;
2230	unsigned char ch;
2231
2232	#ifdef RE_ENABLE_I18N
2233	/ If the current state can accept multibyte. /
2234	if (BE (state->accept_mb, `0`))
2235	{
2236	*err = transit_state_mb (mctx, state);
2237	if (BE (*err != REG_NOERROR, `0`))
2238	return NULL;
2239	}
2240	#endif /* RE_ENABLE_I18N */
2241
2242	/ Then decide the next state with the single byte. /
2243	#if 0
2244	if (`0`)
2245	/ don't use transition table /
2246	return transit_state_sb (err, mctx, state);
2247	#endif
2248
2249	/ Use transition table /
2250	ch = re_string_fetch_byte (&mctx->input);
2251	for (;;)
2252	{
2253	trtable = state->trtable;
2254	if (BE (trtable != NULL, `1`))
2255	return trtable[ch];
2256
2257	trtable = state->word_trtable;
2258	if (BE (trtable != NULL, `1`))
2259	{
2260	unsigned int context;
2261	context
2262	= re_string_context_at (&mctx->input,
2263	re_string_cur_idx (&mctx->input) - `1`,
2264	mctx->eflags);
2265	if (IS_WORD_CONTEXT (context))
2266	return trtable[ch + SBC_MAX];
2267	else
2268	return trtable[ch];
2269	}
2270
2271	if (!build_trtable (mctx->dfa, state))
2272	{
2273	*err = REG_ESPACE;
2274	return NULL;
2275	}
2276
2277	/ Retry, we now have a transition table. /
2278	}
2279	}
2280
2281	/ Update the state_log if we need /
2282	static re_dfastate_t *
2283	merge_state_with_log (reg_errcode_t err, re_match_context_t mctx,
2284	re_dfastate_t *next_state)
2285	{
2286	const re_dfa_t *const dfa = mctx->dfa;
2287	Idx cur_idx = re_string_cur_idx (&mctx->input);
2288
2289	if (cur_idx > mctx->state_log_top)
2290	{
2291	mctx->state_log[cur_idx] = next_state;
2292	mctx->state_log_top = cur_idx;
2293	}
2294	else if (mctx->state_log[cur_idx] == `0`)
2295	{
2296	mctx->state_log[cur_idx] = next_state;
2297	}
2298	else
2299	{
2300	re_dfastate_t *pstate;
2301	unsigned int context;
2302	re_node_set next_nodes, log_nodes, table_nodes = NULL;
2303	/ If (state_log[cur_idx] != 0), it implies that cur_idx is*
2304	the destination of a multibyte char/collating element/
2305	back reference. Then the next state is the union set of
2306	these destinations and the results of the transition table. /*
2307	pstate = mctx->state_log[cur_idx];
2308	log_nodes = pstate->entrance_nodes;
2309	if (next_state != NULL)
2310	{
2311	table_nodes = next_state->entrance_nodes;
2312	*err = re_node_set_init_union (&next_nodes, table_nodes,
2313	log_nodes);
2314	if (BE (*err != REG_NOERROR, `0`))
2315	return NULL;
2316	}
2317	else
2318	next_nodes = *log_nodes;
2319	/ Note: We already add the nodes of the initial state,*
2320	then we don't need to add them here. /*
2321
2322	context = re_string_context_at (&mctx->input,
2323	re_string_cur_idx (&mctx->input) - `1`,
2324	mctx->eflags);
2325	next_state = mctx->state_log[cur_idx]
2326	= re_acquire_state_context (err, dfa, &next_nodes, context);
2327	/ We don't need to check errors here, since the return value of*
2328	this function is next_state and ERR is already set. /*
2329
2330	if (table_nodes != NULL)
2331	re_node_set_free (&next_nodes);
2332	}
2333
2334	if (BE (dfa->nbackref, `0`) && next_state != NULL)
2335	{
2336	/ Check OP_OPEN_SUBEXP in the current state in case that we use them*
2337	later. We must check them here, since the back references in the
2338	next state might use them. /*
2339	*err = check_subexp_matching_top (mctx, &next_state->nodes,
2340	cur_idx);
2341	if (BE (*err != REG_NOERROR, `0`))
2342	return NULL;
2343
2344	/ If the next state has back references. /
2345	if (next_state->has_backref)
2346	{
2347	*err = transit_state_bkref (mctx, &next_state->nodes);
2348	if (BE (*err != REG_NOERROR, `0`))
2349	return NULL;
2350	next_state = mctx->state_log[cur_idx];
2351	}
2352	}
2353
2354	return next_state;
2355	}
2356
2357	/ Skip bytes in the input that correspond to part of a*
2358	multi-byte match, then look in the log for a state
2359	from which to restart matching. /*
2360	static re_dfastate_t *
2361	find_recover_state (reg_errcode_t err, re_match_context_t mctx)
2362	{
2363	re_dfastate_t *cur_state;
2364	do
2365	{
2366	Idx max = mctx->state_log_top;
2367	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2368
2369	do
2370	{
2371	if (++cur_str_idx > max)
2372	return NULL;
2373	re_string_skip_bytes (&mctx->input, `1`);
2374	}
2375	while (mctx->state_log[cur_str_idx] == NULL);
2376
2377	cur_state = merge_state_with_log (err, mctx, NULL);
2378	}
2379	while (*err == REG_NOERROR && cur_state == NULL);
2380	return cur_state;
2381	}
2382
2383	/ Helper functions for transit_state. /
2384
2385	/ From the node set CUR_NODES, pick up the nodes whose types are*
2386	OP_OPEN_SUBEXP and which have corresponding back references in the regular
2387	expression. And register them to use them later for evaluating the
2388	corresponding back references. /*
2389
2390	static reg_errcode_t
2391	check_subexp_matching_top (re_match_context_t mctx, re_node_set cur_nodes,
2392	Idx str_idx)
2393	{
2394	const re_dfa_t *const dfa = mctx->dfa;
2395	Idx node_idx;
2396	reg_errcode_t err;
2397
2398	/ TODO: This isn't efficient.*
2399	Because there might be more than one nodes whose types are
2400	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2401	nodes.
2402	E.g. RE: (a){2} /*
2403	for (node_idx = `0`; node_idx < cur_nodes->nelem; ++node_idx)
2404	{
2405	Idx node = cur_nodes->elems[node_idx];
2406	if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2407	&& dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2408	&& (dfa->used_bkref_map
2409	& ((bitset_word_t) `1` << dfa->nodes[node].opr.idx)))
2410	{
2411	err = match_ctx_add_subtop (mctx, node, str_idx);
2412	if (BE (err != REG_NOERROR, `0`))
2413	return err;
2414	}
2415	}
2416	return REG_NOERROR;
2417	}
2418
2419	#if 0
2420	/ Return the next state to which the current state STATE will transit by*
2421	accepting the current input byte. /*
2422
2423	static re_dfastate_t *
2424	transit_state_sb (reg_errcode_t err, re_match_context_t mctx,
2425	re_dfastate_t *state)
2426	{
2427	const re_dfa_t *const dfa = mctx->dfa;
2428	re_node_set next_nodes;
2429	re_dfastate_t *next_state;
2430	Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2431	unsigned int context;
2432
2433	*err = re_node_set_alloc (&next_nodes, state->nodes.nelem + `1`);
2434	if (BE (*err != REG_NOERROR, `0`))
2435	return NULL;
2436	for (node_cnt = `0`; node_cnt < state->nodes.nelem; ++node_cnt)
2437	{
2438	Idx cur_node = state->nodes.elems[node_cnt];
2439	if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2440	{
2441	*err = re_node_set_merge (&next_nodes,
2442	dfa->eclosures + dfa->nexts[cur_node]);
2443	if (BE (*err != REG_NOERROR, `0`))
2444	{
2445	re_node_set_free (&next_nodes);
2446	return NULL;
2447	}
2448	}
2449	}
2450	context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2451	next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2452	/ We don't need to check errors here, since the return value of*
2453	this function is next_state and ERR is already set. /*
2454
2455	re_node_set_free (&next_nodes);
2456	re_string_skip_bytes (&mctx->input, `1`);
2457	return next_state;
2458	}
2459	#endif
2460
2461	#ifdef RE_ENABLE_I18N
2462	static reg_errcode_t
2463	transit_state_mb (re_match_context_t mctx, re_dfastate_t pstate)
2464	{
2465	const re_dfa_t *const dfa = mctx->dfa;
2466	reg_errcode_t err;
2467	Idx i;
2468
2469	for (i = `0`; i < pstate->nodes.nelem; ++i)
2470	{
2471	re_node_set dest_nodes, *new_nodes;
2472	Idx cur_node_idx = pstate->nodes.elems[i];
2473	int naccepted;
2474	Idx dest_idx;
2475	unsigned int context;
2476	re_dfastate_t *dest_state;
2477
2478	if (!dfa->nodes[cur_node_idx].accept_mb)
2479	continue;
2480
2481	if (dfa->nodes[cur_node_idx].constraint)
2482	{
2483	context = re_string_context_at (&mctx->input,
2484	re_string_cur_idx (&mctx->input),
2485	mctx->eflags);
2486	if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2487	context))
2488	continue;
2489	}
2490
2491	/ How many bytes the node can accept? /
2492	naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2493	re_string_cur_idx (&mctx->input));
2494	if (naccepted == `0`)
2495	continue;
2496
2497	/ The node can accepts 'naccepted' bytes. /
2498	dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2499	mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2500	: mctx->max_mb_elem_len);
2501	err = clean_state_log_if_needed (mctx, dest_idx);
2502	if (BE (err != REG_NOERROR, `0`))
2503	return err;
2504	#ifdef DEBUG
2505	assert (dfa->nexts[cur_node_idx] != -`1`);
2506	#endif
2507	new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2508
2509	dest_state = mctx->state_log[dest_idx];
2510	if (dest_state == NULL)
2511	dest_nodes = *new_nodes;
2512	else
2513	{
2514	err = re_node_set_init_union (&dest_nodes,
2515	dest_state->entrance_nodes, new_nodes);
2516	if (BE (err != REG_NOERROR, `0`))
2517	return err;
2518	}
2519	context = re_string_context_at (&mctx->input, dest_idx - `1`,
2520	mctx->eflags);
2521	mctx->state_log[dest_idx]
2522	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2523	if (dest_state != NULL)
2524	re_node_set_free (&dest_nodes);
2525	if (BE (mctx->state_log[dest_idx] == NULL && err != REG_NOERROR, `0`))
2526	return err;
2527	}
2528	return REG_NOERROR;
2529	}
2530	#endif /* RE_ENABLE_I18N */
2531
2532	static reg_errcode_t
2533	transit_state_bkref (re_match_context_t mctx, const* re_node_set *nodes)
2534	{
2535	const re_dfa_t *const dfa = mctx->dfa;
2536	reg_errcode_t err;
2537	Idx i;
2538	Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2539
2540	for (i = `0`; i < nodes->nelem; ++i)
2541	{
2542	Idx dest_str_idx, prev_nelem, bkc_idx;
2543	Idx node_idx = nodes->elems[i];
2544	unsigned int context;
2545	const re_token_t *node = dfa->nodes + node_idx;
2546	re_node_set *new_dest_nodes;
2547
2548	/ Check whether 'node' is a backreference or not. /
2549	if (node->type != OP_BACK_REF)
2550	continue;
2551
2552	if (node->constraint)
2553	{
2554	context = re_string_context_at (&mctx->input, cur_str_idx,
2555	mctx->eflags);
2556	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2557	continue;
2558	}
2559
2560	/ 'node' is a backreference.*
2561	Check the substring which the substring matched. /*
2562	bkc_idx = mctx->nbkref_ents;
2563	err = get_subexp (mctx, node_idx, cur_str_idx);
2564	if (BE (err != REG_NOERROR, `0`))
2565	goto free_return;
2566
2567	/ And add the epsilon closures (which is 'new_dest_nodes') of*
2568	the backreference to appropriate state_log. /*
2569	#ifdef DEBUG
2570	assert (dfa->nexts[node_idx] != -`1`);
2571	#endif
2572	for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2573	{
2574	Idx subexp_len;
2575	re_dfastate_t *dest_state;
2576	struct re_backref_cache_entry *bkref_ent;
2577	bkref_ent = mctx->bkref_ents + bkc_idx;
2578	if (bkref_ent->node != node_idx \|\| bkref_ent->str_idx != cur_str_idx)
2579	continue;
2580	subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2581	new_dest_nodes = (subexp_len == `0`
2582	? dfa->eclosures + dfa->edests[node_idx].elems[`0`]
2583	: dfa->eclosures + dfa->nexts[node_idx]);
2584	dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2585	- bkref_ent->subexp_from);
2586	context = re_string_context_at (&mctx->input, dest_str_idx - `1`,
2587	mctx->eflags);
2588	dest_state = mctx->state_log[dest_str_idx];
2589	prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? `0`
2590	: mctx->state_log[cur_str_idx]->nodes.nelem);
2591	/ Add 'new_dest_node' to state_log. /
2592	if (dest_state == NULL)
2593	{
2594	mctx->state_log[dest_str_idx]
2595	= re_acquire_state_context (&err, dfa, new_dest_nodes,
2596	context);
2597	if (BE (mctx->state_log[dest_str_idx] == NULL
2598	&& err != REG_NOERROR, `0`))
2599	goto free_return;
2600	}
2601	else
2602	{
2603	re_node_set dest_nodes;
2604	err = re_node_set_init_union (&dest_nodes,
2605	dest_state->entrance_nodes,
2606	new_dest_nodes);
2607	if (BE (err != REG_NOERROR, `0`))
2608	{
2609	re_node_set_free (&dest_nodes);
2610	goto free_return;
2611	}
2612	mctx->state_log[dest_str_idx]
2613	= re_acquire_state_context (&err, dfa, &dest_nodes, context);
2614	re_node_set_free (&dest_nodes);
2615	if (BE (mctx->state_log[dest_str_idx] == NULL
2616	&& err != REG_NOERROR, `0`))
2617	goto free_return;
2618	}
2619	/ We need to check recursively if the backreference can epsilon*
2620	transit. /*
2621	if (subexp_len == `0`
2622	&& mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2623	{
2624	err = check_subexp_matching_top (mctx, new_dest_nodes,
2625	cur_str_idx);
2626	if (BE (err != REG_NOERROR, `0`))
2627	goto free_return;
2628	err = transit_state_bkref (mctx, new_dest_nodes);
2629	if (BE (err != REG_NOERROR, `0`))
2630	goto free_return;
2631	}
2632	}
2633	}
2634	err = REG_NOERROR;
2635	free_return:
2636	return err;
2637	}
2638
2639	/ Enumerate all the candidates which the backreference BKREF_NODE can match*
2640	at BKREF_STR_IDX, and register them by match_ctx_add_entry().
2641	Note that we might collect inappropriate candidates here.
2642	However, the cost of checking them strictly here is too high, then we
2643	delay these checking for prune_impossible_nodes(). /*
2644
2645	static reg_errcode_t
2646	__attribute_warn_unused_result__
2647	get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2648	{
2649	const re_dfa_t *const dfa = mctx->dfa;
2650	Idx subexp_num, sub_top_idx;
2651	const char buf = (const* char *) re_string_get_buffer (&mctx->input);
2652	/ Return if we have already checked BKREF_NODE at BKREF_STR_IDX. /
2653	Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2654	if (cache_idx != -`1`)
2655	{
2656	const struct re_backref_cache_entry *entry
2657	= mctx->bkref_ents + cache_idx;
2658	do
2659	if (entry->node == bkref_node)
2660	return REG_NOERROR; / We already checked it. /
2661	while (entry++->more);
2662	}
2663
2664	subexp_num = dfa->nodes[bkref_node].opr.idx;
2665
2666	/ For each sub expression /
2667	for (sub_top_idx = `0`; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2668	{
2669	reg_errcode_t err;
2670	re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2671	re_sub_match_last_t *sub_last;
2672	Idx sub_last_idx, sl_str, bkref_str_off;
2673
2674	if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2675	continue; / It isn't related. /
2676
2677	sl_str = sub_top->str_idx;
2678	bkref_str_off = bkref_str_idx;
2679	/ At first, check the last node of sub expressions we already*
2680	evaluated. /*
2681	for (sub_last_idx = `0`; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2682	{
2683	regoff_t sl_str_diff;
2684	sub_last = sub_top->lasts[sub_last_idx];
2685	sl_str_diff = sub_last->str_idx - sl_str;
2686	/ The matched string by the sub expression match with the substring*
2687	at the back reference? /*
2688	if (sl_str_diff > `0`)
2689	{
2690	if (BE (bkref_str_off + sl_str_diff > mctx->input.valid_len, `0`))
2691	{
2692	/ Not enough chars for a successful match. /
2693	if (bkref_str_off + sl_str_diff > mctx->input.len)
2694	break;
2695
2696	err = clean_state_log_if_needed (mctx,
2697	bkref_str_off
2698	+ sl_str_diff);
2699	if (BE (err != REG_NOERROR, `0`))
2700	return err;
2701	buf = (const char *) re_string_get_buffer (&mctx->input);
2702	}
2703	if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != `0`)
2704	/ We don't need to search this sub expression any more. /
2705	break;
2706	}
2707	bkref_str_off += sl_str_diff;
2708	sl_str += sl_str_diff;
2709	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2710	bkref_str_idx);
2711
2712	/ Reload buf, since the preceding call might have reallocated*
2713	the buffer. /*
2714	buf = (const char *) re_string_get_buffer (&mctx->input);
2715
2716	if (err == REG_NOMATCH)
2717	continue;
2718	if (BE (err != REG_NOERROR, `0`))
2719	return err;
2720	}
2721
2722	if (sub_last_idx < sub_top->nlasts)
2723	continue;
2724	if (sub_last_idx > `0`)
2725	++sl_str;
2726	/ Then, search for the other last nodes of the sub expression. /
2727	for (; sl_str <= bkref_str_idx; ++sl_str)
2728	{
2729	Idx cls_node;
2730	regoff_t sl_str_off;
2731	const re_node_set *nodes;
2732	sl_str_off = sl_str - sub_top->str_idx;
2733	/ The matched string by the sub expression match with the substring*
2734	at the back reference? /*
2735	if (sl_str_off > `0`)
2736	{
2737	if (BE (bkref_str_off >= mctx->input.valid_len, `0`))
2738	{
2739	/ If we are at the end of the input, we cannot match. /
2740	if (bkref_str_off >= mctx->input.len)
2741	break;
2742
2743	err = extend_buffers (mctx, bkref_str_off + `1`);
2744	if (BE (err != REG_NOERROR, `0`))
2745	return err;
2746
2747	buf = (const char *) re_string_get_buffer (&mctx->input);
2748	}
2749	if (buf [bkref_str_off++] != buf[sl_str - `1`])
2750	break; / We don't need to search this sub expression*
2751	any more. /*
2752	}
2753	if (mctx->state_log[sl_str] == NULL)
2754	continue;
2755	/ Does this state have a ')' of the sub expression? /
2756	nodes = &mctx->state_log[sl_str]->nodes;
2757	cls_node = find_subexp_node (dfa, nodes, subexp_num,
2758	OP_CLOSE_SUBEXP);
2759	if (cls_node == -`1`)
2760	continue; / No. /
2761	if (sub_top->path == NULL)
2762	{
2763	sub_top->path = calloc (sizeof (state_array_t),
2764	sl_str - sub_top->str_idx + `1`);
2765	if (sub_top->path == NULL)
2766	return REG_ESPACE;
2767	}
2768	/ Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node*
2769	in the current context? /*
2770	err = check_arrival (mctx, sub_top->path, sub_top->node,
2771	sub_top->str_idx, cls_node, sl_str,
2772	OP_CLOSE_SUBEXP);
2773	if (err == REG_NOMATCH)
2774	continue;
2775	if (BE (err != REG_NOERROR, `0`))
2776	return err;
2777	sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2778	if (BE (sub_last == NULL, `0`))
2779	return REG_ESPACE;
2780	err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2781	bkref_str_idx);
2782	if (err == REG_NOMATCH)
2783	continue;
2784	}
2785	}
2786	return REG_NOERROR;
2787	}
2788
2789	/ Helper functions for get_subexp(). /
2790
2791	/ Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.*
2792	If it can arrive, register the sub expression expressed with SUB_TOP
2793	and SUB_LAST. /*
2794
2795	static reg_errcode_t
2796	get_subexp_sub (re_match_context_t mctx, const* re_sub_match_top_t *sub_top,
2797	re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2798	{
2799	reg_errcode_t err;
2800	Idx to_idx;
2801	/ Can the subexpression arrive the back reference? /
2802	err = check_arrival (mctx, &sub_last->path, sub_last->node,
2803	sub_last->str_idx, bkref_node, bkref_str,
2804	OP_OPEN_SUBEXP);
2805	if (err != REG_NOERROR)
2806	return err;
2807	err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2808	sub_last->str_idx);
2809	if (BE (err != REG_NOERROR, `0`))
2810	return err;
2811	to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2812	return clean_state_log_if_needed (mctx, to_idx);
2813	}
2814
2815	/ Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.*
2816	Search '(' if FL_OPEN, or search ')' otherwise.
2817	TODO: This function isn't efficient...
2818	Because there might be more than one nodes whose types are
2819	OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
2820	nodes.
2821	E.g. RE: (a){2} /*
2822
2823	static Idx
2824	find_subexp_node (const re_dfa_t dfa, const* re_node_set *nodes,
2825	Idx subexp_idx, int type)
2826	{
2827	Idx cls_idx;
2828	for (cls_idx = `0`; cls_idx < nodes->nelem; ++cls_idx)
2829	{
2830	Idx cls_node = nodes->elems[cls_idx];
2831	const re_token_t *node = dfa->nodes + cls_node;
2832	if (node->type == type
2833	&& node->opr.idx == subexp_idx)
2834	return cls_node;
2835	}
2836	return -`1`;
2837	}
2838
2839	/ Check whether the node TOP_NODE at TOP_STR can arrive to the node*
2840	LAST_NODE at LAST_STR. We record the path onto PATH since it will be
2841	heavily reused.
2842	Return REG_NOERROR if it can arrive, or REG_NOMATCH otherwise. /*
2843
2844	static reg_errcode_t
2845	__attribute_warn_unused_result__
2846	check_arrival (re_match_context_t mctx, state_array_t path, Idx top_node,
2847	Idx top_str, Idx last_node, Idx last_str, int type)
2848	{
2849	const re_dfa_t *const dfa = mctx->dfa;
2850	reg_errcode_t err = REG_NOERROR;
2851	Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2852	re_dfastate_t *cur_state = NULL;
2853	re_node_set *cur_nodes, next_nodes;
2854	re_dfastate_t **backup_state_log;
2855	unsigned int context;
2856
2857	subexp_num = dfa->nodes[top_node].opr.idx;
2858	/ Extend the buffer if we need. /
2859	if (BE (path->alloc < last_str + mctx->max_mb_elem_len + `1`, `0`))
2860	{
2861	re_dfastate_t **new_array;
2862	Idx old_alloc = path->alloc;
2863	Idx incr_alloc = last_str + mctx->max_mb_elem_len + `1`;
2864	Idx new_alloc;
2865	if (BE (IDX_MAX - old_alloc < incr_alloc, `0`))
2866	return REG_ESPACE;
2867	new_alloc = old_alloc + incr_alloc;
2868	if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, `0`))
2869	return REG_ESPACE;
2870	new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2871	if (BE (new_array == NULL, `0`))
2872	return REG_ESPACE;
2873	path->array = new_array;
2874	path->alloc = new_alloc;
2875	memset (new_array + old_alloc, `'\0'`,
2876	sizeof (re_dfastate_t ) (path->alloc - old_alloc));
2877	}
2878
2879	str_idx = path->next_idx ? path->next_idx : top_str;
2880
2881	/ Temporary modify MCTX. /
2882	backup_state_log = mctx->state_log;
2883	backup_cur_idx = mctx->input.cur_idx;
2884	mctx->state_log = path->array;
2885	mctx->input.cur_idx = str_idx;
2886
2887	/ Setup initial node set. /
2888	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2889	if (str_idx == top_str)
2890	{
2891	err = re_node_set_init_1 (&next_nodes, top_node);
2892	if (BE (err != REG_NOERROR, `0`))
2893	return err;
2894	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2895	if (BE (err != REG_NOERROR, `0`))
2896	{
2897	re_node_set_free (&next_nodes);
2898	return err;
2899	}
2900	}
2901	else
2902	{
2903	cur_state = mctx->state_log[str_idx];
2904	if (cur_state && cur_state->has_backref)
2905	{
2906	err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2907	if (BE (err != REG_NOERROR, `0`))
2908	return err;
2909	}
2910	else
2911	re_node_set_init_empty (&next_nodes);
2912	}
2913	if (str_idx == top_str \|\| (cur_state && cur_state->has_backref))
2914	{
2915	if (next_nodes.nelem)
2916	{
2917	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2918	subexp_num, type);
2919	if (BE (err != REG_NOERROR, `0`))
2920	{
2921	re_node_set_free (&next_nodes);
2922	return err;
2923	}
2924	}
2925	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2926	if (BE (cur_state == NULL && err != REG_NOERROR, `0`))
2927	{
2928	re_node_set_free (&next_nodes);
2929	return err;
2930	}
2931	mctx->state_log[str_idx] = cur_state;
2932	}
2933
2934	for (null_cnt = `0`; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2935	{
2936	re_node_set_empty (&next_nodes);
2937	if (mctx->state_log[str_idx + `1`])
2938	{
2939	err = re_node_set_merge (&next_nodes,
2940	&mctx->state_log[str_idx + `1`]->nodes);
2941	if (BE (err != REG_NOERROR, `0`))
2942	{
2943	re_node_set_free (&next_nodes);
2944	return err;
2945	}
2946	}
2947	if (cur_state)
2948	{
2949	err = check_arrival_add_next_nodes (mctx, str_idx,
2950	&cur_state->non_eps_nodes,
2951	&next_nodes);
2952	if (BE (err != REG_NOERROR, `0`))
2953	{
2954	re_node_set_free (&next_nodes);
2955	return err;
2956	}
2957	}
2958	++str_idx;
2959	if (next_nodes.nelem)
2960	{
2961	err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2962	if (BE (err != REG_NOERROR, `0`))
2963	{
2964	re_node_set_free (&next_nodes);
2965	return err;
2966	}
2967	err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2968	subexp_num, type);
2969	if (BE (err != REG_NOERROR, `0`))
2970	{
2971	re_node_set_free (&next_nodes);
2972	return err;
2973	}
2974	}
2975	context = re_string_context_at (&mctx->input, str_idx - `1`, mctx->eflags);
2976	cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2977	if (BE (cur_state == NULL && err != REG_NOERROR, `0`))
2978	{
2979	re_node_set_free (&next_nodes);
2980	return err;
2981	}
2982	mctx->state_log[str_idx] = cur_state;
2983	null_cnt = cur_state == NULL ? null_cnt + `1` : `0`;
2984	}
2985	re_node_set_free (&next_nodes);
2986	cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2987	: &mctx->state_log[last_str]->nodes);
2988	path->next_idx = str_idx;
2989
2990	/ Fix MCTX. /
2991	mctx->state_log = backup_state_log;
2992	mctx->input.cur_idx = backup_cur_idx;
2993
2994	/ Then check the current node set has the node LAST_NODE. /
2995	if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2996	return REG_NOERROR;
2997
2998	return REG_NOMATCH;
2999	}
3000
3001	/ Helper functions for check_arrival. /
3002
3003	/ Calculate the destination nodes of CUR_NODES at STR_IDX, and append them*
3004	to NEXT_NODES.
3005	TODO: This function is similar to the functions transit_state(),*
3006	however this function has many additional works.
3007	Can't we unify them? /*
3008
3009	static reg_errcode_t
3010	__attribute_warn_unused_result__
3011	check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
3012	re_node_set cur_nodes, re_node_set next_nodes)
3013	{
3014	const re_dfa_t *const dfa = mctx->dfa;
3015	bool ok;
3016	Idx cur_idx;
3017	#ifdef RE_ENABLE_I18N
3018	reg_errcode_t err = REG_NOERROR;
3019	#endif
3020	re_node_set union_set;
3021	re_node_set_init_empty (&union_set);
3022	for (cur_idx = `0`; cur_idx < cur_nodes->nelem; ++cur_idx)
3023	{
3024	int naccepted = `0`;
3025	Idx cur_node = cur_nodes->elems[cur_idx];
3026	#ifdef DEBUG
3027	re_token_type_t type = dfa->nodes[cur_node].type;
3028	assert (!IS_EPSILON_NODE (type));
3029	#endif
3030	#ifdef RE_ENABLE_I18N
3031	/ If the node may accept "multi byte". /
3032	if (dfa->nodes[cur_node].accept_mb)
3033	{
3034	naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
3035	str_idx);
3036	if (naccepted > `1`)
3037	{
3038	re_dfastate_t *dest_state;
3039	Idx next_node = dfa->nexts[cur_node];
3040	Idx next_idx = str_idx + naccepted;
3041	dest_state = mctx->state_log[next_idx];
3042	re_node_set_empty (&union_set);
3043	if (dest_state)
3044	{
3045	err = re_node_set_merge (&union_set, &dest_state->nodes);
3046	if (BE (err != REG_NOERROR, `0`))
3047	{
3048	re_node_set_free (&union_set);
3049	return err;
3050	}
3051	}
3052	ok = re_node_set_insert (&union_set, next_node);
3053	if (BE (! ok, `0`))
3054	{
3055	re_node_set_free (&union_set);
3056	return REG_ESPACE;
3057	}
3058	mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3059	&union_set);
3060	if (BE (mctx->state_log[next_idx] == NULL
3061	&& err != REG_NOERROR, `0`))
3062	{
3063	re_node_set_free (&union_set);
3064	return err;
3065	}
3066	}
3067	}
3068	#endif /* RE_ENABLE_I18N */
3069	if (naccepted
3070	\|\| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3071	{
3072	ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3073	if (BE (! ok, `0`))
3074	{
3075	re_node_set_free (&union_set);
3076	return REG_ESPACE;
3077	}
3078	}
3079	}
3080	re_node_set_free (&union_set);
3081	return REG_NOERROR;
3082	}
3083
3084	/ For all the nodes in CUR_NODES, add the epsilon closures of them to*
3085	CUR_NODES, however exclude the nodes which are:
3086	- inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
3087	- out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
3088	*/
3089
3090	static reg_errcode_t
3091	check_arrival_expand_ecl (const re_dfa_t dfa, re_node_set cur_nodes,
3092	Idx ex_subexp, int type)
3093	{
3094	reg_errcode_t err;
3095	Idx idx, outside_node;
3096	re_node_set new_nodes;
3097	#ifdef DEBUG
3098	assert (cur_nodes->nelem);
3099	#endif
3100	err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3101	if (BE (err != REG_NOERROR, `0`))
3102	return err;
3103	/ Create a new node set NEW_NODES with the nodes which are epsilon*
3104	closures of the node in CUR_NODES. /*
3105
3106	for (idx = `0`; idx < cur_nodes->nelem; ++idx)
3107	{
3108	Idx cur_node = cur_nodes->elems[idx];
3109	const re_node_set *eclosure = dfa->eclosures + cur_node;
3110	outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3111	if (outside_node == -`1`)
3112	{
3113	/ There are no problematic nodes, just merge them. /
3114	err = re_node_set_merge (&new_nodes, eclosure);
3115	if (BE (err != REG_NOERROR, `0`))
3116	{
3117	re_node_set_free (&new_nodes);
3118	return err;
3119	}
3120	}
3121	else
3122	{
3123	/ There are problematic nodes, re-calculate incrementally. /
3124	err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3125	ex_subexp, type);
3126	if (BE (err != REG_NOERROR, `0`))
3127	{
3128	re_node_set_free (&new_nodes);
3129	return err;
3130	}
3131	}
3132	}
3133	re_node_set_free (cur_nodes);
3134	*cur_nodes = new_nodes;
3135	return REG_NOERROR;
3136	}
3137
3138	/ Helper function for check_arrival_expand_ecl.*
3139	Check incrementally the epsilon closure of TARGET, and if it isn't
3140	problematic append it to DST_NODES. /*
3141
3142	static reg_errcode_t
3143	__attribute_warn_unused_result__
3144	check_arrival_expand_ecl_sub (const re_dfa_t dfa, re_node_set dst_nodes,
3145	Idx target, Idx ex_subexp, int type)
3146	{
3147	Idx cur_node;
3148	for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3149	{
3150	bool ok;
3151
3152	if (dfa->nodes[cur_node].type == type
3153	&& dfa->nodes[cur_node].opr.idx == ex_subexp)
3154	{
3155	if (type == OP_CLOSE_SUBEXP)
3156	{
3157	ok = re_node_set_insert (dst_nodes, cur_node);
3158	if (BE (! ok, `0`))
3159	return REG_ESPACE;
3160	}
3161	break;
3162	}
3163	ok = re_node_set_insert (dst_nodes, cur_node);
3164	if (BE (! ok, `0`))
3165	return REG_ESPACE;
3166	if (dfa->edests[cur_node].nelem == `0`)
3167	break;
3168	if (dfa->edests[cur_node].nelem == `2`)
3169	{
3170	reg_errcode_t err;
3171	err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3172	dfa->edests[cur_node].elems[`1`],
3173	ex_subexp, type);
3174	if (BE (err != REG_NOERROR, `0`))
3175	return err;
3176	}
3177	cur_node = dfa->edests[cur_node].elems[`0`];
3178	}
3179	return REG_NOERROR;
3180	}
3181
3182
3183	/ For all the back references in the current state, calculate the*
3184	destination of the back references by the appropriate entry
3185	in MCTX->BKREF_ENTS. /*
3186
3187	static reg_errcode_t
3188	__attribute_warn_unused_result__
3189	expand_bkref_cache (re_match_context_t mctx, re_node_set cur_nodes,
3190	Idx cur_str, Idx subexp_num, int type)
3191	{
3192	const re_dfa_t *const dfa = mctx->dfa;
3193	reg_errcode_t err;
3194	Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3195	struct re_backref_cache_entry *ent;
3196
3197	if (cache_idx_start == -`1`)
3198	return REG_NOERROR;
3199
3200	restart:
3201	ent = mctx->bkref_ents + cache_idx_start;
3202	do
3203	{
3204	Idx to_idx, next_node;
3205
3206	/ Is this entry ENT is appropriate? /
3207	if (!re_node_set_contains (cur_nodes, ent->node))
3208	continue; / No. /
3209
3210	to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3211	/ Calculate the destination of the back reference, and append it*
3212	to MCTX->STATE_LOG. /*
3213	if (to_idx == cur_str)
3214	{
3215	/ The backreference did epsilon transit, we must re-check all the*
3216	node in the current state. /*
3217	re_node_set new_dests;
3218	reg_errcode_t err2, err3;
3219	next_node = dfa->edests[ent->node].elems[`0`];
3220	if (re_node_set_contains (cur_nodes, next_node))
3221	continue;
3222	err = re_node_set_init_1 (&new_dests, next_node);
3223	err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3224	err3 = re_node_set_merge (cur_nodes, &new_dests);
3225	re_node_set_free (&new_dests);
3226	if (BE (err != REG_NOERROR \|\| err2 != REG_NOERROR
3227	\|\| err3 != REG_NOERROR, `0`))
3228	{
3229	err = (err != REG_NOERROR ? err
3230	: (err2 != REG_NOERROR ? err2 : err3));
3231	return err;
3232	}
3233	/ TODO: It is still inefficient... /
3234	goto restart;
3235	}
3236	else
3237	{
3238	re_node_set union_set;
3239	next_node = dfa->nexts[ent->node];
3240	if (mctx->state_log[to_idx])
3241	{
3242	bool ok;
3243	if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3244	next_node))
3245	continue;
3246	err = re_node_set_init_copy (&union_set,
3247	&mctx->state_log[to_idx]->nodes);
3248	ok = re_node_set_insert (&union_set, next_node);
3249	if (BE (err != REG_NOERROR \|\| ! ok, `0`))
3250	{
3251	re_node_set_free (&union_set);
3252	err = err != REG_NOERROR ? err : REG_ESPACE;
3253	return err;
3254	}
3255	}
3256	else
3257	{
3258	err = re_node_set_init_1 (&union_set, next_node);
3259	if (BE (err != REG_NOERROR, `0`))
3260	return err;
3261	}
3262	mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3263	re_node_set_free (&union_set);
3264	if (BE (mctx->state_log[to_idx] == NULL
3265	&& err != REG_NOERROR, `0`))
3266	return err;
3267	}
3268	}
3269	while (ent++->more);
3270	return REG_NOERROR;
3271	}
3272
3273	/ Build transition table for the state.*
3274	Return true if successful. /*
3275
3276	static bool
3277	build_trtable (const re_dfa_t dfa, re_dfastate_t state)
3278	{
3279	reg_errcode_t err;
3280	Idx i, j;
3281	int ch;
3282	bool need_word_trtable = false;
3283	bitset_word_t elem, mask;
3284	bool dests_node_malloced = false;
3285	bool dest_states_malloced = false;
3286	Idx ndests; / Number of the destination states from 'state'. /
3287	re_dfastate_t **trtable;
3288	re_dfastate_t dest_states = NULL, dest_states_word, **dest_states_nl;
3289	re_node_set follows, *dests_node;
3290	bitset_t *dests_ch;
3291	bitset_t acceptable;
3292
3293	struct dests_alloc
3294	{
3295	re_node_set dests_node[SBC_MAX];
3296	bitset_t dests_ch[SBC_MAX];
3297	} *dests_alloc;
3298
3299	/ We build DFA states which corresponds to the destination nodes*
3300	from 'state'. 'dests_node[i]' represents the nodes which i-th
3301	destination state contains, and 'dests_ch[i]' represents the
3302	characters which i-th destination state accepts. /*
3303	if (__libc_use_alloca (sizeof (struct dests_alloc)))
3304	dests_alloc = (struct dests_alloc ) alloca (sizeof* (struct dests_alloc));
3305	else
3306	{
3307	dests_alloc = re_malloc (struct dests_alloc, `1`);
3308	if (BE (dests_alloc == NULL, `0`))
3309	return false;
3310	dests_node_malloced = true;
3311	}
3312	dests_node = dests_alloc->dests_node;
3313	dests_ch = dests_alloc->dests_ch;
3314
3315	/ Initialize transition table. /
3316	state->word_trtable = state->trtable = NULL;
3317
3318	/ At first, group all nodes belonging to 'state' into several*
3319	destinations. /*
3320	ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3321	if (BE (ndests <= `0`, `0`))
3322	{
3323	if (dests_node_malloced)
3324	re_free (dests_alloc);
3325	/ Return false in case of an error, true otherwise. /
3326	if (ndests == `0`)
3327	{
3328	state->trtable = (re_dfastate_t **)
3329	calloc (sizeof (re_dfastate_t *), SBC_MAX);
3330	if (BE (state->trtable == NULL, `0`))
3331	return false;
3332	return true;
3333	}
3334	return false;
3335	}
3336
3337	err = re_node_set_alloc (&follows, ndests + `1`);
3338	if (BE (err != REG_NOERROR, `0`))
3339	goto out_free;
3340
3341	/ Avoid arithmetic overflow in size calculation. /
3342	if (BE ((((SIZE_MAX - (sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX)
3343	/ (`3` * sizeof (re_dfastate_t *)))
3344	< ndests),
3345	`0`))
3346	goto out_free;
3347
3348	if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset_t)) * SBC_MAX
3349	+ ndests * `3` * sizeof (re_dfastate_t *)))
3350	dest_states = (re_dfastate_t **)
3351	alloca (ndests * `3` * sizeof (re_dfastate_t *));
3352	else
3353	{
3354	dest_states = re_malloc (re_dfastate_t , ndests `3`);
3355	if (BE (dest_states == NULL, `0`))
3356	{
3357	out_free:
3358	if (dest_states_malloced)
3359	re_free (dest_states);
3360	re_node_set_free (&follows);
3361	for (i = `0`; i < ndests; ++i)
3362	re_node_set_free (dests_node + i);
3363	if (dests_node_malloced)
3364	re_free (dests_alloc);
3365	return false;
3366	}
3367	dest_states_malloced = true;
3368	}
3369	dest_states_word = dest_states + ndests;
3370	dest_states_nl = dest_states_word + ndests;
3371	bitset_empty (acceptable);
3372
3373	/ Then build the states for all destinations. /
3374	for (i = `0`; i < ndests; ++i)
3375	{
3376	Idx next_node;
3377	re_node_set_empty (&follows);
3378	/ Merge the follows of this destination states. /
3379	for (j = `0`; j < dests_node[i].nelem; ++j)
3380	{
3381	next_node = dfa->nexts[dests_node[i].elems[j]];
3382	if (next_node != -`1`)
3383	{
3384	err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3385	if (BE (err != REG_NOERROR, `0`))
3386	goto out_free;
3387	}
3388	}
3389	dest_states[i] = re_acquire_state_context (&err, dfa, &follows, `0`);
3390	if (BE (dest_states[i] == NULL && err != REG_NOERROR, `0`))
3391	goto out_free;
3392	/ If the new state has context constraint,*
3393	build appropriate states for these contexts. /*
3394	if (dest_states[i]->has_constraint)
3395	{
3396	dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3397	CONTEXT_WORD);
3398	if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, `0`))
3399	goto out_free;
3400
3401	if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > `1`)
3402	need_word_trtable = true;
3403
3404	dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3405	CONTEXT_NEWLINE);
3406	if (BE (dest_states_nl[i] == NULL && err != REG_NOERROR, `0`))
3407	goto out_free;
3408	}
3409	else
3410	{
3411	dest_states_word[i] = dest_states[i];
3412	dest_states_nl[i] = dest_states[i];
3413	}
3414	bitset_merge (acceptable, dests_ch[i]);
3415	}
3416
3417	if (!BE (need_word_trtable, `0`))
3418	{
3419	/ We don't care about whether the following character is a word*
3420	character, or we are in a single-byte character set so we can
3421	discern by looking at the character code: allocate a
3422	256-entry transition table. /*
3423	trtable = state->trtable =
3424	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t *), SBC_MAX);
3425	if (BE (trtable == NULL, `0`))
3426	goto out_free;
3427
3428	/ For all characters ch...: /
3429	for (i = `0`; i < BITSET_WORDS; ++i)
3430	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3431	elem;
3432	mask <<= `1`, elem >>= `1`, ++ch)
3433	if (BE (elem & `1`, `0`))
3434	{
3435	/ There must be exactly one destination which accepts*
3436	character ch. See group_nodes_into_DFAstates. /*
3437	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3438	;
3439
3440	/ j-th destination accepts the word character ch. /
3441	if (dfa->word_char[i] & mask)
3442	trtable[ch] = dest_states_word[j];
3443	else
3444	trtable[ch] = dest_states[j];
3445	}
3446	}
3447	else
3448	{
3449	/ We care about whether the following character is a word*
3450	character, and we are in a multi-byte character set: discern
3451	by looking at the character code: build two 256-entry
3452	transition tables, one starting at trtable[0] and one
3453	starting at trtable[SBC_MAX]. /*
3454	trtable = state->word_trtable =
3455	(re_dfastate_t ) calloc (sizeof** (re_dfastate_t ), `2` SBC_MAX);
3456	if (BE (trtable == NULL, `0`))
3457	goto out_free;
3458
3459	/ For all characters ch...: /
3460	for (i = `0`; i < BITSET_WORDS; ++i)
3461	for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = `1`;
3462	elem;
3463	mask <<= `1`, elem >>= `1`, ++ch)
3464	if (BE (elem & `1`, `0`))
3465	{
3466	/ There must be exactly one destination which accepts*
3467	character ch. See group_nodes_into_DFAstates. /*
3468	for (j = `0`; (dests_ch[j][i] & mask) == `0`; ++j)
3469	;
3470
3471	/ j-th destination accepts the word character ch. /
3472	trtable[ch] = dest_states[j];
3473	trtable[ch + SBC_MAX] = dest_states_word[j];
3474	}
3475	}
3476
3477	/ new line /
3478	if (bitset_contain (acceptable, NEWLINE_CHAR))
3479	{
3480	/ The current state accepts newline character. /
3481	for (j = `0`; j < ndests; ++j)
3482	if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3483	{
3484	/ k-th destination accepts newline character. /
3485	trtable[NEWLINE_CHAR] = dest_states_nl[j];
3486	if (need_word_trtable)
3487	trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3488	/ There must be only one destination which accepts*
3489	newline. See group_nodes_into_DFAstates. /*
3490	break;
3491	}
3492	}
3493
3494	if (dest_states_malloced)
3495	re_free (dest_states);
3496
3497	re_node_set_free (&follows);
3498	for (i = `0`; i < ndests; ++i)
3499	re_node_set_free (dests_node + i);
3500
3501	if (dests_node_malloced)
3502	re_free (dests_alloc);
3503
3504	return true;
3505	}
3506
3507	/ Group all nodes belonging to STATE into several destinations.*
3508	Then for all destinations, set the nodes belonging to the destination
3509	to DESTS_NODE[i] and set the characters accepted by the destination
3510	to DEST_CH[i]. This function return the number of destinations. /*
3511
3512	static Idx
3513	group_nodes_into_DFAstates (const re_dfa_t dfa, const* re_dfastate_t *state,
3514	re_node_set dests_node, bitset_t dests_ch)
3515	{
3516	reg_errcode_t err;
3517	bool ok;
3518	Idx i, j, k;
3519	Idx ndests; / Number of the destinations from 'state'. /
3520	bitset_t accepts; / Characters a node can accept. /
3521	const re_node_set *cur_nodes = &state->nodes;
3522	bitset_empty (accepts);
3523	ndests = `0`;
3524
3525	/ For all the nodes belonging to 'state', /
3526	for (i = `0`; i < cur_nodes->nelem; ++i)
3527	{
3528	re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3529	re_token_type_t type = node->type;
3530	unsigned int constraint = node->constraint;
3531
3532	/ Enumerate all single byte character this node can accept. /
3533	if (type == CHARACTER)
3534	bitset_set (accepts, node->opr.c);
3535	else if (type == SIMPLE_BRACKET)
3536	{
3537	bitset_merge (accepts, node->opr.sbcset);
3538	}
3539	else if (type == OP_PERIOD)
3540	{
3541	#ifdef RE_ENABLE_I18N
3542	if (dfa->mb_cur_max > `1`)
3543	bitset_merge (accepts, dfa->sb_char);
3544	else
3545	#endif
3546	bitset_set_all (accepts);
3547	if (!(dfa->syntax & RE_DOT_NEWLINE))
3548	bitset_clear (accepts, `'\n'`);
3549	if (dfa->syntax & RE_DOT_NOT_NULL)
3550	bitset_clear (accepts, `'\0'`);
3551	}
3552	#ifdef RE_ENABLE_I18N
3553	else if (type == OP_UTF8_PERIOD)
3554	{
3555	if (ASCII_CHARS % BITSET_WORD_BITS == `0`)
3556	memset (accepts, -`1`, ASCII_CHARS / CHAR_BIT);
3557	else
3558	bitset_merge (accepts, utf8_sb_map);
3559	if (!(dfa->syntax & RE_DOT_NEWLINE))
3560	bitset_clear (accepts, `'\n'`);
3561	if (dfa->syntax & RE_DOT_NOT_NULL)
3562	bitset_clear (accepts, `'\0'`);
3563	}
3564	#endif
3565	else
3566	continue;
3567
3568	/ Check the 'accepts' and sift the characters which are not*
3569	match it the context. /*
3570	if (constraint)
3571	{
3572	if (constraint & NEXT_NEWLINE_CONSTRAINT)
3573	{
3574	bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3575	bitset_empty (accepts);
3576	if (accepts_newline)
3577	bitset_set (accepts, NEWLINE_CHAR);
3578	else
3579	continue;
3580	}
3581	if (constraint & NEXT_ENDBUF_CONSTRAINT)
3582	{
3583	bitset_empty (accepts);
3584	continue;
3585	}
3586
3587	if (constraint & NEXT_WORD_CONSTRAINT)
3588	{
3589	bitset_word_t any_set = `0`;
3590	if (type == CHARACTER && !node->word_char)
3591	{
3592	bitset_empty (accepts);
3593	continue;
3594	}
3595	#ifdef RE_ENABLE_I18N
3596	if (dfa->mb_cur_max > `1`)
3597	for (j = `0`; j < BITSET_WORDS; ++j)
3598	any_set \|= (accepts[j] &= (dfa->word_char[j] \| ~dfa->sb_char[j]));
3599	else
3600	#endif
3601	for (j = `0`; j < BITSET_WORDS; ++j)
3602	any_set \|= (accepts[j] &= dfa->word_char[j]);
3603	if (!any_set)
3604	continue;
3605	}
3606	if (constraint & NEXT_NOTWORD_CONSTRAINT)
3607	{
3608	bitset_word_t any_set = `0`;
3609	if (type == CHARACTER && node->word_char)
3610	{
3611	bitset_empty (accepts);
3612	continue;
3613	}
3614	#ifdef RE_ENABLE_I18N
3615	if (dfa->mb_cur_max > `1`)
3616	for (j = `0`; j < BITSET_WORDS; ++j)
3617	any_set \|= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3618	else
3619	#endif
3620	for (j = `0`; j < BITSET_WORDS; ++j)
3621	any_set \|= (accepts[j] &= ~dfa->word_char[j]);
3622	if (!any_set)
3623	continue;
3624	}
3625	}
3626
3627	/ Then divide 'accepts' into DFA states, or create a new*
3628	state. Above, we make sure that accepts is not empty. /*
3629	for (j = `0`; j < ndests; ++j)
3630	{
3631	bitset_t intersec; / Intersection sets, see below. /
3632	bitset_t remains;
3633	/ Flags, see below. /
3634	bitset_word_t has_intersec, not_subset, not_consumed;
3635
3636	/ Optimization, skip if this state doesn't accept the character. /
3637	if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3638	continue;
3639
3640	/ Enumerate the intersection set of this state and 'accepts'. /
3641	has_intersec = `0`;
3642	for (k = `0`; k < BITSET_WORDS; ++k)
3643	has_intersec \|= intersec[k] = accepts[k] & dests_ch[j][k];
3644	/ And skip if the intersection set is empty. /
3645	if (!has_intersec)
3646	continue;
3647
3648	/ Then check if this state is a subset of 'accepts'. /
3649	not_subset = not_consumed = `0`;
3650	for (k = `0`; k < BITSET_WORDS; ++k)
3651	{
3652	not_subset \|= remains[k] = ~accepts[k] & dests_ch[j][k];
3653	not_consumed \|= accepts[k] = accepts[k] & ~dests_ch[j][k];
3654	}
3655
3656	/ If this state isn't a subset of 'accepts', create a*
3657	new group state, which has the 'remains'. /*
3658	if (not_subset)
3659	{
3660	bitset_copy (dests_ch[ndests], remains);
3661	bitset_copy (dests_ch[j], intersec);
3662	err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3663	if (BE (err != REG_NOERROR, `0`))
3664	goto error_return;
3665	++ndests;
3666	}
3667
3668	/ Put the position in the current group. /
3669	ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3670	if (BE (! ok, `0`))
3671	goto error_return;
3672
3673	/ If all characters are consumed, go to next node. /
3674	if (!not_consumed)
3675	break;
3676	}
3677	/ Some characters remain, create a new group. /
3678	if (j == ndests)
3679	{
3680	bitset_copy (dests_ch[ndests], accepts);
3681	err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3682	if (BE (err != REG_NOERROR, `0`))
3683	goto error_return;
3684	++ndests;
3685	bitset_empty (accepts);
3686	}
3687	}
3688	return ndests;
3689	error_return:
3690	for (j = `0`; j < ndests; ++j)
3691	re_node_set_free (dests_node + j);
3692	return -`1`;
3693	}
3694
3695	#ifdef RE_ENABLE_I18N
3696	/ Check how many bytes the node 'dfa->nodes[node_idx]' accepts.*
3697	Return the number of the bytes the node accepts.
3698	STR_IDX is the current index of the input string.
3699
3700	This function handles the nodes which can accept one character, or
3701	one collating element like '.', '[a-z]', opposite to the other nodes
3702	can only accept one byte. /*
3703
3704	# ifdef _LIBC
3705	# include <locale/weight.h>
3706	# endif
3707
3708	static int
3709	check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3710	const re_string_t *input, Idx str_idx)
3711	{
3712	const re_token_t *node = dfa->nodes + node_idx;
3713	int char_len, elem_len;
3714	Idx i;
3715
3716	if (BE (node->type == OP_UTF8_PERIOD, `0`))
3717	{
3718	unsigned char c = re_string_byte_at (input, str_idx), d;
3719	if (BE (c < `0xc2`, `1`))
3720	return `0`;
3721
3722	if (str_idx + `2` > input->len)
3723	return `0`;
3724
3725	d = re_string_byte_at (input, str_idx + `1`);
3726	if (c < `0xe0`)
3727	return (d < `0x80` \|\| d > `0xbf`) ? `0` : `2`;
3728	else if (c < `0xf0`)
3729	{
3730	char_len = `3`;
3731	if (c == `0xe0` && d < `0xa0`)
3732	return `0`;
3733	}
3734	else if (c < `0xf8`)
3735	{
3736	char_len = `4`;
3737	if (c == `0xf0` && d < `0x90`)
3738	return `0`;
3739	}
3740	else if (c < `0xfc`)
3741	{
3742	char_len = `5`;
3743	if (c == `0xf8` && d < `0x88`)
3744	return `0`;
3745	}
3746	else if (c < `0xfe`)
3747	{
3748	char_len = `6`;
3749	if (c == `0xfc` && d < `0x84`)
3750	return `0`;
3751	}
3752	else
3753	return `0`;
3754
3755	if (str_idx + char_len > input->len)
3756	return `0`;
3757
3758	for (i = `1`; i < char_len; ++i)
3759	{
3760	d = re_string_byte_at (input, str_idx + i);
3761	if (d < `0x80` \|\| d > `0xbf`)
3762	return `0`;
3763	}
3764	return char_len;
3765	}
3766
3767	char_len = re_string_char_size_at (input, str_idx);
3768	if (node->type == OP_PERIOD)
3769	{
3770	if (char_len <= `1`)
3771	return `0`;
3772	/ FIXME: I don't think this if is needed, as both '\n'*
3773	and '\0' are char_len == 1. /*
3774	/ '.' accepts any one character except the following two cases. /
3775	if ((!(dfa->syntax & RE_DOT_NEWLINE) &&
3776	re_string_byte_at (input, str_idx) == `'\n'`) \|\|
3777	((dfa->syntax & RE_DOT_NOT_NULL) &&
3778	re_string_byte_at (input, str_idx) == `'\0'`))
3779	return `0`;
3780	return char_len;
3781	}
3782
3783	elem_len = re_string_elem_size_at (input, str_idx);
3784	if ((elem_len <= `1` && char_len <= `1`) \|\| char_len == `0`)
3785	return `0`;
3786
3787	if (node->type == COMPLEX_BRACKET)
3788	{
3789	const re_charset_t *cset = node->opr.mbcset;
3790	# ifdef _LIBC
3791	const unsigned char *pin
3792	= ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3793	Idx j;
3794	uint32_t nrules;
3795	# endif /* _LIBC */
3796	int match_len = `0`;
3797	wchar_t wc = ((cset->nranges \|\| cset->nchar_classes \|\| cset->nmbchars)
3798	? re_string_wchar_at (input, str_idx) : `0`);
3799
3800	/ match with multibyte character? /
3801	for (i = `0`; i < cset->nmbchars; ++i)
3802	if (wc == cset->mbchars[i])
3803	{
3804	match_len = char_len;
3805	goto check_node_accept_bytes_match;
3806	}
3807	/ match with character_class? /
3808	for (i = `0`; i < cset->nchar_classes; ++i)
3809	{
3810	wctype_t wt = cset->char_classes[i];
3811	if (__iswctype (wc, wt))
3812	{
3813	match_len = char_len;
3814	goto check_node_accept_bytes_match;
3815	}
3816	}
3817
3818	# ifdef _LIBC
3819	nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3820	if (nrules != `0`)
3821	{
3822	unsigned int in_collseq = `0`;
3823	const int32_t table, indirect;
3824	const unsigned char weights, extra;
3825	const char *collseqwc;
3826
3827	/ match with collating_symbol? /
3828	if (cset->ncoll_syms)
3829	extra = (const unsigned char *)
3830	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3831	for (i = `0`; i < cset->ncoll_syms; ++i)
3832	{
3833	const unsigned char *coll_sym = extra + cset->coll_syms[i];
3834	/ Compare the length of input collating element and*
3835	the length of current collating element. /*
3836	if (*coll_sym != elem_len)
3837	continue;
3838	/ Compare each bytes. /
3839	for (j = `0`; j < *coll_sym; j++)
3840	if (pin[j] != coll_sym[`1` + j])
3841	break;
3842	if (j == *coll_sym)
3843	{
3844	/ Match if every bytes is equal. /
3845	match_len = j;
3846	goto check_node_accept_bytes_match;
3847	}
3848	}
3849
3850	if (cset->nranges)
3851	{
3852	if (elem_len <= char_len)
3853	{
3854	collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3855	in_collseq = __collseq_table_lookup (collseqwc, wc);
3856	}
3857	else
3858	in_collseq = find_collation_sequence_value (pin, elem_len);
3859	}
3860	/ match with range expression? /
3861	/ FIXME: Implement rational ranges here, too. /
3862	for (i = `0`; i < cset->nranges; ++i)
3863	if (cset->range_starts[i] <= in_collseq
3864	&& in_collseq <= cset->range_ends[i])
3865	{
3866	match_len = elem_len;
3867	goto check_node_accept_bytes_match;
3868	}
3869
3870	/ match with equivalence_class? /
3871	if (cset->nequiv_classes)
3872	{
3873	const unsigned char *cp = pin;
3874	table = (const int32_t *)
3875	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3876	weights = (const unsigned char *)
3877	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3878	extra = (const unsigned char *)
3879	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3880	indirect = (const int32_t *)
3881	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3882	int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3883	int32_t rule = idx >> `24`;
3884	idx &= `0xffffff`;
3885	if (idx > `0`)
3886	{
3887	size_t weight_len = weights[idx];
3888	for (i = `0`; i < cset->nequiv_classes; ++i)
3889	{
3890	int32_t equiv_class_idx = cset->equiv_classes[i];
3891	int32_t equiv_class_rule = equiv_class_idx >> `24`;
3892	equiv_class_idx &= `0xffffff`;
3893	if (weights[equiv_class_idx] == weight_len
3894	&& equiv_class_rule == rule
3895	&& memcmp (weights + idx + `1`,
3896	weights + equiv_class_idx + `1`,
3897	weight_len) == `0`)
3898	{
3899	match_len = elem_len;
3900	goto check_node_accept_bytes_match;
3901	}
3902	}
3903	}
3904	}
3905	}
3906	else
3907	# endif /* _LIBC */
3908	{
3909	/ match with range expression? /
3910	for (i = `0`; i < cset->nranges; ++i)
3911	{
3912	if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3913	{
3914	match_len = char_len;
3915	goto check_node_accept_bytes_match;
3916	}
3917	}
3918	}
3919	check_node_accept_bytes_match:
3920	if (!cset->non_match)
3921	return match_len;
3922	else
3923	{
3924	if (match_len > `0`)
3925	return `0`;
3926	else
3927	return (elem_len > char_len) ? elem_len : char_len;
3928	}
3929	}
3930	return `0`;
3931	}
3932
3933	# ifdef _LIBC
3934	static unsigned int
3935	find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3936	{
3937	uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3938	if (nrules == `0`)
3939	{
3940	if (mbs_len == `1`)
3941	{
3942	/ No valid character. Match it as a single byte character. /
3943	const unsigned char collseq = (const* unsigned char *)
3944	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3945	return collseq[mbs[`0`]];
3946	}
3947	return UINT_MAX;
3948	}
3949	else
3950	{
3951	int32_t idx;
3952	const unsigned char extra = (const* unsigned char *)
3953	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3954	int32_t extrasize = (const unsigned char *)
3955	_NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + `1`) - extra;
3956
3957	for (idx = `0`; idx < extrasize;)
3958	{
3959	int mbs_cnt;
3960	bool found = false;
3961	int32_t elem_mbs_len;
3962	/ Skip the name of collating element name. /
3963	idx = idx + extra[idx] + `1`;
3964	elem_mbs_len = extra[idx++];
3965	if (mbs_len == elem_mbs_len)
3966	{
3967	for (mbs_cnt = `0`; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3968	if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3969	break;
3970	if (mbs_cnt == elem_mbs_len)
3971	/ Found the entry. /
3972	found = true;
3973	}
3974	/ Skip the byte sequence of the collating element. /
3975	idx += elem_mbs_len;
3976	/ Adjust for the alignment. /
3977	idx = (idx + `3`) & ~`3`;
3978	/ Skip the collation sequence value. /
3979	idx += sizeof (uint32_t);
3980	/ Skip the wide char sequence of the collating element. /
3981	idx = idx + sizeof (uint32_t) * ((int32_t ) (extra + idx) + `1`);
3982	/ If we found the entry, return the sequence value. /
3983	if (found)
3984	return (uint32_t ) (extra + idx);
3985	/ Skip the collation sequence value. /
3986	idx += sizeof (uint32_t);
3987	}
3988	return UINT_MAX;
3989	}
3990	}
3991	# endif /* _LIBC */
3992	#endif /* RE_ENABLE_I18N */
3993
3994	/ Check whether the node accepts the byte which is IDX-th*
3995	byte of the INPUT. /*
3996
3997	static bool
3998	check_node_accept (const re_match_context_t mctx, const* re_token_t *node,
3999	Idx idx)
4000	{
4001	unsigned char ch;
4002	ch = re_string_byte_at (&mctx->input, idx);
4003	switch (node->type)
4004	{
4005	case CHARACTER:
4006	if (node->opr.c != ch)
4007	return false;
4008	break;
4009
4010	case SIMPLE_BRACKET:
4011	if (!bitset_contain (node->opr.sbcset, ch))
4012	return false;
4013	break;
4014
4015	#ifdef RE_ENABLE_I18N
4016	case OP_UTF8_PERIOD:
4017	if (ch >= ASCII_CHARS)
4018	return false;
4019	FALLTHROUGH;
4020	#endif
4021	case OP_PERIOD:
4022	if ((ch == `'\n'` && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
4023	\|\| (ch == `'\0'` && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
4024	return false;
4025	break;
4026
4027	default:
4028	return false;
4029	}
4030
4031	if (node->constraint)
4032	{
4033	/ The node has constraints. Check whether the current context*
4034	satisfies the constraints. /*
4035	unsigned int context = re_string_context_at (&mctx->input, idx,
4036	mctx->eflags);
4037	if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
4038	return false;
4039	}
4040
4041	return true;
4042	}
4043
4044	/ Extend the buffers, if the buffers have run out. /
4045
4046	static reg_errcode_t
4047	__attribute_warn_unused_result__
4048	extend_buffers (re_match_context_t mctx, int* min_len)
4049	{
4050	reg_errcode_t ret;
4051	re_string_t *pstr = &mctx->input;
4052
4053	/ Avoid overflow. /
4054	if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / `2`
4055	<= pstr->bufs_len, `0`))
4056	return REG_ESPACE;
4057
4058	/ Double the lengths of the buffers, but allocate at least MIN_LEN. /
4059	ret = re_string_realloc_buffers (pstr,
4060	MAX (min_len,
4061	MIN (pstr->len, pstr->bufs_len * `2`)));
4062	if (BE (ret != REG_NOERROR, `0`))
4063	return ret;
4064
4065	if (mctx->state_log != NULL)
4066	{
4067	/ And double the length of state_log. /
4068	/ XXX We have no indication of the size of this buffer. If this*
4069	allocation fail we have no indication that the state_log array
4070	does not have the right size. /*
4071	re_dfastate_t *new_array = re_realloc (mctx->state_log, re_dfastate_t ,
4072	pstr->bufs_len + `1`);
4073	if (BE (new_array == NULL, `0`))
4074	return REG_ESPACE;
4075	mctx->state_log = new_array;
4076	}
4077
4078	/ Then reconstruct the buffers. /
4079	if (pstr->icase)
4080	{
4081	#ifdef RE_ENABLE_I18N
4082	if (pstr->mb_cur_max > `1`)
4083	{
4084	ret = build_wcs_upper_buffer (pstr);
4085	if (BE (ret != REG_NOERROR, `0`))
4086	return ret;
4087	}
4088	else
4089	#endif /* RE_ENABLE_I18N */
4090	build_upper_buffer (pstr);
4091	}
4092	else
4093	{
4094	#ifdef RE_ENABLE_I18N
4095	if (pstr->mb_cur_max > `1`)
4096	build_wcs_buffer (pstr);
4097	else
4098	#endif /* RE_ENABLE_I18N */
4099	{
4100	if (pstr->trans != NULL)
4101	re_string_translate_buffer (pstr);
4102	}
4103	}
4104	return REG_NOERROR;
4105	}
4106
4107
4108	/ Functions for matching context. /
4109
4110	/ Initialize MCTX. /
4111
4112	static reg_errcode_t
4113	__attribute_warn_unused_result__
4114	match_ctx_init (re_match_context_t mctx, int* eflags, Idx n)
4115	{
4116	mctx->eflags = eflags;
4117	mctx->match_last = -`1`;
4118	if (n > `0`)
4119	{
4120	/ Avoid overflow. /
4121	size_t max_object_size =
4122	MAX (sizeof (struct re_backref_cache_entry),
4123	sizeof (re_sub_match_top_t *));
4124	if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, `0`))
4125	return REG_ESPACE;
4126
4127	mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4128	mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4129	if (BE (mctx->bkref_ents == NULL \|\| mctx->sub_tops == NULL, `0`))
4130	return REG_ESPACE;
4131	}
4132	/ Already zero-ed by the caller.*
4133	else
4134	mctx->bkref_ents = NULL;
4135	mctx->nbkref_ents = 0;
4136	mctx->nsub_tops = 0; /*
4137	mctx->abkref_ents = n;
4138	mctx->max_mb_elem_len = `1`;
4139	mctx->asub_tops = n;
4140	return REG_NOERROR;
4141	}
4142
4143	/ Clean the entries which depend on the current input in MCTX.*
4144	This function must be invoked when the matcher changes the start index
4145	of the input, or changes the input string. /*
4146
4147	static void
4148	match_ctx_clean (re_match_context_t *mctx)
4149	{
4150	Idx st_idx;
4151	for (st_idx = `0`; st_idx < mctx->nsub_tops; ++st_idx)
4152	{
4153	Idx sl_idx;
4154	re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4155	for (sl_idx = `0`; sl_idx < top->nlasts; ++sl_idx)
4156	{
4157	re_sub_match_last_t *last = top->lasts[sl_idx];
4158	re_free (last->path.array);
4159	re_free (last);
4160	}
4161	re_free (top->lasts);
4162	if (top->path)
4163	{
4164	re_free (top->path->array);
4165	re_free (top->path);
4166	}
4167	re_free (top);
4168	}
4169
4170	mctx->nsub_tops = `0`;
4171	mctx->nbkref_ents = `0`;
4172	}
4173
4174	/ Free all the memory associated with MCTX. /
4175
4176	static void
4177	match_ctx_free (re_match_context_t *mctx)
4178	{
4179	/ First, free all the memory associated with MCTX->SUB_TOPS. /
4180	match_ctx_clean (mctx);
4181	re_free (mctx->sub_tops);
4182	re_free (mctx->bkref_ents);
4183	}
4184
4185	/ Add a new backreference entry to MCTX.*
4186	Note that we assume that caller never call this function with duplicate
4187	entry, and call with STR_IDX which isn't smaller than any existing entry.
4188	*/
4189
4190	static reg_errcode_t
4191	__attribute_warn_unused_result__
4192	match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4193	Idx to)
4194	{
4195	if (mctx->nbkref_ents >= mctx->abkref_ents)
4196	{
4197	struct re_backref_cache_entry* new_entry;
4198	new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4199	mctx->abkref_ents * `2`);
4200	if (BE (new_entry == NULL, `0`))
4201	{
4202	re_free (mctx->bkref_ents);
4203	return REG_ESPACE;
4204	}
4205	mctx->bkref_ents = new_entry;
4206	memset (mctx->bkref_ents + mctx->nbkref_ents, `'\0'`,
4207	sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4208	mctx->abkref_ents *= `2`;
4209	}
4210	if (mctx->nbkref_ents > `0`
4211	&& mctx->bkref_ents[mctx->nbkref_ents - `1`].str_idx == str_idx)
4212	mctx->bkref_ents[mctx->nbkref_ents - `1`].more = `1`;
4213
4214	mctx->bkref_ents[mctx->nbkref_ents].node = node;
4215	mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4216	mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4217	mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4218
4219	/ This is a cache that saves negative results of check_dst_limits_calc_pos.*
4220	If bit N is clear, means that this entry won't epsilon-transition to
4221	an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If
4222	it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
4223	such node.
4224
4225	A backreference does not epsilon-transition unless it is empty, so set
4226	to all zeros if FROM != TO. /*
4227	mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4228	= (from == to ? -`1` : `0`);
4229
4230	mctx->bkref_ents[mctx->nbkref_ents++].more = `0`;
4231	if (mctx->max_mb_elem_len < to - from)
4232	mctx->max_mb_elem_len = to - from;
4233	return REG_NOERROR;
4234	}
4235
4236	/ Return the first entry with the same str_idx, or -1 if none is*
4237	found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. /*
4238
4239	static Idx
4240	search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4241	{
4242	Idx left, right, mid, last;
4243	last = right = mctx->nbkref_ents;
4244	for (left = `0`; left < right;)
4245	{
4246	mid = (left + right) / `2`;
4247	if (mctx->bkref_ents[mid].str_idx < str_idx)
4248	left = mid + `1`;
4249	else
4250	right = mid;
4251	}
4252	if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4253	return left;
4254	else
4255	return -`1`;
4256	}
4257
4258	/ Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches*
4259	at STR_IDX. /*
4260
4261	static reg_errcode_t
4262	__attribute_warn_unused_result__
4263	match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4264	{
4265	#ifdef DEBUG
4266	assert (mctx->sub_tops != NULL);
4267	assert (mctx->asub_tops > `0`);
4268	#endif
4269	if (BE (mctx->nsub_tops == mctx->asub_tops, `0`))
4270	{
4271	Idx new_asub_tops = mctx->asub_tops * `2`;
4272	re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4273	re_sub_match_top_t *,
4274	new_asub_tops);
4275	if (BE (new_array == NULL, `0`))
4276	return REG_ESPACE;
4277	mctx->sub_tops = new_array;
4278	mctx->asub_tops = new_asub_tops;
4279	}
4280	mctx->sub_tops[mctx->nsub_tops] = calloc (`1`, sizeof (re_sub_match_top_t));
4281	if (BE (mctx->sub_tops[mctx->nsub_tops] == NULL, `0`))
4282	return REG_ESPACE;
4283	mctx->sub_tops[mctx->nsub_tops]->node = node;
4284	mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4285	return REG_NOERROR;
4286	}
4287
4288	/ Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches*
4289	at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP. /*
4290
4291	static re_sub_match_last_t *
4292	match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4293	{
4294	re_sub_match_last_t *new_entry;
4295	if (BE (subtop->nlasts == subtop->alasts, `0`))
4296	{
4297	Idx new_alasts = `2` * subtop->alasts + `1`;
4298	re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4299	re_sub_match_last_t *,
4300	new_alasts);
4301	if (BE (new_array == NULL, `0`))
4302	return NULL;
4303	subtop->lasts = new_array;
4304	subtop->alasts = new_alasts;
4305	}
4306	new_entry = calloc (`1`, sizeof (re_sub_match_last_t));
4307	if (BE (new_entry != NULL, `1`))
4308	{
4309	subtop->lasts[subtop->nlasts] = new_entry;
4310	new_entry->node = node;
4311	new_entry->str_idx = str_idx;
4312	++subtop->nlasts;
4313	}
4314	return new_entry;
4315	}
4316
4317	static void
4318	sift_ctx_init (re_sift_context_t sctx, re_dfastate_t *sifted_sts,
4319	re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4320	{
4321	sctx->sifted_states = sifted_sts;
4322	sctx->limited_states = limited_sts;
4323	sctx->last_node = last_node;
4324	sctx->last_str_idx = last_str_idx;
4325	re_node_set_init_empty (&sctx->limits);
4326	}
4327

Browse the source code of glibc_src_2.28/posix/regexec.c