regex_internal.c source code [glibc_src_2.30/posix/regex_internal.c]

1	/ Extended regular expression matching and search library.*
2	Copyright (C) 2002-2019 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<https://www.gnu.org/licenses/>. /*
19
20	static void re_string_construct_common (const char *str, Idx len,
21	re_string_t *pstr,
22	RE_TRANSLATE_TYPE trans, bool icase,
23	const re_dfa_t *dfa);
24	static re_dfastate_t create_ci_newstate (const* re_dfa_t *dfa,
25	const re_node_set *nodes,
26	re_hashval_t hash);
27	static re_dfastate_t create_cd_newstate (const* re_dfa_t *dfa,
28	const re_node_set *nodes,
29	unsigned int context,
30	re_hashval_t hash);
31	static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
32	Idx new_buf_len);
33	#ifdef RE_ENABLE_I18N
34	static void build_wcs_buffer (re_string_t *pstr);
35	static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
36	#endif /* RE_ENABLE_I18N */
37	static void build_upper_buffer (re_string_t *pstr);
38	static void re_string_translate_buffer (re_string_t *pstr);
39	static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
40	int eflags) __attribute__ ((pure));
41
42	/ Functions for string operation. /
43
44	/ This function allocate the buffers. It is necessary to call*
45	re_string_reconstruct before using the object. /*
46
47	static reg_errcode_t
48	__attribute_warn_unused_result__
49	re_string_allocate (re_string_t pstr, const* char *str, Idx len, Idx init_len,
50	RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
51	{
52	reg_errcode_t ret;
53	Idx init_buf_len;
54
55	/ Ensure at least one character fits into the buffers. /
56	if (init_len < dfa->mb_cur_max)
57	init_len = dfa->mb_cur_max;
58	init_buf_len = (len + `1` < init_len) ? len + `1`: init_len;
59	re_string_construct_common (str, len, pstr, trans, icase, dfa);
60
61	ret = re_string_realloc_buffers (pstr, init_buf_len);
62	if (__glibc_unlikely (ret != REG_NOERROR))
63	return ret;
64
65	pstr->word_char = dfa->word_char;
66	pstr->word_ops_used = dfa->word_ops_used;
67	pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
68	pstr->valid_len = (pstr->mbs_allocated \|\| dfa->mb_cur_max > `1`) ? `0` : len;
69	pstr->valid_raw_len = pstr->valid_len;
70	return REG_NOERROR;
71	}
72
73	/ This function allocate the buffers, and initialize them. /
74
75	static reg_errcode_t
76	__attribute_warn_unused_result__
77	re_string_construct (re_string_t pstr, const* char *str, Idx len,
78	RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
79	{
80	reg_errcode_t ret;
81	memset (pstr, `'\0'`, sizeof (re_string_t));
82	re_string_construct_common (str, len, pstr, trans, icase, dfa);
83
84	if (len > `0`)
85	{
86	ret = re_string_realloc_buffers (pstr, len + `1`);
87	if (__glibc_unlikely (ret != REG_NOERROR))
88	return ret;
89	}
90	pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
91
92	if (icase)
93	{
94	#ifdef RE_ENABLE_I18N
95	if (dfa->mb_cur_max > `1`)
96	{
97	while (`1`)
98	{
99	ret = build_wcs_upper_buffer (pstr);
100	if (__glibc_unlikely (ret != REG_NOERROR))
101	return ret;
102	if (pstr->valid_raw_len >= len)
103	break;
104	if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
105	break;
106	ret = re_string_realloc_buffers (pstr, pstr->bufs_len * `2`);
107	if (__glibc_unlikely (ret != REG_NOERROR))
108	return ret;
109	}
110	}
111	else
112	#endif /* RE_ENABLE_I18N */
113	build_upper_buffer (pstr);
114	}
115	else
116	{
117	#ifdef RE_ENABLE_I18N
118	if (dfa->mb_cur_max > `1`)
119	build_wcs_buffer (pstr);
120	else
121	#endif /* RE_ENABLE_I18N */
122	{
123	if (trans != NULL)
124	re_string_translate_buffer (pstr);
125	else
126	{
127	pstr->valid_len = pstr->bufs_len;
128	pstr->valid_raw_len = pstr->bufs_len;
129	}
130	}
131	}
132
133	return REG_NOERROR;
134	}
135
136	/ Helper functions for re_string_allocate, and re_string_construct. /
137
138	static reg_errcode_t
139	__attribute_warn_unused_result__
140	re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
141	{
142	#ifdef RE_ENABLE_I18N
143	if (pstr->mb_cur_max > `1`)
144	{
145	wint_t *new_wcs;
146
147	/ Avoid overflow in realloc. /
148	const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
149	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
150	< new_buf_len))
151	return REG_ESPACE;
152
153	new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
154	if (__glibc_unlikely (new_wcs == NULL))
155	return REG_ESPACE;
156	pstr->wcs = new_wcs;
157	if (pstr->offsets != NULL)
158	{
159	Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
160	if (__glibc_unlikely (new_offsets == NULL))
161	return REG_ESPACE;
162	pstr->offsets = new_offsets;
163	}
164	}
165	#endif /* RE_ENABLE_I18N */
166	if (pstr->mbs_allocated)
167	{
168	unsigned char new_mbs = re_realloc (pstr->mbs, unsigned* char,
169	new_buf_len);
170	if (__glibc_unlikely (new_mbs == NULL))
171	return REG_ESPACE;
172	pstr->mbs = new_mbs;
173	}
174	pstr->bufs_len = new_buf_len;
175	return REG_NOERROR;
176	}
177
178
179	static void
180	re_string_construct_common (const char str, Idx len, re_string_t pstr,
181	RE_TRANSLATE_TYPE trans, bool icase,
182	const re_dfa_t *dfa)
183	{
184	pstr->raw_mbs = (const unsigned char *) str;
185	pstr->len = len;
186	pstr->raw_len = len;
187	pstr->trans = trans;
188	pstr->icase = icase;
189	pstr->mbs_allocated = (trans != NULL \|\| icase);
190	pstr->mb_cur_max = dfa->mb_cur_max;
191	pstr->is_utf8 = dfa->is_utf8;
192	pstr->map_notascii = dfa->map_notascii;
193	pstr->stop = pstr->len;
194	pstr->raw_stop = pstr->stop;
195	}
196
197	#ifdef RE_ENABLE_I18N
198
199	/ Build wide character buffer PSTR->WCS.*
200	If the byte sequence of the string are:
201	<mb1>(0), <mb1>(1), <mb2>(0), <mb2>(1), <sb3>
202	Then wide character buffer will be:
203	<wc1> , WEOF , <wc2> , WEOF , <wc3>
204	We use WEOF for padding, they indicate that the position isn't
205	a first byte of a multibyte character.
206
207	Note that this function assumes PSTR->VALID_LEN elements are already
208	built and starts from PSTR->VALID_LEN. /*
209
210	static void
211	build_wcs_buffer (re_string_t *pstr)
212	{
213	#ifdef _LIBC
214	unsigned char buf[MB_LEN_MAX];
215	assert (MB_LEN_MAX >= pstr->mb_cur_max);
216	#else
217	unsigned char buf[`64`];
218	#endif
219	mbstate_t prev_st;
220	Idx byte_idx, end_idx, remain_len;
221	size_t mbclen;
222
223	/ Build the buffers from pstr->valid_len to either pstr->len or*
224	pstr->bufs_len. /*
225	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
226	for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
227	{
228	wchar_t wc;
229	const char *p;
230
231	remain_len = end_idx - byte_idx;
232	prev_st = pstr->cur_state;
233	/ Apply the translation if we need. /
234	if (__glibc_unlikely (pstr->trans != NULL))
235	{
236	int i, ch;
237
238	for (i = `0`; i < pstr->mb_cur_max && i < remain_len; ++i)
239	{
240	ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
241	buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
242	}
243	p = (const char *) buf;
244	}
245	else
246	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
247	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
248	if (__glibc_unlikely (mbclen == (size_t) -`1` \|\| mbclen == `0`
249	\|\| (mbclen == (size_t) -`2`
250	&& pstr->bufs_len >= pstr->len)))
251	{
252	/ We treat these cases as a singlebyte character. /
253	mbclen = `1`;
254	wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
255	if (__glibc_unlikely (pstr->trans != NULL))
256	wc = pstr->trans[wc];
257	pstr->cur_state = prev_st;
258	}
259	else if (__glibc_unlikely (mbclen == (size_t) -`2`))
260	{
261	/ The buffer doesn't have enough space, finish to build. /
262	pstr->cur_state = prev_st;
263	break;
264	}
265
266	/ Write wide character and padding. /
267	pstr->wcs[byte_idx++] = wc;
268	/ Write paddings. /
269	for (remain_len = byte_idx + mbclen - `1`; byte_idx < remain_len ;)
270	pstr->wcs[byte_idx++] = WEOF;
271	}
272	pstr->valid_len = byte_idx;
273	pstr->valid_raw_len = byte_idx;
274	}
275
276	/ Build wide character buffer PSTR->WCS like build_wcs_buffer,*
277	but for REG_ICASE. /*
278
279	static reg_errcode_t
280	__attribute_warn_unused_result__
281	build_wcs_upper_buffer (re_string_t *pstr)
282	{
283	mbstate_t prev_st;
284	Idx src_idx, byte_idx, end_idx, remain_len;
285	size_t mbclen;
286	#ifdef _LIBC
287	char buf[MB_LEN_MAX];
288	assert (MB_LEN_MAX >= pstr->mb_cur_max);
289	#else
290	char buf[`64`];
291	#endif
292
293	byte_idx = pstr->valid_len;
294	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
295
296	/ The following optimization assumes that ASCII characters can be*
297	mapped to wide characters with a simple cast. /*
298	if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
299	{
300	while (byte_idx < end_idx)
301	{
302	wchar_t wc;
303
304	if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx])
305	&& mbsinit (&pstr->cur_state))
306	{
307	/ In case of a singlebyte character. /
308	pstr->mbs[byte_idx]
309	= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
310	/ The next step uses the assumption that wchar_t is encoded*
311	ASCII-safe: all ASCII values can be converted like this. /*
312	pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
313	++byte_idx;
314	continue;
315	}
316
317	remain_len = end_idx - byte_idx;
318	prev_st = pstr->cur_state;
319	mbclen = __mbrtowc (&wc,
320	((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
321	+ byte_idx), remain_len, &pstr->cur_state);
322	if (__glibc_likely (`0` < mbclen && mbclen < (size_t) -`2`))
323	{
324	wchar_t wcu = __towupper (wc);
325	if (wcu != wc)
326	{
327	size_t mbcdlen;
328
329	mbcdlen = __wcrtomb (buf, wcu, &prev_st);
330	if (__glibc_likely (mbclen == mbcdlen))
331	memcpy (pstr->mbs + byte_idx, buf, mbclen);
332	else
333	{
334	src_idx = byte_idx;
335	goto offsets_needed;
336	}
337	}
338	else
339	memcpy (pstr->mbs + byte_idx,
340	pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
341	pstr->wcs[byte_idx++] = wcu;
342	/ Write paddings. /
343	for (remain_len = byte_idx + mbclen - `1`; byte_idx < remain_len ;)
344	pstr->wcs[byte_idx++] = WEOF;
345	}
346	else if (mbclen == (size_t) -`1` \|\| mbclen == `0`
347	\|\| (mbclen == (size_t) -`2` && pstr->bufs_len >= pstr->len))
348	{
349	/ It is an invalid character, an incomplete character*
350	at the end of the string, or '\0'. Just use the byte. /*
351	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
352	pstr->mbs[byte_idx] = ch;
353	/ And also cast it to wide char. /
354	pstr->wcs[byte_idx++] = (wchar_t) ch;
355	if (__glibc_unlikely (mbclen == (size_t) -`1`))
356	pstr->cur_state = prev_st;
357	}
358	else
359	{
360	/ The buffer doesn't have enough space, finish to build. /
361	pstr->cur_state = prev_st;
362	break;
363	}
364	}
365	pstr->valid_len = byte_idx;
366	pstr->valid_raw_len = byte_idx;
367	return REG_NOERROR;
368	}
369	else
370	for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
371	{
372	wchar_t wc;
373	const char *p;
374	offsets_needed:
375	remain_len = end_idx - byte_idx;
376	prev_st = pstr->cur_state;
377	if (__glibc_unlikely (pstr->trans != NULL))
378	{
379	int i, ch;
380
381	for (i = `0`; i < pstr->mb_cur_max && i < remain_len; ++i)
382	{
383	ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
384	buf[i] = pstr->trans[ch];
385	}
386	p = (const char *) buf;
387	}
388	else
389	p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
390	mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
391	if (__glibc_likely (`0` < mbclen && mbclen < (size_t) -`2`))
392	{
393	wchar_t wcu = __towupper (wc);
394	if (wcu != wc)
395	{
396	size_t mbcdlen;
397
398	mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st);
399	if (__glibc_likely (mbclen == mbcdlen))
400	memcpy (pstr->mbs + byte_idx, buf, mbclen);
401	else if (mbcdlen != (size_t) -`1`)
402	{
403	size_t i;
404
405	if (byte_idx + mbcdlen > pstr->bufs_len)
406	{
407	pstr->cur_state = prev_st;
408	break;
409	}
410
411	if (pstr->offsets == NULL)
412	{
413	pstr->offsets = re_malloc (Idx, pstr->bufs_len);
414
415	if (pstr->offsets == NULL)
416	return REG_ESPACE;
417	}
418	if (!pstr->offsets_needed)
419	{
420	for (i = `0`; i < (size_t) byte_idx; ++i)
421	pstr->offsets[i] = i;
422	pstr->offsets_needed = `1`;
423	}
424
425	memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
426	pstr->wcs[byte_idx] = wcu;
427	pstr->offsets[byte_idx] = src_idx;
428	for (i = `1`; i < mbcdlen; ++i)
429	{
430	pstr->offsets[byte_idx + i]
431	= src_idx + (i < mbclen ? i : mbclen - `1`);
432	pstr->wcs[byte_idx + i] = WEOF;
433	}
434	pstr->len += mbcdlen - mbclen;
435	if (pstr->raw_stop > src_idx)
436	pstr->stop += mbcdlen - mbclen;
437	end_idx = (pstr->bufs_len > pstr->len)
438	? pstr->len : pstr->bufs_len;
439	byte_idx += mbcdlen;
440	src_idx += mbclen;
441	continue;
442	}
443	else
444	memcpy (pstr->mbs + byte_idx, p, mbclen);
445	}
446	else
447	memcpy (pstr->mbs + byte_idx, p, mbclen);
448
449	if (__glibc_unlikely (pstr->offsets_needed != `0`))
450	{
451	size_t i;
452	for (i = `0`; i < mbclen; ++i)
453	pstr->offsets[byte_idx + i] = src_idx + i;
454	}
455	src_idx += mbclen;
456
457	pstr->wcs[byte_idx++] = wcu;
458	/ Write paddings. /
459	for (remain_len = byte_idx + mbclen - `1`; byte_idx < remain_len ;)
460	pstr->wcs[byte_idx++] = WEOF;
461	}
462	else if (mbclen == (size_t) -`1` \|\| mbclen == `0`
463	\|\| (mbclen == (size_t) -`2` && pstr->bufs_len >= pstr->len))
464	{
465	/ It is an invalid character or '\0'. Just use the byte. /
466	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
467
468	if (__glibc_unlikely (pstr->trans != NULL))
469	ch = pstr->trans [ch];
470	pstr->mbs[byte_idx] = ch;
471
472	if (__glibc_unlikely (pstr->offsets_needed != `0`))
473	pstr->offsets[byte_idx] = src_idx;
474	++src_idx;
475
476	/ And also cast it to wide char. /
477	pstr->wcs[byte_idx++] = (wchar_t) ch;
478	if (__glibc_unlikely (mbclen == (size_t) -`1`))
479	pstr->cur_state = prev_st;
480	}
481	else
482	{
483	/ The buffer doesn't have enough space, finish to build. /
484	pstr->cur_state = prev_st;
485	break;
486	}
487	}
488	pstr->valid_len = byte_idx;
489	pstr->valid_raw_len = src_idx;
490	return REG_NOERROR;
491	}
492
493	/ Skip characters until the index becomes greater than NEW_RAW_IDX.*
494	Return the index. /*
495
496	static Idx
497	re_string_skip_chars (re_string_t pstr, Idx new_raw_idx, wint_t last_wc)
498	{
499	mbstate_t prev_st;
500	Idx rawbuf_idx;
501	size_t mbclen;
502	wint_t wc = WEOF;
503
504	/ Skip the characters which are not necessary to check. /
505	for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
506	rawbuf_idx < new_raw_idx;)
507	{
508	wchar_t wc2;
509	Idx remain_len = pstr->raw_len - rawbuf_idx;
510	prev_st = pstr->cur_state;
511	mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
512	remain_len, &pstr->cur_state);
513	if (__glibc_unlikely (mbclen == (size_t) -`2` \|\| mbclen == (size_t) -`1`
514	\|\| mbclen == `0`))
515	{
516	/ We treat these cases as a single byte character. /
517	if (mbclen == `0` \|\| remain_len == `0`)
518	wc = L`'\0'`;
519	else
520	wc = (unsigned* char *) (pstr->raw_mbs + rawbuf_idx);
521	mbclen = `1`;
522	pstr->cur_state = prev_st;
523	}
524	else
525	wc = wc2;
526	/ Then proceed the next character. /
527	rawbuf_idx += mbclen;
528	}
529	*last_wc = wc;
530	return rawbuf_idx;
531	}
532	#endif /* RE_ENABLE_I18N */
533
534	/ Build the buffer PSTR->MBS, and apply the translation if we need.*
535	This function is used in case of REG_ICASE. /*
536
537	static void
538	build_upper_buffer (re_string_t *pstr)
539	{
540	Idx char_idx, end_idx;
541	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
542
543	for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
544	{
545	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
546	if (__glibc_unlikely (pstr->trans != NULL))
547	ch = pstr->trans[ch];
548	pstr->mbs[char_idx] = toupper (ch);
549	}
550	pstr->valid_len = char_idx;
551	pstr->valid_raw_len = char_idx;
552	}
553
554	/ Apply TRANS to the buffer in PSTR. /
555
556	static void
557	re_string_translate_buffer (re_string_t *pstr)
558	{
559	Idx buf_idx, end_idx;
560	end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
561
562	for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
563	{
564	int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
565	pstr->mbs[buf_idx] = pstr->trans[ch];
566	}
567
568	pstr->valid_len = buf_idx;
569	pstr->valid_raw_len = buf_idx;
570	}
571
572	/ This function re-construct the buffers.*
573	Concretely, convert to wide character in case of pstr->mb_cur_max > 1,
574	convert to upper case in case of REG_ICASE, apply translation. /*
575
576	static reg_errcode_t
577	__attribute_warn_unused_result__
578	re_string_reconstruct (re_string_t pstr, Idx idx, int* eflags)
579	{
580	Idx offset;
581
582	if (__glibc_unlikely (pstr->raw_mbs_idx <= idx))
583	offset = idx - pstr->raw_mbs_idx;
584	else
585	{
586	/ Reset buffer. /
587	#ifdef RE_ENABLE_I18N
588	if (pstr->mb_cur_max > `1`)
589	memset (&pstr->cur_state, `'\0'`, sizeof (mbstate_t));
590	#endif /* RE_ENABLE_I18N */
591	pstr->len = pstr->raw_len;
592	pstr->stop = pstr->raw_stop;
593	pstr->valid_len = `0`;
594	pstr->raw_mbs_idx = `0`;
595	pstr->valid_raw_len = `0`;
596	pstr->offsets_needed = `0`;
597	pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
598	: CONTEXT_NEWLINE \| CONTEXT_BEGBUF);
599	if (!pstr->mbs_allocated)
600	pstr->mbs = (unsigned char *) pstr->raw_mbs;
601	offset = idx;
602	}
603
604	if (__glibc_likely (offset != `0`))
605	{
606	/ Should the already checked characters be kept? /
607	if (__glibc_likely (offset < pstr->valid_raw_len))
608	{
609	/ Yes, move them to the front of the buffer. /
610	#ifdef RE_ENABLE_I18N
611	if (__glibc_unlikely (pstr->offsets_needed))
612	{
613	Idx low = `0`, high = pstr->valid_len, mid;
614	do
615	{
616	mid = (high + low) / `2`;
617	if (pstr->offsets[mid] > offset)
618	high = mid;
619	else if (pstr->offsets[mid] < offset)
620	low = mid + `1`;
621	else
622	break;
623	}
624	while (low < high);
625	if (pstr->offsets[mid] < offset)
626	++mid;
627	pstr->tip_context = re_string_context_at (pstr, mid - `1`,
628	eflags);
629	/ This can be quite complicated, so handle specially*
630	only the common and easy case where the character with
631	different length representation of lower and upper
632	case is present at or after offset. /*
633	if (pstr->valid_len > offset
634	&& mid == offset && pstr->offsets[mid] == offset)
635	{
636	memmove (pstr->wcs, pstr->wcs + offset,
637	(pstr->valid_len - offset) * sizeof (wint_t));
638	memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
639	pstr->valid_len -= offset;
640	pstr->valid_raw_len -= offset;
641	for (low = `0`; low < pstr->valid_len; low++)
642	pstr->offsets[low] = pstr->offsets[low + offset] - offset;
643	}
644	else
645	{
646	/ Otherwise, just find out how long the partial multibyte*
647	character at offset is and fill it with WEOF/255. /*
648	pstr->len = pstr->raw_len - idx + offset;
649	pstr->stop = pstr->raw_stop - idx + offset;
650	pstr->offsets_needed = `0`;
651	while (mid > `0` && pstr->offsets[mid - `1`] == offset)
652	--mid;
653	while (mid < pstr->valid_len)
654	if (pstr->wcs[mid] != WEOF)
655	break;
656	else
657	++mid;
658	if (mid == pstr->valid_len)
659	pstr->valid_len = `0`;
660	else
661	{
662	pstr->valid_len = pstr->offsets[mid] - offset;
663	if (pstr->valid_len)
664	{
665	for (low = `0`; low < pstr->valid_len; ++low)
666	pstr->wcs[low] = WEOF;
667	memset (pstr->mbs, `255`, pstr->valid_len);
668	}
669	}
670	pstr->valid_raw_len = pstr->valid_len;
671	}
672	}
673	else
674	#endif
675	{
676	pstr->tip_context = re_string_context_at (pstr, offset - `1`,
677	eflags);
678	#ifdef RE_ENABLE_I18N
679	if (pstr->mb_cur_max > `1`)
680	memmove (pstr->wcs, pstr->wcs + offset,
681	(pstr->valid_len - offset) * sizeof (wint_t));
682	#endif /* RE_ENABLE_I18N */
683	if (__glibc_unlikely (pstr->mbs_allocated))
684	memmove (pstr->mbs, pstr->mbs + offset,
685	pstr->valid_len - offset);
686	pstr->valid_len -= offset;
687	pstr->valid_raw_len -= offset;
688	#if defined DEBUG && DEBUG
689	assert (pstr->valid_len > `0`);
690	#endif
691	}
692	}
693	else
694	{
695	#ifdef RE_ENABLE_I18N
696	/ No, skip all characters until IDX. /
697	Idx prev_valid_len = pstr->valid_len;
698
699	if (__glibc_unlikely (pstr->offsets_needed))
700	{
701	pstr->len = pstr->raw_len - idx + offset;
702	pstr->stop = pstr->raw_stop - idx + offset;
703	pstr->offsets_needed = `0`;
704	}
705	#endif
706	pstr->valid_len = `0`;
707	#ifdef RE_ENABLE_I18N
708	if (pstr->mb_cur_max > `1`)
709	{
710	Idx wcs_idx;
711	wint_t wc = WEOF;
712
713	if (pstr->is_utf8)
714	{
715	const unsigned char raw, p, *end;
716
717	/ Special case UTF-8. Multi-byte chars start with any*
718	byte other than 0x80 - 0xbf. /*
719	raw = pstr->raw_mbs + pstr->raw_mbs_idx;
720	end = raw + (offset - pstr->mb_cur_max);
721	if (end < pstr->raw_mbs)
722	end = pstr->raw_mbs;
723	p = raw + offset - `1`;
724	#ifdef _LIBC
725	/ We know the wchar_t encoding is UCS4, so for the simple*
726	case, ASCII characters, skip the conversion step. /*
727	if (isascii (*p) && __glibc_likely (pstr->trans == NULL))
728	{
729	memset (&pstr->cur_state, `'\0'`, sizeof (mbstate_t));
730	/ pstr->valid_len = 0; /
731	wc = (wchar_t) *p;
732	}
733	else
734	#endif
735	for (; p >= end; --p)
736	if ((*p & `0xc0`) != `0x80`)
737	{
738	mbstate_t cur_state;
739	wchar_t wc2;
740	Idx mlen = raw + pstr->len - p;
741	unsigned char buf[`6`];
742	size_t mbclen;
743
744	const unsigned char *pp = p;
745	if (__glibc_unlikely (pstr->trans != NULL))
746	{
747	int i = mlen < `6` ? mlen : `6`;
748	while (--i >= `0`)
749	buf[i] = pstr->trans[p[i]];
750	pp = buf;
751	}
752	/ XXX Don't use mbrtowc, we know which conversion*
753	to use (UTF-8 -> UCS4). /*
754	memset (&cur_state, `0`, sizeof (cur_state));
755	mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
756	&cur_state);
757	if (raw + offset - p <= mbclen
758	&& mbclen < (size_t) -`2`)
759	{
760	memset (&pstr->cur_state, `'\0'`,
761	sizeof (mbstate_t));
762	pstr->valid_len = mbclen - (raw + offset - p);
763	wc = wc2;
764	}
765	break;
766	}
767	}
768
769	if (wc == WEOF)
770	pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
771	if (wc == WEOF)
772	pstr->tip_context
773	= re_string_context_at (pstr, prev_valid_len - `1`, eflags);
774	else
775	pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != `0`)
776	&& IS_WIDE_WORD_CHAR (wc))
777	? CONTEXT_WORD
778	: ((IS_WIDE_NEWLINE (wc)
779	&& pstr->newline_anchor)
780	? CONTEXT_NEWLINE : `0`));
781	if (__glibc_unlikely (pstr->valid_len))
782	{
783	for (wcs_idx = `0`; wcs_idx < pstr->valid_len; ++wcs_idx)
784	pstr->wcs[wcs_idx] = WEOF;
785	if (pstr->mbs_allocated)
786	memset (pstr->mbs, `255`, pstr->valid_len);
787	}
788	pstr->valid_raw_len = pstr->valid_len;
789	}
790	else
791	#endif /* RE_ENABLE_I18N */
792	{
793	int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - `1`];
794	pstr->valid_raw_len = `0`;
795	if (pstr->trans)
796	c = pstr->trans[c];
797	pstr->tip_context = (bitset_contain (pstr->word_char, c)
798	? CONTEXT_WORD
799	: ((IS_NEWLINE (c) && pstr->newline_anchor)
800	? CONTEXT_NEWLINE : `0`));
801	}
802	}
803	if (!__glibc_unlikely (pstr->mbs_allocated))
804	pstr->mbs += offset;
805	}
806	pstr->raw_mbs_idx = idx;
807	pstr->len -= offset;
808	pstr->stop -= offset;
809
810	/ Then build the buffers. /
811	#ifdef RE_ENABLE_I18N
812	if (pstr->mb_cur_max > `1`)
813	{
814	if (pstr->icase)
815	{
816	reg_errcode_t ret = build_wcs_upper_buffer (pstr);
817	if (__glibc_unlikely (ret != REG_NOERROR))
818	return ret;
819	}
820	else
821	build_wcs_buffer (pstr);
822	}
823	else
824	#endif /* RE_ENABLE_I18N */
825	if (__glibc_unlikely (pstr->mbs_allocated))
826	{
827	if (pstr->icase)
828	build_upper_buffer (pstr);
829	else if (pstr->trans != NULL)
830	re_string_translate_buffer (pstr);
831	}
832	else
833	pstr->valid_len = pstr->len;
834
835	pstr->cur_idx = `0`;
836	return REG_NOERROR;
837	}
838
839	static unsigned char
840	__attribute__ ((pure))
841	re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
842	{
843	int ch;
844	Idx off;
845
846	/ Handle the common (easiest) cases first. /
847	if (__glibc_likely (!pstr->mbs_allocated))
848	return re_string_peek_byte (pstr, idx);
849
850	#ifdef RE_ENABLE_I18N
851	if (pstr->mb_cur_max > `1`
852	&& ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
853	return re_string_peek_byte (pstr, idx);
854	#endif
855
856	off = pstr->cur_idx + idx;
857	#ifdef RE_ENABLE_I18N
858	if (pstr->offsets_needed)
859	off = pstr->offsets[off];
860	#endif
861
862	ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
863
864	#ifdef RE_ENABLE_I18N
865	/ Ensure that e.g. for tr_TR.UTF-8 BACKSLASH DOTLESS SMALL LETTER I*
866	this function returns CAPITAL LETTER I instead of first byte of
867	DOTLESS SMALL LETTER I. The latter would confuse the parser,
868	since peek_byte_case doesn't advance cur_idx in any way. /*
869	if (pstr->offsets_needed && !isascii (ch))
870	return re_string_peek_byte (pstr, idx);
871	#endif
872
873	return ch;
874	}
875
876	static unsigned char
877	re_string_fetch_byte_case (re_string_t *pstr)
878	{
879	if (__glibc_likely (!pstr->mbs_allocated))
880	return re_string_fetch_byte (pstr);
881
882	#ifdef RE_ENABLE_I18N
883	if (pstr->offsets_needed)
884	{
885	Idx off;
886	int ch;
887
888	/ For tr_TR.UTF-8 [[:islower:]] there is*
889	[[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip
890	in that case the whole multi-byte character and return
891	the original letter. On the other side, with
892	[[: DOTLESS SMALL LETTER I return [[:I, as doing
893	anything else would complicate things too much. /*
894
895	if (!re_string_first_byte (pstr, pstr->cur_idx))
896	return re_string_fetch_byte (pstr);
897
898	off = pstr->offsets[pstr->cur_idx];
899	ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
900
901	if (! isascii (ch))
902	return re_string_fetch_byte (pstr);
903
904	re_string_skip_bytes (pstr,
905	re_string_char_size_at (pstr, pstr->cur_idx));
906	return ch;
907	}
908	#endif
909
910	return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
911	}
912
913	static void
914	re_string_destruct (re_string_t *pstr)
915	{
916	#ifdef RE_ENABLE_I18N
917	re_free (pstr->wcs);
918	re_free (pstr->offsets);
919	#endif /* RE_ENABLE_I18N */
920	if (pstr->mbs_allocated)
921	re_free (pstr->mbs);
922	}
923
924	/ Return the context at IDX in INPUT. /
925
926	static unsigned int
927	re_string_context_at (const re_string_t input, Idx idx, int* eflags)
928	{
929	int c;
930	if (__glibc_unlikely (idx < `0`))
931	/ In this case, we use the value stored in input->tip_context,*
932	since we can't know the character in input->mbs[-1] here. /*
933	return input->tip_context;
934	if (__glibc_unlikely (idx == input->len))
935	return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
936	: CONTEXT_NEWLINE \| CONTEXT_ENDBUF);
937	#ifdef RE_ENABLE_I18N
938	if (input->mb_cur_max > `1`)
939	{
940	wint_t wc;
941	Idx wc_idx = idx;
942	while(input->wcs[wc_idx] == WEOF)
943	{
944	#if defined DEBUG && DEBUG
945	/ It must not happen. /
946	assert (wc_idx >= `0`);
947	#endif
948	--wc_idx;
949	if (wc_idx < `0`)
950	return input->tip_context;
951	}
952	wc = input->wcs[wc_idx];
953	if (__glibc_unlikely (input->word_ops_used != `0`)
954	&& IS_WIDE_WORD_CHAR (wc))
955	return CONTEXT_WORD;
956	return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
957	? CONTEXT_NEWLINE : `0`);
958	}
959	else
960	#endif
961	{
962	c = re_string_byte_at (input, idx);
963	if (bitset_contain (input->word_char, c))
964	return CONTEXT_WORD;
965	return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : `0`;
966	}
967	}
968
969	/ Functions for set operation. /
970
971	static reg_errcode_t
972	__attribute_warn_unused_result__
973	re_node_set_alloc (re_node_set *set, Idx size)
974	{
975	set->alloc = size;
976	set->nelem = `0`;
977	set->elems = re_malloc (Idx, size);
978	if (__glibc_unlikely (set->elems == NULL)
979	&& (MALLOC_0_IS_NONNULL \|\| size != `0`))
980	return REG_ESPACE;
981	return REG_NOERROR;
982	}
983
984	static reg_errcode_t
985	__attribute_warn_unused_result__
986	re_node_set_init_1 (re_node_set *set, Idx elem)
987	{
988	set->alloc = `1`;
989	set->nelem = `1`;
990	set->elems = re_malloc (Idx, `1`);
991	if (__glibc_unlikely (set->elems == NULL))
992	{
993	set->alloc = set->nelem = `0`;
994	return REG_ESPACE;
995	}
996	set->elems[`0`] = elem;
997	return REG_NOERROR;
998	}
999
1000	static reg_errcode_t
1001	__attribute_warn_unused_result__
1002	re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
1003	{
1004	set->alloc = `2`;
1005	set->elems = re_malloc (Idx, `2`);
1006	if (__glibc_unlikely (set->elems == NULL))
1007	return REG_ESPACE;
1008	if (elem1 == elem2)
1009	{
1010	set->nelem = `1`;
1011	set->elems[`0`] = elem1;
1012	}
1013	else
1014	{
1015	set->nelem = `2`;
1016	if (elem1 < elem2)
1017	{
1018	set->elems[`0`] = elem1;
1019	set->elems[`1`] = elem2;
1020	}
1021	else
1022	{
1023	set->elems[`0`] = elem2;
1024	set->elems[`1`] = elem1;
1025	}
1026	}
1027	return REG_NOERROR;
1028	}
1029
1030	static reg_errcode_t
1031	__attribute_warn_unused_result__
1032	re_node_set_init_copy (re_node_set dest, const* re_node_set *src)
1033	{
1034	dest->nelem = src->nelem;
1035	if (src->nelem > `0`)
1036	{
1037	dest->alloc = dest->nelem;
1038	dest->elems = re_malloc (Idx, dest->alloc);
1039	if (__glibc_unlikely (dest->elems == NULL))
1040	{
1041	dest->alloc = dest->nelem = `0`;
1042	return REG_ESPACE;
1043	}
1044	memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1045	}
1046	else
1047	re_node_set_init_empty (dest);
1048	return REG_NOERROR;
1049	}
1050
1051	/ Calculate the intersection of the sets SRC1 and SRC2. And merge it to*
1052	DEST. Return value indicate the error code or REG_NOERROR if succeeded.
1053	Note: We assume dest->elems is NULL, when dest->alloc is 0. /*
1054
1055	static reg_errcode_t
1056	__attribute_warn_unused_result__
1057	re_node_set_add_intersect (re_node_set dest, const* re_node_set *src1,
1058	const re_node_set *src2)
1059	{
1060	Idx i1, i2, is, id, delta, sbase;
1061	if (src1->nelem == `0` \|\| src2->nelem == `0`)
1062	return REG_NOERROR;
1063
1064	/ We need dest->nelem + 2 * elems_in_intersection; this is a*
1065	conservative estimate. /*
1066	if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1067	{
1068	Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1069	Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1070	if (__glibc_unlikely (new_elems == NULL))
1071	return REG_ESPACE;
1072	dest->elems = new_elems;
1073	dest->alloc = new_alloc;
1074	}
1075
1076	/ Find the items in the intersection of SRC1 and SRC2, and copy*
1077	into the top of DEST those that are not already in DEST itself. /*
1078	sbase = dest->nelem + src1->nelem + src2->nelem;
1079	i1 = src1->nelem - `1`;
1080	i2 = src2->nelem - `1`;
1081	id = dest->nelem - `1`;
1082	for (;;)
1083	{
1084	if (src1->elems[i1] == src2->elems[i2])
1085	{
1086	/ Try to find the item in DEST. Maybe we could binary search? /
1087	while (id >= `0` && dest->elems[id] > src1->elems[i1])
1088	--id;
1089
1090	if (id < `0` \|\| dest->elems[id] != src1->elems[i1])
1091	dest->elems[--sbase] = src1->elems[i1];
1092
1093	if (--i1 < `0` \|\| --i2 < `0`)
1094	break;
1095	}
1096
1097	/ Lower the highest of the two items. /
1098	else if (src1->elems[i1] < src2->elems[i2])
1099	{
1100	if (--i2 < `0`)
1101	break;
1102	}
1103	else
1104	{
1105	if (--i1 < `0`)
1106	break;
1107	}
1108	}
1109
1110	id = dest->nelem - `1`;
1111	is = dest->nelem + src1->nelem + src2->nelem - `1`;
1112	delta = is - sbase + `1`;
1113
1114	/ Now copy. When DELTA becomes zero, the remaining*
1115	DEST elements are already in place; this is more or
1116	less the same loop that is in re_node_set_merge. /*
1117	dest->nelem += delta;
1118	if (delta > `0` && id >= `0`)
1119	for (;;)
1120	{
1121	if (dest->elems[is] > dest->elems[id])
1122	{
1123	/ Copy from the top. /
1124	dest->elems[id + delta--] = dest->elems[is--];
1125	if (delta == `0`)
1126	break;
1127	}
1128	else
1129	{
1130	/ Slide from the bottom. /
1131	dest->elems[id + delta] = dest->elems[id];
1132	if (--id < `0`)
1133	break;
1134	}
1135	}
1136
1137	/ Copy remaining SRC elements. /
1138	memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1139
1140	return REG_NOERROR;
1141	}
1142
1143	/ Calculate the union set of the sets SRC1 and SRC2. And store it to*
1144	DEST. Return value indicate the error code or REG_NOERROR if succeeded. /*
1145
1146	static reg_errcode_t
1147	__attribute_warn_unused_result__
1148	re_node_set_init_union (re_node_set dest, const* re_node_set *src1,
1149	const re_node_set *src2)
1150	{
1151	Idx i1, i2, id;
1152	if (src1 != NULL && src1->nelem > `0` && src2 != NULL && src2->nelem > `0`)
1153	{
1154	dest->alloc = src1->nelem + src2->nelem;
1155	dest->elems = re_malloc (Idx, dest->alloc);
1156	if (__glibc_unlikely (dest->elems == NULL))
1157	return REG_ESPACE;
1158	}
1159	else
1160	{
1161	if (src1 != NULL && src1->nelem > `0`)
1162	return re_node_set_init_copy (dest, src1);
1163	else if (src2 != NULL && src2->nelem > `0`)
1164	return re_node_set_init_copy (dest, src2);
1165	else
1166	re_node_set_init_empty (dest);
1167	return REG_NOERROR;
1168	}
1169	for (i1 = i2 = id = `0` ; i1 < src1->nelem && i2 < src2->nelem ;)
1170	{
1171	if (src1->elems[i1] > src2->elems[i2])
1172	{
1173	dest->elems[id++] = src2->elems[i2++];
1174	continue;
1175	}
1176	if (src1->elems[i1] == src2->elems[i2])
1177	++i2;
1178	dest->elems[id++] = src1->elems[i1++];
1179	}
1180	if (i1 < src1->nelem)
1181	{
1182	memcpy (dest->elems + id, src1->elems + i1,
1183	(src1->nelem - i1) * sizeof (Idx));
1184	id += src1->nelem - i1;
1185	}
1186	else if (i2 < src2->nelem)
1187	{
1188	memcpy (dest->elems + id, src2->elems + i2,
1189	(src2->nelem - i2) * sizeof (Idx));
1190	id += src2->nelem - i2;
1191	}
1192	dest->nelem = id;
1193	return REG_NOERROR;
1194	}
1195
1196	/ Calculate the union set of the sets DEST and SRC. And store it to*
1197	DEST. Return value indicate the error code or REG_NOERROR if succeeded. /*
1198
1199	static reg_errcode_t
1200	__attribute_warn_unused_result__
1201	re_node_set_merge (re_node_set dest, const* re_node_set *src)
1202	{
1203	Idx is, id, sbase, delta;
1204	if (src == NULL \|\| src->nelem == `0`)
1205	return REG_NOERROR;
1206	if (dest->alloc < `2` * src->nelem + dest->nelem)
1207	{
1208	Idx new_alloc = `2` * (src->nelem + dest->alloc);
1209	Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1210	if (__glibc_unlikely (new_buffer == NULL))
1211	return REG_ESPACE;
1212	dest->elems = new_buffer;
1213	dest->alloc = new_alloc;
1214	}
1215
1216	if (__glibc_unlikely (dest->nelem == `0`))
1217	{
1218	dest->nelem = src->nelem;
1219	memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1220	return REG_NOERROR;
1221	}
1222
1223	/ Copy into the top of DEST the items of SRC that are not*
1224	found in DEST. Maybe we could binary search in DEST? /*
1225	for (sbase = dest->nelem + `2` * src->nelem,
1226	is = src->nelem - `1`, id = dest->nelem - `1`; is >= `0` && id >= `0`; )
1227	{
1228	if (dest->elems[id] == src->elems[is])
1229	is--, id--;
1230	else if (dest->elems[id] < src->elems[is])
1231	dest->elems[--sbase] = src->elems[is--];
1232	else / if (dest->elems[id] > src->elems[is]) /
1233	--id;
1234	}
1235
1236	if (is >= `0`)
1237	{
1238	/ If DEST is exhausted, the remaining items of SRC must be unique. /
1239	sbase -= is + `1`;
1240	memcpy (dest->elems + sbase, src->elems, (is + `1`) * sizeof (Idx));
1241	}
1242
1243	id = dest->nelem - `1`;
1244	is = dest->nelem + `2` * src->nelem - `1`;
1245	delta = is - sbase + `1`;
1246	if (delta == `0`)
1247	return REG_NOERROR;
1248
1249	/ Now copy. When DELTA becomes zero, the remaining*
1250	DEST elements are already in place. /*
1251	dest->nelem += delta;
1252	for (;;)
1253	{
1254	if (dest->elems[is] > dest->elems[id])
1255	{
1256	/ Copy from the top. /
1257	dest->elems[id + delta--] = dest->elems[is--];
1258	if (delta == `0`)
1259	break;
1260	}
1261	else
1262	{
1263	/ Slide from the bottom. /
1264	dest->elems[id + delta] = dest->elems[id];
1265	if (--id < `0`)
1266	{
1267	/ Copy remaining SRC elements. /
1268	memcpy (dest->elems, dest->elems + sbase,
1269	delta * sizeof (Idx));
1270	break;
1271	}
1272	}
1273	}
1274
1275	return REG_NOERROR;
1276	}
1277
1278	/ Insert the new element ELEM to the re_node_set* SET.*
1279	SET should not already have ELEM.
1280	Return true if successful. /*
1281
1282	static bool
1283	__attribute_warn_unused_result__
1284	re_node_set_insert (re_node_set *set, Idx elem)
1285	{
1286	Idx idx;
1287	/ In case the set is empty. /
1288	if (set->alloc == `0`)
1289	return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR);
1290
1291	if (__glibc_unlikely (set->nelem) == `0`)
1292	{
1293	/ We already guaranteed above that set->alloc != 0. /
1294	set->elems[`0`] = elem;
1295	++set->nelem;
1296	return true;
1297	}
1298
1299	/ Realloc if we need. /
1300	if (set->alloc == set->nelem)
1301	{
1302	Idx *new_elems;
1303	set->alloc = set->alloc * `2`;
1304	new_elems = re_realloc (set->elems, Idx, set->alloc);
1305	if (__glibc_unlikely (new_elems == NULL))
1306	return false;
1307	set->elems = new_elems;
1308	}
1309
1310	/ Move the elements which follows the new element. Test the*
1311	first element separately to skip a check in the inner loop. /*
1312	if (elem < set->elems[`0`])
1313	{
1314	idx = `0`;
1315	for (idx = set->nelem; idx > `0`; idx--)
1316	set->elems[idx] = set->elems[idx - `1`];
1317	}
1318	else
1319	{
1320	for (idx = set->nelem; set->elems[idx - `1`] > elem; idx--)
1321	set->elems[idx] = set->elems[idx - `1`];
1322	}
1323
1324	/ Insert the new element. /
1325	set->elems[idx] = elem;
1326	++set->nelem;
1327	return true;
1328	}
1329
1330	/ Insert the new element ELEM to the re_node_set* SET.*
1331	SET should not already have any element greater than or equal to ELEM.
1332	Return true if successful. /*
1333
1334	static bool
1335	__attribute_warn_unused_result__
1336	re_node_set_insert_last (re_node_set *set, Idx elem)
1337	{
1338	/ Realloc if we need. /
1339	if (set->alloc == set->nelem)
1340	{
1341	Idx *new_elems;
1342	set->alloc = (set->alloc + `1`) * `2`;
1343	new_elems = re_realloc (set->elems, Idx, set->alloc);
1344	if (__glibc_unlikely (new_elems == NULL))
1345	return false;
1346	set->elems = new_elems;
1347	}
1348
1349	/ Insert the new element. /
1350	set->elems[set->nelem++] = elem;
1351	return true;
1352	}
1353
1354	/ Compare two node sets SET1 and SET2.*
1355	Return true if SET1 and SET2 are equivalent. /*
1356
1357	static bool
1358	__attribute__ ((pure))
1359	re_node_set_compare (const re_node_set set1, const* re_node_set *set2)
1360	{
1361	Idx i;
1362	if (set1 == NULL \|\| set2 == NULL \|\| set1->nelem != set2->nelem)
1363	return false;
1364	for (i = set1->nelem ; --i >= `0` ; )
1365	if (set1->elems[i] != set2->elems[i])
1366	return false;
1367	return true;
1368	}
1369
1370	/ Return (idx + 1) if SET contains the element ELEM, return 0 otherwise. /
1371
1372	static Idx
1373	__attribute__ ((pure))
1374	re_node_set_contains (const re_node_set *set, Idx elem)
1375	{
1376	__re_size_t idx, right, mid;
1377	if (set->nelem <= `0`)
1378	return `0`;
1379
1380	/ Binary search the element. /
1381	idx = `0`;
1382	right = set->nelem - `1`;
1383	while (idx < right)
1384	{
1385	mid = (idx + right) / `2`;
1386	if (set->elems[mid] < elem)
1387	idx = mid + `1`;
1388	else
1389	right = mid;
1390	}
1391	return set->elems[idx] == elem ? idx + `1` : `0`;
1392	}
1393
1394	static void
1395	re_node_set_remove_at (re_node_set *set, Idx idx)
1396	{
1397	if (idx < `0` \|\| idx >= set->nelem)
1398	return;
1399	--set->nelem;
1400	for (; idx < set->nelem; idx++)
1401	set->elems[idx] = set->elems[idx + `1`];
1402	}
1403
1404
1405	/ Add the token TOKEN to dfa->nodes, and return the index of the token.*
1406	Or return -1 if an error occurred. /*
1407
1408	static Idx
1409	re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1410	{
1411	if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc))
1412	{
1413	size_t new_nodes_alloc = dfa->nodes_alloc * `2`;
1414	Idx new_nexts, new_indices;
1415	re_node_set new_edests, new_eclosures;
1416	re_token_t *new_nodes;
1417
1418	/ Avoid overflows in realloc. /
1419	const size_t max_object_size = MAX (sizeof (re_token_t),
1420	MAX (sizeof (re_node_set),
1421	sizeof (Idx)));
1422	if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
1423	< new_nodes_alloc))
1424	return -`1`;
1425
1426	new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1427	if (__glibc_unlikely (new_nodes == NULL))
1428	return -`1`;
1429	dfa->nodes = new_nodes;
1430	new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1431	new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1432	new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1433	new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1434	if (__glibc_unlikely (new_nexts == NULL \|\| new_indices == NULL
1435	\|\| new_edests == NULL \|\| new_eclosures == NULL))
1436	{
1437	re_free (new_nexts);
1438	re_free (new_indices);
1439	re_free (new_edests);
1440	re_free (new_eclosures);
1441	return -`1`;
1442	}
1443	dfa->nexts = new_nexts;
1444	dfa->org_indices = new_indices;
1445	dfa->edests = new_edests;
1446	dfa->eclosures = new_eclosures;
1447	dfa->nodes_alloc = new_nodes_alloc;
1448	}
1449	dfa->nodes[dfa->nodes_len] = token;
1450	dfa->nodes[dfa->nodes_len].constraint = `0`;
1451	#ifdef RE_ENABLE_I18N
1452	dfa->nodes[dfa->nodes_len].accept_mb =
1453	((token.type == OP_PERIOD && dfa->mb_cur_max > `1`)
1454	\|\| token.type == COMPLEX_BRACKET);
1455	#endif
1456	dfa->nexts[dfa->nodes_len] = -`1`;
1457	re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1458	re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1459	return dfa->nodes_len++;
1460	}
1461
1462	static re_hashval_t
1463	calc_state_hash (const re_node_set nodes, unsigned* int context)
1464	{
1465	re_hashval_t hash = nodes->nelem + context;
1466	Idx i;
1467	for (i = `0` ; i < nodes->nelem ; i++)
1468	hash += nodes->elems[i];
1469	return hash;
1470	}
1471
1472	/ Search for the state whose node_set is equivalent to NODES.*
1473	Return the pointer to the state, if we found it in the DFA.
1474	Otherwise create the new one and return it. In case of an error
1475	return NULL and set the error code in ERR.
1476	Note: - We assume NULL as the invalid state, then it is possible that
1477	return value is NULL and ERR is REG_NOERROR.
1478	- We never return non-NULL value in case of any errors, it is for
1479	optimization. /*
1480
1481	static re_dfastate_t *
1482	__attribute_warn_unused_result__
1483	re_acquire_state (reg_errcode_t err, const* re_dfa_t *dfa,
1484	const re_node_set *nodes)
1485	{
1486	re_hashval_t hash;
1487	re_dfastate_t *new_state;
1488	struct re_state_table_entry *spot;
1489	Idx i;
1490	#if defined GCC_LINT \|\| defined lint
1491	/ Suppress bogus uninitialized-variable warnings. /
1492	*err = REG_NOERROR;
1493	#endif
1494	if (__glibc_unlikely (nodes->nelem == `0`))
1495	{
1496	*err = REG_NOERROR;
1497	return NULL;
1498	}
1499	hash = calc_state_hash (nodes, `0`);
1500	spot = dfa->state_table + (hash & dfa->state_hash_mask);
1501
1502	for (i = `0` ; i < spot->num ; i++)
1503	{
1504	re_dfastate_t *state = spot->array[i];
1505	if (hash != state->hash)
1506	continue;
1507	if (re_node_set_compare (&state->nodes, nodes))
1508	return state;
1509	}
1510
1511	/ There are no appropriate state in the dfa, create the new one. /
1512	new_state = create_ci_newstate (dfa, nodes, hash);
1513	if (__glibc_unlikely (new_state == NULL))
1514	*err = REG_ESPACE;
1515
1516	return new_state;
1517	}
1518
1519	/ Search for the state whose node_set is equivalent to NODES and*
1520	whose context is equivalent to CONTEXT.
1521	Return the pointer to the state, if we found it in the DFA.
1522	Otherwise create the new one and return it. In case of an error
1523	return NULL and set the error code in ERR.
1524	Note: - We assume NULL as the invalid state, then it is possible that
1525	return value is NULL and ERR is REG_NOERROR.
1526	- We never return non-NULL value in case of any errors, it is for
1527	optimization. /*
1528
1529	static re_dfastate_t *
1530	__attribute_warn_unused_result__
1531	re_acquire_state_context (reg_errcode_t err, const* re_dfa_t *dfa,
1532	const re_node_set nodes, unsigned* int context)
1533	{
1534	re_hashval_t hash;
1535	re_dfastate_t *new_state;
1536	struct re_state_table_entry *spot;
1537	Idx i;
1538	#if defined GCC_LINT \|\| defined lint
1539	/ Suppress bogus uninitialized-variable warnings. /
1540	*err = REG_NOERROR;
1541	#endif
1542	if (nodes->nelem == `0`)
1543	{
1544	*err = REG_NOERROR;
1545	return NULL;
1546	}
1547	hash = calc_state_hash (nodes, context);
1548	spot = dfa->state_table + (hash & dfa->state_hash_mask);
1549
1550	for (i = `0` ; i < spot->num ; i++)
1551	{
1552	re_dfastate_t *state = spot->array[i];
1553	if (state->hash == hash
1554	&& state->context == context
1555	&& re_node_set_compare (state->entrance_nodes, nodes))
1556	return state;
1557	}
1558	/ There are no appropriate state in 'dfa', create the new one. /
1559	new_state = create_cd_newstate (dfa, nodes, context, hash);
1560	if (__glibc_unlikely (new_state == NULL))
1561	*err = REG_ESPACE;
1562
1563	return new_state;
1564	}
1565
1566	/ Finish initialization of the new state NEWSTATE, and using its hash value*
1567	HASH put in the appropriate bucket of DFA's state table. Return value
1568	indicates the error code if failed. /*
1569
1570	static reg_errcode_t
1571	__attribute_warn_unused_result__
1572	register_state (const re_dfa_t dfa, re_dfastate_t newstate,
1573	re_hashval_t hash)
1574	{
1575	struct re_state_table_entry *spot;
1576	reg_errcode_t err;
1577	Idx i;
1578
1579	newstate->hash = hash;
1580	err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1581	if (__glibc_unlikely (err != REG_NOERROR))
1582	return REG_ESPACE;
1583	for (i = `0`; i < newstate->nodes.nelem; i++)
1584	{
1585	Idx elem = newstate->nodes.elems[i];
1586	if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1587	if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
1588	return REG_ESPACE;
1589	}
1590
1591	spot = dfa->state_table + (hash & dfa->state_hash_mask);
1592	if (__glibc_unlikely (spot->alloc <= spot->num))
1593	{
1594	Idx new_alloc = `2` * spot->num + `2`;
1595	re_dfastate_t *new_array = re_realloc (spot->array, re_dfastate_t ,
1596	new_alloc);
1597	if (__glibc_unlikely (new_array == NULL))
1598	return REG_ESPACE;
1599	spot->array = new_array;
1600	spot->alloc = new_alloc;
1601	}
1602	spot->array[spot->num++] = newstate;
1603	return REG_NOERROR;
1604	}
1605
1606	static void
1607	free_state (re_dfastate_t *state)
1608	{
1609	re_node_set_free (&state->non_eps_nodes);
1610	re_node_set_free (&state->inveclosure);
1611	if (state->entrance_nodes != &state->nodes)
1612	{
1613	re_node_set_free (state->entrance_nodes);
1614	re_free (state->entrance_nodes);
1615	}
1616	re_node_set_free (&state->nodes);
1617	re_free (state->word_trtable);
1618	re_free (state->trtable);
1619	re_free (state);
1620	}
1621
1622	/ Create the new state which is independent of contexts.*
1623	Return the new state if succeeded, otherwise return NULL. /*
1624
1625	static re_dfastate_t *
1626	__attribute_warn_unused_result__
1627	create_ci_newstate (const re_dfa_t dfa, const* re_node_set *nodes,
1628	re_hashval_t hash)
1629	{
1630	Idx i;
1631	reg_errcode_t err;
1632	re_dfastate_t *newstate;
1633
1634	newstate = (re_dfastate_t ) calloc (sizeof* (re_dfastate_t), `1`);
1635	if (__glibc_unlikely (newstate == NULL))
1636	return NULL;
1637	err = re_node_set_init_copy (&newstate->nodes, nodes);
1638	if (__glibc_unlikely (err != REG_NOERROR))
1639	{
1640	re_free (newstate);
1641	return NULL;
1642	}
1643
1644	newstate->entrance_nodes = &newstate->nodes;
1645	for (i = `0` ; i < nodes->nelem ; i++)
1646	{
1647	re_token_t *node = dfa->nodes + nodes->elems[i];
1648	re_token_type_t type = node->type;
1649	if (type == CHARACTER && !node->constraint)
1650	continue;
1651	#ifdef RE_ENABLE_I18N
1652	newstate->accept_mb \|= node->accept_mb;
1653	#endif /* RE_ENABLE_I18N */
1654
1655	/ If the state has the halt node, the state is a halt state. /
1656	if (type == END_OF_RE)
1657	newstate->halt = `1`;
1658	else if (type == OP_BACK_REF)
1659	newstate->has_backref = `1`;
1660	else if (type == ANCHOR \|\| node->constraint)
1661	newstate->has_constraint = `1`;
1662	}
1663	err = register_state (dfa, newstate, hash);
1664	if (__glibc_unlikely (err != REG_NOERROR))
1665	{
1666	free_state (newstate);
1667	newstate = NULL;
1668	}
1669	return newstate;
1670	}
1671
1672	/ Create the new state which is depend on the context CONTEXT.*
1673	Return the new state if succeeded, otherwise return NULL. /*
1674
1675	static re_dfastate_t *
1676	__attribute_warn_unused_result__
1677	create_cd_newstate (const re_dfa_t dfa, const* re_node_set *nodes,
1678	unsigned int context, re_hashval_t hash)
1679	{
1680	Idx i, nctx_nodes = `0`;
1681	reg_errcode_t err;
1682	re_dfastate_t *newstate;
1683
1684	newstate = (re_dfastate_t ) calloc (sizeof* (re_dfastate_t), `1`);
1685	if (__glibc_unlikely (newstate == NULL))
1686	return NULL;
1687	err = re_node_set_init_copy (&newstate->nodes, nodes);
1688	if (__glibc_unlikely (err != REG_NOERROR))
1689	{
1690	re_free (newstate);
1691	return NULL;
1692	}
1693
1694	newstate->context = context;
1695	newstate->entrance_nodes = &newstate->nodes;
1696
1697	for (i = `0` ; i < nodes->nelem ; i++)
1698	{
1699	re_token_t *node = dfa->nodes + nodes->elems[i];
1700	re_token_type_t type = node->type;
1701	unsigned int constraint = node->constraint;
1702
1703	if (type == CHARACTER && !constraint)
1704	continue;
1705	#ifdef RE_ENABLE_I18N
1706	newstate->accept_mb \|= node->accept_mb;
1707	#endif /* RE_ENABLE_I18N */
1708
1709	/ If the state has the halt node, the state is a halt state. /
1710	if (type == END_OF_RE)
1711	newstate->halt = `1`;
1712	else if (type == OP_BACK_REF)
1713	newstate->has_backref = `1`;
1714
1715	if (constraint)
1716	{
1717	if (newstate->entrance_nodes == &newstate->nodes)
1718	{
1719	newstate->entrance_nodes = re_malloc (re_node_set, `1`);
1720	if (__glibc_unlikely (newstate->entrance_nodes == NULL))
1721	{
1722	free_state (newstate);
1723	return NULL;
1724	}
1725	if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1726	!= REG_NOERROR)
1727	return NULL;
1728	nctx_nodes = `0`;
1729	newstate->has_constraint = `1`;
1730	}
1731
1732	if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1733	{
1734	re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1735	++nctx_nodes;
1736	}
1737	}
1738	}
1739	err = register_state (dfa, newstate, hash);
1740	if (__glibc_unlikely (err != REG_NOERROR))
1741	{
1742	free_state (newstate);
1743	newstate = NULL;
1744	}
1745	return newstate;
1746	}
1747

Browse the source code of glibc_src_2.30/posix/regex_internal.c