ld-ctype.c source code [glibc_src_2.25/locale/programs/ld-ctype.c]

1	/ Copyright (C) 1995-2017 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <http://www.gnu.org/licenses/>. /*
17
18	#ifdef HAVE_CONFIG_H
19	# include <config.h>
20	#endif
21
22	#include <alloca.h>
23	#include <byteswap.h>
24	#include <endian.h>
25	#include <errno.h>
26	#include <limits.h>
27	#include <obstack.h>
28	#include <stdlib.h>
29	#include <string.h>
30	#include <wchar.h>
31	#include <wctype.h>
32	#include <stdint.h>
33	#include <sys/uio.h>
34
35	#include "localedef.h"
36	#include "charmap.h"
37	#include "localeinfo.h"
38	#include "langinfo.h"
39	#include "linereader.h"
40	#include "locfile-token.h"
41	#include "locfile.h"
42
43	#include <assert.h>
44
45
46	/ The bit used for representing a special class. /
47	#define BITPOS(class) ((class) - tok_upper)
48	#define BIT(class) (_ISbit (BITPOS (class)))
49	#define BITw(class) (_ISwbit (BITPOS (class)))
50
51	#define ELEM(ctype, collection, idx, value) \
52	*find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53	&ctype->collection##_act idx, value)
54
55
56	/ To be compatible with former implementations we for now restrict*
57	the number of bits for character classes to 16. When compatibility
58	is not necessary anymore increase the number to 32. /*
59	#define char_class_t uint16_t
60	#define char_class32_t uint32_t
61
62
63	/ Type to describe a transliteration action. We have a possibly*
64	multiple character from-string and a set of multiple character
65	to-strings. All are 32bit values since this is what is used in
66	the gconv functions. /*
67	struct translit_to_t
68	{
69	uint32_t *str;
70
71	struct translit_to_t *next;
72	};
73
74	struct translit_t
75	{
76	uint32_t *from;
77
78	const char *fname;
79	size_t lineno;
80
81	struct translit_to_t *to;
82
83	struct translit_t *next;
84	};
85
86	struct translit_ignore_t
87	{
88	uint32_t from;
89	uint32_t to;
90	uint32_t step;
91
92	const char *fname;
93	size_t lineno;
94
95	struct translit_ignore_t *next;
96	};
97
98
99	/ Type to describe a transliteration include statement. /
100	struct translit_include_t
101	{
102	const char *copy_locale;
103	const char *copy_repertoire;
104
105	struct translit_include_t *next;
106	};
107
108	/ Provide some dummy pointer for empty string. /
109	static uint32_t no_str[] = { `0` };
110
111
112	/ Sparse table of uint32_t. /
113	#define TABLE idx_table
114	#define ELEMENT uint32_t
115	#define DEFAULT ((uint32_t) ~0)
116	#define NO_ADD_LOCALE
117	#include "3level.h"
118
119	#define TABLE wcwidth_table
120	#define ELEMENT uint8_t
121	#define DEFAULT 0xff
122	#include "3level.h"
123
124	#define TABLE wctrans_table
125	#define ELEMENT int32_t
126	#define DEFAULT 0
127	#define wctrans_table_add wctrans_table_add_internal
128	#include "3level.h"
129	#undef wctrans_table_add
130	/ The wctrans_table must actually store the difference between the*
131	desired result and the argument. /*
132	static inline void
133	wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
134	{
135	wctrans_table_add_internal (t, wc, mapped_wc - wc);
136	}
137
138	/ Construction of sparse 3-level tables.*
139	See wchar-lookup.h for their structure and the meaning of p and q. /*
140
141	struct wctype_table
142	{
143	/ Parameters. /
144	unsigned int p;
145	unsigned int q;
146	/ Working representation. /
147	size_t level1_alloc;
148	size_t level1_size;
149	uint32_t *level1;
150	size_t level2_alloc;
151	size_t level2_size;
152	uint32_t *level2;
153	size_t level3_alloc;
154	size_t level3_size;
155	uint32_t *level3;
156	size_t result_size;
157	};
158
159	static void add_locale_wctype_table (struct locale_file *file,
160	struct wctype_table *t);
161
162	/ The real definition of the struct for the LC_CTYPE locale. /
163	struct locale_ctype_t
164	{
165	uint32_t *charnames;
166	size_t charnames_max;
167	size_t charnames_act;
168	/ An index lookup table, to speedup find_idx. /
169	struct idx_table charnames_idx;
170
171	struct repertoire_t *repertoire;
172
173	/ We will allow up to 8 * sizeof (uint32_t) character classes. /
174	#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
175	size_t nr_charclass;
176	const char *classnames[MAX_NR_CHARCLASS];
177	uint32_t last_class_char;
178	uint32_t class256_collection[`256`];
179	uint32_t *class_collection;
180	size_t class_collection_max;
181	size_t class_collection_act;
182	uint32_t class_done;
183	uint32_t class_offset;
184
185	struct charseq **mbdigits;
186	size_t mbdigits_act;
187	size_t mbdigits_max;
188	uint32_t *wcdigits;
189	size_t wcdigits_act;
190	size_t wcdigits_max;
191
192	struct charseq *mboutdigits[`10`];
193	uint32_t wcoutdigits[`10`];
194	size_t outdigits_act;
195
196	/ If the following number ever turns out to be too small simply*
197	increase it. But I doubt it will. --drepper@gnu /*
198	#define MAX_NR_CHARMAP 16
199	const char *mapnames[MAX_NR_CHARMAP];
200	uint32_t *map_collection[MAX_NR_CHARMAP];
201	uint32_t map256_collection[`2`][`256`];
202	size_t map_collection_max[MAX_NR_CHARMAP];
203	size_t map_collection_act[MAX_NR_CHARMAP];
204	size_t map_collection_nr;
205	size_t last_map_idx;
206	int tomap_done[MAX_NR_CHARMAP];
207	uint32_t map_offset;
208
209	/ Transliteration information. /
210	struct translit_include_t *translit_include;
211	struct translit_t *translit;
212	struct translit_ignore_t *translit_ignore;
213	uint32_t ntranslit_ignore;
214
215	uint32_t *default_missing;
216	const char *default_missing_file;
217	size_t default_missing_lineno;
218
219	uint32_t to_nonascii;
220	uint32_t nonascii_case;
221
222	/ The arrays for the binary representation. /
223	char_class_t *ctype_b;
224	char_class32_t *ctype32_b;
225	uint32_t **map_b;
226	uint32_t **map32_b;
227	uint32_t **class_b;
228	struct wctype_table *class_3level;
229	struct wctrans_table *map_3level;
230	uint32_t *class_name_ptr;
231	uint32_t *map_name_ptr;
232	struct wcwidth_table width;
233	uint32_t mb_cur_max;
234	const char *codeset_name;
235	uint32_t *translit_from_idx;
236	uint32_t *translit_from_tbl;
237	uint32_t *translit_to_idx;
238	uint32_t *translit_to_tbl;
239	uint32_t translit_idx_size;
240	size_t translit_from_tbl_size;
241	size_t translit_to_tbl_size;
242
243	struct obstack mempool;
244	};
245
246
247	/ Marker for an empty slot. This has the value 0xFFFFFFFF, regardless*
248	whether 'int' is 16 bit, 32 bit, or 64 bit. /*
249	#define EMPTY ((uint32_t) ~0)
250
251
252	#define obstack_chunk_alloc xmalloc
253	#define obstack_chunk_free free
254
255
256	/ Prototypes for local functions. /
257	static void ctype_startup (struct linereader lr, struct* localedef_t *locale,
258	const struct charmap_t *charmap,
259	struct localedef_t *copy_locale,
260	int ignore_content);
261	static void ctype_class_new (struct linereader *lr,
262	struct locale_ctype_t ctype, const* char *name);
263	static void ctype_map_new (struct linereader *lr,
264	struct locale_ctype_t *ctype,
265	const char name, const* struct charmap_t *charmap);
266	static uint32_t find_idx (struct* locale_ctype_t ctype, uint32_t *table,
267	size_t max, size_t act, uint32_t idx);
268	static void set_class_defaults (struct locale_ctype_t *ctype,
269	const struct charmap_t *charmap,
270	struct repertoire_t *repertoire);
271	static void allocate_arrays (struct locale_ctype_t *ctype,
272	const struct charmap_t *charmap,
273	struct repertoire_t *repertoire);
274
275
276	static const char *longnames[] =
277	{
278	"zero", "one", "two", "three", "four",
279	"five", "six", "seven", "eight", "nine"
280	};
281	static const char *uninames[] =
282	{
283	"U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284	"U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
285	};
286	static const unsigned char digits[] = "0123456789";
287
288
289	static void
290	ctype_startup (struct linereader lr, struct* localedef_t *locale,
291	const struct charmap_t *charmap,
292	struct localedef_t copy_locale, int* ignore_content)
293	{
294	unsigned int cnt;
295	struct locale_ctype_t *ctype;
296
297	if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
298	{
299	if (copy_locale == NULL)
300	{
301	/ Allocate the needed room. /
302	locale->categories[LC_CTYPE].ctype = ctype =
303	(struct locale_ctype_t *) xcalloc (`1`,
304	sizeof (struct locale_ctype_t));
305
306	/ We have seen no names yet. /
307	ctype->charnames_max = charmap->mb_cur_max == `1` ? `256` : `512`;
308	ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
309	* sizeof (uint32_t));
310	for (cnt = `0`; cnt < `256`; ++cnt)
311	ctype->charnames[cnt] = cnt;
312	ctype->charnames_act = `256`;
313	idx_table_init (&ctype->charnames_idx);
314
315	/ Fill character class information. /
316	ctype->last_class_char = ILLEGAL_CHAR_VALUE;
317	/ The order of the following instructions determines the bit*
318	positions! /*
319	ctype_class_new (lr, ctype, "upper");
320	ctype_class_new (lr, ctype, "lower");
321	ctype_class_new (lr, ctype, "alpha");
322	ctype_class_new (lr, ctype, "digit");
323	ctype_class_new (lr, ctype, "xdigit");
324	ctype_class_new (lr, ctype, "space");
325	ctype_class_new (lr, ctype, "print");
326	ctype_class_new (lr, ctype, "graph");
327	ctype_class_new (lr, ctype, "blank");
328	ctype_class_new (lr, ctype, "cntrl");
329	ctype_class_new (lr, ctype, "punct");
330	ctype_class_new (lr, ctype, "alnum");
331
332	ctype->class_collection_max = charmap->mb_cur_max == `1` ? `256` : `512`;
333	ctype->class_collection
334	= (uint32_t ) xcalloc (sizeof* (unsigned long int),
335	ctype->class_collection_max);
336	ctype->class_collection_act = `256`;
337
338	/ Fill character map information. /
339	ctype->last_map_idx = MAX_NR_CHARMAP;
340	ctype_map_new (lr, ctype, "toupper", charmap);
341	ctype_map_new (lr, ctype, "tolower", charmap);
342
343	/ Fill first 256 entries in `toXXX' arrays. /
344	for (cnt = `0`; cnt < `256`; ++cnt)
345	{
346	ctype->map_collection[`0`][cnt] = cnt;
347	ctype->map_collection[`1`][cnt] = cnt;
348
349	ctype->map256_collection[`0`][cnt] = cnt;
350	ctype->map256_collection[`1`][cnt] = cnt;
351	}
352
353	if (enc_not_ascii_compatible)
354	ctype->to_nonascii = `1`;
355
356	obstack_init (&ctype->mempool);
357	}
358	else
359	ctype = locale->categories[LC_CTYPE].ctype =
360	copy_locale->categories[LC_CTYPE].ctype;
361	}
362	}
363
364
365	void
366	ctype_finish (struct localedef_t locale, const* struct charmap_t *charmap)
367	{
368	/ See POSIX.2, table 2-6 for the meaning of the following table. /
369	#define NCLASS 12
370	static const struct
371	{
372	const char *name;
373	const char allow[NCLASS];
374	}
375	valid_table[NCLASS] =
376	{
377	/ The order is important. See token.h for more information.*
378	M = Always, D = Default, - = Permitted, X = Mutually exclusive /*
379	{ "upper", "--MX-XDDXXX-" },
380	{ "lower", "--MX-XDDXXX-" },
381	{ "alpha", "---X-XDDXXX-" },
382	{ "digit", "XXX--XDDXXX-" },
383	{ "xdigit", "-----XDDXXX-" },
384	{ "space", "XXXXX------X" },
385	{ "print", "---------X--" },
386	{ "graph", "---------X--" },
387	{ "blank", "XXXXXM-----X" },
388	{ "cntrl", "XXXXX-XX--XX" },
389	{ "punct", "XXXXX-DD-X-X" },
390	{ "alnum", "-----XDDXXX-" }
391	};
392	size_t cnt;
393	int cls1, cls2;
394	uint32_t space_value;
395	struct charseq *space_seq;
396	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
397	int warned;
398	const void *key;
399	size_t len;
400	void *vdata;
401	void *curs;
402
403	/ Now resolve copying and also handle completely missing definitions. /
404	if (ctype == NULL)
405	{
406	const char *repertoire_name;
407
408	/ First see whether we were supposed to copy. If yes, find the*
409	actual definition. /*
410	if (locale->copy_name[LC_CTYPE] != NULL)
411	{
412	/ Find the copying locale. This has to happen transitively since*
413	the locale we are copying from might also copying another one. /*
414	struct localedef_t *from = locale;
415
416	do
417	from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
418	from->repertoire_name, charmap);
419	while (from->categories[LC_CTYPE].ctype == NULL
420	&& from->copy_name[LC_CTYPE] != NULL);
421
422	ctype = locale->categories[LC_CTYPE].ctype
423	= from->categories[LC_CTYPE].ctype;
424	}
425
426	/ If there is still no definition issue an warning and create an*
427	empty one. /*
428	if (ctype == NULL)
429	{
430	if (! be_quiet)
431	WITH_CUR_LOCALE (error (`0`, `0`, _("\
432	No definition for %s category found"), "LC_CTYPE"));
433	ctype_startup (NULL, locale, charmap, NULL, `0`);
434	ctype = locale->categories[LC_CTYPE].ctype;
435	}
436
437	/ Get the repertoire we have to use. /
438	repertoire_name = locale->repertoire_name ?: repertoire_global;
439	if (repertoire_name != NULL)
440	ctype->repertoire = repertoire_read (repertoire_name);
441	}
442
443	/ We need the name of the currently used 8-bit character set to*
444	make correct conversion between this 8-bit representation and the
445	ISO 10646 character set used internally for wide characters. /*
446	ctype->codeset_name = charmap->code_set_name;
447	if (ctype->codeset_name == NULL)
448	{
449	if (! be_quiet)
450	WITH_CUR_LOCALE (error (`0`, `0`, _("\
451	No character set name specified in charmap")));
452	ctype->codeset_name = "//UNKNOWN//";
453	}
454
455	/ Set default value for classes not specified. /
456	set_class_defaults (ctype, charmap, ctype->repertoire);
457
458	/ Check according to table. /
459	for (cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
460	{
461	uint32_t tmp = ctype->class_collection[cnt];
462
463	if (tmp != `0`)
464	{
465	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
466	if ((tmp & _ISwbit (cls1)) != `0`)
467	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
468	if (valid_table[cls1].allow[cls2] != `'-'`)
469	{
470	int eq = (tmp & _ISwbit (cls2)) != `0`;
471	switch (valid_table[cls1].allow[cls2])
472	{
473	case `'M'`:
474	if (!eq)
475	{
476	uint32_t value = ctype->charnames[cnt];
477
478	if (!be_quiet)
479	WITH_CUR_LOCALE (error (`0`, `0`, _("\
480	character L'\\u%0*x' in class `%s' must be in class `%s'"),
481	value > `0xffff` ? `8` : `4`,
482	value,
483	valid_table[cls1].name,
484	valid_table[cls2].name));
485	}
486	break;
487
488	case `'X'`:
489	if (eq)
490	{
491	uint32_t value = ctype->charnames[cnt];
492
493	if (!be_quiet)
494	WITH_CUR_LOCALE (error (`0`, `0`, _("\
495	character L'\\u%0*x' in class `%s' must not be in class `%s'"),
496	value > `0xffff` ? `8` : `4`,
497	value,
498	valid_table[cls1].name,
499	valid_table[cls2].name));
500	}
501	break;
502
503	case `'D'`:
504	ctype->class_collection[cnt] \|= _ISwbit (cls2);
505	break;
506
507	default:
508	WITH_CUR_LOCALE (error (`5`, `0`, _("\
509	internal error in %s, line %u"), __FUNCTION__, __LINE__));
510	}
511	}
512	}
513	}
514
515	for (cnt = `0`; cnt < `256`; ++cnt)
516	{
517	uint32_t tmp = ctype->class256_collection[cnt];
518
519	if (tmp != `0`)
520	{
521	for (cls1 = `0`; cls1 < NCLASS; ++cls1)
522	if ((tmp & _ISbit (cls1)) != `0`)
523	for (cls2 = `0`; cls2 < NCLASS; ++cls2)
524	if (valid_table[cls1].allow[cls2] != `'-'`)
525	{
526	int eq = (tmp & _ISbit (cls2)) != `0`;
527	switch (valid_table[cls1].allow[cls2])
528	{
529	case `'M'`:
530	if (!eq)
531	{
532	char buf[`17`];
533
534	snprintf (buf, sizeof buf, "\\%Zo", cnt);
535
536	if (!be_quiet)
537	WITH_CUR_LOCALE (error (`0`, `0`, _("\
538	character '%s' in class `%s' must be in class `%s'"),
539	buf,
540	valid_table[cls1].name,
541	valid_table[cls2].name));
542	}
543	break;
544
545	case `'X'`:
546	if (eq)
547	{
548	char buf[`17`];
549
550	snprintf (buf, sizeof buf, "\\%Zo", cnt);
551
552	if (!be_quiet)
553	WITH_CUR_LOCALE (error (`0`, `0`, _("\
554	character '%s' in class `%s' must not be in class `%s'"),
555	buf,
556	valid_table[cls1].name,
557	valid_table[cls2].name));
558	}
559	break;
560
561	case `'D'`:
562	ctype->class256_collection[cnt] \|= _ISbit (cls2);
563	break;
564
565	default:
566	WITH_CUR_LOCALE (error (`5`, `0`, _("\
567	internal error in %s, line %u"), __FUNCTION__, __LINE__));
568	}
569	}
570	}
571	}
572
573	/ ... and now test <SP> as a special case. /
574	space_value = `32`;
575	if (((cnt = BITPOS (tok_space),
576	(ELEM (ctype, class_collection, , space_value)
577	& BITw (tok_space)) == `0`)
578	\|\| (cnt = BITPOS (tok_blank),
579	(ELEM (ctype, class_collection, , space_value)
580	& BITw (tok_blank)) == `0`)))
581	{
582	if (!be_quiet)
583	WITH_CUR_LOCALE (error (`0`, `0`, _("<SP> character not in class `%s'"),
584	valid_table[cnt].name));
585	}
586	else if (((cnt = BITPOS (tok_punct),
587	(ELEM (ctype, class_collection, , space_value)
588	& BITw (tok_punct)) != `0`)
589	\|\| (cnt = BITPOS (tok_graph),
590	(ELEM (ctype, class_collection, , space_value)
591	& BITw (tok_graph))
592	!= `0`)))
593	{
594	if (!be_quiet)
595	WITH_CUR_LOCALE (error (`0`, `0`, _("\
596	<SP> character must not be in class `%s'"),
597	valid_table[cnt].name));
598	}
599	else
600	ELEM (ctype, class_collection, , space_value) \|= BITw (tok_print);
601
602	space_seq = charmap_find_value (charmap, "SP", `2`);
603	if (space_seq == NULL)
604	space_seq = charmap_find_value (charmap, "space", `5`);
605	if (space_seq == NULL)
606	space_seq = charmap_find_value (charmap, "U00000020", `9`);
607	if (space_seq == NULL \|\| space_seq->nbytes != `1`)
608	{
609	if (!be_quiet)
610	WITH_CUR_LOCALE (error (`0`, `0`, _("\
611	character <SP> not defined in character map")));
612	}
613	else if (((cnt = BITPOS (tok_space),
614	(ctype->class256_collection[space_seq->bytes[`0`]]
615	& BIT (tok_space)) == `0`)
616	\|\| (cnt = BITPOS (tok_blank),
617	(ctype->class256_collection[space_seq->bytes[`0`]]
618	& BIT (tok_blank)) == `0`)))
619	{
620	if (!be_quiet)
621	WITH_CUR_LOCALE (error (`0`, `0`, _("<SP> character not in class `%s'"),
622	valid_table[cnt].name));
623	}
624	else if (((cnt = BITPOS (tok_punct),
625	(ctype->class256_collection[space_seq->bytes[`0`]]
626	& BIT (tok_punct)) != `0`)
627	\|\| (cnt = BITPOS (tok_graph),
628	(ctype->class256_collection[space_seq->bytes[`0`]]
629	& BIT (tok_graph)) != `0`)))
630	{
631	if (!be_quiet)
632	WITH_CUR_LOCALE (error (`0`, `0`, _("\
633	<SP> character must not be in class `%s'"),
634	valid_table[cnt].name));
635	}
636	else
637	ctype->class256_collection[space_seq->bytes[`0`]] \|= BIT (tok_print);
638
639	/ Check whether all single-byte characters make to their upper/lowercase*
640	equivalent according to the ASCII rules. /*
641	for (cnt = `'A'`; cnt <= `'Z'`; ++cnt)
642	{
643	uint32_t uppval = ctype->map256_collection[`0`][cnt];
644	uint32_t lowval = ctype->map256_collection[`1`][cnt];
645	uint32_t lowuppval = ctype->map256_collection[`0`][lowval];
646	uint32_t lowlowval = ctype->map256_collection[`1`][lowval];
647
648	if (uppval != cnt
649	\|\| lowval != cnt + `0x20`
650	\|\| lowuppval != cnt
651	\|\| lowlowval != cnt + `0x20`)
652	ctype->nonascii_case = `1`;
653	}
654	for (cnt = `0`; cnt < `256`; ++cnt)
655	if (cnt < `'A'` \|\| (cnt > `'Z'` && cnt < `'a'`) \|\| cnt > `'z'`)
656	if (ctype->map256_collection[`0`][cnt] != cnt
657	\|\| ctype->map256_collection[`1`][cnt] != cnt)
658	ctype->nonascii_case = `1`;
659
660	/ Now that the tests are done make sure the name array contains all*
661	characters which are handled in the WIDTH section of the
662	character set definition file. /*
663	if (charmap->width_rules != NULL)
664	for (cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
665	{
666	unsigned char bytes[charmap->mb_cur_max];
667	int nbytes = charmap->width_rules[cnt].from->nbytes;
668
669	/ We have the range of character for which the width is*
670	specified described using byte sequences of the multibyte
671	charset. We have to convert this to UCS4 now. And we
672	cannot simply convert the beginning and the end of the
673	sequence, we have to iterate over the byte sequence and
674	convert it for every single character. /*
675	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
676
677	while (nbytes < charmap->width_rules[cnt].to->nbytes
678	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
679	nbytes) <= `0`)
680	{
681	/ Find the UCS value for `bytes'. /
682	int inner;
683	uint32_t wch;
684	struct charseq *seq
685	= charmap_find_symbol (charmap, (char *) bytes, nbytes);
686
687	if (seq == NULL)
688	wch = ILLEGAL_CHAR_VALUE;
689	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
690	wch = seq->ucs4;
691	else
692	wch = repertoire_find_value (ctype->repertoire, seq->name,
693	strlen (seq->name));
694
695	if (wch != ILLEGAL_CHAR_VALUE)
696	/ We are only interested in the side-effects of the*
697	`find_idx' call. It will add appropriate entries in
698	the name array if this is necessary. /*
699	(void) find_idx (ctype, NULL, NULL, NULL, wch);
700
701	/ "Increment" the bytes sequence. /
702	inner = nbytes - `1`;
703	while (inner >= `0` && bytes[inner] == `0xff`)
704	--inner;
705
706	if (inner < `0`)
707	{
708	/ We have to extend the byte sequence. /
709	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
710	break;
711
712	bytes[`0`] = `1`;
713	memset (&bytes[`1`], `0`, nbytes);
714	++nbytes;
715	}
716	else
717	{
718	++bytes[inner];
719	while (++inner < nbytes)
720	bytes[inner] = `0`;
721	}
722	}
723	}
724
725	/ Now set all the other characters of the character set to the*
726	default width. /*
727	curs = NULL;
728	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
729	{
730	struct charseq data = (struct* charseq *) vdata;
731
732	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
733	data->ucs4 = repertoire_find_value (ctype->repertoire,
734	data->name, len);
735
736	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
737	(void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
738	}
739
740	/ There must be a multiple of 10 digits. /
741	if (ctype->mbdigits_act % `10` != `0`)
742	{
743	assert (ctype->mbdigits_act == ctype->wcdigits_act);
744	ctype->wcdigits_act -= ctype->mbdigits_act % `10`;
745	ctype->mbdigits_act -= ctype->mbdigits_act % `10`;
746	WITH_CUR_LOCALE (error (`0`, `0`, _("\
747	`digit' category has not entries in groups of ten")));
748	}
749
750	/ Check the input digits. There must be a multiple of ten available.*
751	In each group it could be that one or the other character is missing.
752	In this case the whole group must be removed. /*
753	cnt = `0`;
754	while (cnt < ctype->mbdigits_act)
755	{
756	size_t inner;
757	for (inner = `0`; inner < `10`; ++inner)
758	if (ctype->mbdigits[cnt + inner] == NULL)
759	break;
760
761	if (inner == `10`)
762	cnt += `10`;
763	else
764	{
765	/ Remove the group. /
766	memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + `10`],
767	((ctype->wcdigits_act - cnt - `10`)
768	* sizeof (ctype->mbdigits[`0`])));
769	ctype->mbdigits_act -= `10`;
770	}
771	}
772
773	/ If no input digits are given use the default. /
774	if (ctype->mbdigits_act == `0`)
775	{
776	if (ctype->mbdigits_max == `0`)
777	{
778	ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
779	`10` * sizeof (struct charseq *));
780	ctype->mbdigits_max = `10`;
781	}
782
783	for (cnt = `0`; cnt < `10`; ++cnt)
784	{
785	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
786	(char *) digits + cnt, `1`);
787	if (ctype->mbdigits[cnt] == NULL)
788	{
789	ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
790	longnames[cnt],
791	strlen (longnames[cnt]));
792	if (ctype->mbdigits[cnt] == NULL)
793	{
794	/ Hum, this ain't good. /
795	WITH_CUR_LOCALE (error (`0`, `0`, _("\
796	no input digits defined and none of the standard names in the charmap")));
797
798	ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
799	sizeof (struct charseq) + `1`);
800
801	/ This is better than nothing. /
802	ctype->mbdigits[cnt]->bytes[`0`] = digits[cnt];
803	ctype->mbdigits[cnt]->nbytes = `1`;
804	}
805	}
806	}
807
808	ctype->mbdigits_act = `10`;
809	}
810
811	/ Check the wide character input digits. There must be a multiple*
812	of ten available. In each group it could be that one or the other
813	character is missing. In this case the whole group must be
814	removed. /*
815	cnt = `0`;
816	while (cnt < ctype->wcdigits_act)
817	{
818	size_t inner;
819	for (inner = `0`; inner < `10`; ++inner)
820	if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
821	break;
822
823	if (inner == `10`)
824	cnt += `10`;
825	else
826	{
827	/ Remove the group. /
828	memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + `10`],
829	((ctype->wcdigits_act - cnt - `10`)
830	* sizeof (ctype->wcdigits[`0`])));
831	ctype->wcdigits_act -= `10`;
832	}
833	}
834
835	/ If no input digits are given use the default. /
836	if (ctype->wcdigits_act == `0`)
837	{
838	if (ctype->wcdigits_max == `0`)
839	{
840	ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
841	`10` * sizeof (uint32_t));
842	ctype->wcdigits_max = `10`;
843	}
844
845	for (cnt = `0`; cnt < `10`; ++cnt)
846	ctype->wcdigits[cnt] = L`'0'` + cnt;
847
848	ctype->mbdigits_act = `10`;
849	}
850
851	/ Check the outdigits. /
852	warned = `0`;
853	for (cnt = `0`; cnt < `10`; ++cnt)
854	if (ctype->mboutdigits[cnt] == NULL)
855	{
856	static struct charseq replace[`2`];
857
858	if (!warned)
859	{
860	WITH_CUR_LOCALE (error (`0`, `0`, _("\
861	not all characters used in `outdigit' are available in the charmap")));
862	warned = `1`;
863	}
864
865	replace[`0`].nbytes = `1`;
866	replace[`0`].bytes[`0`] = `'?'`;
867	replace[`0`].bytes[`1`] = `'\0'`;
868	ctype->mboutdigits[cnt] = &replace[`0`];
869	}
870
871	warned = `0`;
872	for (cnt = `0`; cnt < `10`; ++cnt)
873	if (ctype->wcoutdigits[cnt] == `0`)
874	{
875	if (!warned)
876	{
877	WITH_CUR_LOCALE (error (`0`, `0`, _("\
878	not all characters used in `outdigit' are available in the repertoire")));
879	warned = `1`;
880	}
881
882	ctype->wcoutdigits[cnt] = L`'?'`;
883	}
884
885	/ Sort the entries in the translit_ignore list. /
886	if (ctype->translit_ignore != NULL)
887	{
888	struct translit_ignore_t *firstp = ctype->translit_ignore;
889	struct translit_ignore_t *runp;
890
891	ctype->ntranslit_ignore = `1`;
892
893	for (runp = firstp->next; runp != NULL; runp = runp->next)
894	{
895	struct translit_ignore_t *lastp = NULL;
896	struct translit_ignore_t *cmpp;
897
898	++ctype->ntranslit_ignore;
899
900	for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
901	if (runp->from < cmpp->from)
902	break;
903
904	runp->next = lastp;
905	if (lastp == NULL)
906	firstp = runp;
907	}
908
909	ctype->translit_ignore = firstp;
910	}
911	}
912
913
914	void
915	ctype_output (struct localedef_t locale, const* struct charmap_t *charmap,
916	const char *output_path)
917	{
918	struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
919	const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
920	+ ctype->nr_charclass + ctype->map_collection_nr);
921	struct locale_file file;
922	uint32_t default_missing_len;
923	size_t elem, cnt;
924
925	/ Now prepare the output: Find the sizes of the table we can use. /
926	allocate_arrays (ctype, charmap, ctype->repertoire);
927
928	default_missing_len = (ctype->default_missing
929	? wcslen ((wchar_t *) ctype->default_missing)
930	: `0`);
931
932	init_locale_data (&file, nelems);
933	for (elem = `0`; elem < nelems; ++elem)
934	{
935	if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
936	switch (elem)
937	{
938	#define CTYPE_EMPTY(name) \
939	case name: \
940	add_locale_empty (&file); \
941	break
942
943	CTYPE_EMPTY(_NL_CTYPE_GAP1);
944	CTYPE_EMPTY(_NL_CTYPE_GAP2);
945	CTYPE_EMPTY(_NL_CTYPE_GAP3);
946	CTYPE_EMPTY(_NL_CTYPE_GAP4);
947	CTYPE_EMPTY(_NL_CTYPE_GAP5);
948	CTYPE_EMPTY(_NL_CTYPE_GAP6);
949
950	#define CTYPE_RAW_DATA(name, base, size) \
951	case _NL_ITEM_INDEX (name): \
952	add_locale_raw_data (&file, base, size); \
953	break
954
955	CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
956	ctype->ctype_b,
957	(`256` + `128`) * sizeof (char_class_t));
958
959	#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
960	case _NL_ITEM_INDEX (name): \
961	add_locale_uint32_array (&file, base, n_elems); \
962	break
963
964	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[`0`], `256` + `128`);
965	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[`1`], `256` + `128`);
966	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[`0`], `256`);
967	CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[`1`], `256`);
968	CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
969	ctype->ctype32_b,
970	`256` * sizeof (char_class32_t));
971
972	#define CTYPE_UINT32(name, value) \
973	case _NL_ITEM_INDEX (name): \
974	add_locale_uint32 (&file, value); \
975	break
976
977	CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
978	CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
979	CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
980
981	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
982	ctype->translit_from_idx,
983	ctype->translit_idx_size);
984
985	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
986	ctype->translit_from_tbl,
987	ctype->translit_from_tbl_size
988	/ sizeof (uint32_t));
989
990	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
991	ctype->translit_to_idx,
992	ctype->translit_idx_size);
993
994	CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
995	ctype->translit_to_tbl,
996	ctype->translit_to_tbl_size / sizeof (uint32_t));
997
998	case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
999	/ The class name array. /
1000	start_locale_structure (&file);
1001	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
1002	add_locale_string (&file, ctype->classnames[cnt]);
1003	add_locale_char (&file, `0`);
1004	align_locale_data (&file, LOCFILE_ALIGN);
1005	end_locale_structure (&file);
1006	break;
1007
1008	case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1009	/ The class name array. /
1010	start_locale_structure (&file);
1011	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1012	add_locale_string (&file, ctype->mapnames[cnt]);
1013	add_locale_char (&file, `0`);
1014	align_locale_data (&file, LOCFILE_ALIGN);
1015	end_locale_structure (&file);
1016	break;
1017
1018	case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1019	add_locale_wcwidth_table (&file, &ctype->width);
1020	break;
1021
1022	CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1023
1024	case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1025	add_locale_string (&file, ctype->codeset_name);
1026	break;
1027
1028	CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1029
1030	CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1031
1032	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1033	add_locale_uint32 (&file, ctype->mbdigits_act / `10`);
1034	break;
1035
1036	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1037	add_locale_uint32 (&file, ctype->wcdigits_act / `10`);
1038	break;
1039
1040	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1041	start_locale_structure (&file);
1042	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1043	cnt < ctype->mbdigits_act; cnt += `10`)
1044	{
1045	add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1046	ctype->mbdigits[cnt]->nbytes);
1047	add_locale_char (&file, `0`);
1048	}
1049	end_locale_structure (&file);
1050	break;
1051
1052	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1053	start_locale_structure (&file);
1054	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1055	add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1056	ctype->mboutdigits[cnt]->nbytes);
1057	add_locale_char (&file, `0`);
1058	end_locale_structure (&file);
1059	break;
1060
1061	case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1062	start_locale_structure (&file);
1063	for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1064	cnt < ctype->wcdigits_act; cnt += `10`)
1065	add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1066	end_locale_structure (&file);
1067	break;
1068
1069	case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1070	cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1071	add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1072	break;
1073
1074	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1075	add_locale_uint32 (&file, default_missing_len);
1076	break;
1077
1078	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1079	add_locale_uint32_array (&file, ctype->default_missing,
1080	default_missing_len);
1081	break;
1082
1083	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1084	add_locale_uint32 (&file, ctype->ntranslit_ignore);
1085	break;
1086
1087	case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1088	start_locale_structure (&file);
1089	{
1090	struct translit_ignore_t *runp;
1091	for (runp = ctype->translit_ignore; runp != NULL;
1092	runp = runp->next)
1093	{
1094	add_locale_uint32 (&file, runp->from);
1095	add_locale_uint32 (&file, runp->to);
1096	add_locale_uint32 (&file, runp->step);
1097	}
1098	}
1099	end_locale_structure (&file);
1100	break;
1101
1102	default:
1103	assert (! "unknown CTYPE element");
1104	}
1105	else
1106	{
1107	/ Handle extra maps. /
1108	size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1109	if (nr < ctype->nr_charclass)
1110	{
1111	start_locale_prelude (&file);
1112	add_locale_uint32_array (&file, ctype->class_b[nr], `256` / `32`);
1113	end_locale_prelude (&file);
1114	add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1115	}
1116	else
1117	{
1118	nr -= ctype->nr_charclass;
1119	assert (nr < ctype->map_collection_nr);
1120	add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1121	}
1122	}
1123	}
1124
1125	write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1126	}
1127
1128
1129	/ Local functions. /
1130	static void
1131	ctype_class_new (struct linereader lr, struct* locale_ctype_t *ctype,
1132	const char *name)
1133	{
1134	size_t cnt;
1135
1136	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
1137	if (strcmp (ctype->classnames[cnt], name) == `0`)
1138	break;
1139
1140	if (cnt < ctype->nr_charclass)
1141	{
1142	lr_error (lr, _("character class `%s' already defined"), name);
1143	return;
1144	}
1145
1146	if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1147	/ Exit code 2 is prescribed in P1003.2b. /
1148	WITH_CUR_LOCALE (error (`2`, `0`, _("\
1149	implementation limit: no more than %Zd character classes allowed"),
1150	MAX_NR_CHARCLASS));
1151
1152	ctype->classnames[ctype->nr_charclass++] = name;
1153	}
1154
1155
1156	static void
1157	ctype_map_new (struct linereader lr, struct* locale_ctype_t *ctype,
1158	const char name, const* struct charmap_t *charmap)
1159	{
1160	size_t max_chars = `0`;
1161	size_t cnt;
1162
1163	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
1164	{
1165	if (strcmp (ctype->mapnames[cnt], name) == `0`)
1166	break;
1167
1168	if (max_chars < ctype->map_collection_max[cnt])
1169	max_chars = ctype->map_collection_max[cnt];
1170	}
1171
1172	if (cnt < ctype->map_collection_nr)
1173	{
1174	lr_error (lr, _("character map `%s' already defined"), name);
1175	return;
1176	}
1177
1178	if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1179	/ Exit code 2 is prescribed in P1003.2b. /
1180	WITH_CUR_LOCALE (error (`2`, `0`, _("\
1181	implementation limit: no more than %d character maps allowed"),
1182	MAX_NR_CHARMAP));
1183
1184	ctype->mapnames[cnt] = name;
1185
1186	if (max_chars == `0`)
1187	ctype->map_collection_max[cnt] = charmap->mb_cur_max == `1` ? `256` : `512`;
1188	else
1189	ctype->map_collection_max[cnt] = max_chars;
1190
1191	ctype->map_collection[cnt] = (uint32_t *)
1192	xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1193	ctype->map_collection_act[cnt] = `256`;
1194
1195	++ctype->map_collection_nr;
1196	}
1197
1198
1199	/ We have to be prepared that TABLE, MAX, and ACT can be NULL. This*
1200	is possible if we only want to extend the name array. /*
1201	static uint32_t *
1202	find_idx (struct locale_ctype_t ctype, uint32_t table, size_t max,
1203	size_t *act, uint32_t idx)
1204	{
1205	size_t cnt;
1206
1207	if (idx < `256`)
1208	return table == NULL ? NULL : &(*table)[idx];
1209
1210	/ Use the charnames_idx lookup table instead of the slow search loop. /
1211	#if 1
1212	cnt = idx_table_get (&ctype->charnames_idx, idx);
1213	if (cnt == EMPTY)
1214	/ Not found. /
1215	cnt = ctype->charnames_act;
1216	#else
1217	for (cnt = `256`; cnt < ctype->charnames_act; ++cnt)
1218	if (ctype->charnames[cnt] == idx)
1219	break;
1220	#endif
1221
1222	/ We have to distinguish two cases: the name is found or not. /
1223	if (cnt == ctype->charnames_act)
1224	{
1225	/ Extend the name array. /
1226	if (ctype->charnames_act == ctype->charnames_max)
1227	{
1228	ctype->charnames_max *= `2`;
1229	ctype->charnames = (uint32_t *)
1230	xrealloc (ctype->charnames,
1231	sizeof (uint32_t) * ctype->charnames_max);
1232	}
1233	ctype->charnames[ctype->charnames_act++] = idx;
1234	idx_table_add (&ctype->charnames_idx, idx, cnt);
1235	}
1236
1237	if (table == NULL)
1238	/ We have done everything we are asked to do. /
1239	return NULL;
1240
1241	if (max == NULL)
1242	/ The caller does not want to extend the table. /
1243	return (cnt >= act ? NULL : &(table)[cnt]);
1244
1245	if (cnt >= *act)
1246	{
1247	if (cnt >= *max)
1248	{
1249	size_t old_max = *max;
1250	do
1251	max = `2`;
1252	while (*max <= cnt);
1253
1254	*table =
1255	(uint32_t ) xrealloc (table, max sizeof (uint32_t));
1256	memset (&(*table)[old_max], `'\0'`,
1257	(max - old_max) sizeof (uint32_t));
1258	}
1259
1260	*act = cnt + `1`;
1261	}
1262
1263	return &(*table)[cnt];
1264	}
1265
1266
1267	static int
1268	get_character (struct token now, const* struct charmap_t *charmap,
1269	struct repertoire_t *repertoire,
1270	struct charseq *seqp, uint32_t wchp)
1271	{
1272	if (now->tok == tok_bsymbol)
1273	{
1274	/ This will hopefully be the normal case. /
1275	*wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1276	now->val.str.lenmb);
1277	*seqp = charmap_find_value (charmap, now->val.str.startmb,
1278	now->val.str.lenmb);
1279	}
1280	else if (now->tok == tok_ucs4)
1281	{
1282	char utmp[`10`];
1283
1284	snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1285	*seqp = charmap_find_value (charmap, utmp, `9`);
1286
1287	if (*seqp == NULL)
1288	*seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1289
1290	if (*seqp == NULL)
1291	{
1292	/ Compute the value in the charmap from the UCS value. /
1293	const char *symbol = repertoire_find_symbol (repertoire,
1294	now->val.ucs4);
1295
1296	if (symbol == NULL)
1297	*seqp = NULL;
1298	else
1299	*seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1300
1301	if (*seqp == NULL)
1302	{
1303	if (repertoire != NULL)
1304	{
1305	/ Insert a negative entry. /
1306	static const struct charseq negative
1307	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1308	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1309	sizeof (uint32_t));
1310	*newp = now->val.ucs4;
1311
1312	insert_entry (&repertoire->seq_table, newp,
1313	sizeof (uint32_t), (void *) &negative);
1314	}
1315	}
1316	else
1317	(*seqp)->ucs4 = now->val.ucs4;
1318	}
1319	else if ((*seqp)->ucs4 != now->val.ucs4)
1320	*seqp = NULL;
1321
1322	*wchp = now->val.ucs4;
1323	}
1324	else if (now->tok == tok_charcode)
1325	{
1326	/ We must map from the byte code to UCS4. /
1327	*seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1328	now->val.str.lenmb);
1329
1330	if (*seqp == NULL)
1331	*wchp = ILLEGAL_CHAR_VALUE;
1332	else
1333	{
1334	if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1335	(seqp)->ucs4 = repertoire_find_value (repertoire, (seqp)->name,
1336	strlen ((*seqp)->name));
1337	wchp = (seqp)->ucs4;
1338	}
1339	}
1340	else
1341	return `1`;
1342
1343	return `0`;
1344	}
1345
1346
1347	/ Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and*
1348	the .(2). counterparts. /*
1349	static void
1350	charclass_symbolic_ellipsis (struct linereader *ldfile,
1351	struct locale_ctype_t *ctype,
1352	const struct charmap_t *charmap,
1353	struct repertoire_t *repertoire,
1354	struct token *now,
1355	const char *last_str,
1356	unsigned long int class256_bit,
1357	unsigned long int class_bit, int base,
1358	int ignore_content, int handle_digits, int step)
1359	{
1360	const char *nowstr = now->val.str.startmb;
1361	char tmp[now->val.str.lenmb + `1`];
1362	const char *cp;
1363	char *endp;
1364	unsigned long int from;
1365	unsigned long int to;
1366
1367	/ We have to compute the ellipsis values using the symbolic names. /
1368	assert (last_str != NULL);
1369
1370	if (strlen (last_str) != now->val.str.lenmb)
1371	{
1372	invalid_range:
1373	lr_error (ldfile,
1374	_("`%s' and `%.*s' are not valid names for symbolic range"),
1375	last_str, (int) now->val.str.lenmb, nowstr);
1376	return;
1377	}
1378
1379	if (memcmp (last_str, nowstr, now->val.str.lenmb) == `0`)
1380	/ Nothing to do, the names are the same. /
1381	return;
1382
1383	for (cp = last_str; cp == (nowstr + (cp - last_str)); ++cp)
1384	;
1385
1386	errno = `0`;
1387	from = strtoul (cp, &endp, base);
1388	if ((from == UINT_MAX && errno == ERANGE) \|\| *endp != `'\0'`)
1389	goto invalid_range;
1390
1391	to = strtoul (nowstr + (cp - last_str), &endp, base);
1392	if ((to == UINT_MAX && errno == ERANGE)
1393	\|\| (endp - nowstr) != now->val.str.lenmb \|\| from >= to)
1394	goto invalid_range;
1395
1396	/ OK, we have a range FROM - TO. Now we can create the symbolic names. /
1397	if (!ignore_content)
1398	{
1399	now->val.str.startmb = tmp;
1400	while ((from += step) <= to)
1401	{
1402	struct charseq *seq;
1403	uint32_t wch;
1404
1405	sprintf (tmp, (base == `10` ? "%.s%0ld" : "%.s%0lX"),
1406	(int) (cp - last_str), last_str,
1407	(int) (now->val.str.lenmb - (cp - last_str)),
1408	from);
1409
1410	get_character (now, charmap, repertoire, &seq, &wch);
1411
1412	if (seq != NULL && seq->nbytes == `1`)
1413	/ Yep, we can store information about this byte sequence. /
1414	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
1415
1416	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1417	/ We have the UCS4 position. /
1418	*find_idx (ctype, &ctype->class_collection,
1419	&ctype->class_collection_max,
1420	&ctype->class_collection_act, wch) \|= class_bit;
1421
1422	if (handle_digits == `1`)
1423	{
1424	/ We must store the digit values. /
1425	if (ctype->mbdigits_act == ctype->mbdigits_max)
1426	{
1427	ctype->mbdigits_max *= `2`;
1428	ctype->mbdigits = xrealloc (ctype->mbdigits,
1429	(ctype->mbdigits_max
1430	* sizeof (char *)));
1431	ctype->wcdigits_max *= `2`;
1432	ctype->wcdigits = xrealloc (ctype->wcdigits,
1433	(ctype->wcdigits_max
1434	* sizeof (uint32_t)));
1435	}
1436
1437	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1438	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1439	}
1440	else if (handle_digits == `2`)
1441	{
1442	/ We must store the digit values. /
1443	if (ctype->outdigits_act >= `10`)
1444	{
1445	lr_error (ldfile, _("\
1446	%s: field `%s' does not contain exactly ten entries"),
1447	"LC_CTYPE", "outdigit");
1448	return;
1449	}
1450
1451	ctype->mboutdigits[ctype->outdigits_act] = seq;
1452	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1453	++ctype->outdigits_act;
1454	}
1455	}
1456	}
1457	}
1458
1459
1460	/ Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. /
1461	static void
1462	charclass_ucs4_ellipsis (struct linereader *ldfile,
1463	struct locale_ctype_t *ctype,
1464	const struct charmap_t *charmap,
1465	struct repertoire_t *repertoire,
1466	struct token *now, uint32_t last_wch,
1467	unsigned long int class256_bit,
1468	unsigned long int class_bit, int ignore_content,
1469	int handle_digits, int step)
1470	{
1471	if (last_wch > now->val.ucs4)
1472	{
1473	lr_error (ldfile, _("\
1474	to-value <U%0X> of range is smaller than from-value <U%0X>"),
1475	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, now->val.ucs4,
1476	(now->val.ucs4 \| last_wch) < `65536` ? `4` : `8`, last_wch);
1477	return;
1478	}
1479
1480	if (!ignore_content)
1481	while ((last_wch += step) <= now->val.ucs4)
1482	{
1483	/ We have to find out whether there is a byte sequence corresponding*
1484	to this UCS4 value. /*
1485	struct charseq *seq;
1486	char utmp[`10`];
1487
1488	snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1489	seq = charmap_find_value (charmap, utmp, `9`);
1490	if (seq == NULL)
1491	{
1492	snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1493	seq = charmap_find_value (charmap, utmp, `5`);
1494	}
1495
1496	if (seq == NULL)
1497	/ Try looking in the repertoire map. /
1498	seq = repertoire_find_seq (repertoire, last_wch);
1499
1500	/ If this is the first time we look for this sequence create a new*
1501	entry. /*
1502	if (seq == NULL)
1503	{
1504	static const struct charseq negative
1505	= { .ucs4 = ILLEGAL_CHAR_VALUE };
1506
1507	/ Find the symbolic name for this UCS4 value. /
1508	if (repertoire != NULL)
1509	{
1510	const char *symbol = repertoire_find_symbol (repertoire,
1511	last_wch);
1512	uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1513	sizeof (uint32_t));
1514	*newp = last_wch;
1515
1516	if (symbol != NULL)
1517	/ We have a name, now search the multibyte value. /
1518	seq = charmap_find_value (charmap, symbol, strlen (symbol));
1519
1520	if (seq == NULL)
1521	/ We have to create a fake entry. /
1522	seq = (struct charseq *) &negative;
1523	else
1524	seq->ucs4 = last_wch;
1525
1526	insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1527	seq);
1528	}
1529	else
1530	/ We have to create a fake entry. /
1531	seq = (struct charseq *) &negative;
1532	}
1533
1534	/ We have a name, now search the multibyte value. /
1535	if (seq->ucs4 == last_wch && seq->nbytes == `1`)
1536	/ Yep, we can store information about this byte sequence. /
1537	ctype->class256_collection[(size_t) seq->bytes[`0`]]
1538	\|= class256_bit;
1539
1540	/ And of course we have the UCS4 position. /
1541	if (class_bit != `0`)
1542	*find_idx (ctype, &ctype->class_collection,
1543	&ctype->class_collection_max,
1544	&ctype->class_collection_act, last_wch) \|= class_bit;
1545
1546	if (handle_digits == `1`)
1547	{
1548	/ We must store the digit values. /
1549	if (ctype->mbdigits_act == ctype->mbdigits_max)
1550	{
1551	ctype->mbdigits_max *= `2`;
1552	ctype->mbdigits = xrealloc (ctype->mbdigits,
1553	(ctype->mbdigits_max
1554	* sizeof (char *)));
1555	ctype->wcdigits_max *= `2`;
1556	ctype->wcdigits = xrealloc (ctype->wcdigits,
1557	(ctype->wcdigits_max
1558	* sizeof (uint32_t)));
1559	}
1560
1561	ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1562	? seq : NULL);
1563	ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1564	}
1565	else if (handle_digits == `2`)
1566	{
1567	/ We must store the digit values. /
1568	if (ctype->outdigits_act >= `10`)
1569	{
1570	lr_error (ldfile, _("\
1571	%s: field `%s' does not contain exactly ten entries"),
1572	"LC_CTYPE", "outdigit");
1573	return;
1574	}
1575
1576	ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1577	? seq : NULL);
1578	ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1579	++ctype->outdigits_act;
1580	}
1581	}
1582	}
1583
1584
1585	/ Ellipsis as in `/xea/x12.../xea/x34'. /
1586	static void
1587	charclass_charcode_ellipsis (struct linereader *ldfile,
1588	struct locale_ctype_t *ctype,
1589	const struct charmap_t *charmap,
1590	struct repertoire_t *repertoire,
1591	struct token now, char* *last_charcode,
1592	uint32_t last_charcode_len,
1593	unsigned long int class256_bit,
1594	unsigned long int class_bit, int ignore_content,
1595	int handle_digits)
1596	{
1597	/ First check whether the to-value is larger. /
1598	if (now->val.charcode.nbytes != last_charcode_len)
1599	{
1600	lr_error (ldfile, _("\
1601	start and end character sequence of range must have the same length"));
1602	return;
1603	}
1604
1605	if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > `0`)
1606	{
1607	lr_error (ldfile, _("\
1608	to-value character sequence is smaller than from-value sequence"));
1609	return;
1610	}
1611
1612	if (!ignore_content)
1613	{
1614	do
1615	{
1616	/ Increment the byte sequence value. /
1617	struct charseq *seq;
1618	uint32_t wch;
1619	int i;
1620
1621	for (i = last_charcode_len - `1`; i >= `0`; --i)
1622	if (++last_charcode[i] != `0`)
1623	break;
1624
1625	if (last_charcode_len == `1`)
1626	/ Of course we have the charcode value. /
1627	ctype->class256_collection[(size_t) last_charcode[`0`]]
1628	\|= class256_bit;
1629
1630	/ Find the symbolic name. /
1631	seq = charmap_find_symbol (charmap, last_charcode,
1632	last_charcode_len);
1633	if (seq != NULL)
1634	{
1635	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1636	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1637	strlen (seq->name));
1638	wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1639
1640	if (wch != ILLEGAL_CHAR_VALUE && class_bit != `0`)
1641	*find_idx (ctype, &ctype->class_collection,
1642	&ctype->class_collection_max,
1643	&ctype->class_collection_act, wch) \|= class_bit;
1644	}
1645	else
1646	wch = ILLEGAL_CHAR_VALUE;
1647
1648	if (handle_digits == `1`)
1649	{
1650	/ We must store the digit values. /
1651	if (ctype->mbdigits_act == ctype->mbdigits_max)
1652	{
1653	ctype->mbdigits_max *= `2`;
1654	ctype->mbdigits = xrealloc (ctype->mbdigits,
1655	(ctype->mbdigits_max
1656	* sizeof (char *)));
1657	ctype->wcdigits_max *= `2`;
1658	ctype->wcdigits = xrealloc (ctype->wcdigits,
1659	(ctype->wcdigits_max
1660	* sizeof (uint32_t)));
1661	}
1662
1663	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1664	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1665	seq->nbytes = last_charcode_len;
1666
1667	ctype->mbdigits[ctype->mbdigits_act++] = seq;
1668	ctype->wcdigits[ctype->wcdigits_act++] = wch;
1669	}
1670	else if (handle_digits == `2`)
1671	{
1672	struct charseq *seq;
1673	/ We must store the digit values. /
1674	if (ctype->outdigits_act >= `10`)
1675	{
1676	lr_error (ldfile, _("\
1677	%s: field `%s' does not contain exactly ten entries"),
1678	"LC_CTYPE", "outdigit");
1679	return;
1680	}
1681
1682	seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1683	memcpy ((char *) (seq + `1`), last_charcode, last_charcode_len);
1684	seq->nbytes = last_charcode_len;
1685
1686	ctype->mboutdigits[ctype->outdigits_act] = seq;
1687	ctype->wcoutdigits[ctype->outdigits_act] = wch;
1688	++ctype->outdigits_act;
1689	}
1690	}
1691	while (memcmp (last_charcode, now->val.charcode.bytes,
1692	last_charcode_len) != `0`);
1693	}
1694	}
1695
1696
1697	static uint32_t *
1698	find_translit2 (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
1699	uint32_t wch)
1700	{
1701	struct translit_t *trunp = ctype->translit;
1702	struct translit_ignore_t *tirunp = ctype->translit_ignore;
1703
1704	while (trunp != NULL)
1705	{
1706	/ XXX We simplify things here. The transliterations we look*
1707	for are only allowed to have one character. /*
1708	if (trunp->from[`0`] == wch && trunp->from[`1`] == `0`)
1709	{
1710	/ Found it. Now look for a transliteration which can be*
1711	represented with the character set. /*
1712	struct translit_to_t *torunp = trunp->to;
1713
1714	while (torunp != NULL)
1715	{
1716	int i;
1717
1718	for (i = `0`; torunp->str[i] != `0`; ++i)
1719	{
1720	char utmp[`10`];
1721
1722	snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1723	if (charmap_find_value (charmap, utmp, `9`) == NULL)
1724	/ This character cannot be represented. /
1725	break;
1726	}
1727
1728	if (torunp->str[i] == `0`)
1729	return torunp->str;
1730
1731	torunp = torunp->next;
1732	}
1733
1734	break;
1735	}
1736
1737	trunp = trunp->next;
1738	}
1739
1740	/ Check for ignored chars. /
1741	while (tirunp != NULL)
1742	{
1743	if (tirunp->from <= wch && tirunp->to >= wch)
1744	{
1745	uint32_t wi;
1746
1747	for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1748	if (wi == wch)
1749	return no_str;
1750	}
1751	}
1752
1753	/ Nothing found. /
1754	return NULL;
1755	}
1756
1757
1758	uint32_t *
1759	find_translit (struct localedef_t locale, const* struct charmap_t *charmap,
1760	uint32_t wch)
1761	{
1762	struct locale_ctype_t *ctype;
1763	uint32_t *result = NULL;
1764
1765	assert (locale != NULL);
1766	ctype = locale->categories[LC_CTYPE].ctype;
1767
1768	if (ctype == NULL)
1769	return NULL;
1770
1771	if (ctype->translit != NULL)
1772	result = find_translit2 (ctype, charmap, wch);
1773
1774	if (result == NULL)
1775	{
1776	struct translit_include_t *irunp = ctype->translit_include;
1777
1778	while (irunp != NULL && result == NULL)
1779	{
1780	result = find_translit (find_locale (CTYPE_LOCALE,
1781	irunp->copy_locale,
1782	irunp->copy_repertoire,
1783	charmap),
1784	charmap, wch);
1785	irunp = irunp->next;
1786	}
1787	}
1788
1789	return result;
1790	}
1791
1792
1793	/ Read one transliteration entry. /
1794	static uint32_t *
1795	read_widestring (struct linereader ldfile, struct* token *now,
1796	const struct charmap_t *charmap,
1797	struct repertoire_t *repertoire)
1798	{
1799	uint32_t *wstr;
1800
1801	if (now->tok == tok_default_missing)
1802	/ The special name "" will denote this case. /
1803	wstr = no_str;
1804	else if (now->tok == tok_bsymbol)
1805	{
1806	/ Get the value from the repertoire. /
1807	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1808	wstr[`0`] = repertoire_find_value (repertoire, now->val.str.startmb,
1809	now->val.str.lenmb);
1810	if (wstr[`0`] == ILLEGAL_CHAR_VALUE)
1811	{
1812	/ We cannot proceed, we don't know the UCS4 value. /
1813	free (wstr);
1814	return NULL;
1815	}
1816
1817	wstr[`1`] = `0`;
1818	}
1819	else if (now->tok == tok_ucs4)
1820	{
1821	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1822	wstr[`0`] = now->val.ucs4;
1823	wstr[`1`] = `0`;
1824	}
1825	else if (now->tok == tok_charcode)
1826	{
1827	/ Argh, we have to convert to the symbol name first and then to the*
1828	UCS4 value. /*
1829	struct charseq *seq = charmap_find_symbol (charmap,
1830	now->val.str.startmb,
1831	now->val.str.lenmb);
1832	if (seq == NULL)
1833	/ Cannot find the UCS4 value. /
1834	return NULL;
1835
1836	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1837	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1838	strlen (seq->name));
1839	if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1840	/ We cannot proceed, we don't know the UCS4 value. /
1841	return NULL;
1842
1843	wstr = (uint32_t ) xmalloc (`2` sizeof (uint32_t));
1844	wstr[`0`] = seq->ucs4;
1845	wstr[`1`] = `0`;
1846	}
1847	else if (now->tok == tok_string)
1848	{
1849	wstr = now->val.str.startwc;
1850	if (wstr == NULL \|\| wstr[`0`] == `0`)
1851	return NULL;
1852	}
1853	else
1854	{
1855	if (now->tok != tok_eol && now->tok != tok_eof)
1856	lr_ignore_rest (ldfile, `0`);
1857	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1858	return (uint32_t *) -`1l`;
1859	}
1860
1861	return wstr;
1862	}
1863
1864
1865	static void
1866	read_translit_entry (struct linereader ldfile, struct* locale_ctype_t *ctype,
1867	struct token now, const* struct charmap_t *charmap,
1868	struct repertoire_t *repertoire)
1869	{
1870	uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1871	struct translit_t *result;
1872	struct translit_to_t **top;
1873	struct obstack *ob = &ctype->mempool;
1874	int first;
1875	int ignore;
1876
1877	if (from_wstr == NULL)
1878	/ There is no valid from string. /
1879	return;
1880
1881	result = (struct translit_t *) obstack_alloc (ob,
1882	sizeof (struct translit_t));
1883	result->from = from_wstr;
1884	result->fname = ldfile->fname;
1885	result->lineno = ldfile->lineno;
1886	result->next = NULL;
1887	result->to = NULL;
1888	top = &result->to;
1889	first = `1`;
1890	ignore = `0`;
1891
1892	while (`1`)
1893	{
1894	uint32_t *to_wstr;
1895
1896	/ Next we have one or more transliterations. They are*
1897	separated by semicolons. /*
1898	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1899
1900	if (!first && (now->tok == tok_semicolon \|\| now->tok == tok_eol))
1901	{
1902	/ One string read. /
1903	const uint32_t zero = `0`;
1904
1905	if (!ignore)
1906	{
1907	obstack_grow (ob, &zero, `4`);
1908	to_wstr = obstack_finish (ob);
1909
1910	top = obstack_alloc (ob, sizeof* (struct translit_to_t));
1911	(*top)->str = to_wstr;
1912	(*top)->next = NULL;
1913	}
1914
1915	if (now->tok == tok_eol)
1916	{
1917	result->next = ctype->translit;
1918	ctype->translit = result;
1919	return;
1920	}
1921
1922	if (!ignore)
1923	top = &(*top)->next;
1924	ignore = `0`;
1925	}
1926	else
1927	{
1928	to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1929	if (to_wstr == (uint32_t *) -`1l`)
1930	{
1931	/ An error occurred. /
1932	obstack_free (ob, result);
1933	return;
1934	}
1935
1936	if (to_wstr == NULL)
1937	ignore = `1`;
1938	else
1939	/ This value is usable. /
1940	obstack_grow (ob, to_wstr, wcslen ((wchar_t ) to_wstr) `4`);
1941
1942	first = `0`;
1943	}
1944	}
1945	}
1946
1947
1948	static void
1949	read_translit_ignore_entry (struct linereader *ldfile,
1950	struct locale_ctype_t *ctype,
1951	const struct charmap_t *charmap,
1952	struct repertoire_t *repertoire)
1953	{
1954	/ We expect a semicolon-separated list of characters we ignore. We are*
1955	only interested in the wide character definitions. These must be
1956	single characters, possibly defining a range when an ellipsis is used. /*
1957	while (`1`)
1958	{
1959	struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1960	verbose);
1961	struct translit_ignore_t *newp;
1962	uint32_t from;
1963
1964	if (now->tok == tok_eol \|\| now->tok == tok_eof)
1965	{
1966	lr_error (ldfile,
1967	_("premature end of `translit_ignore' definition"));
1968	return;
1969	}
1970
1971	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1972	{
1973	lr_error (ldfile, _("syntax error"));
1974	lr_ignore_rest (ldfile, `0`);
1975	return;
1976	}
1977
1978	if (now->tok == tok_ucs4)
1979	from = now->val.ucs4;
1980	else
1981	/ Try to get the value. /
1982	from = repertoire_find_value (repertoire, now->val.str.startmb,
1983	now->val.str.lenmb);
1984
1985	if (from == ILLEGAL_CHAR_VALUE)
1986	{
1987	lr_error (ldfile, "invalid character name");
1988	newp = NULL;
1989	}
1990	else
1991	{
1992	newp = (struct translit_ignore_t *)
1993	obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1994	newp->from = from;
1995	newp->to = from;
1996	newp->step = `1`;
1997
1998	newp->next = ctype->translit_ignore;
1999	ctype->translit_ignore = newp;
2000	}
2001
2002	/ Now we expect either a semicolon, an ellipsis, or the end of the*
2003	line. /*
2004	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2005
2006	if (now->tok == tok_ellipsis2 \|\| now->tok == tok_ellipsis2_2)
2007	{
2008	/ XXX Should we bother implementing `....'? `...' certainly*
2009	will not be implemented. /*
2010	uint32_t to;
2011	int step = now->tok == tok_ellipsis2_2 ? `2` : `1`;
2012
2013	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2014
2015	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2016	{
2017	lr_error (ldfile,
2018	_("premature end of `translit_ignore' definition"));
2019	return;
2020	}
2021
2022	if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2023	{
2024	lr_error (ldfile, _("syntax error"));
2025	lr_ignore_rest (ldfile, `0`);
2026	return;
2027	}
2028
2029	if (now->tok == tok_ucs4)
2030	to = now->val.ucs4;
2031	else
2032	/ Try to get the value. /
2033	to = repertoire_find_value (repertoire, now->val.str.startmb,
2034	now->val.str.lenmb);
2035
2036	if (to == ILLEGAL_CHAR_VALUE)
2037	lr_error (ldfile, "invalid character name");
2038	else
2039	{
2040	/ Make sure the `to'-value is larger. /
2041	if (to >= from)
2042	{
2043	newp->to = to;
2044	newp->step = step;
2045	}
2046	else
2047	lr_error (ldfile, _("\
2048	to-value <U%0X> of range is smaller than from-value <U%0X>"),
2049	(to \| from) < `65536` ? `4` : `8`, to,
2050	(to \| from) < `65536` ? `4` : `8`, from);
2051	}
2052
2053	/ And the next token. /
2054	now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2055	}
2056
2057	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2058	/ We are done. /
2059	return;
2060
2061	if (now->tok == tok_semicolon)
2062	/ Next round. /
2063	continue;
2064
2065	/ If we come here something is wrong. /
2066	lr_error (ldfile, _("syntax error"));
2067	lr_ignore_rest (ldfile, `0`);
2068	return;
2069	}
2070	}
2071
2072
2073	/ The parser for the LC_CTYPE section of the locale definition. /
2074	void
2075	ctype_read (struct linereader ldfile, struct* localedef_t *result,
2076	const struct charmap_t charmap, const* char *repertoire_name,
2077	int ignore_content)
2078	{
2079	struct repertoire_t *repertoire = NULL;
2080	struct locale_ctype_t *ctype;
2081	struct token *now;
2082	enum token_t nowtok;
2083	size_t cnt;
2084	uint32_t last_wch = `0`;
2085	enum token_t last_token;
2086	enum token_t ellipsis_token;
2087	int step;
2088	char last_charcode[`16`];
2089	size_t last_charcode_len = `0`;
2090	const char *last_str = NULL;
2091	int mapidx;
2092	struct localedef_t *copy_locale = NULL;
2093
2094	/ Get the repertoire we have to use. /
2095	if (repertoire_name != NULL)
2096	repertoire = repertoire_read (repertoire_name);
2097
2098	/ The rest of the line containing `LC_CTYPE' must be free. /
2099	lr_ignore_rest (ldfile, `1`);
2100
2101
2102	do
2103	{
2104	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2105	nowtok = now->tok;
2106	}
2107	while (nowtok == tok_eol);
2108
2109	/ If we see `copy' now we are almost done. /
2110	if (nowtok == tok_copy)
2111	{
2112	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2113	if (now->tok != tok_string)
2114	{
2115	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2116
2117	skip_category:
2118	do
2119	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2120	while (now->tok != tok_eof && now->tok != tok_end);
2121
2122	if (now->tok != tok_eof
2123	\|\| (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2124	now->tok == tok_eof))
2125	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2126	else if (now->tok != tok_lc_ctype)
2127	{
2128	lr_error (ldfile, _("\
2129	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2130	lr_ignore_rest (ldfile, `0`);
2131	}
2132	else
2133	lr_ignore_rest (ldfile, `1`);
2134
2135	return;
2136	}
2137
2138	if (! ignore_content)
2139	{
2140	/ Get the locale definition. /
2141	copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2142	repertoire_name, charmap, NULL);
2143	if ((copy_locale->avail & CTYPE_LOCALE) == `0`)
2144	{
2145	/ Not yet loaded. So do it now. /
2146	if (locfile_read (copy_locale, charmap) != `0`)
2147	goto skip_category;
2148	}
2149
2150	if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2151	return;
2152	}
2153
2154	lr_ignore_rest (ldfile, `1`);
2155
2156	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2157	nowtok = now->tok;
2158	}
2159
2160	/ Prepare the data structures. /
2161	ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2162	ctype = result->categories[LC_CTYPE].ctype;
2163
2164	/ Remember the repertoire we use. /
2165	if (!ignore_content)
2166	ctype->repertoire = repertoire;
2167
2168	while (`1`)
2169	{
2170	unsigned long int class_bit = `0`;
2171	unsigned long int class256_bit = `0`;
2172	int handle_digits = `0`;
2173
2174	/ Of course we don't proceed beyond the end of file. /
2175	if (nowtok == tok_eof)
2176	break;
2177
2178	/ Ingore empty lines. /
2179	if (nowtok == tok_eol)
2180	{
2181	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2182	nowtok = now->tok;
2183	continue;
2184	}
2185
2186	switch (nowtok)
2187	{
2188	case tok_charclass:
2189	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2190	while (now->tok == tok_ident \|\| now->tok == tok_string)
2191	{
2192	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2193	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194	if (now->tok != tok_semicolon)
2195	break;
2196	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2197	}
2198	if (now->tok != tok_eol)
2199	SYNTAX_ERROR (_("\
2200	%s: syntax error in definition of new character class"), "LC_CTYPE");
2201	break;
2202
2203	case tok_charconv:
2204	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2205	while (now->tok == tok_ident \|\| now->tok == tok_string)
2206	{
2207	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2208	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2209	if (now->tok != tok_semicolon)
2210	break;
2211	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2212	}
2213	if (now->tok != tok_eol)
2214	SYNTAX_ERROR (_("\
2215	%s: syntax error in definition of new character map"), "LC_CTYPE");
2216	break;
2217
2218	case tok_class:
2219	/ Ignore the rest of the line if we don't need the input of*
2220	this line. /*
2221	if (ignore_content)
2222	{
2223	lr_ignore_rest (ldfile, `0`);
2224	break;
2225	}
2226
2227	/ We simply forget the `class' keyword and use the following*
2228	operand to determine the bit. /*
2229	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2230	if (now->tok == tok_ident \|\| now->tok == tok_string)
2231	{
2232	/ Must can be one of the predefined class names. /
2233	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2234	if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == `0`)
2235	break;
2236	if (cnt >= ctype->nr_charclass)
2237	{
2238	/ OK, it's a new class. /
2239	ctype_class_new (ldfile, ctype, now->val.str.startmb);
2240
2241	class_bit = _ISwbit (ctype->nr_charclass - `1`);
2242	}
2243	else
2244	{
2245	class_bit = _ISwbit (cnt);
2246
2247	free (now->val.str.startmb);
2248	}
2249	}
2250	else if (now->tok == tok_digit)
2251	goto handle_tok_digit;
2252	else if (now->tok < tok_upper \|\| now->tok > tok_blank)
2253	goto err_label;
2254	else
2255	{
2256	class_bit = BITw (now->tok);
2257	class256_bit = BIT (now->tok);
2258	}
2259
2260	/ The next character must be a semicolon. /
2261	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2262	if (now->tok != tok_semicolon)
2263	goto err_label;
2264	goto read_charclass;
2265
2266	case tok_upper:
2267	case tok_lower:
2268	case tok_alpha:
2269	case tok_alnum:
2270	case tok_space:
2271	case tok_cntrl:
2272	case tok_punct:
2273	case tok_graph:
2274	case tok_print:
2275	case tok_xdigit:
2276	case tok_blank:
2277	/ Ignore the rest of the line if we don't need the input of*
2278	this line. /*
2279	if (ignore_content)
2280	{
2281	lr_ignore_rest (ldfile, `0`);
2282	break;
2283	}
2284
2285	class_bit = BITw (now->tok);
2286	class256_bit = BIT (now->tok);
2287	handle_digits = `0`;
2288	read_charclass:
2289	ctype->class_done \|= class_bit;
2290	last_token = tok_none;
2291	ellipsis_token = tok_none;
2292	step = `1`;
2293	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2294	while (now->tok != tok_eol && now->tok != tok_eof)
2295	{
2296	uint32_t wch;
2297	struct charseq *seq;
2298
2299	if (ellipsis_token == tok_none)
2300	{
2301	if (get_character (now, charmap, repertoire, &seq, &wch))
2302	goto err_label;
2303
2304	if (!ignore_content && seq != NULL && seq->nbytes == `1`)
2305	/ Yep, we can store information about this byte*
2306	sequence. /*
2307	ctype->class256_collection[seq->bytes[`0`]] \|= class256_bit;
2308
2309	if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2310	&& class_bit != `0`)
2311	/ We have the UCS4 position. /
2312	*find_idx (ctype, &ctype->class_collection,
2313	&ctype->class_collection_max,
2314	&ctype->class_collection_act, wch) \|= class_bit;
2315
2316	last_token = now->tok;
2317	/ Terminate the string. /
2318	if (last_token == tok_bsymbol)
2319	{
2320	now->val.str.startmb[now->val.str.lenmb] = `'\0'`;
2321	last_str = now->val.str.startmb;
2322	}
2323	else
2324	last_str = NULL;
2325	last_wch = wch;
2326	memcpy (last_charcode, now->val.charcode.bytes, `16`);
2327	last_charcode_len = now->val.charcode.nbytes;
2328
2329	if (!ignore_content && handle_digits == `1`)
2330	{
2331	/ We must store the digit values. /
2332	if (ctype->mbdigits_act == ctype->mbdigits_max)
2333	{
2334	ctype->mbdigits_max += `10`;
2335	ctype->mbdigits = xrealloc (ctype->mbdigits,
2336	(ctype->mbdigits_max
2337	* sizeof (char *)));
2338	ctype->wcdigits_max += `10`;
2339	ctype->wcdigits = xrealloc (ctype->wcdigits,
2340	(ctype->wcdigits_max
2341	* sizeof (uint32_t)));
2342	}
2343
2344	ctype->mbdigits[ctype->mbdigits_act++] = seq;
2345	ctype->wcdigits[ctype->wcdigits_act++] = wch;
2346	}
2347	else if (!ignore_content && handle_digits == `2`)
2348	{
2349	/ We must store the digit values. /
2350	if (ctype->outdigits_act >= `10`)
2351	{
2352	lr_error (ldfile, _("\
2353	%s: field `%s' does not contain exactly ten entries"),
2354	"LC_CTYPE", "outdigit");
2355	lr_ignore_rest (ldfile, `0`);
2356	break;
2357	}
2358
2359	ctype->mboutdigits[ctype->outdigits_act] = seq;
2360	ctype->wcoutdigits[ctype->outdigits_act] = wch;
2361	++ctype->outdigits_act;
2362	}
2363	}
2364	else
2365	{
2366	/ Now it gets complicated. We have to resolve the*
2367	ellipsis problem. First we must distinguish between
2368	the different kind of ellipsis and this must match the
2369	tokens we have seen. /*
2370	assert (last_token != tok_none);
2371
2372	if (last_token != now->tok)
2373	{
2374	lr_error (ldfile, _("\
2375	ellipsis range must be marked by two operands of same type"));
2376	lr_ignore_rest (ldfile, `0`);
2377	break;
2378	}
2379
2380	if (last_token == tok_bsymbol)
2381	{
2382	if (ellipsis_token == tok_ellipsis3)
2383	lr_error (ldfile, _("with symbolic name range values \
2384	the absolute ellipsis `...' must not be used"));
2385
2386	charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2387	repertoire, now, last_str,
2388	class256_bit, class_bit,
2389	(ellipsis_token
2390	== tok_ellipsis4
2391	? `10` : `16`),
2392	ignore_content,
2393	handle_digits, step);
2394	}
2395	else if (last_token == tok_ucs4)
2396	{
2397	if (ellipsis_token != tok_ellipsis2)
2398	lr_error (ldfile, _("\
2399	with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2400
2401	charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2402	repertoire, now, last_wch,
2403	class256_bit, class_bit,
2404	ignore_content, handle_digits,
2405	step);
2406	}
2407	else
2408	{
2409	assert (last_token == tok_charcode);
2410
2411	if (ellipsis_token != tok_ellipsis3)
2412	lr_error (ldfile, _("\
2413	with character code range values one must use the absolute ellipsis `...'"));
2414
2415	charclass_charcode_ellipsis (ldfile, ctype, charmap,
2416	repertoire, now,
2417	last_charcode,
2418	last_charcode_len,
2419	class256_bit, class_bit,
2420	ignore_content,
2421	handle_digits);
2422	}
2423
2424	/ Now we have used the last value. /
2425	last_token = tok_none;
2426	}
2427
2428	/ Next we expect a semicolon or the end of the line. /
2429	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2430	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2431	break;
2432
2433	if (last_token != tok_none
2434	&& now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2435	{
2436	if (now->tok == tok_ellipsis2_2)
2437	{
2438	now->tok = tok_ellipsis2;
2439	step = `2`;
2440	}
2441	else if (now->tok == tok_ellipsis4_2)
2442	{
2443	now->tok = tok_ellipsis4;
2444	step = `2`;
2445	}
2446
2447	ellipsis_token = now->tok;
2448
2449	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2450	continue;
2451	}
2452
2453	if (now->tok != tok_semicolon)
2454	goto err_label;
2455
2456	/ And get the next character. /
2457	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2458
2459	ellipsis_token = tok_none;
2460	step = `1`;
2461	}
2462	break;
2463
2464	case tok_digit:
2465	/ Ignore the rest of the line if we don't need the input of*
2466	this line. /*
2467	if (ignore_content)
2468	{
2469	lr_ignore_rest (ldfile, `0`);
2470	break;
2471	}
2472
2473	handle_tok_digit:
2474	class_bit = _ISwdigit;
2475	class256_bit = _ISdigit;
2476	handle_digits = `1`;
2477	goto read_charclass;
2478
2479	case tok_outdigit:
2480	/ Ignore the rest of the line if we don't need the input of*
2481	this line. /*
2482	if (ignore_content)
2483	{
2484	lr_ignore_rest (ldfile, `0`);
2485	break;
2486	}
2487
2488	if (ctype->outdigits_act != `0`)
2489	lr_error (ldfile, _("\
2490	%s: field `%s' declared more than once"),
2491	"LC_CTYPE", "outdigit");
2492	class_bit = `0`;
2493	class256_bit = `0`;
2494	handle_digits = `2`;
2495	goto read_charclass;
2496
2497	case tok_toupper:
2498	/ Ignore the rest of the line if we don't need the input of*
2499	this line. /*
2500	if (ignore_content)
2501	{
2502	lr_ignore_rest (ldfile, `0`);
2503	break;
2504	}
2505
2506	mapidx = `0`;
2507	goto read_mapping;
2508
2509	case tok_tolower:
2510	/ Ignore the rest of the line if we don't need the input of*
2511	this line. /*
2512	if (ignore_content)
2513	{
2514	lr_ignore_rest (ldfile, `0`);
2515	break;
2516	}
2517
2518	mapidx = `1`;
2519	goto read_mapping;
2520
2521	case tok_map:
2522	/ Ignore the rest of the line if we don't need the input of*
2523	this line. /*
2524	if (ignore_content)
2525	{
2526	lr_ignore_rest (ldfile, `0`);
2527	break;
2528	}
2529
2530	/ We simply forget the `map' keyword and use the following*
2531	operand to determine the mapping. /*
2532	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2533	if (now->tok == tok_ident \|\| now->tok == tok_string)
2534	{
2535	size_t cnt;
2536
2537	for (cnt = `2`; cnt < ctype->map_collection_nr; ++cnt)
2538	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2539	break;
2540
2541	if (cnt < ctype->map_collection_nr)
2542	free (now->val.str.startmb);
2543	else
2544	/ OK, it's a new map. /
2545	ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2546
2547	mapidx = cnt;
2548	}
2549	else if (now->tok < tok_toupper \|\| now->tok > tok_tolower)
2550	goto err_label;
2551	else
2552	mapidx = now->tok - tok_toupper;
2553
2554	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2555	/ This better should be a semicolon. /
2556	if (now->tok != tok_semicolon)
2557	goto err_label;
2558
2559	read_mapping:
2560	/ Test whether this mapping was already defined. /
2561	if (ctype->tomap_done[mapidx])
2562	{
2563	lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2564	ctype->mapnames[mapidx]);
2565	lr_ignore_rest (ldfile, `0`);
2566	break;
2567	}
2568	ctype->tomap_done[mapidx] = `1`;
2569
2570	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2571	while (now->tok != tok_eol && now->tok != tok_eof)
2572	{
2573	struct charseq *from_seq;
2574	uint32_t from_wch;
2575	struct charseq *to_seq;
2576	uint32_t to_wch;
2577
2578	/ Every pair starts with an opening brace. /
2579	if (now->tok != tok_open_brace)
2580	goto err_label;
2581
2582	/ Next comes the from-value. /
2583	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584	if (get_character (now, charmap, repertoire, &from_seq,
2585	&from_wch) != `0`)
2586	goto err_label;
2587
2588	/ The next is a comma. /
2589	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590	if (now->tok != tok_comma)
2591	goto err_label;
2592
2593	/ And the other value. /
2594	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2595	if (get_character (now, charmap, repertoire, &to_seq,
2596	&to_wch) != `0`)
2597	goto err_label;
2598
2599	/ And the last thing is the closing brace. /
2600	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2601	if (now->tok != tok_close_brace)
2602	goto err_label;
2603
2604	if (!ignore_content)
2605	{
2606	/ Check whether the mapping converts from an ASCII value*
2607	to a non-ASCII value. /*
2608	if (from_seq != NULL && from_seq->nbytes == `1`
2609	&& isascii (from_seq->bytes[`0`])
2610	&& to_seq != NULL && (to_seq->nbytes != `1`
2611	\|\| !isascii (to_seq->bytes[`0`])))
2612	ctype->to_nonascii = `1`;
2613
2614	if (mapidx < `2` && from_seq != NULL && to_seq != NULL
2615	&& from_seq->nbytes == `1` && to_seq->nbytes == `1`)
2616	/ We can use this value. /
2617	ctype->map256_collection[mapidx][from_seq->bytes[`0`]]
2618	= to_seq->bytes[`0`];
2619
2620	if (from_wch != ILLEGAL_CHAR_VALUE
2621	&& to_wch != ILLEGAL_CHAR_VALUE)
2622	/ Both correct values. /
2623	*find_idx (ctype, &ctype->map_collection[mapidx],
2624	&ctype->map_collection_max[mapidx],
2625	&ctype->map_collection_act[mapidx],
2626	from_wch) = to_wch;
2627	}
2628
2629	/ Now comes a semicolon or the end of the line/file. /
2630	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2631	if (now->tok == tok_semicolon)
2632	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2633	}
2634	break;
2635
2636	case tok_translit_start:
2637	/ Ignore the entire translit section with its peculiar syntax*
2638	if we don't need the input. /*
2639	if (ignore_content)
2640	{
2641	do
2642	{
2643	lr_ignore_rest (ldfile, `0`);
2644	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2645	}
2646	while (now->tok != tok_translit_end && now->tok != tok_eof);
2647
2648	if (now->tok == tok_eof)
2649	lr_error (ldfile, _(\
2650	"%s: `translit_start' section does not end with `translit_end'"),
2651	"LC_CTYPE");
2652
2653	break;
2654	}
2655
2656	/ The rest of the line better should be empty. /
2657	lr_ignore_rest (ldfile, `1`);
2658
2659	/ We count here the number of allocated entries in the `translit'*
2660	array. /*
2661	cnt = `0`;
2662
2663	ldfile->translate_strings = `1`;
2664	ldfile->return_widestr = `1`;
2665
2666	/ We proceed until we see the `translit_end' token. /
2667	while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2668	now->tok != tok_translit_end && now->tok != tok_eof)
2669	{
2670	if (now->tok == tok_eol)
2671	/ Ignore empty lines. /
2672	continue;
2673
2674	if (now->tok == tok_include)
2675	{
2676	/ We have to include locale. /
2677	const char *locale_name;
2678	const char *repertoire_name;
2679	struct translit_include_t include_stmt, *include_ptr;
2680
2681	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2682	/ This should be a string or an identifier. In any*
2683	case something to name a locale. /*
2684	if (now->tok != tok_string && now->tok != tok_ident)
2685	{
2686	translit_syntax:
2687	lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2688	lr_ignore_rest (ldfile, `0`);
2689	continue;
2690	}
2691	locale_name = now->val.str.startmb;
2692
2693	/ Next should be a semicolon. /
2694	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2695	if (now->tok != tok_semicolon)
2696	goto translit_syntax;
2697
2698	/ Now the repertoire name. /
2699	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2700	if ((now->tok != tok_string && now->tok != tok_ident)
2701	\|\| now->val.str.startmb == NULL)
2702	goto translit_syntax;
2703	repertoire_name = now->val.str.startmb;
2704	if (repertoire_name[`0`] == `'\0'`)
2705	/ Ignore the empty string. /
2706	repertoire_name = NULL;
2707
2708	/ Save the include statement for later processing. /
2709	include_stmt = (struct translit_include_t *)
2710	xmalloc (sizeof (struct translit_include_t));
2711	include_stmt->copy_locale = locale_name;
2712	include_stmt->copy_repertoire = repertoire_name;
2713	include_stmt->next = NULL;
2714
2715	include_ptr = &ctype->translit_include;
2716	while (*include_ptr != NULL)
2717	include_ptr = &(*include_ptr)->next;
2718	*include_ptr = include_stmt;
2719
2720	/ The rest of the line must be empty. /
2721	lr_ignore_rest (ldfile, `1`);
2722
2723	/ Make sure the locale is read. /
2724	add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2725	`1`, NULL);
2726	continue;
2727	}
2728	else if (now->tok == tok_default_missing)
2729	{
2730	uint32_t *wstr;
2731
2732	while (`1`)
2733	{
2734	/ We expect a single character or string as the*
2735	argument. /*
2736	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2737	wstr = read_widestring (ldfile, now, charmap,
2738	repertoire);
2739
2740	if (wstr != NULL)
2741	{
2742	if (ctype->default_missing != NULL)
2743	{
2744	lr_error (ldfile, _("\
2745	%s: duplicate `default_missing' definition"), "LC_CTYPE");
2746	WITH_CUR_LOCALE (error_at_line (`0`, `0`,
2747	ctype->default_missing_file,
2748	ctype->default_missing_lineno,
2749	_("\
2750	previous definition was here")));
2751	}
2752	else
2753	{
2754	ctype->default_missing = wstr;
2755	ctype->default_missing_file = ldfile->fname;
2756	ctype->default_missing_lineno = ldfile->lineno;
2757	}
2758	/ We can have more entries, ignore them. /
2759	lr_ignore_rest (ldfile, `0`);
2760	break;
2761	}
2762	else if (wstr == (uint32_t *) -`1l`)
2763	/ This was an syntax error. /
2764	break;
2765
2766	/ Maybe there is another replacement we can use. /
2767	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2768	if (now->tok == tok_eol \|\| now->tok == tok_eof)
2769	{
2770	/ Nothing found. We tell the user. /
2771	lr_error (ldfile, _("\
2772	%s: no representable `default_missing' definition found"), "LC_CTYPE");
2773	break;
2774	}
2775	if (now->tok != tok_semicolon)
2776	goto translit_syntax;
2777	}
2778
2779	continue;
2780	}
2781	else if (now->tok == tok_translit_ignore)
2782	{
2783	read_translit_ignore_entry (ldfile, ctype, charmap,
2784	repertoire);
2785	continue;
2786	}
2787
2788	read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2789	}
2790	ldfile->return_widestr = `0`;
2791
2792	if (now->tok == tok_eof)
2793	lr_error (ldfile, _(\
2794	"%s: `translit_start' section does not end with `translit_end'"),
2795	"LC_CTYPE");
2796
2797	break;
2798
2799	case tok_ident:
2800	/ Ignore the rest of the line if we don't need the input of*
2801	this line. /*
2802	if (ignore_content)
2803	{
2804	lr_ignore_rest (ldfile, `0`);
2805	break;
2806	}
2807
2808	/ This could mean one of several things. First test whether*
2809	it's a character class name. /*
2810	for (cnt = `0`; cnt < ctype->nr_charclass; ++cnt)
2811	if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == `0`)
2812	break;
2813	if (cnt < ctype->nr_charclass)
2814	{
2815	class_bit = _ISwbit (cnt);
2816	class256_bit = cnt <= `11` ? _ISbit (cnt) : `0`;
2817	free (now->val.str.startmb);
2818	goto read_charclass;
2819	}
2820	for (cnt = `0`; cnt < ctype->map_collection_nr; ++cnt)
2821	if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == `0`)
2822	break;
2823	if (cnt < ctype->map_collection_nr)
2824	{
2825	mapidx = cnt;
2826	free (now->val.str.startmb);
2827	goto read_mapping;
2828	}
2829	break;
2830
2831	case tok_end:
2832	/ Next we assume `LC_CTYPE'. /
2833	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2834	if (now->tok == tok_eof)
2835	break;
2836	if (now->tok == tok_eol)
2837	lr_error (ldfile, _("%s: incomplete `END' line"),
2838	"LC_CTYPE");
2839	else if (now->tok != tok_lc_ctype)
2840	lr_error (ldfile, _("\
2841	%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2842	lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2843	return;
2844
2845	default:
2846	err_label:
2847	if (now->tok != tok_eof)
2848	SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2849	}
2850
2851	/ Prepare for the next round. /
2852	now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2853	nowtok = now->tok;
2854	}
2855
2856	/ When we come here we reached the end of the file. /
2857	lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2858	}
2859
2860
2861	/ Subroutine of set_class_defaults, below. /
2862	static void
2863	set_one_default (struct locale_ctype_t *ctype,
2864	const struct charmap_t *charmap,
2865	int bitpos, int from, int to)
2866	{
2867	char tmp[`2`];
2868	int ch;
2869	int bit = _ISbit (bitpos);
2870	int bitw = _ISwbit (bitpos);
2871	/ Define string. /
2872	strcpy (tmp, "?");
2873
2874	for (ch = from; ch <= to; ++ch)
2875	{
2876	struct charseq *seq;
2877	tmp[`0`] = ch;
2878
2879	seq = charmap_find_value (charmap, tmp, `1`);
2880	if (seq == NULL)
2881	{
2882	char buf[`10`];
2883	sprintf (buf, "U%08X", ch);
2884	seq = charmap_find_value (charmap, buf, `9`);
2885	}
2886	if (seq == NULL)
2887	{
2888	if (!be_quiet)
2889	WITH_CUR_LOCALE (error (`0`, `0`, _("\
2890	%s: character `%s' not defined while needed as default value"),
2891	"LC_CTYPE", tmp));
2892	}
2893	else if (seq->nbytes != `1`)
2894	WITH_CUR_LOCALE (error (`0`, `0`, _("\
2895	%s: character `%s' in charmap not representable with one byte"),
2896	"LC_CTYPE", tmp));
2897	else
2898	ctype->class256_collection[seq->bytes[`0`]] \|= bit;
2899
2900	/ No need to search here, the ASCII value is also the Unicode*
2901	value. /*
2902	ELEM (ctype, class_collection, , ch) \|= bitw;
2903	}
2904	}
2905
2906	static void
2907	set_class_defaults (struct locale_ctype_t *ctype,
2908	const struct charmap_t *charmap,
2909	struct repertoire_t *repertoire)
2910	{
2911	#define set_default(bitpos, from, to) \
2912	set_one_default (ctype, charmap, bitpos, from, to)
2913
2914	/ These function defines the default values for the classes and conversions*
2915	according to POSIX.2 2.5.2.1.
2916	It may seem that the order of these if-blocks is arbitrary but it is NOT.
2917	Don't move them unless you know what you do! /*
2918
2919	/ Set default values if keyword was not present. /
2920	if ((ctype->class_done & BITw (tok_upper)) == `0`)
2921	/ "If this keyword [lower] is not specified, the lowercase letters*
2922	`A' through `Z', ..., shall automatically belong to this class,
2923	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2924	set_default (BITPOS (tok_upper), `'A'`, `'Z'`);
2925
2926	if ((ctype->class_done & BITw (tok_lower)) == `0`)
2927	/ "If this keyword [lower] is not specified, the lowercase letters*
2928	`a' through `z', ..., shall automatically belong to this class,
2929	with implementation defined character values." [P1003.2, 2.5.2.1] /*
2930	set_default (BITPOS (tok_lower), `'a'`, `'z'`);
2931
2932	if ((ctype->class_done & BITw (tok_alpha)) == `0`)
2933	{
2934	/ Table 2-6 in P1003.2 says that characters in class `upper' or*
2935	class `lower' must* be in class `alpha'. /
2936	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower);
2937	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower);
2938
2939	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2940	if ((ctype->class256_collection[cnt] & mask) != `0`)
2941	ctype->class256_collection[cnt] \|= BIT (tok_alpha);
2942
2943	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2944	if ((ctype->class_collection[cnt] & maskw) != `0`)
2945	ctype->class_collection[cnt] \|= BITw (tok_alpha);
2946	}
2947
2948	if ((ctype->class_done & BITw (tok_digit)) == `0`)
2949	/ "If this keyword [digit] is not specified, the digits `0' through*
2950	`9', ..., shall automatically belong to this class, with
2951	implementation-defined character values." [P1003.2, 2.5.2.1] /*
2952	set_default (BITPOS (tok_digit), `'0'`, `'9'`);
2953
2954	/ "Only characters specified for the `alpha' and `digit' keyword*
2955	shall be specified. Characters specified for the keyword `alpha'
2956	and `digit' are automatically included in this class. /*
2957	{
2958	unsigned long int mask = BIT (tok_alpha) \| BIT (tok_digit);
2959	unsigned long int maskw = BITw (tok_alpha) \| BITw (tok_digit);
2960
2961	for (size_t cnt = `0`; cnt < `256`; ++cnt)
2962	if ((ctype->class256_collection[cnt] & mask) != `0`)
2963	ctype->class256_collection[cnt] \|= BIT (tok_alnum);
2964
2965	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
2966	if ((ctype->class_collection[cnt] & maskw) != `0`)
2967	ctype->class_collection[cnt] \|= BITw (tok_alnum);
2968	}
2969
2970	if ((ctype->class_done & BITw (tok_space)) == `0`)
2971	/ "If this keyword [space] is not specified, the characters <space>,*
2972	<form-feed>, <newline>, <carriage-return>, <tab>, and
2973	<vertical-tab>, ..., shall automatically belong to this class,
2974	with implementation-defined character values." [P1003.2, 2.5.2.1] /*
2975	{
2976	struct charseq *seq;
2977
2978	seq = charmap_find_value (charmap, "space", `5`);
2979	if (seq == NULL)
2980	seq = charmap_find_value (charmap, "SP", `2`);
2981	if (seq == NULL)
2982	seq = charmap_find_value (charmap, "U00000020", `9`);
2983	if (seq == NULL)
2984	{
2985	if (!be_quiet)
2986	WITH_CUR_LOCALE (error (`0`, `0`, _("\
2987	%s: character `%s' not defined while needed as default value"),
2988	"LC_CTYPE", "<space>"));
2989	}
2990	else if (seq->nbytes != `1`)
2991	WITH_CUR_LOCALE (error (`0`, `0`, _("\
2992	%s: character `%s' in charmap not representable with one byte"),
2993	"LC_CTYPE", "<space>"));
2994	else
2995	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
2996
2997	/ No need to search. /
2998	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_space);
2999
3000	seq = charmap_find_value (charmap, "form-feed", `9`);
3001	if (seq == NULL)
3002	seq = charmap_find_value (charmap, "U0000000C", `9`);
3003	if (seq == NULL)
3004	{
3005	if (!be_quiet)
3006	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3007	%s: character `%s' not defined while needed as default value"),
3008	"LC_CTYPE", "<form-feed>"));
3009	}
3010	else if (seq->nbytes != `1`)
3011	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3012	%s: character `%s' in charmap not representable with one byte"),
3013	"LC_CTYPE", "<form-feed>"));
3014	else
3015	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3016
3017	/ No need to search. /
3018	ELEM (ctype, class_collection, , L`'\f'`) \|= BITw (tok_space);
3019
3020
3021	seq = charmap_find_value (charmap, "newline", `7`);
3022	if (seq == NULL)
3023	seq = charmap_find_value (charmap, "U0000000A", `9`);
3024	if (seq == NULL)
3025	{
3026	if (!be_quiet)
3027	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3028	%s: character `%s' not defined while needed as default value"),
3029	"LC_CTYPE", "<newline>"));
3030	}
3031	else if (seq->nbytes != `1`)
3032	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3033	%s: character `%s' in charmap not representable with one byte"),
3034	"LC_CTYPE", "<newline>"));
3035	else
3036	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3037
3038	/ No need to search. /
3039	ELEM (ctype, class_collection, , L`'\n'`) \|= BITw (tok_space);
3040
3041
3042	seq = charmap_find_value (charmap, "carriage-return", `15`);
3043	if (seq == NULL)
3044	seq = charmap_find_value (charmap, "U0000000D", `9`);
3045	if (seq == NULL)
3046	{
3047	if (!be_quiet)
3048	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3049	%s: character `%s' not defined while needed as default value"),
3050	"LC_CTYPE", "<carriage-return>"));
3051	}
3052	else if (seq->nbytes != `1`)
3053	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3054	%s: character `%s' in charmap not representable with one byte"),
3055	"LC_CTYPE", "<carriage-return>"));
3056	else
3057	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3058
3059	/ No need to search. /
3060	ELEM (ctype, class_collection, , L`'\r'`) \|= BITw (tok_space);
3061
3062
3063	seq = charmap_find_value (charmap, "tab", `3`);
3064	if (seq == NULL)
3065	seq = charmap_find_value (charmap, "U00000009", `9`);
3066	if (seq == NULL)
3067	{
3068	if (!be_quiet)
3069	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3070	%s: character `%s' not defined while needed as default value"),
3071	"LC_CTYPE", "<tab>"));
3072	}
3073	else if (seq->nbytes != `1`)
3074	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3075	%s: character `%s' in charmap not representable with one byte"),
3076	"LC_CTYPE", "<tab>"));
3077	else
3078	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3079
3080	/ No need to search. /
3081	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_space);
3082
3083
3084	seq = charmap_find_value (charmap, "vertical-tab", `12`);
3085	if (seq == NULL)
3086	seq = charmap_find_value (charmap, "U0000000B", `9`);
3087	if (seq == NULL)
3088	{
3089	if (!be_quiet)
3090	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3091	%s: character `%s' not defined while needed as default value"),
3092	"LC_CTYPE", "<vertical-tab>"));
3093	}
3094	else if (seq->nbytes != `1`)
3095	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3096	%s: character `%s' in charmap not representable with one byte"),
3097	"LC_CTYPE", "<vertical-tab>"));
3098	else
3099	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_space);
3100
3101	/ No need to search. /
3102	ELEM (ctype, class_collection, , L`'\v'`) \|= BITw (tok_space);
3103	}
3104
3105	if ((ctype->class_done & BITw (tok_xdigit)) == `0`)
3106	/ "If this keyword is not specified, the digits `0' to `9', the*
3107	uppercase letters `A' through `F', and the lowercase letters `a'
3108	through `f', ..., shell automatically belong to this class, with
3109	implementation defined character values." [P1003.2, 2.5.2.1] /*
3110	{
3111	set_default (BITPOS (tok_xdigit), `'0'`, `'9'`);
3112	set_default (BITPOS (tok_xdigit), `'A'`, `'F'`);
3113	set_default (BITPOS (tok_xdigit), `'a'`, `'f'`);
3114	}
3115
3116	if ((ctype->class_done & BITw (tok_blank)) == `0`)
3117	/ "If this keyword [blank] is unspecified, the characters <space> and*
3118	<tab> shall belong to this character class." [P1003.2, 2.5.2.1] /*
3119	{
3120	struct charseq *seq;
3121
3122	seq = charmap_find_value (charmap, "space", `5`);
3123	if (seq == NULL)
3124	seq = charmap_find_value (charmap, "SP", `2`);
3125	if (seq == NULL)
3126	seq = charmap_find_value (charmap, "U00000020", `9`);
3127	if (seq == NULL)
3128	{
3129	if (!be_quiet)
3130	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3131	%s: character `%s' not defined while needed as default value"),
3132	"LC_CTYPE", "<space>"));
3133	}
3134	else if (seq->nbytes != `1`)
3135	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3136	%s: character `%s' in charmap not representable with one byte"),
3137	"LC_CTYPE", "<space>"));
3138	else
3139	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3140
3141	/ No need to search. /
3142	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_blank);
3143
3144
3145	seq = charmap_find_value (charmap, "tab", `3`);
3146	if (seq == NULL)
3147	seq = charmap_find_value (charmap, "U00000009", `9`);
3148	if (seq == NULL)
3149	{
3150	if (!be_quiet)
3151	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3152	%s: character `%s' not defined while needed as default value"),
3153	"LC_CTYPE", "<tab>"));
3154	}
3155	else if (seq->nbytes != `1`)
3156	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3157	%s: character `%s' in charmap not representable with one byte"),
3158	"LC_CTYPE", "<tab>"));
3159	else
3160	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_blank);
3161
3162	/ No need to search. /
3163	ELEM (ctype, class_collection, , L`'\t'`) \|= BITw (tok_blank);
3164	}
3165
3166	if ((ctype->class_done & BITw (tok_graph)) == `0`)
3167	/ "If this keyword [graph] is not specified, characters specified for*
3168	the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3169	shall belong to this character class." [P1003.2, 2.5.2.1] /*
3170	{
3171	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower) \|
3172	BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit) \| BIT (tok_punct);
3173	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower) \|
3174	BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit) \|
3175	BITw (tok_punct);
3176
3177	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3178	if ((ctype->class_collection[cnt] & maskw) != `0`)
3179	ctype->class_collection[cnt] \|= BITw (tok_graph);
3180
3181	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3182	if ((ctype->class256_collection[cnt] & mask) != `0`)
3183	ctype->class256_collection[cnt] \|= BIT (tok_graph);
3184	}
3185
3186	if ((ctype->class_done & BITw (tok_print)) == `0`)
3187	/ "If this keyword [print] is not provided, characters specified for*
3188	the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3189	and the <space> character shall belong to this character class."
3190	[P1003.2, 2.5.2.1] /*
3191	{
3192	unsigned long int mask = BIT (tok_upper) \| BIT (tok_lower) \|
3193	BIT (tok_alpha) \| BIT (tok_digit) \| BIT (tok_xdigit) \| BIT (tok_punct);
3194	unsigned long int maskw = BITw (tok_upper) \| BITw (tok_lower) \|
3195	BITw (tok_alpha) \| BITw (tok_digit) \| BITw (tok_xdigit) \|
3196	BITw (tok_punct);
3197	struct charseq *seq;
3198
3199	for (size_t cnt = `0`; cnt < ctype->class_collection_act; ++cnt)
3200	if ((ctype->class_collection[cnt] & maskw) != `0`)
3201	ctype->class_collection[cnt] \|= BITw (tok_print);
3202
3203	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3204	if ((ctype->class256_collection[cnt] & mask) != `0`)
3205	ctype->class256_collection[cnt] \|= BIT (tok_print);
3206
3207
3208	seq = charmap_find_value (charmap, "space", `5`);
3209	if (seq == NULL)
3210	seq = charmap_find_value (charmap, "SP", `2`);
3211	if (seq == NULL)
3212	seq = charmap_find_value (charmap, "U00000020", `9`);
3213	if (seq == NULL)
3214	{
3215	if (!be_quiet)
3216	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3217	%s: character `%s' not defined while needed as default value"),
3218	"LC_CTYPE", "<space>"));
3219	}
3220	else if (seq->nbytes != `1`)
3221	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3222	%s: character `%s' in charmap not representable with one byte"),
3223	"LC_CTYPE", "<space>"));
3224	else
3225	ctype->class256_collection[seq->bytes[`0`]] \|= BIT (tok_print);
3226
3227	/ No need to search. /
3228	ELEM (ctype, class_collection, , L`' '`) \|= BITw (tok_print);
3229	}
3230
3231	if (ctype->tomap_done[`0`] == `0`)
3232	/ "If this keyword [toupper] is not specified, the lowercase letters*
3233	`a' through `z', and their corresponding uppercase letters `A' to
3234	`Z', ..., shall automatically be included, with implementation-
3235	defined character values." [P1003.2, 2.5.2.1] /*
3236	{
3237	char tmp[`4`];
3238	int ch;
3239
3240	strcpy (tmp, "<?>");
3241
3242	for (ch = `'a'`; ch <= `'z'`; ++ch)
3243	{
3244	struct charseq seq_from, seq_to;
3245
3246	tmp[`1`] = (char) ch;
3247
3248	seq_from = charmap_find_value (charmap, &tmp[`1`], `1`);
3249	if (seq_from == NULL)
3250	{
3251	char buf[`10`];
3252	sprintf (buf, "U%08X", ch);
3253	seq_from = charmap_find_value (charmap, buf, `9`);
3254	}
3255	if (seq_from == NULL)
3256	{
3257	if (!be_quiet)
3258	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3259	%s: character `%s' not defined while needed as default value"),
3260	"LC_CTYPE", tmp));
3261	}
3262	else if (seq_from->nbytes != `1`)
3263	{
3264	if (!be_quiet)
3265	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3266	%s: character `%s' needed as default value not representable with one byte"),
3267	"LC_CTYPE", tmp));
3268	}
3269	else
3270	{
3271	/ This conversion is implementation defined. /
3272	tmp[`1`] = (char) (ch + (`'A'` - `'a'`));
3273	seq_to = charmap_find_value (charmap, &tmp[`1`], `1`);
3274	if (seq_to == NULL)
3275	{
3276	char buf[`10`];
3277	sprintf (buf, "U%08X", ch + (`'A'` - `'a'`));
3278	seq_to = charmap_find_value (charmap, buf, `9`);
3279	}
3280	if (seq_to == NULL)
3281	{
3282	if (!be_quiet)
3283	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3284	%s: character `%s' not defined while needed as default value"),
3285	"LC_CTYPE", tmp));
3286	}
3287	else if (seq_to->nbytes != `1`)
3288	{
3289	if (!be_quiet)
3290	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3291	%s: character `%s' needed as default value not representable with one byte"),
3292	"LC_CTYPE", tmp));
3293	}
3294	else
3295	/ The index [0] is determined by the order of the*
3296	`ctype_map_newP' calls in `ctype_startup'. /*
3297	ctype->map256_collection[`0`][seq_from->bytes[`0`]]
3298	= seq_to->bytes[`0`];
3299	}
3300
3301	/ No need to search. /
3302	ELEM (ctype, map_collection, [`0`], ch) = ch + (`'A'` - `'a'`);
3303	}
3304	}
3305
3306	if (ctype->tomap_done[`1`] == `0`)
3307	/ "If this keyword [tolower] is not specified, the mapping shall be*
3308	the reverse mapping of the one specified to `toupper'." [P1003.2] /*
3309	{
3310	for (size_t cnt = `0`; cnt < ctype->map_collection_act[`0`]; ++cnt)
3311	if (ctype->map_collection[`0`][cnt] != `0`)
3312	ELEM (ctype, map_collection, [`1`],
3313	ctype->map_collection[`0`][cnt])
3314	= ctype->charnames[cnt];
3315
3316	for (size_t cnt = `0`; cnt < `256`; ++cnt)
3317	if (ctype->map256_collection[`0`][cnt] != `0`)
3318	ctype->map256_collection[`1`][ctype->map256_collection[`0`][cnt]] = cnt;
3319	}
3320
3321	if (ctype->outdigits_act != `10`)
3322	{
3323	if (ctype->outdigits_act != `0`)
3324	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3325	%s: field `%s' does not contain exactly ten entries"),
3326	"LC_CTYPE", "outdigit"));
3327
3328	for (size_t cnt = ctype->outdigits_act; cnt < `10`; ++cnt)
3329	{
3330	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3331	(char *) digits + cnt,
3332	`1`);
3333
3334	if (ctype->mboutdigits[cnt] == NULL)
3335	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3336	longnames[cnt],
3337	strlen (longnames[cnt]));
3338
3339	if (ctype->mboutdigits[cnt] == NULL)
3340	ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3341	uninames[cnt], `9`);
3342
3343	if (ctype->mboutdigits[cnt] == NULL)
3344	{
3345	/ Provide a replacement. /
3346	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3347	no output digits defined and none of the standard names in the charmap")));
3348
3349	ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3350	sizeof (struct charseq)
3351	+ `1`);
3352
3353	/ This is better than nothing. /
3354	ctype->mboutdigits[cnt]->bytes[`0`] = digits[cnt];
3355	ctype->mboutdigits[cnt]->nbytes = `1`;
3356	}
3357
3358	ctype->wcoutdigits[cnt] = L`'0'` + cnt;
3359	}
3360
3361	ctype->outdigits_act = `10`;
3362	}
3363
3364	#undef set_default
3365	}
3366
3367
3368	/ Initialize. Assumes t->p and t->q have already been set. /
3369	static inline void
3370	wctype_table_init (struct wctype_table *t)
3371	{
3372	t->level1 = NULL;
3373	t->level1_alloc = t->level1_size = `0`;
3374	t->level2 = NULL;
3375	t->level2_alloc = t->level2_size = `0`;
3376	t->level3 = NULL;
3377	t->level3_alloc = t->level3_size = `0`;
3378	}
3379
3380	/ Retrieve an entry. /
3381	static inline int
3382	wctype_table_get (struct wctype_table *t, uint32_t wc)
3383	{
3384	uint32_t index1 = wc >> (t->q + t->p + `5`);
3385	if (index1 < t->level1_size)
3386	{
3387	uint32_t lookup1 = t->level1[index1];
3388	if (lookup1 != EMPTY)
3389	{
3390	uint32_t index2 = ((wc >> (t->p + `5`)) & ((`1` << t->q) - `1`))
3391	+ (lookup1 << t->q);
3392	uint32_t lookup2 = t->level2[index2];
3393	if (lookup2 != EMPTY)
3394	{
3395	uint32_t index3 = ((wc >> `5`) & ((`1` << t->p) - `1`))
3396	+ (lookup2 << t->p);
3397	uint32_t lookup3 = t->level3[index3];
3398	uint32_t index4 = wc & `0x1f`;
3399
3400	return (lookup3 >> index4) & `1`;
3401	}
3402	}
3403	}
3404	return `0`;
3405	}
3406
3407	/ Add one entry. /
3408	static void
3409	wctype_table_add (struct wctype_table *t, uint32_t wc)
3410	{
3411	uint32_t index1 = wc >> (t->q + t->p + `5`);
3412	uint32_t index2 = (wc >> (t->p + `5`)) & ((`1` << t->q) - `1`);
3413	uint32_t index3 = (wc >> `5`) & ((`1` << t->p) - `1`);
3414	uint32_t index4 = wc & `0x1f`;
3415	size_t i, i1, i2;
3416
3417	if (index1 >= t->level1_size)
3418	{
3419	if (index1 >= t->level1_alloc)
3420	{
3421	size_t alloc = `2` * t->level1_alloc;
3422	if (alloc <= index1)
3423	alloc = index1 + `1`;
3424	t->level1 = (uint32_t ) xrealloc ((char* *) t->level1,
3425	alloc * sizeof (uint32_t));
3426	t->level1_alloc = alloc;
3427	}
3428	while (index1 >= t->level1_size)
3429	t->level1[t->level1_size++] = EMPTY;
3430	}
3431
3432	if (t->level1[index1] == EMPTY)
3433	{
3434	if (t->level2_size == t->level2_alloc)
3435	{
3436	size_t alloc = `2` * t->level2_alloc + `1`;
3437	t->level2 = (uint32_t ) xrealloc ((char* *) t->level2,
3438	(alloc << t->q) * sizeof (uint32_t));
3439	t->level2_alloc = alloc;
3440	}
3441	i1 = t->level2_size << t->q;
3442	i2 = (t->level2_size + `1`) << t->q;
3443	for (i = i1; i < i2; i++)
3444	t->level2[i] = EMPTY;
3445	t->level1[index1] = t->level2_size++;
3446	}
3447
3448	index2 += t->level1[index1] << t->q;
3449
3450	if (t->level2[index2] == EMPTY)
3451	{
3452	if (t->level3_size == t->level3_alloc)
3453	{
3454	size_t alloc = `2` * t->level3_alloc + `1`;
3455	t->level3 = (uint32_t ) xrealloc ((char* *) t->level3,
3456	(alloc << t->p) * sizeof (uint32_t));
3457	t->level3_alloc = alloc;
3458	}
3459	i1 = t->level3_size << t->p;
3460	i2 = (t->level3_size + `1`) << t->p;
3461	for (i = i1; i < i2; i++)
3462	t->level3[i] = `0`;
3463	t->level2[index2] = t->level3_size++;
3464	}
3465
3466	index3 += t->level2[index2] << t->p;
3467
3468	t->level3[index3] \|= (uint32_t)`1` << index4;
3469	}
3470
3471	/ Finalize and shrink. /
3472	static void
3473	add_locale_wctype_table (struct locale_file file, struct* wctype_table *t)
3474	{
3475	size_t i, j, k;
3476	uint32_t reorder3[t->level3_size];
3477	uint32_t reorder2[t->level2_size];
3478	uint32_t level2_offset, level3_offset;
3479
3480	/ Uniquify level3 blocks. /
3481	k = `0`;
3482	for (j = `0`; j < t->level3_size; j++)
3483	{
3484	for (i = `0`; i < k; i++)
3485	if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3486	(`1` << t->p) * sizeof (uint32_t)) == `0`)
3487	break;
3488	/ Relocate block j to block i. /
3489	reorder3[j] = i;
3490	if (i == k)
3491	{
3492	if (i != j)
3493	memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3494	(`1` << t->p) * sizeof (uint32_t));
3495	k++;
3496	}
3497	}
3498	t->level3_size = k;
3499
3500	for (i = `0`; i < (t->level2_size << t->q); i++)
3501	if (t->level2[i] != EMPTY)
3502	t->level2[i] = reorder3[t->level2[i]];
3503
3504	/ Uniquify level2 blocks. /
3505	k = `0`;
3506	for (j = `0`; j < t->level2_size; j++)
3507	{
3508	for (i = `0`; i < k; i++)
3509	if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3510	(`1` << t->q) * sizeof (uint32_t)) == `0`)
3511	break;
3512	/ Relocate block j to block i. /
3513	reorder2[j] = i;
3514	if (i == k)
3515	{
3516	if (i != j)
3517	memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3518	(`1` << t->q) * sizeof (uint32_t));
3519	k++;
3520	}
3521	}
3522	t->level2_size = k;
3523
3524	for (i = `0`; i < t->level1_size; i++)
3525	if (t->level1[i] != EMPTY)
3526	t->level1[i] = reorder2[t->level1[i]];
3527
3528	t->result_size =
3529	`5` * sizeof (uint32_t)
3530	+ t->level1_size * sizeof (uint32_t)
3531	+ (t->level2_size << t->q) * sizeof (uint32_t)
3532	+ (t->level3_size << t->p) * sizeof (uint32_t);
3533
3534	level2_offset =
3535	`5` * sizeof (uint32_t)
3536	+ t->level1_size * sizeof (uint32_t);
3537	level3_offset =
3538	`5` * sizeof (uint32_t)
3539	+ t->level1_size * sizeof (uint32_t)
3540	+ (t->level2_size << t->q) * sizeof (uint32_t);
3541
3542	start_locale_structure (file);
3543	add_locale_uint32 (file, t->q + t->p + `5`);
3544	add_locale_uint32 (file, t->level1_size);
3545	add_locale_uint32 (file, t->p + `5`);
3546	add_locale_uint32 (file, (`1` << t->q) - `1`);
3547	add_locale_uint32 (file, (`1` << t->p) - `1`);
3548
3549	for (i = `0`; i < t->level1_size; i++)
3550	add_locale_uint32
3551	(file,
3552	t->level1[i] == EMPTY
3553	? `0`
3554	: (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3555
3556	for (i = `0`; i < (t->level2_size << t->q); i++)
3557	add_locale_uint32
3558	(file,
3559	t->level2[i] == EMPTY
3560	? `0`
3561	: (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3562
3563	add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3564	end_locale_structure (file);
3565
3566	if (t->level1_alloc > `0`)
3567	free (t->level1);
3568	if (t->level2_alloc > `0`)
3569	free (t->level2);
3570	if (t->level3_alloc > `0`)
3571	free (t->level3);
3572	}
3573
3574	/ Flattens the included transliterations into a translit list.*
3575	Inserts them in the list at `cursor', and returns the new cursor. /*
3576	static struct translit_t **
3577	translit_flatten (struct locale_ctype_t *ctype,
3578	const struct charmap_t *charmap,
3579	struct translit_t **cursor)
3580	{
3581	while (ctype->translit_include != NULL)
3582	{
3583	const char *copy_locale = ctype->translit_include->copy_locale;
3584	const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3585	struct localedef_t *other;
3586
3587	/ Unchain the include statement. During the depth-first traversal*
3588	we don't want to visit any locale more than once. /*
3589	ctype->translit_include = ctype->translit_include->next;
3590
3591	other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3592
3593	if (other == NULL \|\| other->categories[LC_CTYPE].ctype == NULL)
3594	{
3595	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3596	%s: transliteration data from locale `%s' not available"),
3597	"LC_CTYPE", copy_locale));
3598	}
3599	else
3600	{
3601	struct locale_ctype_t *other_ctype =
3602	other->categories[LC_CTYPE].ctype;
3603
3604	cursor = translit_flatten (other_ctype, charmap, cursor);
3605	assert (other_ctype->translit_include == NULL);
3606
3607	if (other_ctype->translit != NULL)
3608	{
3609	/ Insert the other_ctype->translit list at cursor. /*
3610	struct translit_t *endp = other_ctype->translit;
3611	while (endp->next != NULL)
3612	endp = endp->next;
3613
3614	endp->next = *cursor;
3615	*cursor = other_ctype->translit;
3616
3617	/ Avoid any risk of circular lists. /
3618	other_ctype->translit = NULL;
3619
3620	cursor = &endp->next;
3621	}
3622
3623	if (ctype->default_missing == NULL)
3624	ctype->default_missing = other_ctype->default_missing;
3625	}
3626	}
3627
3628	return cursor;
3629	}
3630
3631	static void
3632	allocate_arrays (struct locale_ctype_t ctype, const* struct charmap_t *charmap,
3633	struct repertoire_t *repertoire)
3634	{
3635	size_t idx, nr;
3636	const void *key;
3637	size_t len;
3638	void *vdata;
3639	void *curs;
3640
3641	/ You wonder about this amount of memory? This is only because some*
3642	users do not manage to address the array with unsigned values or
3643	data types with range >= 256. '\200' would result in the array
3644	index -128. To help these poor people we duplicate the entries for
3645	128 up to 255 below the entry for \0. /*
3646	ctype->ctype_b = (char_class_t ) xcalloc (`256` + `128`, sizeof* (char_class_t));
3647	ctype->ctype32_b = (char_class32_t ) xcalloc (`256`, sizeof* (char_class32_t));
3648	ctype->class_b = (uint32_t **)
3649	xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3650	ctype->class_3level = (struct wctype_table *)
3651	xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3652
3653	/ This is the array accessed using the multibyte string elements. /
3654	for (idx = `0`; idx < `256`; ++idx)
3655	ctype->ctype_b[`128` + idx] = ctype->class256_collection[idx];
3656
3657	/ Mirror first 127 entries. We must take care that entry -1 is not*
3658	mirrored because EOF == -1. /*
3659	for (idx = `0`; idx < `127`; ++idx)
3660	ctype->ctype_b[idx] = ctype->ctype_b[`256` + idx];
3661
3662	/ The 32 bit array contains all characters < 0x100. /
3663	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3664	if (ctype->charnames[idx] < `0x100`)
3665	ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3666
3667	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3668	{
3669	ctype->class_b[nr] = (uint32_t ) xcalloc (`256` / `32`, sizeof* (uint32_t));
3670
3671	/ We only set CLASS_B for the bits in the ISO C classes, not*
3672	the user defined classes. The number should not change but
3673	who knows. /*
3674	#define LAST_ISO_C_BIT 11
3675	if (nr <= LAST_ISO_C_BIT)
3676	for (idx = `0`; idx < `256`; ++idx)
3677	if (ctype->class256_collection[idx] & _ISbit (nr))
3678	ctype->class_b[nr][idx >> `5`] \|= (uint32_t) `1` << (idx & `0x1f`);
3679	}
3680
3681	for (nr = `0`; nr < ctype->nr_charclass; nr++)
3682	{
3683	struct wctype_table *t;
3684
3685	t = &ctype->class_3level[nr];
3686	t->p = `4`; / or: 5 /
3687	t->q = `7`; / or: 6 /
3688	wctype_table_init (t);
3689
3690	for (idx = `0`; idx < ctype->class_collection_act; ++idx)
3691	if (ctype->class_collection[idx] & _ISwbit (nr))
3692	wctype_table_add (t, ctype->charnames[idx]);
3693
3694	if (verbose)
3695	WITH_CUR_LOCALE (fprintf (stderr, _("\
3696	%s: table for class \"%s\": %lu bytes\n"),
3697	"LC_CTYPE", ctype->classnames[nr],
3698	(unsigned long int) t->result_size));
3699	}
3700
3701	/ Room for table of mappings. /
3702	ctype->map_b = (uint32_t *) xmalloc (`2` sizeof (uint32_t *));
3703	ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3704	* sizeof (uint32_t *));
3705	ctype->map_3level = (struct wctrans_table *)
3706	xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3707
3708	/ Fill in all mappings. /
3709	for (idx = `0`; idx < `2`; ++idx)
3710	{
3711	unsigned int idx2;
3712
3713	/ Allocate table. /
3714	ctype->map_b[idx] = (uint32_t *)
3715	xmalloc ((`256` + `128`) * sizeof (uint32_t));
3716
3717	/ Copy values from collection. /
3718	for (idx2 = `0`; idx2 < `256`; ++idx2)
3719	ctype->map_b[idx][`128` + idx2] = ctype->map256_collection[idx][idx2];
3720
3721	/ Mirror first 127 entries. We must take care not to map entry*
3722	-1 because EOF == -1. /*
3723	for (idx2 = `0`; idx2 < `127`; ++idx2)
3724	ctype->map_b[idx][idx2] = ctype->map_b[idx][`256` + idx2];
3725
3726	/ EOF must map to EOF. /
3727	ctype->map_b[idx][`127`] = EOF;
3728	}
3729
3730	for (idx = `0`; idx < ctype->map_collection_nr; ++idx)
3731	{
3732	unsigned int idx2;
3733
3734	/ Allocate table. /
3735	ctype->map32_b[idx] = (uint32_t ) xmalloc (`256` sizeof (uint32_t));
3736
3737	/ Copy values from collection. Default is identity mapping. /
3738	for (idx2 = `0`; idx2 < `256`; ++idx2)
3739	ctype->map32_b[idx][idx2] =
3740	(ctype->map_collection[idx][idx2] != `0`
3741	? ctype->map_collection[idx][idx2]
3742	: idx2);
3743	}
3744
3745	for (nr = `0`; nr < ctype->map_collection_nr; nr++)
3746	{
3747	struct wctrans_table *t;
3748
3749	t = &ctype->map_3level[nr];
3750	t->p = `7`;
3751	t->q = `9`;
3752	wctrans_table_init (t);
3753
3754	for (idx = `0`; idx < ctype->map_collection_act[nr]; ++idx)
3755	if (ctype->map_collection[nr][idx] != `0`)
3756	wctrans_table_add (t, ctype->charnames[idx],
3757	ctype->map_collection[nr][idx]);
3758
3759	if (verbose)
3760	WITH_CUR_LOCALE (fprintf (stderr, _("\
3761	%s: table for map \"%s\": %lu bytes\n"),
3762	"LC_CTYPE", ctype->mapnames[nr],
3763	(unsigned long int) t->result_size));
3764	}
3765
3766	/ Extra array for class and map names. /
3767	ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3768	* sizeof (uint32_t));
3769	ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3770	* sizeof (uint32_t));
3771
3772	ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3773	ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3774
3775	/ Array for width information. Because the expected widths are very*
3776	small (never larger than 2) we use only one single byte. This
3777	saves space.
3778	We put only printable characters in the table. wcwidth is specified
3779	to return -1 for non-printable characters. Doing the check here
3780	saves a run-time check.
3781	But we put L'\0' in the table. This again saves a run-time check. /*
3782	{
3783	struct wcwidth_table *t;
3784
3785	t = &ctype->width;
3786	t->p = `7`;
3787	t->q = `9`;
3788	wcwidth_table_init (t);
3789
3790	/ First set all the printable characters of the character set to*
3791	the default width. /*
3792	curs = NULL;
3793	while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == `0`)
3794	{
3795	struct charseq data = (struct* charseq *) vdata;
3796
3797	if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3798	data->ucs4 = repertoire_find_value (ctype->repertoire,
3799	data->name, len);
3800
3801	if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3802	{
3803	uint32_t *class_bits =
3804	find_idx (ctype, &ctype->class_collection, NULL,
3805	&ctype->class_collection_act, data->ucs4);
3806
3807	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3808	wcwidth_table_add (t, data->ucs4, charmap->width_default);
3809	}
3810	}
3811
3812	/ Now add the explicitly specified widths. /
3813	if (charmap->width_rules != NULL)
3814	for (size_t cnt = `0`; cnt < charmap->nwidth_rules; ++cnt)
3815	{
3816	unsigned char bytes[charmap->mb_cur_max];
3817	int nbytes = charmap->width_rules[cnt].from->nbytes;
3818
3819	/ We have the range of character for which the width is*
3820	specified described using byte sequences of the multibyte
3821	charset. We have to convert this to UCS4 now. And we
3822	cannot simply convert the beginning and the end of the
3823	sequence, we have to iterate over the byte sequence and
3824	convert it for every single character. /*
3825	memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3826
3827	while (nbytes < charmap->width_rules[cnt].to->nbytes
3828	\|\| memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3829	nbytes) <= `0`)
3830	{
3831	/ Find the UCS value for `bytes'. /
3832	int inner;
3833	uint32_t wch;
3834	struct charseq *seq =
3835	charmap_find_symbol (charmap, (char *) bytes, nbytes);
3836
3837	if (seq == NULL)
3838	wch = ILLEGAL_CHAR_VALUE;
3839	else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3840	wch = seq->ucs4;
3841	else
3842	wch = repertoire_find_value (ctype->repertoire, seq->name,
3843	strlen (seq->name));
3844
3845	if (wch != ILLEGAL_CHAR_VALUE)
3846	{
3847	/ Store the value. /
3848	uint32_t *class_bits =
3849	find_idx (ctype, &ctype->class_collection, NULL,
3850	&ctype->class_collection_act, wch);
3851
3852	if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3853	wcwidth_table_add (t, wch,
3854	charmap->width_rules[cnt].width);
3855	}
3856
3857	/ "Increment" the bytes sequence. /
3858	inner = nbytes - `1`;
3859	while (inner >= `0` && bytes[inner] == `0xff`)
3860	--inner;
3861
3862	if (inner < `0`)
3863	{
3864	/ We have to extend the byte sequence. /
3865	if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3866	break;
3867
3868	bytes[`0`] = `1`;
3869	memset (&bytes[`1`], `0`, nbytes);
3870	++nbytes;
3871	}
3872	else
3873	{
3874	++bytes[inner];
3875	while (++inner < nbytes)
3876	bytes[inner] = `0`;
3877	}
3878	}
3879	}
3880
3881	/ Set the width of L'\0' to 0. /
3882	wcwidth_table_add (t, `0`, `0`);
3883
3884	if (verbose)
3885	WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
3886	"LC_CTYPE", (unsigned long int) t->result_size));
3887	}
3888
3889	/ Set MB_CUR_MAX. /
3890	ctype->mb_cur_max = charmap->mb_cur_max;
3891
3892	/ Now determine the table for the transliteration information.*
3893
3894	XXX It is not yet clear to me whether it is worth implementing a
3895	complicated algorithm which uses a hash table to locate the entries.
3896	For now I'll use a simple array which can be searching using binary
3897	search. /*
3898	if (ctype->translit_include != NULL)
3899	/ Traverse the locales mentioned in the `include' statements in a*
3900	depth-first way and fold in their transliteration information. /*
3901	translit_flatten (ctype, charmap, &ctype->translit);
3902
3903	if (ctype->translit != NULL)
3904	{
3905	/ First count how many entries we have. This is the upper limit*
3906	since some entries from the included files might be overwritten. /*
3907	size_t number = `0`;
3908	struct translit_t *runp = ctype->translit;
3909	struct translit_t **sorted;
3910	size_t from_len, to_len;
3911
3912	while (runp != NULL)
3913	{
3914	++number;
3915	runp = runp->next;
3916	}
3917
3918	/ Next we allocate an array large enough and fill in the values. /
3919	sorted = (struct translit_t **) alloca (number
3920	* sizeof (struct translit_t **));
3921	runp = ctype->translit;
3922	number = `0`;
3923	do
3924	{
3925	/ Search for the place where to insert this string.*
3926	XXX Better use a real sorting algorithm later. /*
3927	size_t idx = `0`;
3928	int replace = `0`;
3929
3930	while (idx < number)
3931	{
3932	int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3933	(const wchar_t *) runp->from);
3934	if (res == `0`)
3935	{
3936	replace = `1`;
3937	break;
3938	}
3939	if (res > `0`)
3940	break;
3941	++idx;
3942	}
3943
3944	if (replace)
3945	sorted[idx] = runp;
3946	else
3947	{
3948	memmove (&sorted[idx + `1`], &sorted[idx],
3949	(number - idx) * sizeof (struct translit_t *));
3950	sorted[idx] = runp;
3951	++number;
3952	}
3953
3954	runp = runp->next;
3955	}
3956	while (runp != NULL);
3957
3958	/ The next step is putting all the possible transliteration*
3959	strings in one memory block so that we can write it out.
3960	We need several different blocks:
3961	- index to the from-string array
3962	- from-string array
3963	- index to the to-string array
3964	- to-string array.
3965	*/
3966	from_len = to_len = `0`;
3967	for (size_t cnt = `0`; cnt < number; ++cnt)
3968	{
3969	struct translit_to_t *srunp;
3970	from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3971	srunp = sorted[cnt]->to;
3972	while (srunp != NULL)
3973	{
3974	to_len += wcslen ((const wchar_t *) srunp->str) + `1`;
3975	srunp = srunp->next;
3976	}
3977	/ Plus one for the extra NUL character marking the end of*
3978	the list for the current entry. /*
3979	++to_len;
3980	}
3981
3982	/ We can allocate the arrays for the results. /
3983	ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3984	ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3985	ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3986	ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3987
3988	from_len = `0`;
3989	to_len = `0`;
3990	for (size_t cnt = `0`; cnt < number; ++cnt)
3991	{
3992	size_t len;
3993	struct translit_to_t *srunp;
3994
3995	ctype->translit_from_idx[cnt] = from_len;
3996	ctype->translit_to_idx[cnt] = to_len;
3997
3998	len = wcslen ((const wchar_t *) sorted[cnt]->from) + `1`;
3999	wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4000	(const wchar_t *) sorted[cnt]->from, len);
4001	from_len += len;
4002
4003	ctype->translit_to_idx[cnt] = to_len;
4004	srunp = sorted[cnt]->to;
4005	while (srunp != NULL)
4006	{
4007	len = wcslen ((const wchar_t *) srunp->str) + `1`;
4008	wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4009	(const wchar_t *) srunp->str, len);
4010	to_len += len;
4011	srunp = srunp->next;
4012	}
4013	ctype->translit_to_tbl[to_len++] = L`'\0'`;
4014	}
4015
4016	/ Store the information about the length. /
4017	ctype->translit_idx_size = number;
4018	ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4019	ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4020	}
4021	else
4022	{
4023	ctype->translit_from_idx = no_str;
4024	ctype->translit_from_tbl = no_str;
4025	ctype->translit_to_tbl = no_str;
4026	ctype->translit_idx_size = `0`;
4027	ctype->translit_from_tbl_size = `0`;
4028	ctype->translit_to_tbl_size = `0`;
4029	}
4030	}
4031

Browse the source code of glibc_src_2.25/locale/programs/ld-ctype.c