ld-collate.c source code [glibc_src_2.23/locale/programs/ld-collate.c]

1	/ Copyright (C) 1995-2016 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5	This program is free software; you can redistribute it and/or modify
6	it under the terms of the GNU General Public License as published
7	by the Free Software Foundation; version 2 of the License, or
8	(at your option) any later version.
9
10	This program is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU General Public License for more details.
14
15	You should have received a copy of the GNU General Public License
16	along with this program; if not, see <http://www.gnu.org/licenses/>. /*
17
18	#ifdef HAVE_CONFIG_H
19	# include <config.h>
20	#endif
21
22	#include <errno.h>
23	#include <error.h>
24	#include <stdlib.h>
25	#include <wchar.h>
26	#include <stdint.h>
27	#include <sys/param.h>
28
29	#include "localedef.h"
30	#include "charmap.h"
31	#include "localeinfo.h"
32	#include "linereader.h"
33	#include "locfile.h"
34	#include "elem-hash.h"
35
36	/ Uncomment the following line in the production version. /
37	/ #define NDEBUG 1 /
38	#include <assert.h>
39
40	#define obstack_chunk_alloc malloc
41	#define obstack_chunk_free free
42
43	static inline void
44	__attribute ((always_inline))
45	obstack_int32_grow (struct obstack *obstack, int32_t data)
46	{
47	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
48	data = maybe_swap_uint32 (data);
49	if (sizeof (int32_t) == sizeof (int))
50	obstack_int_grow (obstack, data);
51	else
52	obstack_grow (obstack, &data, sizeof (int32_t));
53	}
54
55	static inline void
56	__attribute ((always_inline))
57	obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
58	{
59	assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
60	data = maybe_swap_uint32 (data);
61	if (sizeof (int32_t) == sizeof (int))
62	obstack_int_grow_fast (obstack, data);
63	else
64	obstack_grow (obstack, &data, sizeof (int32_t));
65	}
66
67	/ Forward declaration. /
68	struct element_t;
69
70	/ Data type for list of strings. /
71	struct section_list
72	{
73	/ Successor in the known_sections list. /
74	struct section_list *def_next;
75	/ Successor in the sections list. /
76	struct section_list *next;
77	/ Name of the section. /
78	const char *name;
79	/ First element of this section. /
80	struct element_t *first;
81	/ Last element of this section. /
82	struct element_t *last;
83	/ These are the rules for this section. /
84	enum coll_sort_rule *rules;
85	/ Index of the rule set in the appropriate section of the output file. /
86	int ruleidx;
87	};
88
89	struct element_t;
90
91	struct element_list_t
92	{
93	/ Number of elements. /
94	int cnt;
95
96	struct element_t **w;
97	};
98
99	/ Data type for collating element. /
100	struct element_t
101	{
102	const char *name;
103
104	const char *mbs;
105	size_t nmbs;
106	const uint32_t *wcs;
107	size_t nwcs;
108	int *mborder;
109	int wcorder;
110
111	/ The following is a bit mask which bits are set if this element is*
112	used in the appropriate level. Interesting for the singlebyte
113	weight computation.
114
115	XXX The type here restricts the number of levels to 32. It could
116	be changed if necessary but I doubt this is necessary. /*
117	unsigned int used_in_level;
118
119	struct element_list_t *weights;
120
121	/ Nonzero if this is a real character definition. /
122	int is_character;
123
124	/ Order of the character in the sequence. This information will*
125	be used in range expressions. /*
126	int mbseqorder;
127	int wcseqorder;
128
129	/ Where does the definition come from. /
130	const char *file;
131	size_t line;
132
133	/ Which section does this belong to. /
134	struct section_list *section;
135
136	/ Predecessor and successor in the order list. /
137	struct element_t *last;
138	struct element_t *next;
139
140	/ Next element in multibyte output list. /
141	struct element_t *mbnext;
142	struct element_t *mblast;
143
144	/ Next element in wide character output list. /
145	struct element_t *wcnext;
146	struct element_t *wclast;
147	};
148
149	/ Special element value. /
150	#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
151	#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
152	#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
153
154	/ Data type for collating symbol. /
155	struct symbol_t
156	{
157	const char *name;
158
159	/ Point to place in the order list. /
160	struct element_t *order;
161
162	/ Where does the definition come from. /
163	const char *file;
164	size_t line;
165	};
166
167	/ Sparse table of struct element_t . /*
168	#define TABLE wchead_table
169	#define ELEMENT struct element_t *
170	#define DEFAULT NULL
171	#define ITERATE
172	#define NO_ADD_LOCALE
173	#include "3level.h"
174
175	/ Sparse table of int32_t. /
176	#define TABLE collidx_table
177	#define ELEMENT int32_t
178	#define DEFAULT 0
179	#include "3level.h"
180
181	/ Sparse table of uint32_t. /
182	#define TABLE collseq_table
183	#define ELEMENT uint32_t
184	#define DEFAULT ~((uint32_t) 0)
185	#include "3level.h"
186
187
188	/ Simple name list for the preprocessor. /
189	struct name_list
190	{
191	struct name_list *next;
192	char str[`0`];
193	};
194
195
196	/ The real definition of the struct for the LC_COLLATE locale. /
197	struct locale_collate_t
198	{
199	int col_weight_max;
200	int cur_weight_max;
201
202	/ List of known scripts. /
203	struct section_list *known_sections;
204	/ List of used sections. /
205	struct section_list *sections;
206	/ Current section using definition. /
207	struct section_list *current_section;
208	/ There always can be an unnamed section. /
209	struct section_list unnamed_section;
210	/ Flag whether the unnamed section has been defined. /
211	bool unnamed_section_defined;
212	/ To make handling of errors easier we have another section. /
213	struct section_list error_section;
214	/ Sometimes we are defining the values for collating symbols before*
215	the first actual section. /*
216	struct section_list symbol_section;
217
218	/ Start of the order list. /
219	struct element_t *start;
220
221	/ The undefined element. /
222	struct element_t undefined;
223
224	/ This is the cursor for `reorder_after' insertions. /
225	struct element_t *cursor;
226
227	/ This value is used when handling ellipsis. /
228	struct element_t ellipsis_weight;
229
230	/ Known collating elements. /
231	hash_table elem_table;
232
233	/ Known collating symbols. /
234	hash_table sym_table;
235
236	/ Known collation sequences. /
237	hash_table seq_table;
238
239	struct obstack mempool;
240
241	/ The LC_COLLATE category is a bit special as it is sometimes possible*
242	that the definitions from more than one input file contains information.
243	Therefore we keep all relevant input in a list. /*
244	struct locale_collate_t *next;
245
246	/ Arrays with heads of the list for each of the leading bytes in*
247	the multibyte sequences. /*
248	struct element_t *mbheads[`256`];
249
250	/ Arrays with heads of the list for each of the leading bytes in*
251	the multibyte sequences. /*
252	struct wchead_table wcheads;
253
254	/ The arrays with the collation sequence order. /
255	unsigned char mbseqorder[`256`];
256	struct collseq_table wcseqorder;
257
258	/ State of the preprocessor. /
259	enum
260	{
261	else_none = `0`,
262	else_ignore,
263	else_seen
264	}
265	else_action;
266	};
267
268
269	/ We have a few global variables which are used for reading all*
270	LC_COLLATE category descriptions in all files. /*
271	static uint32_t nrules;
272
273	/ List of defined preprocessor symbols. /
274	static struct name_list *defined;
275
276
277	/ We need UTF-8 encoding of numbers. /
278	static inline int
279	__attribute ((always_inline))
280	utf8_encode (char buf, int* val)
281	{
282	int retval;
283
284	if (val < `0x80`)
285	{
286	buf++ = (char*) val;
287	retval = `1`;
288	}
289	else
290	{
291	int step;
292
293	for (step = `2`; step < `6`; ++step)
294	if ((val & (~(uint32_t)`0` << (`5` * step + `1`))) == `0`)
295	break;
296	retval = step;
297
298	buf = (unsigned* char) (~`0xff` >> step);
299	--step;
300	do
301	{
302	buf[step] = `0x80` \| (val & `0x3f`);
303	val >>= `6`;
304	}
305	while (--step > `0`);
306	*buf \|= val;
307	}
308
309	return retval;
310	}
311
312
313	static struct section_list *
314	make_seclist_elem (struct locale_collate_t collate, const* char *string,
315	struct section_list *next)
316	{
317	struct section_list *newp;
318
319	newp = (struct section_list *) obstack_alloc (&collate->mempool,
320	sizeof (*newp));
321	newp->next = next;
322	newp->name = string;
323	newp->first = NULL;
324	newp->last = NULL;
325
326	return newp;
327	}
328
329
330	static struct element_t *
331	new_element (struct locale_collate_t collate, const* char *mbs, size_t mbslen,
332	const uint32_t wcs, const* char *name, size_t namelen,
333	int is_character)
334	{
335	struct element_t *newp;
336
337	newp = (struct element_t *) obstack_alloc (&collate->mempool,
338	sizeof (*newp));
339	newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
340	name, namelen);
341	if (mbs != NULL)
342	{
343	newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
344	newp->nmbs = mbslen;
345	}
346	else
347	{
348	newp->mbs = NULL;
349	newp->nmbs = `0`;
350	}
351	if (wcs != NULL)
352	{
353	size_t nwcs = wcslen ((wchar_t *) wcs);
354	uint32_t zero = `0`;
355	/ Handle <U0000> as a single character. /
356	if (nwcs == `0`)
357	nwcs = `1`;
358	obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
359	obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
360	newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
361	newp->nwcs = nwcs;
362	}
363	else
364	{
365	newp->wcs = NULL;
366	newp->nwcs = `0`;
367	}
368	newp->mborder = NULL;
369	newp->wcorder = `0`;
370	newp->used_in_level = `0`;
371	newp->is_character = is_character;
372
373	/ Will be assigned later. XXX /
374	newp->mbseqorder = `0`;
375	newp->wcseqorder = `0`;
376
377	/ Will be allocated later. /
378	newp->weights = NULL;
379
380	newp->file = NULL;
381	newp->line = `0`;
382
383	newp->section = collate->current_section;
384
385	newp->last = NULL;
386	newp->next = NULL;
387
388	newp->mbnext = NULL;
389	newp->mblast = NULL;
390
391	newp->wcnext = NULL;
392	newp->wclast = NULL;
393
394	return newp;
395	}
396
397
398	static struct symbol_t *
399	new_symbol (struct locale_collate_t collate, const* char *name, size_t len)
400	{
401	struct symbol_t *newp;
402
403	newp = (struct symbol_t ) obstack_alloc (&collate->mempool, sizeof* (*newp));
404
405	newp->name = obstack_copy0 (&collate->mempool, name, len);
406	newp->order = NULL;
407
408	newp->file = NULL;
409	newp->line = `0`;
410
411	return newp;
412	}
413
414
415	/ Test whether this name is already defined somewhere. /
416	static int
417	check_duplicate (struct linereader ldfile, struct* locale_collate_t *collate,
418	const struct charmap_t *charmap,
419	struct repertoire_t repertoire, const* char *symbol,
420	size_t symbol_len)
421	{
422	void *ignore = NULL;
423
424	if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == `0`)
425	{
426	lr_error (ldfile, _("`%.*s' already defined in charmap"),
427	(int) symbol_len, symbol);
428	return `1`;
429	}
430
431	if (repertoire != NULL
432	&& (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
433	== `0`))
434	{
435	lr_error (ldfile, _("`%.*s' already defined in repertoire"),
436	(int) symbol_len, symbol);
437	return `1`;
438	}
439
440	if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == `0`)
441	{
442	lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
443	(int) symbol_len, symbol);
444	return `1`;
445	}
446
447	if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == `0`)
448	{
449	lr_error (ldfile, _("`%.*s' already defined as collating element"),
450	(int) symbol_len, symbol);
451	return `1`;
452	}
453
454	return `0`;
455	}
456
457
458	/ Read the direction specification. /
459	static void
460	read_directions (struct linereader ldfile, struct* token *arg,
461	const struct charmap_t *charmap,
462	struct repertoire_t repertoire, struct* localedef_t *result)
463	{
464	int cnt = `0`;
465	int max = nrules ?: `10`;
466	enum coll_sort_rule rules = calloc (max, sizeof* (*rules));
467	int warned = `0`;
468	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
469
470	while (`1`)
471	{
472	int valid = `0`;
473
474	if (arg->tok == tok_forward)
475	{
476	if (rules[cnt] & sort_backward)
477	{
478	if (! warned)
479	{
480	lr_error (ldfile, _("\
481	%s: `forward' and `backward' are mutually excluding each other"),
482	"LC_COLLATE");
483	warned = `1`;
484	}
485	}
486	else if (rules[cnt] & sort_forward)
487	{
488	if (! warned)
489	{
490	lr_error (ldfile, _("\
491	%s: `%s' mentioned more than once in definition of weight %d"),
492	"LC_COLLATE", "forward", cnt + `1`);
493	}
494	}
495	else
496	rules[cnt] \|= sort_forward;
497
498	valid = `1`;
499	}
500	else if (arg->tok == tok_backward)
501	{
502	if (rules[cnt] & sort_forward)
503	{
504	if (! warned)
505	{
506	lr_error (ldfile, _("\
507	%s: `forward' and `backward' are mutually excluding each other"),
508	"LC_COLLATE");
509	warned = `1`;
510	}
511	}
512	else if (rules[cnt] & sort_backward)
513	{
514	if (! warned)
515	{
516	lr_error (ldfile, _("\
517	%s: `%s' mentioned more than once in definition of weight %d"),
518	"LC_COLLATE", "backward", cnt + `1`);
519	}
520	}
521	else
522	rules[cnt] \|= sort_backward;
523
524	valid = `1`;
525	}
526	else if (arg->tok == tok_position)
527	{
528	if (rules[cnt] & sort_position)
529	{
530	if (! warned)
531	{
532	lr_error (ldfile, _("\
533	%s: `%s' mentioned more than once in definition of weight %d"),
534	"LC_COLLATE", "position", cnt + `1`);
535	}
536	}
537	else
538	rules[cnt] \|= sort_position;
539
540	valid = `1`;
541	}
542
543	if (valid)
544	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
545
546	if (arg->tok == tok_eof \|\| arg->tok == tok_eol \|\| arg->tok == tok_comma
547	\|\| arg->tok == tok_semicolon)
548	{
549	if (! valid && ! warned)
550	{
551	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
552	warned = `1`;
553	}
554
555	/ See whether we have to increment the counter. /
556	if (arg->tok != tok_comma && rules[cnt] != `0`)
557	{
558	/ Add the default `forward' if we have seen only `position'. /
559	if (rules[cnt] == sort_position)
560	rules[cnt] = sort_position \| sort_forward;
561
562	++cnt;
563	}
564
565	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
566	/ End of line or file, so we exit the loop. /
567	break;
568
569	if (nrules == `0`)
570	{
571	/ See whether we have enough room in the array. /
572	if (cnt == max)
573	{
574	max += `10`;
575	rules = (enum coll_sort_rule *) xrealloc (rules,
576	max
577	* sizeof (*rules));
578	memset (&rules[cnt], `'\0'`, (max - cnt) * sizeof (*rules));
579	}
580	}
581	else
582	{
583	if (cnt == nrules)
584	{
585	/ There must not be any more rule. /
586	if (! warned)
587	{
588	lr_error (ldfile, _("\
589	%s: too many rules; first entry only had %d"),
590	"LC_COLLATE", nrules);
591	warned = `1`;
592	}
593
594	lr_ignore_rest (ldfile, `0`);
595	break;
596	}
597	}
598	}
599	else
600	{
601	if (! warned)
602	{
603	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
604	warned = `1`;
605	}
606	}
607
608	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
609	}
610
611	if (nrules == `0`)
612	{
613	/ Now we know how many rules we have. /
614	nrules = cnt;
615	rules = (enum coll_sort_rule *) xrealloc (rules,
616	nrules * sizeof (*rules));
617	}
618	else
619	{
620	if (cnt < nrules)
621	{
622	/ Not enough rules in this specification. /
623	if (! warned)
624	lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
625
626	do
627	rules[cnt] = sort_forward;
628	while (++cnt < nrules);
629	}
630	}
631
632	collate->current_section->rules = rules;
633	}
634
635
636	static struct element_t *
637	find_element (struct linereader ldfile, struct* locale_collate_t *collate,
638	const char *str, size_t len)
639	{
640	void *result = NULL;
641
642	/ Search for the entries among the collation sequences already define. /
643	if (find_entry (&collate->seq_table, str, len, &result) != `0`)
644	{
645	/ Nope, not define yet. So we see whether it is a*
646	collation symbol. /*
647	void *ptr;
648
649	if (find_entry (&collate->sym_table, str, len, &ptr) == `0`)
650	{
651	/ It's a collation symbol. /
652	struct symbol_t sym = (struct* symbol_t *) ptr;
653	result = sym->order;
654
655	if (result == NULL)
656	result = sym->order = new_element (collate, NULL, `0`, NULL,
657	NULL, `0`, `0`);
658	}
659	else if (find_entry (&collate->elem_table, str, len, &result) != `0`)
660	{
661	/ It's also no collation element. So it is a character*
662	element defined later. /*
663	result = new_element (collate, NULL, `0`, NULL, str, len, `1`);
664	/ Insert it into the sequence table. /
665	insert_entry (&collate->seq_table, str, len, result);
666	}
667	}
668
669	return (struct element_t *) result;
670	}
671
672
673	static void
674	unlink_element (struct locale_collate_t *collate)
675	{
676	if (collate->cursor == collate->start)
677	{
678	assert (collate->cursor->next == NULL);
679	assert (collate->cursor->last == NULL);
680	collate->cursor = NULL;
681	}
682	else
683	{
684	if (collate->cursor->next != NULL)
685	collate->cursor->next->last = collate->cursor->last;
686	if (collate->cursor->last != NULL)
687	collate->cursor->last->next = collate->cursor->next;
688	collate->cursor = collate->cursor->last;
689	}
690	}
691
692
693	static void
694	insert_weights (struct linereader ldfile, struct* element_t *elem,
695	const struct charmap_t *charmap,
696	struct repertoire_t repertoire, struct* localedef_t *result,
697	enum token_t ellipsis)
698	{
699	int weight_cnt;
700	struct token *arg;
701	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
702
703	/ Initialize all the fields. /
704	elem->file = ldfile->fname;
705	elem->line = ldfile->lineno;
706
707	elem->last = collate->cursor;
708	elem->next = collate->cursor ? collate->cursor->next : NULL;
709	if (collate->cursor != NULL && collate->cursor->next != NULL)
710	collate->cursor->next->last = elem;
711	if (collate->cursor != NULL)
712	collate->cursor->next = elem;
713	if (collate->start == NULL)
714	{
715	assert (collate->cursor == NULL);
716	collate->start = elem;
717	}
718
719	elem->section = collate->current_section;
720
721	if (collate->current_section->first == NULL)
722	collate->current_section->first = elem;
723	if (collate->current_section->last == collate->cursor)
724	collate->current_section->last = elem;
725
726	collate->cursor = elem;
727
728	elem->weights = (struct element_list_t *)
729	obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
730	memset (elem->weights, `'\0'`, nrules * sizeof (struct element_list_t));
731
732	weight_cnt = `0`;
733
734	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
735	do
736	{
737	if (arg->tok == tok_eof \|\| arg->tok == tok_eol)
738	break;
739
740	if (arg->tok == tok_ignore)
741	{
742	/ The weight for this level has to be ignored. We use the*
743	null pointer to indicate this. /*
744	elem->weights[weight_cnt].w = (struct element_t **)
745	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
746	elem->weights[weight_cnt].w[`0`] = NULL;
747	elem->weights[weight_cnt].cnt = `1`;
748	}
749	else if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
750	{
751	char ucs4str[`10`];
752	struct element_t *val;
753	char *symstr;
754	size_t symlen;
755
756	if (arg->tok == tok_bsymbol)
757	{
758	symstr = arg->val.str.startmb;
759	symlen = arg->val.str.lenmb;
760	}
761	else
762	{
763	snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
764	symstr = ucs4str;
765	symlen = `9`;
766	}
767
768	val = find_element (ldfile, collate, symstr, symlen);
769	if (val == NULL)
770	break;
771
772	elem->weights[weight_cnt].w = (struct element_t **)
773	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
774	elem->weights[weight_cnt].w[`0`] = val;
775	elem->weights[weight_cnt].cnt = `1`;
776	}
777	else if (arg->tok == tok_string)
778	{
779	/ Split the string up in the individual characters and put*
780	the element definitions in the list. /*
781	const char *cp = arg->val.str.startmb;
782	int cnt = `0`;
783	struct element_t *charelem;
784	struct element_t **weights = NULL;
785	int max = `0`;
786
787	if (*cp == `'\0'`)
788	{
789	lr_error (ldfile, _("%s: empty weight string not allowed"),
790	"LC_COLLATE");
791	lr_ignore_rest (ldfile, `0`);
792	break;
793	}
794
795	do
796	{
797	if (*cp == `'<'`)
798	{
799	/ Ahh, it's a bsymbol or an UCS4 value. If it's*
800	the latter we have to unify the name. /*
801	const char *startp = ++cp;
802	size_t len;
803
804	while (*cp != `'>'`)
805	{
806	if (*cp == ldfile->escape_char)
807	++cp;
808	if (*cp == `'\0'`)
809	/ It's a syntax error. /
810	goto syntax;
811
812	++cp;
813	}
814
815	if (cp - startp == `5` && startp[`0`] == `'U'`
816	&& isxdigit (startp[`1`]) && isxdigit (startp[`2`])
817	&& isxdigit (startp[`3`]) && isxdigit (startp[`4`]))
818	{
819	unsigned int ucs4 = strtoul (startp + `1`, NULL, `16`);
820	char *newstr;
821
822	newstr = (char *) xmalloc (`10`);
823	snprintf (newstr, `10`, "U%08X", ucs4);
824	startp = newstr;
825
826	len = `9`;
827	}
828	else
829	len = cp - startp;
830
831	charelem = find_element (ldfile, collate, startp, len);
832	++cp;
833	}
834	else
835	{
836	/ People really shouldn't use characters directly in*
837	the string. Especially since it's not really clear
838	what this means. We interpret all characters in the
839	string as if that would be bsymbols. Otherwise we
840	would have to match back to bsymbols somehow and this
841	is normally not what people normally expect. /*
842	charelem = find_element (ldfile, collate, cp++, `1`);
843	}
844
845	if (charelem == NULL)
846	{
847	/ We ignore the rest of the line. /
848	lr_ignore_rest (ldfile, `0`);
849	break;
850	}
851
852	/ Add the pointer. /
853	if (cnt >= max)
854	{
855	struct element_t **newp;
856	max += `10`;
857	newp = (struct element_t **)
858	alloca (max * sizeof (struct element_t *));
859	memcpy (newp, weights, cnt * sizeof (struct element_t *));
860	weights = newp;
861	}
862	weights[cnt++] = charelem;
863	}
864	while (*cp != `'\0'`);
865
866	/ Now store the information. /
867	elem->weights[weight_cnt].w = (struct element_t **)
868	obstack_alloc (&collate->mempool,
869	cnt * sizeof (struct element_t *));
870	memcpy (elem->weights[weight_cnt].w, weights,
871	cnt * sizeof (struct element_t *));
872	elem->weights[weight_cnt].cnt = cnt;
873
874	/ We don't need the string anymore. /
875	free (arg->val.str.startmb);
876	}
877	else if (ellipsis != tok_none
878	&& (arg->tok == tok_ellipsis2
879	\|\| arg->tok == tok_ellipsis3
880	\|\| arg->tok == tok_ellipsis4))
881	{
882	/ It must be the same ellipsis as used in the initial column. /
883	if (arg->tok != ellipsis)
884	lr_error (ldfile, _("\
885	%s: weights must use the same ellipsis symbol as the name"),
886	"LC_COLLATE");
887
888	/ The weight for this level will depend on the element*
889	iterating over the range. Put a placeholder. /*
890	elem->weights[weight_cnt].w = (struct element_t **)
891	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
892	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
893	elem->weights[weight_cnt].cnt = `1`;
894	}
895	else
896	{
897	syntax:
898	/ It's a syntax error. /
899	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
900	lr_ignore_rest (ldfile, `0`);
901	break;
902	}
903
904	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
905	/ This better should be the end of the line or a semicolon. /
906	if (arg->tok == tok_semicolon)
907	/ OK, ignore this and read the next token. /
908	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
909	else if (arg->tok != tok_eof && arg->tok != tok_eol)
910	{
911	/ It's a syntax error. /
912	lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
913	lr_ignore_rest (ldfile, `0`);
914	break;
915	}
916	}
917	while (++weight_cnt < nrules);
918
919	if (weight_cnt < nrules)
920	{
921	/ This means the rest of the line uses the current element as*
922	the weight. /*
923	do
924	{
925	elem->weights[weight_cnt].w = (struct element_t **)
926	obstack_alloc (&collate->mempool, sizeof (struct element_t *));
927	if (ellipsis == tok_none)
928	elem->weights[weight_cnt].w[`0`] = elem;
929	else
930	elem->weights[weight_cnt].w[`0`] = ELEMENT_ELLIPSIS2;
931	elem->weights[weight_cnt].cnt = `1`;
932	}
933	while (++weight_cnt < nrules);
934	}
935	else
936	{
937	if (arg->tok == tok_ignore \|\| arg->tok == tok_bsymbol)
938	{
939	/ Too many rule values. /
940	lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
941	lr_ignore_rest (ldfile, `0`);
942	}
943	else
944	lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
945	}
946	}
947
948
949	static int
950	insert_value (struct linereader ldfile, const* char *symstr, size_t symlen,
951	const struct charmap_t charmap, struct* repertoire_t *repertoire,
952	struct localedef_t *result)
953	{
954	/ First find out what kind of symbol this is. /
955	struct charseq *seq;
956	uint32_t wc;
957	struct element_t *elem = NULL;
958	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
959
960	/ Try to find the character in the charmap. /
961	seq = charmap_find_value (charmap, symstr, symlen);
962
963	/ Determine the wide character. /
964	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
965	{
966	wc = repertoire_find_value (repertoire, symstr, symlen);
967	if (seq != NULL)
968	seq->ucs4 = wc;
969	}
970	else
971	wc = seq->ucs4;
972
973	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
974	{
975	/ It's no character, so look through the collation elements and*
976	symbol list. /*
977	void *ptr = elem;
978	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != `0`)
979	{
980	void *result;
981	struct symbol_t *sym = NULL;
982
983	/ It's also collation element. Therefore it's either a*
984	collating symbol or it's a character which is not
985	supported by the character set. In the later case we
986	simply create a dummy entry. /*
987	if (find_entry (&collate->sym_table, symstr, symlen, &result) == `0`)
988	{
989	/ It's a collation symbol. /
990	sym = (struct symbol_t *) result;
991
992	elem = sym->order;
993	}
994
995	if (elem == NULL)
996	{
997	elem = new_element (collate, NULL, `0`, NULL, symstr, symlen, `0`);
998
999	if (sym != NULL)
1000	sym->order = elem;
1001	else
1002	/ Enter a fake element in the sequence table. This*
1003	won't cause anything in the output since there is
1004	no multibyte or wide character associated with
1005	it. /*
1006	insert_entry (&collate->seq_table, symstr, symlen, elem);
1007	}
1008	}
1009	else
1010	/ Copy the result back. /
1011	elem = ptr;
1012	}
1013	else
1014	{
1015	/ Otherwise the symbols stands for a character. /
1016	void *ptr = elem;
1017	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != `0`)
1018	{
1019	uint32_t wcs[`2`] = { wc, `0` };
1020
1021	/ We have to allocate an entry. /
1022	elem = new_element (collate,
1023	seq != NULL ? (char *) seq->bytes : NULL,
1024	seq != NULL ? seq->nbytes : `0`,
1025	wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1026	symstr, symlen, `1`);
1027
1028	/ And add it to the table. /
1029	if (insert_entry (&collate->seq_table, symstr, symlen, elem) != `0`)
1030	/ This cannot happen. /
1031	assert (! "Internal error");
1032	}
1033	else
1034	{
1035	/ Copy the result back. /
1036	elem = ptr;
1037
1038	/ Maybe the character was used before the definition. In this case*
1039	we have to insert the byte sequences now. /*
1040	if (elem->mbs == NULL && seq != NULL)
1041	{
1042	elem->mbs = obstack_copy0 (&collate->mempool,
1043	seq->bytes, seq->nbytes);
1044	elem->nmbs = seq->nbytes;
1045	}
1046
1047	if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1048	{
1049	uint32_t wcs[`2`] = { wc, `0` };
1050
1051	elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1052	elem->nwcs = `1`;
1053	}
1054	}
1055	}
1056
1057	/ Test whether this element is not already in the list. /
1058	if (elem->next != NULL \|\| elem == collate->cursor)
1059	{
1060	lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1061	(int) symlen, symstr, elem->file, elem->line);
1062	lr_ignore_rest (ldfile, `0`);
1063	return `1`;
1064	}
1065
1066	insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1067
1068	return `0`;
1069	}
1070
1071
1072	static void
1073	handle_ellipsis (struct linereader ldfile, const* char *symstr, size_t symlen,
1074	enum token_t ellipsis, const struct charmap_t *charmap,
1075	struct repertoire_t *repertoire,
1076	struct localedef_t *result)
1077	{
1078	struct element_t *startp;
1079	struct element_t *endp;
1080	struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1081
1082	/ Unlink the entry added for the ellipsis. /
1083	unlink_element (collate);
1084	startp = collate->cursor;
1085
1086	/ Process and add the end-entry. /
1087	if (symstr != NULL
1088	&& insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1089	/ Something went wrong with inserting the to-value. This means*
1090	we cannot process the ellipsis. /*
1091	return;
1092
1093	/ Reset the cursor. /
1094	collate->cursor = startp;
1095
1096	/ Now we have to handle many different situations:*
1097	- we have to distinguish between the three different ellipsis forms
1098	- the is the ellipsis at the beginning, in the middle, or at the end.
1099	*/
1100	endp = collate->cursor->next;
1101	assert (symstr == NULL \|\| endp != NULL);
1102
1103	/ XXX The following is probably very wrong since also collating symbols*
1104	can appear in ranges. But do we want/can refine the test for that? /*
1105	#if 0
1106	/ Both, the start and the end symbol, must stand for characters. /
1107	if ((startp != NULL && (startp->name == NULL \|\| ! startp->is_character))
1108	\|\| (endp != NULL && (endp->name == NULL\|\| ! endp->is_character)))
1109	{
1110	lr_error (ldfile, _("\
1111	%s: the start and the end symbol of a range must stand for characters"),
1112	"LC_COLLATE");
1113	return;
1114	}
1115	#endif
1116
1117	if (ellipsis == tok_ellipsis3)
1118	{
1119	/ One requirement we make here: the length of the byte*
1120	sequences for the first and end character must be the same.
1121	This is mainly to prevent unwanted effects and this is often
1122	not what is wanted. /*
1123	size_t len = (startp->mbs != NULL ? startp->nmbs
1124	: (endp->mbs != NULL ? endp->nmbs : `0`));
1125	char mbcnt[len + `1`];
1126	char mbend[len + `1`];
1127
1128	/ Well, this should be caught somewhere else already. Just to*
1129	make sure. /*
1130	assert (startp == NULL \|\| startp->wcs == NULL \|\| startp->wcs[`1`] == `0`);
1131	assert (endp == NULL \|\| endp->wcs == NULL \|\| endp->wcs[`1`] == `0`);
1132
1133	if (startp != NULL && endp != NULL
1134	&& startp->mbs != NULL && endp->mbs != NULL
1135	&& startp->nmbs != endp->nmbs)
1136	{
1137	lr_error (ldfile, _("\
1138	%s: byte sequences of first and last character must have the same length"),
1139	"LC_COLLATE");
1140	return;
1141	}
1142
1143	/ Determine whether we have to generate multibyte sequences. /
1144	if ((startp == NULL \|\| startp->mbs != NULL)
1145	&& (endp == NULL \|\| endp->mbs != NULL))
1146	{
1147	int cnt;
1148	int ret;
1149
1150	/ Prepare the beginning byte sequence. This is either from the*
1151	beginning byte sequence or it is all nulls if it was an
1152	initial ellipsis. /*
1153	if (startp == NULL \|\| startp->mbs == NULL)
1154	memset (mbcnt, `'\0'`, len);
1155	else
1156	{
1157	memcpy (mbcnt, startp->mbs, len);
1158
1159	/ And increment it so that the value is the first one we will*
1160	try to insert. /*
1161	for (cnt = len - `1`; cnt >= `0`; --cnt)
1162	if (++mbcnt[cnt] != `'\0'`)
1163	break;
1164	}
1165	mbcnt[len] = `'\0'`;
1166
1167	/ And the end sequence. /
1168	if (endp == NULL \|\| endp->mbs == NULL)
1169	memset (mbend, `'\0'`, len);
1170	else
1171	memcpy (mbend, endp->mbs, len);
1172	mbend[len] = `'\0'`;
1173
1174	/ Test whether we have a correct range. /
1175	ret = memcmp (mbcnt, mbend, len);
1176	if (ret >= `0`)
1177	{
1178	if (ret > `0`)
1179	lr_error (ldfile, _("%s: byte sequence of first character of \
1180	range is not lower than that of the last character"), "LC_COLLATE");
1181	return;
1182	}
1183
1184	/ Generate the byte sequences data. /
1185	while (`1`)
1186	{
1187	struct charseq *seq;
1188
1189	/ Quite a bit of work ahead. We have to find the character*
1190	definition for the byte sequence and then determine the
1191	wide character belonging to it. /*
1192	seq = charmap_find_symbol (charmap, mbcnt, len);
1193	if (seq != NULL)
1194	{
1195	struct element_t *elem;
1196	size_t namelen;
1197
1198	/ I don't think this can ever happen. /
1199	assert (seq->name != NULL);
1200	namelen = strlen (seq->name);
1201
1202	if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1203	seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1204	namelen);
1205
1206	/ Now we are ready to insert the new value in the*
1207	sequence. Find out whether the element is
1208	already known. /*
1209	void *ptr;
1210	if (find_entry (&collate->seq_table, seq->name, namelen,
1211	&ptr) != `0`)
1212	{
1213	uint32_t wcs[`2`] = { seq->ucs4, `0` };
1214
1215	/ We have to allocate an entry. /
1216	elem = new_element (collate, mbcnt, len,
1217	seq->ucs4 == ILLEGAL_CHAR_VALUE
1218	? NULL : wcs, seq->name,
1219	namelen, `1`);
1220
1221	/ And add it to the table. /
1222	if (insert_entry (&collate->seq_table, seq->name,
1223	namelen, elem) != `0`)
1224	/ This cannot happen. /
1225	assert (! "Internal error");
1226	}
1227	else
1228	/ Copy the result. /
1229	elem = ptr;
1230
1231	/ Test whether this element is not already in the list. /
1232	if (elem->next != NULL \|\| (collate->cursor != NULL
1233	&& elem->next == collate->cursor))
1234	{
1235	lr_error (ldfile, _("\
1236	order for `%.*s' already defined at %s:%Zu"),
1237	(int) namelen, seq->name,
1238	elem->file, elem->line);
1239	goto increment;
1240	}
1241
1242	/ Enqueue the new element. /
1243	elem->last = collate->cursor;
1244	if (collate->cursor == NULL)
1245	elem->next = NULL;
1246	else
1247	{
1248	elem->next = collate->cursor->next;
1249	elem->last->next = elem;
1250	if (elem->next != NULL)
1251	elem->next->last = elem;
1252	}
1253	if (collate->start == NULL)
1254	{
1255	assert (collate->cursor == NULL);
1256	collate->start = elem;
1257	}
1258	collate->cursor = elem;
1259
1260	/ Add the weight value. We take them from the*
1261	`ellipsis_weights' member of `collate'. /*
1262	elem->weights = (struct element_list_t *)
1263	obstack_alloc (&collate->mempool,
1264	nrules * sizeof (struct element_list_t));
1265	for (cnt = `0`; cnt < nrules; ++cnt)
1266	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1267	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1268	== ELEMENT_ELLIPSIS2))
1269	{
1270	elem->weights[cnt].w = (struct element_t **)
1271	obstack_alloc (&collate->mempool,
1272	sizeof (struct element_t *));
1273	elem->weights[cnt].w[`0`] = elem;
1274	elem->weights[cnt].cnt = `1`;
1275	}
1276	else
1277	{
1278	/ Simply use the weight from `ellipsis_weight'. /
1279	elem->weights[cnt].w =
1280	collate->ellipsis_weight.weights[cnt].w;
1281	elem->weights[cnt].cnt =
1282	collate->ellipsis_weight.weights[cnt].cnt;
1283	}
1284	}
1285
1286	/ Increment for the next round. /
1287	increment:
1288	for (cnt = len - `1`; cnt >= `0`; --cnt)
1289	if (++mbcnt[cnt] != `'\0'`)
1290	break;
1291
1292	/ Find out whether this was all. /
1293	if (cnt < `0` \|\| memcmp (mbcnt, mbend, len) >= `0`)
1294	/ Yep, that's all. /
1295	break;
1296	}
1297	}
1298	}
1299	else
1300	{
1301	/ For symbolic range we naturally must have a beginning and an*
1302	end specified by the user. /*
1303	if (startp == NULL)
1304	lr_error (ldfile, _("\
1305	%s: symbolic range ellipsis must not directly follow `order_start'"),
1306	"LC_COLLATE");
1307	else if (endp == NULL)
1308	lr_error (ldfile, _("\
1309	%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1310	"LC_COLLATE");
1311	else
1312	{
1313	/ Determine the range. To do so we have to determine the*
1314	common prefix of the both names and then the numeric
1315	values of both ends. /*
1316	size_t lenfrom = strlen (startp->name);
1317	size_t lento = strlen (endp->name);
1318	char buf[lento + `1`];
1319	int preflen = `0`;
1320	long int from;
1321	long int to;
1322	char *cp;
1323	int base = ellipsis == tok_ellipsis2 ? `16` : `10`;
1324
1325	if (lenfrom != lento)
1326	{
1327	invalid_range:
1328	lr_error (ldfile, _("\
1329	`%s' and `%.*s' are not valid names for symbolic range"),
1330	startp->name, (int) lento, endp->name);
1331	return;
1332	}
1333
1334	while (startp->name[preflen] == endp->name[preflen])
1335	if (startp->name[preflen] == `'\0'`)
1336	/ Nothing to be done. The start and end point are identical*
1337	and while inserting the end point we have already given
1338	the user an error message. /*
1339	return;
1340	else
1341	++preflen;
1342
1343	errno = `0`;
1344	from = strtol (startp->name + preflen, &cp, base);
1345	if ((from == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1346	goto invalid_range;
1347
1348	errno = `0`;
1349	to = strtol (endp->name + preflen, &cp, base);
1350	if ((to == UINT_MAX && errno == ERANGE) \|\| *cp != `'\0'`)
1351	goto invalid_range;
1352
1353	/ Copy the prefix. /
1354	memcpy (buf, startp->name, preflen);
1355
1356	/ Loop over all values. /
1357	for (++from; from < to; ++from)
1358	{
1359	struct element_t *elem = NULL;
1360	struct charseq *seq;
1361	uint32_t wc;
1362	int cnt;
1363
1364	/ Generate the name. /
1365	sprintf (buf + preflen, base == `10` ? "%0ld" : "%0lX",
1366	(int) (lenfrom - preflen), from);
1367
1368	/ Look whether this name is already defined. /
1369	void *ptr;
1370	if (find_entry (&collate->seq_table, buf, symlen, &ptr) == `0`)
1371	{
1372	/ Copy back the result. /
1373	elem = ptr;
1374
1375	if (elem->next != NULL \|\| (collate->cursor != NULL
1376	&& elem->next == collate->cursor))
1377	{
1378	lr_error (ldfile, _("\
1379	%s: order for `%.*s' already defined at %s:%Zu"),
1380	"LC_COLLATE", (int) lenfrom, buf,
1381	elem->file, elem->line);
1382	continue;
1383	}
1384
1385	if (elem->name == NULL)
1386	{
1387	lr_error (ldfile, _("%s: `%s' must be a character"),
1388	"LC_COLLATE", buf);
1389	continue;
1390	}
1391	}
1392
1393	if (elem == NULL \|\| (elem->mbs == NULL && elem->wcs == NULL))
1394	{
1395	/ Search for a character of this name. /
1396	seq = charmap_find_value (charmap, buf, lenfrom);
1397	if (seq == NULL \|\| seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1398	{
1399	wc = repertoire_find_value (repertoire, buf, lenfrom);
1400
1401	if (seq != NULL)
1402	seq->ucs4 = wc;
1403	}
1404	else
1405	wc = seq->ucs4;
1406
1407	if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1408	/ We don't know anything about a character with this*
1409	name. XXX Should we warn? /*
1410	continue;
1411
1412	if (elem == NULL)
1413	{
1414	uint32_t wcs[`2`] = { wc, `0` };
1415
1416	/ We have to allocate an entry. /
1417	elem = new_element (collate,
1418	seq != NULL
1419	? (char *) seq->bytes : NULL,
1420	seq != NULL ? seq->nbytes : `0`,
1421	wc == ILLEGAL_CHAR_VALUE
1422	? NULL : wcs, buf, lenfrom, `1`);
1423	}
1424	else
1425	{
1426	/ Update the element. /
1427	if (seq != NULL)
1428	{
1429	elem->mbs = obstack_copy0 (&collate->mempool,
1430	seq->bytes, seq->nbytes);
1431	elem->nmbs = seq->nbytes;
1432	}
1433
1434	if (wc != ILLEGAL_CHAR_VALUE)
1435	{
1436	uint32_t zero = `0`;
1437
1438	obstack_grow (&collate->mempool,
1439	&wc, sizeof (uint32_t));
1440	obstack_grow (&collate->mempool,
1441	&zero, sizeof (uint32_t));
1442	elem->wcs = obstack_finish (&collate->mempool);
1443	elem->nwcs = `1`;
1444	}
1445	}
1446
1447	elem->file = ldfile->fname;
1448	elem->line = ldfile->lineno;
1449	elem->section = collate->current_section;
1450	}
1451
1452	/ Enqueue the new element. /
1453	elem->last = collate->cursor;
1454	elem->next = collate->cursor->next;
1455	elem->last->next = elem;
1456	if (elem->next != NULL)
1457	elem->next->last = elem;
1458	collate->cursor = elem;
1459
1460	/ Now add the weights. They come from the `ellipsis_weights'*
1461	member of `collate'. /*
1462	elem->weights = (struct element_list_t *)
1463	obstack_alloc (&collate->mempool,
1464	nrules * sizeof (struct element_list_t));
1465	for (cnt = `0`; cnt < nrules; ++cnt)
1466	if (collate->ellipsis_weight.weights[cnt].cnt == `1`
1467	&& (collate->ellipsis_weight.weights[cnt].w[`0`]
1468	== ELEMENT_ELLIPSIS2))
1469	{
1470	elem->weights[cnt].w = (struct element_t **)
1471	obstack_alloc (&collate->mempool,
1472	sizeof (struct element_t *));
1473	elem->weights[cnt].w[`0`] = elem;
1474	elem->weights[cnt].cnt = `1`;
1475	}
1476	else
1477	{
1478	/ Simly use the weight from `ellipsis_weight'. /
1479	elem->weights[cnt].w =
1480	collate->ellipsis_weight.weights[cnt].w;
1481	elem->weights[cnt].cnt =
1482	collate->ellipsis_weight.weights[cnt].cnt;
1483	}
1484	}
1485	}
1486	}
1487	}
1488
1489
1490	static void
1491	collate_startup (struct linereader ldfile, struct* localedef_t *locale,
1492	struct localedef_t copy_locale, int* ignore_content)
1493	{
1494	if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1495	{
1496	struct locale_collate_t *collate;
1497
1498	if (copy_locale == NULL)
1499	{
1500	collate = locale->categories[LC_COLLATE].collate =
1501	(struct locale_collate_t *)
1502	xcalloc (`1`, sizeof (struct locale_collate_t));
1503
1504	/ Init the various data structures. /
1505	init_hash (&collate->elem_table, `100`);
1506	init_hash (&collate->sym_table, `100`);
1507	init_hash (&collate->seq_table, `500`);
1508	obstack_init (&collate->mempool);
1509
1510	collate->col_weight_max = -`1`;
1511	}
1512	else
1513	/ Reuse the copy_locale's data structures. /
1514	collate = locale->categories[LC_COLLATE].collate =
1515	copy_locale->categories[LC_COLLATE].collate;
1516	}
1517
1518	ldfile->translate_strings = `0`;
1519	ldfile->return_widestr = `0`;
1520	}
1521
1522
1523	void
1524	collate_finish (struct localedef_t locale, const* struct charmap_t *charmap)
1525	{
1526	/ Now is the time when we can assign the individual collation*
1527	values for all the symbols. We have possibly different values
1528	for the wide- and the multibyte-character symbols. This is done
1529	since it might make a difference in the encoding if there is in
1530	some cases no multibyte-character but there are wide-characters.
1531	(The other way around it is not important since theencoded
1532	collation value in the wide-character case is 32 bits wide and
1533	therefore requires no encoding).
1534
1535	The lowest collation value assigned is 2. Zero is reserved for
1536	the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1537	functions and 1 is used to separate the individual passes for the
1538	different rules.
1539
1540	We also have to construct is list with all the bytes/words which
1541	can come first in a sequence, followed by all the elements which
1542	also start with this byte/word. The order is reverse which has
1543	among others the important effect that longer strings are located
1544	first in the list. This is required for the output data since
1545	the algorithm used in `strcoll' etc depends on this.
1546
1547	The multibyte case is easy. We simply sort into an array with
1548	256 elements. /*
1549	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1550	int mbact[nrules];
1551	int wcact;
1552	int mbseqact;
1553	int wcseqact;
1554	struct element_t *runp;
1555	int i;
1556	int need_undefined = `0`;
1557	struct section_list *sect;
1558	int ruleidx;
1559	int nr_wide_elems = `0`;
1560
1561	if (collate == NULL)
1562	{
1563	/ No data, no check. /
1564	if (! be_quiet)
1565	WITH_CUR_LOCALE (error (`0`, `0`, _("No definition for %s category found"),
1566	"LC_COLLATE"));
1567	return;
1568	}
1569
1570	/ If this assertion is hit change the type in `element_t'. /
1571	assert (nrules <= sizeof (runp->used_in_level) * `8`);
1572
1573	/ Make sure that the `position' rule is used either in all sections*
1574	or in none. /*
1575	for (i = `0`; i < nrules; ++i)
1576	for (sect = collate->sections; sect != NULL; sect = sect->next)
1577	if (sect != collate->current_section
1578	&& sect->rules != NULL
1579	&& ((sect->rules[i] & sort_position)
1580	!= (collate->current_section->rules[i] & sort_position)))
1581	{
1582	WITH_CUR_LOCALE (error (`0`, `0`, _("\
1583	%s: `position' must be used for a specific level in all sections or none"),
1584	"LC_COLLATE"));
1585	break;
1586	}
1587
1588	/ Find out which elements are used at which level. At the same*
1589	time we find out whether we have any undefined symbols. /*
1590	runp = collate->start;
1591	while (runp != NULL)
1592	{
1593	if (runp->mbs != NULL)
1594	{
1595	for (i = `0`; i < nrules; ++i)
1596	{
1597	int j;
1598
1599	for (j = `0`; j < runp->weights[i].cnt; ++j)
1600	/ A NULL pointer as the weight means IGNORE. /
1601	if (runp->weights[i].w[j] != NULL)
1602	{
1603	if (runp->weights[i].w[j]->weights == NULL)
1604	{
1605	WITH_CUR_LOCALE (error_at_line (`0`, `0`, runp->file,
1606	runp->line,
1607	_("symbol `%s' not defined"),
1608	runp->weights[i].w[j]->name));
1609
1610	need_undefined = `1`;
1611	runp->weights[i].w[j] = &collate->undefined;
1612	}
1613	else
1614	/ Set the bit for the level. /
1615	runp->weights[i].w[j]->used_in_level \|= `1` << i;
1616	}
1617	}
1618	}
1619
1620	/ Up to the next entry. /
1621	runp = runp->next;
1622	}
1623
1624	/ Walk through the list of defined sequences and assign weights. Also*
1625	create the data structure which will allow generating the single byte
1626	character based tables.
1627
1628	Since at each time only the weights for each of the rules are
1629	only compared to other weights for this rule it is possible to
1630	assign more compact weight values than simply counting all
1631	weights in sequence. We can assign weights from 3, one for each
1632	rule individually and only for those elements, which are actually
1633	used for this rule.
1634
1635	Why is this important? It is not for the wide char table. But
1636	it is for the singlebyte output since here larger numbers have to
1637	be encoded to make it possible to emit the value as a byte
1638	string. /*
1639	for (i = `0`; i < nrules; ++i)
1640	mbact[i] = `2`;
1641	wcact = `2`;
1642	mbseqact = `0`;
1643	wcseqact = `0`;
1644	runp = collate->start;
1645	while (runp != NULL)
1646	{
1647	/ Determine the order. /
1648	if (runp->used_in_level != `0`)
1649	{
1650	runp->mborder = (int *) obstack_alloc (&collate->mempool,
1651	nrules * sizeof (int));
1652
1653	for (i = `0`; i < nrules; ++i)
1654	if ((runp->used_in_level & (`1` << i)) != `0`)
1655	runp->mborder[i] = mbact[i]++;
1656	else
1657	runp->mborder[i] = `0`;
1658	}
1659
1660	if (runp->mbs != NULL)
1661	{
1662	struct element_t **eptr;
1663	struct element_t *lastp = NULL;
1664
1665	/ Find the point where to insert in the list. /
1666	eptr = &collate->mbheads[((unsigned char *) runp->mbs)[`0`]];
1667	while (*eptr != NULL)
1668	{
1669	if ((*eptr)->nmbs < runp->nmbs)
1670	break;
1671
1672	if ((*eptr)->nmbs == runp->nmbs)
1673	{
1674	int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1675
1676	if (c == `0`)
1677	{
1678	/ This should not happen. It means that we have*
1679	to symbols with the same byte sequence. It is
1680	of course an error. /*
1681	WITH_CUR_LOCALE (error_at_line (`0`, `0`, (*eptr)->file,
1682	(*eptr)->line,
1683	_("\
1684	symbol `%s' has the same encoding as"), (*eptr)->name);
1685	error_at_line (`0`, `0`, runp->file,
1686	runp->line,
1687	_("symbol `%s'"),
1688	runp->name));
1689	goto dont_insert;
1690	}
1691	else if (c < `0`)
1692	/ Insert it here. /
1693	break;
1694	}
1695
1696	/ To the next entry. /
1697	lastp = *eptr;
1698	eptr = &(*eptr)->mbnext;
1699	}
1700
1701	/ Set the pointers. /
1702	runp->mbnext = *eptr;
1703	runp->mblast = lastp;
1704	if (*eptr != NULL)
1705	(*eptr)->mblast = runp;
1706	*eptr = runp;
1707	dont_insert:
1708	;
1709	}
1710
1711	if (runp->used_in_level)
1712	{
1713	runp->wcorder = wcact++;
1714
1715	/ We take the opportunity to count the elements which have*
1716	wide characters. /*
1717	++nr_wide_elems;
1718	}
1719
1720	if (runp->is_character)
1721	{
1722	if (runp->nmbs == `1`)
1723	collate->mbseqorder[((unsigned char *) runp->mbs)[`0`]] = mbseqact++;
1724
1725	runp->wcseqorder = wcseqact++;
1726	}
1727	else if (runp->mbs != NULL && runp->weights != NULL)
1728	/ This is for collation elements. /
1729	runp->wcseqorder = wcseqact++;
1730
1731	/ Up to the next entry. /
1732	runp = runp->next;
1733	}
1734
1735	/ Find out whether any of the `mbheads' entries is unset. In this*
1736	case we use the UNDEFINED entry. /*
1737	for (i = `1`; i < `256`; ++i)
1738	if (collate->mbheads[i] == NULL)
1739	{
1740	need_undefined = `1`;
1741	collate->mbheads[i] = &collate->undefined;
1742	}
1743
1744	/ Now to the wide character case. /
1745	collate->wcheads.p = `6`;
1746	collate->wcheads.q = `10`;
1747	wchead_table_init (&collate->wcheads);
1748
1749	collate->wcseqorder.p = `6`;
1750	collate->wcseqorder.q = `10`;
1751	collseq_table_init (&collate->wcseqorder);
1752
1753	/ Start adding. /
1754	runp = collate->start;
1755	while (runp != NULL)
1756	{
1757	if (runp->wcs != NULL)
1758	{
1759	struct element_t *e;
1760	struct element_t **eptr;
1761	struct element_t *lastp;
1762
1763	/ Insert the collation sequence value. /
1764	if (runp->is_character)
1765	collseq_table_add (&collate->wcseqorder, runp->wcs[`0`],
1766	runp->wcseqorder);
1767
1768	/ Find the point where to insert in the list. /
1769	e = wchead_table_get (&collate->wcheads, runp->wcs[`0`]);
1770	eptr = &e;
1771	lastp = NULL;
1772	while (*eptr != NULL)
1773	{
1774	if ((*eptr)->nwcs < runp->nwcs)
1775	break;
1776
1777	if ((*eptr)->nwcs == runp->nwcs)
1778	{
1779	int c = wmemcmp ((wchar_t ) (eptr)->wcs,
1780	(wchar_t *) runp->wcs, runp->nwcs);
1781
1782	if (c == `0`)
1783	{
1784	/ This should not happen. It means that we have*
1785	two symbols with the same byte sequence. It is
1786	of course an error. /*
1787	WITH_CUR_LOCALE (error_at_line (`0`, `0`, (*eptr)->file,
1788	(*eptr)->line,
1789	_("\
1790	symbol `%s' has the same encoding as"), (*eptr)->name);
1791	error_at_line (`0`, `0`, runp->file,
1792	runp->line,
1793	_("symbol `%s'"),
1794	runp->name));
1795	goto dont_insertwc;
1796	}
1797	else if (c < `0`)
1798	/ Insert it here. /
1799	break;
1800	}
1801
1802	/ To the next entry. /
1803	lastp = *eptr;
1804	eptr = &(*eptr)->wcnext;
1805	}
1806
1807	/ Set the pointers. /
1808	runp->wcnext = *eptr;
1809	runp->wclast = lastp;
1810	if (*eptr != NULL)
1811	(*eptr)->wclast = runp;
1812	*eptr = runp;
1813	if (eptr == &e)
1814	wchead_table_add (&collate->wcheads, runp->wcs[`0`], e);
1815	dont_insertwc:
1816	;
1817	}
1818
1819	/ Up to the next entry. /
1820	runp = runp->next;
1821	}
1822
1823	/ Now determine whether the UNDEFINED entry is needed and if yes,*
1824	whether it was defined. /*
1825	collate->undefined.used_in_level = need_undefined ? ~`0ul` : `0`;
1826	if (collate->undefined.file == NULL)
1827	{
1828	if (need_undefined)
1829	{
1830	/ This seems not to be enforced by recent standards. Don't*
1831	emit an error, simply append UNDEFINED at the end. /*
1832	if (`0`)
1833	WITH_CUR_LOCALE (error (`0`, `0`, _("no definition of `UNDEFINED'")));
1834
1835	/ Add UNDEFINED at the end. /
1836	collate->undefined.mborder =
1837	(int ) obstack_alloc (&collate->mempool, nrules sizeof (int));
1838
1839	for (i = `0`; i < nrules; ++i)
1840	collate->undefined.mborder[i] = mbact[i]++;
1841	}
1842
1843	/ In any case we will need the definition for the wide character*
1844	case. But we will not complain that it is missing since the
1845	specification strangely enough does not seem to account for
1846	this. /*
1847	collate->undefined.wcorder = wcact++;
1848	}
1849
1850	/ Finally, try to unify the rules for the sections. Whenever the rules*
1851	for a section are the same as those for another section give the
1852	ruleset the same index. Since there are never many section we can
1853	use an O(n^2) algorithm here. /*
1854	sect = collate->sections;
1855	while (sect != NULL && sect->rules == NULL)
1856	sect = sect->next;
1857
1858	/ Bail out if we have no sections because of earlier errors. /
1859	if (sect == NULL)
1860	{
1861	WITH_CUR_LOCALE (error (EXIT_FAILURE, `0`,
1862	_("too many errors; giving up")));
1863	return;
1864	}
1865
1866	ruleidx = `0`;
1867	do
1868	{
1869	struct section_list *osect = collate->sections;
1870
1871	while (osect != sect)
1872	if (osect->rules != NULL
1873	&& memcmp (osect->rules, sect->rules,
1874	nrules * sizeof (osect->rules[`0`])) == `0`)
1875	break;
1876	else
1877	osect = osect->next;
1878
1879	if (osect == sect)
1880	sect->ruleidx = ruleidx++;
1881	else
1882	sect->ruleidx = osect->ruleidx;
1883
1884	/ Next section. /
1885	do
1886	sect = sect->next;
1887	while (sect != NULL && sect->rules == NULL);
1888	}
1889	while (sect != NULL);
1890	/ We are currently not prepared for more than 128 rulesets. But this*
1891	should never really be a problem. /*
1892	assert (ruleidx <= `128`);
1893	}
1894
1895
1896	static int32_t
1897	output_weight (struct obstack pool, struct* locale_collate_t *collate,
1898	struct element_t *elem)
1899	{
1900	size_t cnt;
1901	int32_t retval;
1902
1903	/ Optimize the use of UNDEFINED. /
1904	if (elem == &collate->undefined)
1905	/ The weights are already inserted. /
1906	return `0`;
1907
1908	/ This byte can start exactly one collation element and this is*
1909	a single byte. We can directly give the index to the weights. /*
1910	retval = obstack_object_size (pool);
1911
1912	/ Construct the weight. /
1913	for (cnt = `0`; cnt < nrules; ++cnt)
1914	{
1915	char buf[elem->weights[cnt].cnt * `7`];
1916	int len = `0`;
1917	int i;
1918
1919	for (i = `0`; i < elem->weights[cnt].cnt; ++i)
1920	/ Encode the weight value. We do nothing for IGNORE entries. /
1921	if (elem->weights[cnt].w[i] != NULL)
1922	len += utf8_encode (&buf[len],
1923	elem->weights[cnt].w[i]->mborder[cnt]);
1924
1925	/ And add the buffer content. /
1926	obstack_1grow (pool, len);
1927	obstack_grow (pool, buf, len);
1928	}
1929
1930	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1931	}
1932
1933
1934	static int32_t
1935	output_weightwc (struct obstack pool, struct* locale_collate_t *collate,
1936	struct element_t *elem)
1937	{
1938	size_t cnt;
1939	int32_t retval;
1940
1941	/ Optimize the use of UNDEFINED. /
1942	if (elem == &collate->undefined)
1943	/ The weights are already inserted. /
1944	return `0`;
1945
1946	/ This byte can start exactly one collation element and this is*
1947	a single byte. We can directly give the index to the weights. /*
1948	retval = obstack_object_size (pool) / sizeof (int32_t);
1949
1950	/ Construct the weight. /
1951	for (cnt = `0`; cnt < nrules; ++cnt)
1952	{
1953	int32_t buf[elem->weights[cnt].cnt];
1954	int i;
1955	int32_t j;
1956
1957	for (i = `0`, j = `0`; i < elem->weights[cnt].cnt; ++i)
1958	if (elem->weights[cnt].w[i] != NULL)
1959	buf[j++] = elem->weights[cnt].w[i]->wcorder;
1960
1961	/ And add the buffer content. /
1962	obstack_int32_grow (pool, j);
1963
1964	obstack_grow (pool, buf, j * sizeof (int32_t));
1965	maybe_swap_uint32_obstack (pool, j);
1966	}
1967
1968	return retval \| ((elem->section->ruleidx & `0x7f`) << `24`);
1969	}
1970
1971	/ If localedef is every threaded, this would need to be __thread var. /
1972	static struct
1973	{
1974	struct obstack *weightpool;
1975	struct obstack *extrapool;
1976	struct obstack *indpool;
1977	struct locale_collate_t *collate;
1978	struct collidx_table *tablewc;
1979	} atwc;
1980
1981	static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1982
1983	static void
1984	add_to_tablewc (uint32_t ch, struct element_t *runp)
1985	{
1986	if (runp->wcnext == NULL && runp->nwcs == `1`)
1987	{
1988	int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1989	runp);
1990	collidx_table_add (atwc.tablewc, ch, weigthidx);
1991	}
1992	else
1993	{
1994	/ As for the singlebyte table, we recognize sequences and*
1995	compress them. /*
1996
1997	collidx_table_add (atwc.tablewc, ch,
1998	-(obstack_object_size (atwc.extrapool)
1999	/ sizeof (uint32_t)));
2000
2001	do
2002	{
2003	/ Store the current index in the weight table. We know that*
2004	the current position in the `extrapool' is aligned on a
2005	32-bit address. /*
2006	int32_t weightidx;
2007	int added;
2008
2009	/ Find out wether this is a single entry or we have more than*
2010	one consecutive entry. /*
2011	if (runp->wcnext != NULL
2012	&& runp->nwcs == runp->wcnext->nwcs
2013	&& wmemcmp ((wchar_t *) runp->wcs,
2014	(wchar_t *)runp->wcnext->wcs,
2015	runp->nwcs - `1`) == `0`
2016	&& (runp->wcs[runp->nwcs - `1`]
2017	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`))
2018	{
2019	int i;
2020	struct element_t *series_startp = runp;
2021	struct element_t *curp;
2022
2023	/ Now add first the initial byte sequence. /
2024	added = (`1` + `1` + `2` * (runp->nwcs - `1`)) * sizeof (int32_t);
2025	if (sizeof (int32_t) == sizeof (int))
2026	obstack_make_room (atwc.extrapool, added);
2027
2028	/ More than one consecutive entry. We mark this by having*
2029	a negative index into the indirect table. /*
2030	obstack_int32_grow_fast (atwc.extrapool,
2031	-(obstack_object_size (atwc.indpool)
2032	/ sizeof (int32_t)));
2033	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2034
2035	do
2036	runp = runp->wcnext;
2037	while (runp->wcnext != NULL
2038	&& runp->nwcs == runp->wcnext->nwcs
2039	&& wmemcmp ((wchar_t *) runp->wcs,
2040	(wchar_t *)runp->wcnext->wcs,
2041	runp->nwcs - `1`) == `0`
2042	&& (runp->wcs[runp->nwcs - `1`]
2043	== runp->wcnext->wcs[runp->nwcs - `1`] + `1`));
2044
2045	/ Now walk backward from here to the beginning. /
2046	curp = runp;
2047
2048	for (i = `1`; i < runp->nwcs; ++i)
2049	obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2050
2051	/ Now find the end of the consecutive sequence and*
2052	add all the indeces in the indirect pool. /*
2053	do
2054	{
2055	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2056	curp);
2057	obstack_int32_grow (atwc.indpool, weightidx);
2058
2059	curp = curp->wclast;
2060	}
2061	while (curp != series_startp);
2062
2063	/ Add the final weight. /
2064	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2065	curp);
2066	obstack_int32_grow (atwc.indpool, weightidx);
2067
2068	/ And add the end byte sequence. Without length this*
2069	time. /*
2070	for (i = `1`; i < curp->nwcs; ++i)
2071	obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2072	}
2073	else
2074	{
2075	/ A single entry. Simply add the index and the length and*
2076	string (except for the first character which is already
2077	tested for). /*
2078	int i;
2079
2080	/ Output the weight info. /
2081	weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2082	runp);
2083
2084	assert (runp->nwcs > `0`);
2085	added = (`1` + `1` + runp->nwcs - `1`) * sizeof (int32_t);
2086	if (sizeof (int) == sizeof (int32_t))
2087	obstack_make_room (atwc.extrapool, added);
2088
2089	obstack_int32_grow_fast (atwc.extrapool, weightidx);
2090	obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - `1`);
2091	for (i = `1`; i < runp->nwcs; ++i)
2092	obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2093	}
2094
2095	/ Next entry. /
2096	runp = runp->wcnext;
2097	}
2098	while (runp != NULL);
2099	}
2100	}
2101
2102	void
2103	collate_output (struct localedef_t locale, const* struct charmap_t *charmap,
2104	const char *output_path)
2105	{
2106	struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2107	const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2108	struct locale_file file;
2109	size_t ch;
2110	int32_t tablemb[`256`];
2111	struct obstack weightpool;
2112	struct obstack extrapool;
2113	struct obstack indirectpool;
2114	struct section_list *sect;
2115	struct collidx_table tablewc;
2116	uint32_t elem_size;
2117	uint32_t *elem_table;
2118	int i;
2119	struct element_t *runp;
2120
2121	init_locale_data (&file, nelems);
2122	add_locale_uint32 (&file, nrules);
2123
2124	/ If we have no LC_COLLATE data emit only the number of rules as zero. /
2125	if (collate == NULL)
2126	{
2127	size_t idx;
2128	for (idx = `1`; idx < nelems; idx++)
2129	{
2130	/ The words have to be handled specially. /
2131	if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2132	add_locale_uint32 (&file, `0`);
2133	else
2134	add_locale_empty (&file);
2135	}
2136	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2137	return;
2138	}
2139
2140	obstack_init (&weightpool);
2141	obstack_init (&extrapool);
2142	obstack_init (&indirectpool);
2143
2144	/ Since we are using the sign of an integer to mark indirection the*
2145	offsets in the arrays we are indirectly referring to must not be
2146	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2147	obstack_int32_grow (&extrapool, `0`);
2148	obstack_int32_grow (&indirectpool, `0`);
2149
2150	/ Prepare the ruleset table. /
2151	for (sect = collate->sections, i = `0`; sect != NULL; sect = sect->next)
2152	if (sect->rules != NULL && sect->ruleidx == i)
2153	{
2154	int j;
2155
2156	obstack_make_room (&weightpool, nrules);
2157
2158	for (j = `0`; j < nrules; ++j)
2159	obstack_1grow_fast (&weightpool, sect->rules[j]);
2160	++i;
2161	}
2162	/ And align the output. /
2163	i = (nrules * i) % LOCFILE_ALIGN;
2164	if (i > `0`)
2165	do
2166	obstack_1grow (&weightpool, `'\0'`);
2167	while (++i < LOCFILE_ALIGN);
2168
2169	add_locale_raw_obstack (&file, &weightpool);
2170
2171	/ Generate the 8-bit table. Walk through the lists of sequences*
2172	starting with the same byte and add them one after the other to
2173	the table. In case we have more than one sequence starting with
2174	the same byte we have to use extra indirection.
2175
2176	First add a record for the NUL byte. This entry will never be used
2177	so it does not matter. /*
2178	tablemb[`0`] = `0`;
2179
2180	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2181	will probably be used more than once it is good to store the
2182	weights only once. /*
2183	if (collate->undefined.used_in_level != `0`)
2184	output_weight (&weightpool, collate, &collate->undefined);
2185
2186	for (ch = `1`; ch < `256`; ++ch)
2187	if (collate->mbheads[ch]->mbnext == NULL
2188	&& collate->mbheads[ch]->nmbs <= `1`)
2189	{
2190	tablemb[ch] = output_weight (&weightpool, collate,
2191	collate->mbheads[ch]);
2192	}
2193	else
2194	{
2195	/ The entries in the list are sorted by length and then*
2196	alphabetically. This is the order in which we will add the
2197	elements to the collation table. This allows simply walking
2198	the table in sequence and stopping at the first matching
2199	entry. Since the longer sequences are coming first in the
2200	list they have the possibility to match first, just as it
2201	has to be. In the worst case we are walking to the end of
2202	the list where we put, if no singlebyte sequence is defined
2203	in the locale definition, the weights for UNDEFINED.
2204
2205	To reduce the length of the search list we compress them a bit.
2206	This happens by collecting sequences of consecutive byte
2207	sequences in one entry (having and begin and end byte sequence)
2208	and add only one index into the weight table. We can find the
2209	consecutive entries since they are also consecutive in the list. /*
2210	struct element_t *runp = collate->mbheads[ch];
2211	struct element_t *lastp;
2212
2213	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2214
2215	tablemb[ch] = -obstack_object_size (&extrapool);
2216
2217	do
2218	{
2219	/ Store the current index in the weight table. We know that*
2220	the current position in the `extrapool' is aligned on a
2221	32-bit address. /*
2222	int32_t weightidx;
2223	int added;
2224
2225	/ Find out wether this is a single entry or we have more than*
2226	one consecutive entry. /*
2227	if (runp->mbnext != NULL
2228	&& runp->nmbs == runp->mbnext->nmbs
2229	&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - `1`) == `0`
2230	&& (runp->mbs[runp->nmbs - `1`]
2231	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`))
2232	{
2233	int i;
2234	struct element_t *series_startp = runp;
2235	struct element_t *curp;
2236
2237	/ Compute how much space we will need. /
2238	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2239	+ `2` * (runp->nmbs - `1`));
2240	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2241	obstack_make_room (&extrapool, added);
2242
2243	/ More than one consecutive entry. We mark this by having*
2244	a negative index into the indirect table. /*
2245	obstack_int32_grow_fast (&extrapool,
2246	-(obstack_object_size (&indirectpool)
2247	/ sizeof (int32_t)));
2248
2249	/ Now search first the end of the series. /
2250	do
2251	runp = runp->mbnext;
2252	while (runp->mbnext != NULL
2253	&& runp->nmbs == runp->mbnext->nmbs
2254	&& memcmp (runp->mbs, runp->mbnext->mbs,
2255	runp->nmbs - `1`) == `0`
2256	&& (runp->mbs[runp->nmbs - `1`]
2257	== runp->mbnext->mbs[runp->nmbs - `1`] + `1`));
2258
2259	/ Now walk backward from here to the beginning. /
2260	curp = runp;
2261
2262	assert (runp->nmbs <= `256`);
2263	obstack_1grow_fast (&extrapool, curp->nmbs - `1`);
2264	for (i = `1`; i < curp->nmbs; ++i)
2265	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2266
2267	/ Now find the end of the consecutive sequence and*
2268	add all the indeces in the indirect pool. /*
2269	do
2270	{
2271	weightidx = output_weight (&weightpool, collate, curp);
2272	obstack_int32_grow (&indirectpool, weightidx);
2273
2274	curp = curp->mblast;
2275	}
2276	while (curp != series_startp);
2277
2278	/ Add the final weight. /
2279	weightidx = output_weight (&weightpool, collate, curp);
2280	obstack_int32_grow (&indirectpool, weightidx);
2281
2282	/ And add the end byte sequence. Without length this*
2283	time. /*
2284	for (i = `1`; i < curp->nmbs; ++i)
2285	obstack_1grow_fast (&extrapool, curp->mbs[i]);
2286	}
2287	else
2288	{
2289	/ A single entry. Simply add the index and the length and*
2290	string (except for the first character which is already
2291	tested for). /*
2292	int i;
2293
2294	/ Output the weight info. /
2295	weightidx = output_weight (&weightpool, collate, runp);
2296
2297	added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1`
2298	+ runp->nmbs - `1`);
2299	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2300	obstack_make_room (&extrapool, added);
2301
2302	obstack_int32_grow_fast (&extrapool, weightidx);
2303	assert (runp->nmbs <= `256`);
2304	obstack_1grow_fast (&extrapool, runp->nmbs - `1`);
2305
2306	for (i = `1`; i < runp->nmbs; ++i)
2307	obstack_1grow_fast (&extrapool, runp->mbs[i]);
2308	}
2309
2310	/ Add alignment bytes if necessary. /
2311	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2312	obstack_1grow_fast (&extrapool, `'\0'`);
2313
2314	/ Next entry. /
2315	lastp = runp;
2316	runp = runp->mbnext;
2317	}
2318	while (runp != NULL);
2319
2320	assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2321
2322	/ If the final entry in the list is not a single character we*
2323	add an UNDEFINED entry here. /*
2324	if (lastp->nmbs != `1`)
2325	{
2326	int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + `1` + `1`);
2327	obstack_make_room (&extrapool, added);
2328
2329	obstack_int32_grow_fast (&extrapool, `0`);
2330	/ XXX What rule? We just pick the first. /
2331	obstack_1grow_fast (&extrapool, `0`);
2332	/ Length is zero. /
2333	obstack_1grow_fast (&extrapool, `0`);
2334
2335	/ Add alignment bytes if necessary. /
2336	while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2337	obstack_1grow_fast (&extrapool, `'\0'`);
2338	}
2339	}
2340
2341	/ Add padding to the tables if necessary. /
2342	while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2343	obstack_1grow (&weightpool, `0`);
2344
2345	/ Now add the four tables. /
2346	add_locale_uint32_array (&file, (const uint32_t *) tablemb, `256`);
2347	add_locale_raw_obstack (&file, &weightpool);
2348	add_locale_raw_obstack (&file, &extrapool);
2349	add_locale_raw_obstack (&file, &indirectpool);
2350
2351	/ Now the same for the wide character table. We need to store some*
2352	more information here. /*
2353	add_locale_empty (&file);
2354	add_locale_empty (&file);
2355	add_locale_empty (&file);
2356
2357	/ Since we are using the sign of an integer to mark indirection the*
2358	offsets in the arrays we are indirectly referring to must not be
2359	zero since -0 == 0. Therefore we add a bit of dummy content. /*
2360	obstack_int32_grow (&extrapool, `0`);
2361	obstack_int32_grow (&indirectpool, `0`);
2362
2363	/ Now insert the `UNDEFINED' value if it is used. Since this value*
2364	will probably be used more than once it is good to store the
2365	weights only once. /*
2366	if (output_weightwc (&weightpool, collate, &collate->undefined) != `0`)
2367	abort ();
2368
2369	/ Generate the table. Walk through the lists of sequences starting*
2370	with the same wide character and add them one after the other to
2371	the table. In case we have more than one sequence starting with
2372	the same byte we have to use extra indirection. /*
2373	tablewc.p = `6`;
2374	tablewc.q = `10`;
2375	collidx_table_init (&tablewc);
2376
2377	atwc.weightpool = &weightpool;
2378	atwc.extrapool = &extrapool;
2379	atwc.indpool = &indirectpool;
2380	atwc.collate = collate;
2381	atwc.tablewc = &tablewc;
2382
2383	wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2384
2385	memset (&atwc, `0`, sizeof (atwc));
2386
2387	/ Now add the four tables. /
2388	add_locale_collidx_table (&file, &tablewc);
2389	add_locale_raw_obstack (&file, &weightpool);
2390	add_locale_raw_obstack (&file, &extrapool);
2391	add_locale_raw_obstack (&file, &indirectpool);
2392
2393	/ Finally write the table with collation element names out. It is*
2394	a hash table with a simple function which gets the name of the
2395	character as the input. One character might have many names. The
2396	value associated with the name is an index into the weight table
2397	where we are then interested in the first-level weight value.
2398
2399	To determine how large the table should be we are counting the
2400	elements have to put in. Since we are using internal chaining
2401	using a secondary hash function we have to make the table a bit
2402	larger to avoid extremely long search times. We can achieve
2403	good results with a 40% larger table than there are entries. /*
2404	elem_size = `0`;
2405	runp = collate->start;
2406	while (runp != NULL)
2407	{
2408	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2409	/ Yep, the element really counts. /
2410	++elem_size;
2411
2412	runp = runp->next;
2413	}
2414	/ Add 40% and find the next prime number. /
2415	elem_size = next_prime (elem_size * `1.4`);
2416
2417	/ Allocate the table. Each entry consists of two words: the hash*
2418	value and an index in a secondary table which provides the index
2419	into the weight table and the string itself (so that a match can
2420	be determined). /*
2421	elem_table = (uint32_t *) obstack_alloc (&extrapool,
2422	elem_size * `2` * sizeof (uint32_t));
2423	memset (elem_table, `'\0'`, elem_size * `2` * sizeof (uint32_t));
2424
2425	/ Now add the elements. /
2426	runp = collate->start;
2427	while (runp != NULL)
2428	{
2429	if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2430	{
2431	/ Compute the hash value of the name. /
2432	uint32_t namelen = strlen (runp->name);
2433	uint32_t hash = elem_hash (runp->name, namelen);
2434	size_t idx = hash % elem_size;
2435	#ifndef NDEBUG
2436	size_t start_idx = idx;
2437	#endif
2438
2439	if (elem_table[idx * `2`] != `0`)
2440	{
2441	/ The spot is already taken. Try iterating using the value*
2442	from the secondary hashing function. /*
2443	size_t iter = hash % (elem_size - `2`) + `1`;
2444
2445	do
2446	{
2447	idx += iter;
2448	if (idx >= elem_size)
2449	idx -= elem_size;
2450	assert (idx != start_idx);
2451	}
2452	while (elem_table[idx * `2`] != `0`);
2453	}
2454	/ This is the spot where we will insert the value. /
2455	elem_table[idx * `2`] = hash;
2456	elem_table[idx * `2` + `1`] = obstack_object_size (&extrapool);
2457
2458	/ The string itself including length. /
2459	obstack_1grow (&extrapool, namelen);
2460	obstack_grow (&extrapool, runp->name, namelen);
2461
2462	/ And the multibyte representation. /
2463	obstack_1grow (&extrapool, runp->nmbs);
2464	obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2465
2466	/ And align again to 32 bits. /
2467	if ((`1` + namelen + `1` + runp->nmbs) % sizeof (int32_t) != `0`)
2468	obstack_grow (&extrapool, "\0\0",
2469	(sizeof (int32_t)
2470	- ((`1` + namelen + `1` + runp->nmbs)
2471	% sizeof (int32_t))));
2472
2473	/ Now some 32-bit values: multibyte collation sequence,*
2474	wide char string (including length), and wide char
2475	collation sequence. /*
2476	obstack_int32_grow (&extrapool, runp->mbseqorder);
2477
2478	obstack_int32_grow (&extrapool, runp->nwcs);
2479	obstack_grow (&extrapool, runp->wcs,
2480	runp->nwcs * sizeof (uint32_t));
2481	maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2482
2483	obstack_int32_grow (&extrapool, runp->wcseqorder);
2484	}
2485
2486	runp = runp->next;
2487	}
2488
2489	/ Prepare to write out this data. /
2490	add_locale_uint32 (&file, elem_size);
2491	add_locale_uint32_array (&file, elem_table, `2` * elem_size);
2492	add_locale_raw_obstack (&file, &extrapool);
2493	add_locale_raw_data (&file, collate->mbseqorder, `256`);
2494	add_locale_collseq_table (&file, &collate->wcseqorder);
2495	add_locale_string (&file, charmap->code_set_name);
2496	write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2497
2498	obstack_free (&weightpool, NULL);
2499	obstack_free (&extrapool, NULL);
2500	obstack_free (&indirectpool, NULL);
2501	}
2502
2503
2504	static enum token_t
2505	skip_to (struct linereader ldfile, struct* locale_collate_t *collate,
2506	const struct charmap_t charmap, int* to_endif)
2507	{
2508	while (`1`)
2509	{
2510	struct token *now = lr_token (ldfile, charmap, NULL, NULL, `0`);
2511	enum token_t nowtok = now->tok;
2512
2513	if (nowtok == tok_eof \|\| nowtok == tok_end)
2514	return nowtok;
2515
2516	if (nowtok == tok_ifdef \|\| nowtok == tok_ifndef)
2517	{
2518	lr_error (ldfile, _("%s: nested conditionals not supported"),
2519	"LC_COLLATE");
2520	nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2521	if (nowtok == tok_eof \|\| nowtok == tok_end)
2522	return nowtok;
2523	}
2524	else if (nowtok == tok_endif \|\| (!to_endif && nowtok == tok_else))
2525	{
2526	lr_ignore_rest (ldfile, `1`);
2527	return nowtok;
2528	}
2529	else if (!to_endif && (nowtok == tok_elifdef \|\| nowtok == tok_elifndef))
2530	{
2531	/ Do not read the rest of the line. /
2532	return nowtok;
2533	}
2534	else if (nowtok == tok_else)
2535	{
2536	lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2537	}
2538
2539	lr_ignore_rest (ldfile, `0`);
2540	}
2541	}
2542
2543
2544	void
2545	collate_read (struct linereader ldfile, struct* localedef_t *result,
2546	const struct charmap_t charmap, const* char *repertoire_name,
2547	int ignore_content)
2548	{
2549	struct repertoire_t *repertoire = NULL;
2550	struct locale_collate_t *collate;
2551	struct token *now;
2552	struct token *arg = NULL;
2553	enum token_t nowtok;
2554	enum token_t was_ellipsis = tok_none;
2555	struct localedef_t *copy_locale = NULL;
2556	/ Parsing state:*
2557	0 - start
2558	1 - between `order-start' and `order-end'
2559	2 - after `order-end'
2560	3 - after `reorder-after', waiting for `reorder-end'
2561	4 - after `reorder-end'
2562	5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2563	6 - after `reorder-sections-end'
2564	*/
2565	int state = `0`;
2566
2567	/ Get the repertoire we have to use. /
2568	if (repertoire_name != NULL)
2569	repertoire = repertoire_read (repertoire_name);
2570
2571	/ The rest of the line containing `LC_COLLATE' must be free. /
2572	lr_ignore_rest (ldfile, `1`);
2573
2574	while (`1`)
2575	{
2576	do
2577	{
2578	now = lr_token (ldfile, charmap, result, NULL, verbose);
2579	nowtok = now->tok;
2580	}
2581	while (nowtok == tok_eol);
2582
2583	if (nowtok != tok_define)
2584	break;
2585
2586	if (ignore_content)
2587	lr_ignore_rest (ldfile, `0`);
2588	else
2589	{
2590	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2591	if (arg->tok != tok_ident)
2592	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2593	else
2594	{
2595	/ Simply add the new symbol. /
2596	struct name_list newsym = xmalloc (sizeof* (*newsym)
2597	+ arg->val.str.lenmb + `1`);
2598	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2599	newsym->str[arg->val.str.lenmb] = `'\0'`;
2600	newsym->next = defined;
2601	defined = newsym;
2602
2603	lr_ignore_rest (ldfile, `1`);
2604	}
2605	}
2606	}
2607
2608	if (nowtok == tok_copy)
2609	{
2610	now = lr_token (ldfile, charmap, result, NULL, verbose);
2611	if (now->tok != tok_string)
2612	{
2613	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2614
2615	skip_category:
2616	do
2617	now = lr_token (ldfile, charmap, result, NULL, verbose);
2618	while (now->tok != tok_eof && now->tok != tok_end);
2619
2620	if (now->tok != tok_eof
2621	\|\| (now = lr_token (ldfile, charmap, result, NULL, verbose),
2622	now->tok == tok_eof))
2623	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2624	else if (now->tok != tok_lc_collate)
2625	{
2626	lr_error (ldfile, _("\
2627	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2628	lr_ignore_rest (ldfile, `0`);
2629	}
2630	else
2631	lr_ignore_rest (ldfile, `1`);
2632
2633	return;
2634	}
2635
2636	if (! ignore_content)
2637	{
2638	/ Get the locale definition. /
2639	copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2640	repertoire_name, charmap, NULL);
2641	if ((copy_locale->avail & COLLATE_LOCALE) == `0`)
2642	{
2643	/ Not yet loaded. So do it now. /
2644	if (locfile_read (copy_locale, charmap) != `0`)
2645	goto skip_category;
2646	}
2647
2648	if (copy_locale->categories[LC_COLLATE].collate == NULL)
2649	return;
2650	}
2651
2652	lr_ignore_rest (ldfile, `1`);
2653
2654	now = lr_token (ldfile, charmap, result, NULL, verbose);
2655	nowtok = now->tok;
2656	}
2657
2658	/ Prepare the data structures. /
2659	collate_startup (ldfile, result, copy_locale, ignore_content);
2660	collate = result->categories[LC_COLLATE].collate;
2661
2662	while (`1`)
2663	{
2664	char ucs4buf[`10`];
2665	char *symstr;
2666	size_t symlen;
2667
2668	/ Of course we don't proceed beyond the end of file. /
2669	if (nowtok == tok_eof)
2670	break;
2671
2672	/ Ingore empty lines. /
2673	if (nowtok == tok_eol)
2674	{
2675	now = lr_token (ldfile, charmap, result, NULL, verbose);
2676	nowtok = now->tok;
2677	continue;
2678	}
2679
2680	switch (nowtok)
2681	{
2682	case tok_copy:
2683	/ Allow copying other locales. /
2684	now = lr_token (ldfile, charmap, result, NULL, verbose);
2685	if (now->tok != tok_string)
2686	goto err_label;
2687
2688	if (! ignore_content)
2689	load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2690	charmap, result);
2691
2692	lr_ignore_rest (ldfile, `1`);
2693	break;
2694
2695	case tok_coll_weight_max:
2696	/ Ignore the rest of the line if we don't need the input of*
2697	this line. /*
2698	if (ignore_content)
2699	{
2700	lr_ignore_rest (ldfile, `0`);
2701	break;
2702	}
2703
2704	if (state != `0`)
2705	goto err_label;
2706
2707	arg = lr_token (ldfile, charmap, result, NULL, verbose);
2708	if (arg->tok != tok_number)
2709	goto err_label;
2710	if (collate->col_weight_max != -`1`)
2711	lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2712	"LC_COLLATE", "col_weight_max");
2713	else
2714	collate->col_weight_max = arg->val.num;
2715	lr_ignore_rest (ldfile, `1`);
2716	break;
2717
2718	case tok_section_symbol:
2719	/ Ignore the rest of the line if we don't need the input of*
2720	this line. /*
2721	if (ignore_content)
2722	{
2723	lr_ignore_rest (ldfile, `0`);
2724	break;
2725	}
2726
2727	if (state != `0`)
2728	goto err_label;
2729
2730	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2731	if (arg->tok != tok_bsymbol)
2732	goto err_label;
2733	else if (!ignore_content)
2734	{
2735	/ Check whether this section is already known. /
2736	struct section_list *known = collate->sections;
2737	while (known != NULL)
2738	{
2739	if (strcmp (known->name, arg->val.str.startmb) == `0`)
2740	break;
2741	known = known->next;
2742	}
2743
2744	if (known != NULL)
2745	{
2746	lr_error (ldfile,
2747	_("%s: duplicate declaration of section `%s'"),
2748	"LC_COLLATE", arg->val.str.startmb);
2749	free (arg->val.str.startmb);
2750	}
2751	else
2752	collate->sections = make_seclist_elem (collate,
2753	arg->val.str.startmb,
2754	collate->sections);
2755
2756	lr_ignore_rest (ldfile, known == NULL);
2757	}
2758	else
2759	{
2760	free (arg->val.str.startmb);
2761	lr_ignore_rest (ldfile, `0`);
2762	}
2763	break;
2764
2765	case tok_collating_element:
2766	/ Ignore the rest of the line if we don't need the input of*
2767	this line. /*
2768	if (ignore_content)
2769	{
2770	lr_ignore_rest (ldfile, `0`);
2771	break;
2772	}
2773
2774	if (state != `0` && state != `2`)
2775	goto err_label;
2776
2777	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2778	if (arg->tok != tok_bsymbol)
2779	goto err_label;
2780	else
2781	{
2782	const char *symbol = arg->val.str.startmb;
2783	size_t symbol_len = arg->val.str.lenmb;
2784
2785	/ Next the `from' keyword. /
2786	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2787	if (arg->tok != tok_from)
2788	{
2789	free ((char *) symbol);
2790	goto err_label;
2791	}
2792
2793	ldfile->return_widestr = `1`;
2794	ldfile->translate_strings = `1`;
2795
2796	/ Finally the string with the replacement. /
2797	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2798
2799	ldfile->return_widestr = `0`;
2800	ldfile->translate_strings = `0`;
2801
2802	if (arg->tok != tok_string)
2803	goto err_label;
2804
2805	if (!ignore_content && symbol != NULL)
2806	{
2807	/ The name is already defined. /
2808	if (check_duplicate (ldfile, collate, charmap,
2809	repertoire, symbol, symbol_len))
2810	goto col_elem_free;
2811
2812	if (arg->val.str.startmb != NULL)
2813	insert_entry (&collate->elem_table, symbol, symbol_len,
2814	new_element (collate,
2815	arg->val.str.startmb,
2816	arg->val.str.lenmb - `1`,
2817	arg->val.str.startwc,
2818	symbol, symbol_len, `0`));
2819	}
2820	else
2821	{
2822	col_elem_free:
2823	free ((char *) symbol);
2824	free (arg->val.str.startmb);
2825	free (arg->val.str.startwc);
2826	}
2827	lr_ignore_rest (ldfile, `1`);
2828	}
2829	break;
2830
2831	case tok_collating_symbol:
2832	/ Ignore the rest of the line if we don't need the input of*
2833	this line. /*
2834	if (ignore_content)
2835	{
2836	lr_ignore_rest (ldfile, `0`);
2837	break;
2838	}
2839
2840	if (state != `0` && state != `2`)
2841	goto err_label;
2842
2843	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2844	if (arg->tok != tok_bsymbol)
2845	goto err_label;
2846	else
2847	{
2848	char *symbol = arg->val.str.startmb;
2849	size_t symbol_len = arg->val.str.lenmb;
2850	char *endsymbol = NULL;
2851	size_t endsymbol_len = `0`;
2852	enum token_t ellipsis = tok_none;
2853
2854	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2855	if (arg->tok == tok_ellipsis2 \|\| arg->tok == tok_ellipsis4)
2856	{
2857	ellipsis = arg->tok;
2858
2859	arg = lr_token (ldfile, charmap, result, repertoire,
2860	verbose);
2861	if (arg->tok != tok_bsymbol)
2862	{
2863	free (symbol);
2864	goto err_label;
2865	}
2866
2867	endsymbol = arg->val.str.startmb;
2868	endsymbol_len = arg->val.str.lenmb;
2869
2870	lr_ignore_rest (ldfile, `1`);
2871	}
2872	else if (arg->tok != tok_eol)
2873	{
2874	free (symbol);
2875	goto err_label;
2876	}
2877
2878	if (!ignore_content)
2879	{
2880	if (symbol == NULL
2881	\|\| (ellipsis != tok_none && endsymbol == NULL))
2882	{
2883	lr_error (ldfile, _("\
2884	%s: unknown character in collating symbol name"),
2885	"LC_COLLATE");
2886	goto col_sym_free;
2887	}
2888	else if (ellipsis == tok_none)
2889	{
2890	/ A single symbol, no ellipsis. /
2891	if (check_duplicate (ldfile, collate, charmap,
2892	repertoire, symbol, symbol_len))
2893	/ The name is already defined. /
2894	goto col_sym_free;
2895
2896	insert_entry (&collate->sym_table, symbol, symbol_len,
2897	new_symbol (collate, symbol, symbol_len));
2898	}
2899	else if (symbol_len != endsymbol_len)
2900	{
2901	col_sym_inv_range:
2902	lr_error (ldfile,
2903	_("invalid names for character range"));
2904	goto col_sym_free;
2905	}
2906	else
2907	{
2908	/ Oh my, we have to handle an ellipsis. First, as*
2909	usual, determine the common prefix and then
2910	convert the rest into a range. /*
2911	size_t prefixlen;
2912	unsigned long int from;
2913	unsigned long int to;
2914	char *endp;
2915
2916	for (prefixlen = `0`; prefixlen < symbol_len; ++prefixlen)
2917	if (symbol[prefixlen] != endsymbol[prefixlen])
2918	break;
2919
2920	/ Convert the rest into numbers. /
2921	symbol[symbol_len] = `'\0'`;
2922	from = strtoul (&symbol[prefixlen], &endp,
2923	ellipsis == tok_ellipsis2 ? `16` : `10`);
2924	if (*endp != `'\0'`)
2925	goto col_sym_inv_range;
2926
2927	endsymbol[symbol_len] = `'\0'`;
2928	to = strtoul (&endsymbol[prefixlen], &endp,
2929	ellipsis == tok_ellipsis2 ? `16` : `10`);
2930	if (*endp != `'\0'`)
2931	goto col_sym_inv_range;
2932
2933	if (from > to)
2934	goto col_sym_inv_range;
2935
2936	/ Now loop over all entries. /
2937	while (from <= to)
2938	{
2939	char *symbuf;
2940
2941	symbuf = (char *) obstack_alloc (&collate->mempool,
2942	symbol_len + `1`);
2943
2944	/ Create the name. /
2945	sprintf (symbuf,
2946	ellipsis == tok_ellipsis2
2947	? "%.s%.lX" : "%.s%.lu",
2948	(int) prefixlen, symbol,
2949	(int) (symbol_len - prefixlen), from);
2950
2951	if (check_duplicate (ldfile, collate, charmap,
2952	repertoire, symbuf, symbol_len))
2953	/ The name is already defined. /
2954	goto col_sym_free;
2955
2956	insert_entry (&collate->sym_table, symbuf,
2957	symbol_len,
2958	new_symbol (collate, symbuf,
2959	symbol_len));
2960
2961	/ Increment the counter. /
2962	++from;
2963	}
2964
2965	goto col_sym_free;
2966	}
2967	}
2968	else
2969	{
2970	col_sym_free:
2971	free (symbol);
2972	free (endsymbol);
2973	}
2974	}
2975	break;
2976
2977	case tok_symbol_equivalence:
2978	/ Ignore the rest of the line if we don't need the input of*
2979	this line. /*
2980	if (ignore_content)
2981	{
2982	lr_ignore_rest (ldfile, `0`);
2983	break;
2984	}
2985
2986	if (state != `0`)
2987	goto err_label;
2988
2989	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2990	if (arg->tok != tok_bsymbol)
2991	goto err_label;
2992	else
2993	{
2994	const char *newname = arg->val.str.startmb;
2995	size_t newname_len = arg->val.str.lenmb;
2996	const char *symname;
2997	size_t symname_len;
2998	void symval; /* Actually struct symbol_t* /
2999
3000	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3001	if (arg->tok != tok_bsymbol)
3002	{
3003	free ((char *) newname);
3004	goto err_label;
3005	}
3006
3007	symname = arg->val.str.startmb;
3008	symname_len = arg->val.str.lenmb;
3009
3010	if (newname == NULL)
3011	{
3012	lr_error (ldfile, _("\
3013	%s: unknown character in equivalent definition name"),
3014	"LC_COLLATE");
3015
3016	sym_equiv_free:
3017	free ((char *) newname);
3018	free ((char *) symname);
3019	break;
3020	}
3021	if (symname == NULL)
3022	{
3023	lr_error (ldfile, _("\
3024	%s: unknown character in equivalent definition value"),
3025	"LC_COLLATE");
3026	goto sym_equiv_free;
3027	}
3028
3029	/ See whether the symbol name is already defined. /
3030	if (find_entry (&collate->sym_table, symname, symname_len,
3031	&symval) != `0`)
3032	{
3033	lr_error (ldfile, _("\
3034	%s: unknown symbol `%s' in equivalent definition"),
3035	"LC_COLLATE", symname);
3036	goto sym_equiv_free;
3037	}
3038
3039	if (insert_entry (&collate->sym_table,
3040	newname, newname_len, symval) < `0`)
3041	{
3042	lr_error (ldfile, _("\
3043	error while adding equivalent collating symbol"));
3044	goto sym_equiv_free;
3045	}
3046
3047	free ((char *) symname);
3048	}
3049	lr_ignore_rest (ldfile, `1`);
3050	break;
3051
3052	case tok_script:
3053	/ Ignore the rest of the line if we don't need the input of*
3054	this line. /*
3055	if (ignore_content)
3056	{
3057	lr_ignore_rest (ldfile, `0`);
3058	break;
3059	}
3060
3061	/ We get told about the scripts we know. /
3062	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3063	if (arg->tok != tok_bsymbol)
3064	goto err_label;
3065	else
3066	{
3067	struct section_list *runp = collate->known_sections;
3068	char *name;
3069
3070	while (runp != NULL)
3071	if (strncmp (runp->name, arg->val.str.startmb,
3072	arg->val.str.lenmb) == `0`
3073	&& runp->name[arg->val.str.lenmb] == `'\0'`)
3074	break;
3075	else
3076	runp = runp->def_next;
3077
3078	if (runp != NULL)
3079	{
3080	lr_error (ldfile, _("duplicate definition of script `%s'"),
3081	runp->name);
3082	lr_ignore_rest (ldfile, `0`);
3083	break;
3084	}
3085
3086	runp = (struct section_list ) xcalloc (`1`, sizeof* (*runp));
3087	name = (char *) xmalloc (arg->val.str.lenmb + `1`);
3088	memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3089	name[arg->val.str.lenmb] = `'\0'`;
3090	runp->name = name;
3091
3092	runp->def_next = collate->known_sections;
3093	collate->known_sections = runp;
3094	}
3095	lr_ignore_rest (ldfile, `1`);
3096	break;
3097
3098	case tok_order_start:
3099	/ Ignore the rest of the line if we don't need the input of*
3100	this line. /*
3101	if (ignore_content)
3102	{
3103	lr_ignore_rest (ldfile, `0`);
3104	break;
3105	}
3106
3107	if (state != `0` && state != `1` && state != `2`)
3108	goto err_label;
3109	state = `1`;
3110
3111	/ The 14652 draft does not specify whether all `order_start' lines*
3112	must contain the same number of sort-rules, but 14651 does. So
3113	we require this here as well. /*
3114	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3115	if (arg->tok == tok_bsymbol)
3116	{
3117	/ This better should be a section name. /
3118	struct section_list *sp = collate->known_sections;
3119	while (sp != NULL
3120	&& (sp->name == NULL
3121	\|\| strncmp (sp->name, arg->val.str.startmb,
3122	arg->val.str.lenmb) != `0`
3123	\|\| sp->name[arg->val.str.lenmb] != `'\0'`))
3124	sp = sp->def_next;
3125
3126	if (sp == NULL)
3127	{
3128	lr_error (ldfile, _("\
3129	%s: unknown section name `%.*s'"),
3130	"LC_COLLATE", (int) arg->val.str.lenmb,
3131	arg->val.str.startmb);
3132	/ We use the error section. /
3133	collate->current_section = &collate->error_section;
3134
3135	if (collate->error_section.first == NULL)
3136	{
3137	/ Insert &collate->error_section at the end of*
3138	the collate->sections list. /*
3139	if (collate->sections == NULL)
3140	collate->sections = &collate->error_section;
3141	else
3142	{
3143	sp = collate->sections;
3144	while (sp->next != NULL)
3145	sp = sp->next;
3146
3147	sp->next = &collate->error_section;
3148	}
3149	collate->error_section.next = NULL;
3150	}
3151	}
3152	else
3153	{
3154	/ One should not be allowed to open the same*
3155	section twice. /*
3156	if (sp->first != NULL)
3157	lr_error (ldfile, _("\
3158	%s: multiple order definitions for section `%s'"),
3159	"LC_COLLATE", sp->name);
3160	else
3161	{
3162	/ Insert sp in the collate->sections list,*
3163	right after collate->current_section. /*
3164	if (collate->current_section != NULL)
3165	{
3166	sp->next = collate->current_section->next;
3167	collate->current_section->next = sp;
3168	}
3169	else if (collate->sections == NULL)
3170	/ This is the first section to be defined. /
3171	collate->sections = sp;
3172
3173	collate->current_section = sp;
3174	}
3175
3176	/ Next should come the end of the line or a semicolon. /
3177	arg = lr_token (ldfile, charmap, result, repertoire,
3178	verbose);
3179	if (arg->tok == tok_eol)
3180	{
3181	uint32_t cnt;
3182
3183	/ This means we have exactly one rule: `forward'. /
3184	if (nrules > `1`)
3185	lr_error (ldfile, _("\
3186	%s: invalid number of sorting rules"),
3187	"LC_COLLATE");
3188	else
3189	nrules = `1`;
3190	sp->rules = obstack_alloc (&collate->mempool,
3191	(sizeof (enum coll_sort_rule)
3192	* nrules));
3193	for (cnt = `0`; cnt < nrules; ++cnt)
3194	sp->rules[cnt] = sort_forward;
3195
3196	/ Next line. /
3197	break;
3198	}
3199
3200	/ Get the next token. /
3201	arg = lr_token (ldfile, charmap, result, repertoire,
3202	verbose);
3203	}
3204	}
3205	else
3206	{
3207	/ There is no section symbol. Therefore we use the unnamed*
3208	section. /*
3209	collate->current_section = &collate->unnamed_section;
3210
3211	if (collate->unnamed_section_defined)
3212	lr_error (ldfile, _("\
3213	%s: multiple order definitions for unnamed section"),
3214	"LC_COLLATE");
3215	else
3216	{
3217	/ Insert &collate->unnamed_section at the beginning of*
3218	the collate->sections list. /*
3219	collate->unnamed_section.next = collate->sections;
3220	collate->sections = &collate->unnamed_section;
3221	collate->unnamed_section_defined = true;
3222	}
3223	}
3224
3225	/ Now read the direction names. /
3226	read_directions (ldfile, arg, charmap, repertoire, result);
3227
3228	/ From now we need the strings untranslated. /
3229	ldfile->translate_strings = `0`;
3230	break;
3231
3232	case tok_order_end:
3233	/ Ignore the rest of the line if we don't need the input of*
3234	this line. /*
3235	if (ignore_content)
3236	{
3237	lr_ignore_rest (ldfile, `0`);
3238	break;
3239	}
3240
3241	if (state != `1`)
3242	goto err_label;
3243
3244	/ Handle ellipsis at end of list. /
3245	if (was_ellipsis != tok_none)
3246	{
3247	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3248	repertoire, result);
3249	was_ellipsis = tok_none;
3250	}
3251
3252	state = `2`;
3253	lr_ignore_rest (ldfile, `1`);
3254	break;
3255
3256	case tok_reorder_after:
3257	/ Ignore the rest of the line if we don't need the input of*
3258	this line. /*
3259	if (ignore_content)
3260	{
3261	lr_ignore_rest (ldfile, `0`);
3262	break;
3263	}
3264
3265	if (state == `1`)
3266	{
3267	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3268	"LC_COLLATE");
3269	state = `2`;
3270
3271	/ Handle ellipsis at end of list. /
3272	if (was_ellipsis != tok_none)
3273	{
3274	handle_ellipsis (ldfile, arg->val.str.startmb,
3275	arg->val.str.lenmb, was_ellipsis, charmap,
3276	repertoire, result);
3277	was_ellipsis = tok_none;
3278	}
3279	}
3280	else if (state == `0` && copy_locale == NULL)
3281	goto err_label;
3282	else if (state != `0` && state != `2` && state != `3`)
3283	goto err_label;
3284	state = `3`;
3285
3286	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3287	if (arg->tok == tok_bsymbol \|\| arg->tok == tok_ucs4)
3288	{
3289	/ Find this symbol in the sequence table. /
3290	char ucsbuf[`10`];
3291	char *startmb;
3292	size_t lenmb;
3293	struct element_t *insp;
3294	int no_error = `1`;
3295	void *ptr;
3296
3297	if (arg->tok == tok_bsymbol)
3298	{
3299	startmb = arg->val.str.startmb;
3300	lenmb = arg->val.str.lenmb;
3301	}
3302	else
3303	{
3304	sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3305	startmb = ucsbuf;
3306	lenmb = `9`;
3307	}
3308
3309	if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == `0`)
3310	/ Yes, the symbol exists. Simply point the cursor*
3311	to it. /*
3312	collate->cursor = (struct element_t *) ptr;
3313	else
3314	{
3315	struct symbol_t *symbp;
3316	void *ptr;
3317
3318	if (find_entry (&collate->sym_table, startmb, lenmb,
3319	&ptr) == `0`)
3320	{
3321	symbp = ptr;
3322
3323	if (symbp->order->last != NULL
3324	\|\| symbp->order->next != NULL)
3325	collate->cursor = symbp->order;
3326	else
3327	{
3328	/ This is a collating symbol but its position*
3329	is not yet defined. /*
3330	lr_error (ldfile, _("\
3331	%s: order for collating symbol %.*s not yet defined"),
3332	"LC_COLLATE", (int) lenmb, startmb);
3333	collate->cursor = NULL;
3334	no_error = `0`;
3335	}
3336	}
3337	else if (find_entry (&collate->elem_table, startmb, lenmb,
3338	&ptr) == `0`)
3339	{
3340	insp = (struct element_t *) ptr;
3341
3342	if (insp->last != NULL \|\| insp->next != NULL)
3343	collate->cursor = insp;
3344	else
3345	{
3346	/ This is a collating element but its position*
3347	is not yet defined. /*
3348	lr_error (ldfile, _("\
3349	%s: order for collating element %.*s not yet defined"),
3350	"LC_COLLATE", (int) lenmb, startmb);
3351	collate->cursor = NULL;
3352	no_error = `0`;
3353	}
3354	}
3355	else
3356	{
3357	/ This is bad. The symbol after which we have to*
3358	insert does not exist. /*
3359	lr_error (ldfile, _("\
3360	%s: cannot reorder after %.*s: symbol not known"),
3361	"LC_COLLATE", (int) lenmb, startmb);
3362	collate->cursor = NULL;
3363	no_error = `0`;
3364	}
3365	}
3366
3367	lr_ignore_rest (ldfile, no_error);
3368	}
3369	else
3370	/ This must not happen. /
3371	goto err_label;
3372	break;
3373
3374	case tok_reorder_end:
3375	/ Ignore the rest of the line if we don't need the input of*
3376	this line. /*
3377	if (ignore_content)
3378	break;
3379
3380	if (state != `3`)
3381	goto err_label;
3382	state = `4`;
3383	lr_ignore_rest (ldfile, `1`);
3384	break;
3385
3386	case tok_reorder_sections_after:
3387	/ Ignore the rest of the line if we don't need the input of*
3388	this line. /*
3389	if (ignore_content)
3390	{
3391	lr_ignore_rest (ldfile, `0`);
3392	break;
3393	}
3394
3395	if (state == `1`)
3396	{
3397	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3398	"LC_COLLATE");
3399	state = `2`;
3400
3401	/ Handle ellipsis at end of list. /
3402	if (was_ellipsis != tok_none)
3403	{
3404	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3405	repertoire, result);
3406	was_ellipsis = tok_none;
3407	}
3408	}
3409	else if (state == `3`)
3410	{
3411	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3412	%s: missing `reorder-end' keyword"), "LC_COLLATE"));
3413	state = `4`;
3414	}
3415	else if (state != `2` && state != `4`)
3416	goto err_label;
3417	state = `5`;
3418
3419	/ Get the name of the sections we are adding after. /
3420	arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3421	if (arg->tok == tok_bsymbol)
3422	{
3423	/ Now find a section with this name. /
3424	struct section_list *runp = collate->sections;
3425
3426	while (runp != NULL)
3427	{
3428	if (runp->name != NULL
3429	&& strlen (runp->name) == arg->val.str.lenmb
3430	&& memcmp (runp->name, arg->val.str.startmb,
3431	arg->val.str.lenmb) == `0`)
3432	break;
3433
3434	runp = runp->next;
3435	}
3436
3437	if (runp != NULL)
3438	collate->current_section = runp;
3439	else
3440	{
3441	/ This is bad. The section after which we have to*
3442	reorder does not exist. Therefore we cannot
3443	process the whole rest of this reorder
3444	specification. /*
3445	lr_error (ldfile, _("%s: section `%.*s' not known"),
3446	"LC_COLLATE", (int) arg->val.str.lenmb,
3447	arg->val.str.startmb);
3448
3449	do
3450	{
3451	lr_ignore_rest (ldfile, `0`);
3452
3453	now = lr_token (ldfile, charmap, result, NULL, verbose);
3454	}
3455	while (now->tok == tok_reorder_sections_after
3456	\|\| now->tok == tok_reorder_sections_end
3457	\|\| now->tok == tok_end);
3458
3459	/ Process the token we just saw. /
3460	nowtok = now->tok;
3461	continue;
3462	}
3463	}
3464	else
3465	/ This must not happen. /
3466	goto err_label;
3467	break;
3468
3469	case tok_reorder_sections_end:
3470	/ Ignore the rest of the line if we don't need the input of*
3471	this line. /*
3472	if (ignore_content)
3473	break;
3474
3475	if (state != `5`)
3476	goto err_label;
3477	state = `6`;
3478	lr_ignore_rest (ldfile, `1`);
3479	break;
3480
3481	case tok_bsymbol:
3482	case tok_ucs4:
3483	/ Ignore the rest of the line if we don't need the input of*
3484	this line. /*
3485	if (ignore_content)
3486	{
3487	lr_ignore_rest (ldfile, `0`);
3488	break;
3489	}
3490
3491	if (state != `0` && state != `1` && state != `3` && state != `5`)
3492	goto err_label;
3493
3494	if ((state == `0` \|\| state == `5`) && nowtok == tok_ucs4)
3495	goto err_label;
3496
3497	if (nowtok == tok_ucs4)
3498	{
3499	snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3500	symstr = ucs4buf;
3501	symlen = `9`;
3502	}
3503	else if (arg != NULL)
3504	{
3505	symstr = arg->val.str.startmb;
3506	symlen = arg->val.str.lenmb;
3507	}
3508	else
3509	{
3510	lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3511	(int) ldfile->token.val.str.lenmb,
3512	ldfile->token.val.str.startmb);
3513	break;
3514	}
3515
3516	struct element_t *seqp;
3517	if (state == `0`)
3518	{
3519	/ We are outside an `order_start' region. This means*
3520	we must only accept definitions of values for
3521	collation symbols since these are purely abstract
3522	values and don't need directions associated. /*
3523	void *ptr;
3524
3525	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3526	{
3527	seqp = ptr;
3528
3529	/ It's already defined. First check whether this*
3530	is really a collating symbol. /*
3531	if (seqp->is_character)
3532	goto err_label;
3533
3534	goto move_entry;
3535	}
3536	else
3537	{
3538	void *result;
3539
3540	if (find_entry (&collate->sym_table, symstr, symlen,
3541	&result) != `0`)
3542	/ No collating symbol, it's an error. /
3543	goto err_label;
3544
3545	/ Maybe this is the first time we define a symbol*
3546	value and it is before the first actual section. /*
3547	if (collate->sections == NULL)
3548	collate->sections = collate->current_section =
3549	&collate->symbol_section;
3550	}
3551
3552	if (was_ellipsis != tok_none)
3553	{
3554	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3555	charmap, repertoire, result);
3556
3557	/ Remember that we processed the ellipsis. /
3558	was_ellipsis = tok_none;
3559
3560	/ And don't add the value a second time. /
3561	break;
3562	}
3563	}
3564	else if (state == `3`)
3565	{
3566	/ It is possible that we already have this collation sequence.*
3567	In this case we move the entry. /*
3568	void *sym;
3569	void *ptr;
3570
3571	/ If the symbol after which we have to insert was not found*
3572	ignore all entries. /*
3573	if (collate->cursor == NULL)
3574	{
3575	lr_ignore_rest (ldfile, `0`);
3576	break;
3577	}
3578
3579	if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == `0`)
3580	{
3581	seqp = (struct element_t *) ptr;
3582	goto move_entry;
3583	}
3584
3585	if (find_entry (&collate->sym_table, symstr, symlen, &sym) == `0`
3586	&& (seqp = ((struct symbol_t *) sym)->order) != NULL)
3587	goto move_entry;
3588
3589	if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == `0`
3590	&& (seqp = (struct element_t *) ptr,
3591	seqp->last != NULL \|\| seqp->next != NULL
3592	\|\| (collate->start != NULL && seqp == collate->start)))
3593	{
3594	move_entry:
3595	/ Remove the entry from the old position. /
3596	if (seqp->last == NULL)
3597	collate->start = seqp->next;
3598	else
3599	seqp->last->next = seqp->next;
3600	if (seqp->next != NULL)
3601	seqp->next->last = seqp->last;
3602
3603	/ We also have to check whether this entry is the*
3604	first or last of a section. /*
3605	if (seqp->section->first == seqp)
3606	{
3607	if (seqp->section->first == seqp->section->last)
3608	/ This section has no content anymore. /
3609	seqp->section->first = seqp->section->last = NULL;
3610	else
3611	seqp->section->first = seqp->next;
3612	}
3613	else if (seqp->section->last == seqp)
3614	seqp->section->last = seqp->last;
3615
3616	/ Now insert it in the new place. /
3617	insert_weights (ldfile, seqp, charmap, repertoire, result,
3618	tok_none);
3619	break;
3620	}
3621
3622	/ Otherwise we just add a new entry. /
3623	}
3624	else if (state == `5`)
3625	{
3626	/ We are reordering sections. Find the named section. /
3627	struct section_list *runp = collate->sections;
3628	struct section_list *prevp = NULL;
3629
3630	while (runp != NULL)
3631	{
3632	if (runp->name != NULL
3633	&& strlen (runp->name) == symlen
3634	&& memcmp (runp->name, symstr, symlen) == `0`)
3635	break;
3636
3637	prevp = runp;
3638	runp = runp->next;
3639	}
3640
3641	if (runp == NULL)
3642	{
3643	lr_error (ldfile, _("%s: section `%.*s' not known"),
3644	"LC_COLLATE", (int) symlen, symstr);
3645	lr_ignore_rest (ldfile, `0`);
3646	}
3647	else
3648	{
3649	if (runp != collate->current_section)
3650	{
3651	/ Remove the named section from the old place and*
3652	insert it in the new one. /*
3653	prevp->next = runp->next;
3654
3655	runp->next = collate->current_section->next;
3656	collate->current_section->next = runp;
3657	collate->current_section = runp;
3658	}
3659
3660	/ Process the rest of the line which might change*
3661	the collation rules. /*
3662	arg = lr_token (ldfile, charmap, result, repertoire,
3663	verbose);
3664	if (arg->tok != tok_eof && arg->tok != tok_eol)
3665	read_directions (ldfile, arg, charmap, repertoire,
3666	result);
3667	}
3668	break;
3669	}
3670	else if (was_ellipsis != tok_none)
3671	{
3672	/ Using the information in the `ellipsis_weight'*
3673	element and this and the last value we have to handle
3674	the ellipsis now. /*
3675	assert (state == `1`);
3676
3677	handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3678	repertoire, result);
3679
3680	/ Remember that we processed the ellipsis. /
3681	was_ellipsis = tok_none;
3682
3683	/ And don't add the value a second time. /
3684	break;
3685	}
3686
3687	/ Now insert in the new place. /
3688	insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3689	break;
3690
3691	case tok_undefined:
3692	/ Ignore the rest of the line if we don't need the input of*
3693	this line. /*
3694	if (ignore_content)
3695	{
3696	lr_ignore_rest (ldfile, `0`);
3697	break;
3698	}
3699
3700	if (state != `1`)
3701	goto err_label;
3702
3703	if (was_ellipsis != tok_none)
3704	{
3705	lr_error (ldfile,
3706	_("%s: cannot have `%s' as end of ellipsis range"),
3707	"LC_COLLATE", "UNDEFINED");
3708
3709	unlink_element (collate);
3710	was_ellipsis = tok_none;
3711	}
3712
3713	/ See whether UNDEFINED already appeared somewhere. /
3714	if (collate->undefined.next != NULL
3715	\|\| &collate->undefined == collate->cursor)
3716	{
3717	lr_error (ldfile,
3718	_("%s: order for `%.*s' already defined at %s:%Zu"),
3719	"LC_COLLATE", `9`, "UNDEFINED",
3720	collate->undefined.file,
3721	collate->undefined.line);
3722	lr_ignore_rest (ldfile, `0`);
3723	}
3724	else
3725	/ Parse the weights. /
3726	insert_weights (ldfile, &collate->undefined, charmap,
3727	repertoire, result, tok_none);
3728	break;
3729
3730	case tok_ellipsis2: / symbolic hexadecimal ellipsis /
3731	case tok_ellipsis3: / absolute ellipsis /
3732	case tok_ellipsis4: / symbolic decimal ellipsis /
3733	/ This is the symbolic (decimal or hexadecimal) or absolute*
3734	ellipsis. /*
3735	if (was_ellipsis != tok_none)
3736	goto err_label;
3737
3738	if (state != `0` && state != `1` && state != `3`)
3739	goto err_label;
3740
3741	was_ellipsis = nowtok;
3742
3743	insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3744	repertoire, result, nowtok);
3745	break;
3746
3747	case tok_end:
3748	seen_end:
3749	/ Next we assume `LC_COLLATE'. /
3750	if (!ignore_content)
3751	{
3752	if (state == `0` && copy_locale == NULL)
3753	/ We must either see a copy statement or have*
3754	ordering values. /*
3755	lr_error (ldfile,
3756	_("%s: empty category description not allowed"),
3757	"LC_COLLATE");
3758	else if (state == `1`)
3759	{
3760	lr_error (ldfile, _("%s: missing `order_end' keyword"),
3761	"LC_COLLATE");
3762
3763	/ Handle ellipsis at end of list. /
3764	if (was_ellipsis != tok_none)
3765	{
3766	handle_ellipsis (ldfile, NULL, `0`, was_ellipsis, charmap,
3767	repertoire, result);
3768	was_ellipsis = tok_none;
3769	}
3770	}
3771	else if (state == `3`)
3772	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3773	%s: missing `reorder-end' keyword"), "LC_COLLATE"));
3774	else if (state == `5`)
3775	WITH_CUR_LOCALE (error (`0`, `0`, _("\
3776	%s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3777	}
3778	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3779	if (arg->tok == tok_eof)
3780	break;
3781	if (arg->tok == tok_eol)
3782	lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3783	else if (arg->tok != tok_lc_collate)
3784	lr_error (ldfile, _("\
3785	%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3786	lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3787	return;
3788
3789	case tok_define:
3790	if (ignore_content)
3791	{
3792	lr_ignore_rest (ldfile, `0`);
3793	break;
3794	}
3795
3796	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3797	if (arg->tok != tok_ident)
3798	goto err_label;
3799
3800	/ Simply add the new symbol. /
3801	struct name_list newsym = xmalloc (sizeof* (*newsym)
3802	+ arg->val.str.lenmb + `1`);
3803	memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3804	newsym->str[arg->val.str.lenmb] = `'\0'`;
3805	newsym->next = defined;
3806	defined = newsym;
3807
3808	lr_ignore_rest (ldfile, `1`);
3809	break;
3810
3811	case tok_undef:
3812	if (ignore_content)
3813	{
3814	lr_ignore_rest (ldfile, `0`);
3815	break;
3816	}
3817
3818	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3819	if (arg->tok != tok_ident)
3820	goto err_label;
3821
3822	/ Remove _all_ occurrences of the symbol from the list. /
3823	struct name_list *prevdef = NULL;
3824	struct name_list *curdef = defined;
3825	while (curdef != NULL)
3826	if (strncmp (arg->val.str.startmb, curdef->str,
3827	arg->val.str.lenmb) == `0`
3828	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3829	{
3830	if (prevdef == NULL)
3831	defined = curdef->next;
3832	else
3833	prevdef->next = curdef->next;
3834
3835	struct name_list *olddef = curdef;
3836	curdef = curdef->next;
3837
3838	free (olddef);
3839	}
3840	else
3841	{
3842	prevdef = curdef;
3843	curdef = curdef->next;
3844	}
3845
3846	lr_ignore_rest (ldfile, `1`);
3847	break;
3848
3849	case tok_ifdef:
3850	case tok_ifndef:
3851	if (ignore_content)
3852	{
3853	lr_ignore_rest (ldfile, `0`);
3854	break;
3855	}
3856
3857	found_ifdef:
3858	arg = lr_token (ldfile, charmap, result, NULL, verbose);
3859	if (arg->tok != tok_ident)
3860	goto err_label;
3861	lr_ignore_rest (ldfile, `1`);
3862
3863	if (collate->else_action == else_none)
3864	{
3865	curdef = defined;
3866	while (curdef != NULL)
3867	if (strncmp (arg->val.str.startmb, curdef->str,
3868	arg->val.str.lenmb) == `0`
3869	&& curdef->str[arg->val.str.lenmb] == `'\0'`)
3870	break;
3871	else
3872	curdef = curdef->next;
3873
3874	if ((nowtok == tok_ifdef && curdef != NULL)
3875	\|\| (nowtok == tok_ifndef && curdef == NULL))
3876	{
3877	/ We have to use the if-branch. /
3878	collate->else_action = else_ignore;
3879	}
3880	else
3881	{
3882	/ We have to use the else-branch, if there is one. /
3883	nowtok = skip_to (ldfile, collate, charmap, `0`);
3884	if (nowtok == tok_else)
3885	collate->else_action = else_seen;
3886	else if (nowtok == tok_elifdef)
3887	{
3888	nowtok = tok_ifdef;
3889	goto found_ifdef;
3890	}
3891	else if (nowtok == tok_elifndef)
3892	{
3893	nowtok = tok_ifndef;
3894	goto found_ifdef;
3895	}
3896	else if (nowtok == tok_eof)
3897	goto seen_eof;
3898	else if (nowtok == tok_end)
3899	goto seen_end;
3900	}
3901	}
3902	else
3903	{
3904	/ XXX Should it really become necessary to support nested*
3905	preprocessor handling we will push the state here. /*
3906	lr_error (ldfile, _("%s: nested conditionals not supported"),
3907	"LC_COLLATE");
3908	nowtok = skip_to (ldfile, collate, charmap, `1`);
3909	if (nowtok == tok_eof)
3910	goto seen_eof;
3911	else if (nowtok == tok_end)
3912	goto seen_end;
3913	}
3914	break;
3915
3916	case tok_elifdef:
3917	case tok_elifndef:
3918	case tok_else:
3919	if (ignore_content)
3920	{
3921	lr_ignore_rest (ldfile, `0`);
3922	break;
3923	}
3924
3925	lr_ignore_rest (ldfile, `1`);
3926
3927	if (collate->else_action == else_ignore)
3928	{
3929	/ Ignore everything until the endif. /
3930	nowtok = skip_to (ldfile, collate, charmap, `1`);
3931	if (nowtok == tok_eof)
3932	goto seen_eof;
3933	else if (nowtok == tok_end)
3934	goto seen_end;
3935	}
3936	else
3937	{
3938	assert (collate->else_action == else_none);
3939	lr_error (ldfile, _("\
3940	%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3941	nowtok == tok_else ? "else"
3942	: nowtok == tok_elifdef ? "elifdef" : "elifndef");
3943	}
3944	break;
3945
3946	case tok_endif:
3947	if (ignore_content)
3948	{
3949	lr_ignore_rest (ldfile, `0`);
3950	break;
3951	}
3952
3953	lr_ignore_rest (ldfile, `1`);
3954
3955	if (collate->else_action != else_ignore
3956	&& collate->else_action != else_seen)
3957	lr_error (ldfile, _("\
3958	%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3959
3960	/ XXX If we support nested preprocessor directives we pop*
3961	the state here. /*
3962	collate->else_action = else_none;
3963	break;
3964
3965	default:
3966	err_label:
3967	SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3968	}
3969
3970	/ Prepare for the next round. /
3971	now = lr_token (ldfile, charmap, result, NULL, verbose);
3972	nowtok = now->tok;
3973	}
3974
3975	seen_eof:
3976	/ When we come here we reached the end of the file. /
3977	lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3978	}
3979

Browse the source code of glibc_src_2.23/locale/programs/ld-collate.c