1/* Copyright (C) 1995-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <errno.h>
23#include <error.h>
24#include <stdlib.h>
25#include <wchar.h>
26#include <stdint.h>
27#include <sys/param.h>
28
29#include "localedef.h"
30#include "charmap.h"
31#include "localeinfo.h"
32#include "linereader.h"
33#include "locfile.h"
34#include "elem-hash.h"
35
36/* Uncomment the following line in the production version. */
37/* #define NDEBUG 1 */
38#include <assert.h>
39
40#define obstack_chunk_alloc malloc
41#define obstack_chunk_free free
42
43static inline void
44__attribute ((always_inline))
45obstack_int32_grow (struct obstack *obstack, int32_t data)
46{
47 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
48 data = maybe_swap_uint32 (data);
49 if (sizeof (int32_t) == sizeof (int))
50 obstack_int_grow (obstack, data);
51 else
52 obstack_grow (obstack, &data, sizeof (int32_t));
53}
54
55static inline void
56__attribute ((always_inline))
57obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
58{
59 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
60 data = maybe_swap_uint32 (data);
61 if (sizeof (int32_t) == sizeof (int))
62 obstack_int_grow_fast (obstack, data);
63 else
64 obstack_grow (obstack, &data, sizeof (int32_t));
65}
66
67/* Forward declaration. */
68struct element_t;
69
70/* Data type for list of strings. */
71struct section_list
72{
73 /* Successor in the known_sections list. */
74 struct section_list *def_next;
75 /* Successor in the sections list. */
76 struct section_list *next;
77 /* Name of the section. */
78 const char *name;
79 /* First element of this section. */
80 struct element_t *first;
81 /* Last element of this section. */
82 struct element_t *last;
83 /* These are the rules for this section. */
84 enum coll_sort_rule *rules;
85 /* Index of the rule set in the appropriate section of the output file. */
86 int ruleidx;
87};
88
89struct element_t;
90
91struct element_list_t
92{
93 /* Number of elements. */
94 int cnt;
95
96 struct element_t **w;
97};
98
99/* Data type for collating element. */
100struct element_t
101{
102 const char *name;
103
104 const char *mbs;
105 size_t nmbs;
106 const uint32_t *wcs;
107 size_t nwcs;
108 int *mborder;
109 int wcorder;
110
111 /* The following is a bit mask which bits are set if this element is
112 used in the appropriate level. Interesting for the singlebyte
113 weight computation.
114
115 XXX The type here restricts the number of levels to 32. It could
116 be changed if necessary but I doubt this is necessary. */
117 unsigned int used_in_level;
118
119 struct element_list_t *weights;
120
121 /* Nonzero if this is a real character definition. */
122 int is_character;
123
124 /* Order of the character in the sequence. This information will
125 be used in range expressions. */
126 int mbseqorder;
127 int wcseqorder;
128
129 /* Where does the definition come from. */
130 const char *file;
131 size_t line;
132
133 /* Which section does this belong to. */
134 struct section_list *section;
135
136 /* Predecessor and successor in the order list. */
137 struct element_t *last;
138 struct element_t *next;
139
140 /* Next element in multibyte output list. */
141 struct element_t *mbnext;
142 struct element_t *mblast;
143
144 /* Next element in wide character output list. */
145 struct element_t *wcnext;
146 struct element_t *wclast;
147};
148
149/* Special element value. */
150#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
151#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
152#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
153
154/* Data type for collating symbol. */
155struct symbol_t
156{
157 const char *name;
158
159 /* Point to place in the order list. */
160 struct element_t *order;
161
162 /* Where does the definition come from. */
163 const char *file;
164 size_t line;
165};
166
167/* Sparse table of struct element_t *. */
168#define TABLE wchead_table
169#define ELEMENT struct element_t *
170#define DEFAULT NULL
171#define ITERATE
172#define NO_ADD_LOCALE
173#include "3level.h"
174
175/* Sparse table of int32_t. */
176#define TABLE collidx_table
177#define ELEMENT int32_t
178#define DEFAULT 0
179#include "3level.h"
180
181/* Sparse table of uint32_t. */
182#define TABLE collseq_table
183#define ELEMENT uint32_t
184#define DEFAULT ~((uint32_t) 0)
185#include "3level.h"
186
187
188/* Simple name list for the preprocessor. */
189struct name_list
190{
191 struct name_list *next;
192 char str[0];
193};
194
195
196/* The real definition of the struct for the LC_COLLATE locale. */
197struct locale_collate_t
198{
199 int col_weight_max;
200 int cur_weight_max;
201
202 /* List of known scripts. */
203 struct section_list *known_sections;
204 /* List of used sections. */
205 struct section_list *sections;
206 /* Current section using definition. */
207 struct section_list *current_section;
208 /* There always can be an unnamed section. */
209 struct section_list unnamed_section;
210 /* Flag whether the unnamed section has been defined. */
211 bool unnamed_section_defined;
212 /* To make handling of errors easier we have another section. */
213 struct section_list error_section;
214 /* Sometimes we are defining the values for collating symbols before
215 the first actual section. */
216 struct section_list symbol_section;
217
218 /* Start of the order list. */
219 struct element_t *start;
220
221 /* The undefined element. */
222 struct element_t undefined;
223
224 /* This is the cursor for `reorder_after' insertions. */
225 struct element_t *cursor;
226
227 /* This value is used when handling ellipsis. */
228 struct element_t ellipsis_weight;
229
230 /* Known collating elements. */
231 hash_table elem_table;
232
233 /* Known collating symbols. */
234 hash_table sym_table;
235
236 /* Known collation sequences. */
237 hash_table seq_table;
238
239 struct obstack mempool;
240
241 /* The LC_COLLATE category is a bit special as it is sometimes possible
242 that the definitions from more than one input file contains information.
243 Therefore we keep all relevant input in a list. */
244 struct locale_collate_t *next;
245
246 /* Arrays with heads of the list for each of the leading bytes in
247 the multibyte sequences. */
248 struct element_t *mbheads[256];
249
250 /* Arrays with heads of the list for each of the leading bytes in
251 the multibyte sequences. */
252 struct wchead_table wcheads;
253
254 /* The arrays with the collation sequence order. */
255 unsigned char mbseqorder[256];
256 struct collseq_table wcseqorder;
257
258 /* State of the preprocessor. */
259 enum
260 {
261 else_none = 0,
262 else_ignore,
263 else_seen
264 }
265 else_action;
266};
267
268
269/* We have a few global variables which are used for reading all
270 LC_COLLATE category descriptions in all files. */
271static uint32_t nrules;
272
273/* List of defined preprocessor symbols. */
274static struct name_list *defined;
275
276
277/* We need UTF-8 encoding of numbers. */
278static inline int
279__attribute ((always_inline))
280utf8_encode (char *buf, int val)
281{
282 int retval;
283
284 if (val < 0x80)
285 {
286 *buf++ = (char) val;
287 retval = 1;
288 }
289 else
290 {
291 int step;
292
293 for (step = 2; step < 6; ++step)
294 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
295 break;
296 retval = step;
297
298 *buf = (unsigned char) (~0xff >> step);
299 --step;
300 do
301 {
302 buf[step] = 0x80 | (val & 0x3f);
303 val >>= 6;
304 }
305 while (--step > 0);
306 *buf |= val;
307 }
308
309 return retval;
310}
311
312
313static struct section_list *
314make_seclist_elem (struct locale_collate_t *collate, const char *string,
315 struct section_list *next)
316{
317 struct section_list *newp;
318
319 newp = (struct section_list *) obstack_alloc (&collate->mempool,
320 sizeof (*newp));
321 newp->next = next;
322 newp->name = string;
323 newp->first = NULL;
324 newp->last = NULL;
325
326 return newp;
327}
328
329
330static struct element_t *
331new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
332 const uint32_t *wcs, const char *name, size_t namelen,
333 int is_character)
334{
335 struct element_t *newp;
336
337 newp = (struct element_t *) obstack_alloc (&collate->mempool,
338 sizeof (*newp));
339 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
340 name, namelen);
341 if (mbs != NULL)
342 {
343 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
344 newp->nmbs = mbslen;
345 }
346 else
347 {
348 newp->mbs = NULL;
349 newp->nmbs = 0;
350 }
351 if (wcs != NULL)
352 {
353 size_t nwcs = wcslen ((wchar_t *) wcs);
354 uint32_t zero = 0;
355 /* Handle <U0000> as a single character. */
356 if (nwcs == 0)
357 nwcs = 1;
358 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
359 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
360 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
361 newp->nwcs = nwcs;
362 }
363 else
364 {
365 newp->wcs = NULL;
366 newp->nwcs = 0;
367 }
368 newp->mborder = NULL;
369 newp->wcorder = 0;
370 newp->used_in_level = 0;
371 newp->is_character = is_character;
372
373 /* Will be assigned later. XXX */
374 newp->mbseqorder = 0;
375 newp->wcseqorder = 0;
376
377 /* Will be allocated later. */
378 newp->weights = NULL;
379
380 newp->file = NULL;
381 newp->line = 0;
382
383 newp->section = collate->current_section;
384
385 newp->last = NULL;
386 newp->next = NULL;
387
388 newp->mbnext = NULL;
389 newp->mblast = NULL;
390
391 newp->wcnext = NULL;
392 newp->wclast = NULL;
393
394 return newp;
395}
396
397
398static struct symbol_t *
399new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
400{
401 struct symbol_t *newp;
402
403 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
404
405 newp->name = obstack_copy0 (&collate->mempool, name, len);
406 newp->order = NULL;
407
408 newp->file = NULL;
409 newp->line = 0;
410
411 return newp;
412}
413
414
415/* Test whether this name is already defined somewhere. */
416static int
417check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
418 const struct charmap_t *charmap,
419 struct repertoire_t *repertoire, const char *symbol,
420 size_t symbol_len)
421{
422 void *ignore = NULL;
423
424 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
425 {
426 lr_error (ldfile, _("`%.*s' already defined in charmap"),
427 (int) symbol_len, symbol);
428 return 1;
429 }
430
431 if (repertoire != NULL
432 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
433 == 0))
434 {
435 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
436 (int) symbol_len, symbol);
437 return 1;
438 }
439
440 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
441 {
442 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
443 (int) symbol_len, symbol);
444 return 1;
445 }
446
447 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
448 {
449 lr_error (ldfile, _("`%.*s' already defined as collating element"),
450 (int) symbol_len, symbol);
451 return 1;
452 }
453
454 return 0;
455}
456
457
458/* Read the direction specification. */
459static void
460read_directions (struct linereader *ldfile, struct token *arg,
461 const struct charmap_t *charmap,
462 struct repertoire_t *repertoire, struct localedef_t *result)
463{
464 int cnt = 0;
465 int max = nrules ?: 10;
466 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
467 int warned = 0;
468 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
469
470 while (1)
471 {
472 int valid = 0;
473
474 if (arg->tok == tok_forward)
475 {
476 if (rules[cnt] & sort_backward)
477 {
478 if (! warned)
479 {
480 lr_error (ldfile, _("\
481%s: `forward' and `backward' are mutually excluding each other"),
482 "LC_COLLATE");
483 warned = 1;
484 }
485 }
486 else if (rules[cnt] & sort_forward)
487 {
488 if (! warned)
489 {
490 lr_error (ldfile, _("\
491%s: `%s' mentioned more than once in definition of weight %d"),
492 "LC_COLLATE", "forward", cnt + 1);
493 }
494 }
495 else
496 rules[cnt] |= sort_forward;
497
498 valid = 1;
499 }
500 else if (arg->tok == tok_backward)
501 {
502 if (rules[cnt] & sort_forward)
503 {
504 if (! warned)
505 {
506 lr_error (ldfile, _("\
507%s: `forward' and `backward' are mutually excluding each other"),
508 "LC_COLLATE");
509 warned = 1;
510 }
511 }
512 else if (rules[cnt] & sort_backward)
513 {
514 if (! warned)
515 {
516 lr_error (ldfile, _("\
517%s: `%s' mentioned more than once in definition of weight %d"),
518 "LC_COLLATE", "backward", cnt + 1);
519 }
520 }
521 else
522 rules[cnt] |= sort_backward;
523
524 valid = 1;
525 }
526 else if (arg->tok == tok_position)
527 {
528 if (rules[cnt] & sort_position)
529 {
530 if (! warned)
531 {
532 lr_error (ldfile, _("\
533%s: `%s' mentioned more than once in definition of weight %d"),
534 "LC_COLLATE", "position", cnt + 1);
535 }
536 }
537 else
538 rules[cnt] |= sort_position;
539
540 valid = 1;
541 }
542
543 if (valid)
544 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
545
546 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
547 || arg->tok == tok_semicolon)
548 {
549 if (! valid && ! warned)
550 {
551 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
552 warned = 1;
553 }
554
555 /* See whether we have to increment the counter. */
556 if (arg->tok != tok_comma && rules[cnt] != 0)
557 {
558 /* Add the default `forward' if we have seen only `position'. */
559 if (rules[cnt] == sort_position)
560 rules[cnt] = sort_position | sort_forward;
561
562 ++cnt;
563 }
564
565 if (arg->tok == tok_eof || arg->tok == tok_eol)
566 /* End of line or file, so we exit the loop. */
567 break;
568
569 if (nrules == 0)
570 {
571 /* See whether we have enough room in the array. */
572 if (cnt == max)
573 {
574 max += 10;
575 rules = (enum coll_sort_rule *) xrealloc (rules,
576 max
577 * sizeof (*rules));
578 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
579 }
580 }
581 else
582 {
583 if (cnt == nrules)
584 {
585 /* There must not be any more rule. */
586 if (! warned)
587 {
588 lr_error (ldfile, _("\
589%s: too many rules; first entry only had %d"),
590 "LC_COLLATE", nrules);
591 warned = 1;
592 }
593
594 lr_ignore_rest (ldfile, 0);
595 break;
596 }
597 }
598 }
599 else
600 {
601 if (! warned)
602 {
603 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
604 warned = 1;
605 }
606 }
607
608 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
609 }
610
611 if (nrules == 0)
612 {
613 /* Now we know how many rules we have. */
614 nrules = cnt;
615 rules = (enum coll_sort_rule *) xrealloc (rules,
616 nrules * sizeof (*rules));
617 }
618 else
619 {
620 if (cnt < nrules)
621 {
622 /* Not enough rules in this specification. */
623 if (! warned)
624 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
625
626 do
627 rules[cnt] = sort_forward;
628 while (++cnt < nrules);
629 }
630 }
631
632 collate->current_section->rules = rules;
633}
634
635
636static struct element_t *
637find_element (struct linereader *ldfile, struct locale_collate_t *collate,
638 const char *str, size_t len)
639{
640 void *result = NULL;
641
642 /* Search for the entries among the collation sequences already define. */
643 if (find_entry (&collate->seq_table, str, len, &result) != 0)
644 {
645 /* Nope, not define yet. So we see whether it is a
646 collation symbol. */
647 void *ptr;
648
649 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
650 {
651 /* It's a collation symbol. */
652 struct symbol_t *sym = (struct symbol_t *) ptr;
653 result = sym->order;
654
655 if (result == NULL)
656 result = sym->order = new_element (collate, NULL, 0, NULL,
657 NULL, 0, 0);
658 }
659 else if (find_entry (&collate->elem_table, str, len, &result) != 0)
660 {
661 /* It's also no collation element. So it is a character
662 element defined later. */
663 result = new_element (collate, NULL, 0, NULL, str, len, 1);
664 /* Insert it into the sequence table. */
665 insert_entry (&collate->seq_table, str, len, result);
666 }
667 }
668
669 return (struct element_t *) result;
670}
671
672
673static void
674unlink_element (struct locale_collate_t *collate)
675{
676 if (collate->cursor == collate->start)
677 {
678 assert (collate->cursor->next == NULL);
679 assert (collate->cursor->last == NULL);
680 collate->cursor = NULL;
681 }
682 else
683 {
684 if (collate->cursor->next != NULL)
685 collate->cursor->next->last = collate->cursor->last;
686 if (collate->cursor->last != NULL)
687 collate->cursor->last->next = collate->cursor->next;
688 collate->cursor = collate->cursor->last;
689 }
690}
691
692
693static void
694insert_weights (struct linereader *ldfile, struct element_t *elem,
695 const struct charmap_t *charmap,
696 struct repertoire_t *repertoire, struct localedef_t *result,
697 enum token_t ellipsis)
698{
699 int weight_cnt;
700 struct token *arg;
701 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
702
703 /* Initialize all the fields. */
704 elem->file = ldfile->fname;
705 elem->line = ldfile->lineno;
706
707 elem->last = collate->cursor;
708 elem->next = collate->cursor ? collate->cursor->next : NULL;
709 if (collate->cursor != NULL && collate->cursor->next != NULL)
710 collate->cursor->next->last = elem;
711 if (collate->cursor != NULL)
712 collate->cursor->next = elem;
713 if (collate->start == NULL)
714 {
715 assert (collate->cursor == NULL);
716 collate->start = elem;
717 }
718
719 elem->section = collate->current_section;
720
721 if (collate->current_section->first == NULL)
722 collate->current_section->first = elem;
723 if (collate->current_section->last == collate->cursor)
724 collate->current_section->last = elem;
725
726 collate->cursor = elem;
727
728 elem->weights = (struct element_list_t *)
729 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
730 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
731
732 weight_cnt = 0;
733
734 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
735 do
736 {
737 if (arg->tok == tok_eof || arg->tok == tok_eol)
738 break;
739
740 if (arg->tok == tok_ignore)
741 {
742 /* The weight for this level has to be ignored. We use the
743 null pointer to indicate this. */
744 elem->weights[weight_cnt].w = (struct element_t **)
745 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
746 elem->weights[weight_cnt].w[0] = NULL;
747 elem->weights[weight_cnt].cnt = 1;
748 }
749 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
750 {
751 char ucs4str[10];
752 struct element_t *val;
753 char *symstr;
754 size_t symlen;
755
756 if (arg->tok == tok_bsymbol)
757 {
758 symstr = arg->val.str.startmb;
759 symlen = arg->val.str.lenmb;
760 }
761 else
762 {
763 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
764 symstr = ucs4str;
765 symlen = 9;
766 }
767
768 val = find_element (ldfile, collate, symstr, symlen);
769 if (val == NULL)
770 break;
771
772 elem->weights[weight_cnt].w = (struct element_t **)
773 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
774 elem->weights[weight_cnt].w[0] = val;
775 elem->weights[weight_cnt].cnt = 1;
776 }
777 else if (arg->tok == tok_string)
778 {
779 /* Split the string up in the individual characters and put
780 the element definitions in the list. */
781 const char *cp = arg->val.str.startmb;
782 int cnt = 0;
783 struct element_t *charelem;
784 struct element_t **weights = NULL;
785 int max = 0;
786
787 if (*cp == '\0')
788 {
789 lr_error (ldfile, _("%s: empty weight string not allowed"),
790 "LC_COLLATE");
791 lr_ignore_rest (ldfile, 0);
792 break;
793 }
794
795 do
796 {
797 if (*cp == '<')
798 {
799 /* Ahh, it's a bsymbol or an UCS4 value. If it's
800 the latter we have to unify the name. */
801 const char *startp = ++cp;
802 size_t len;
803
804 while (*cp != '>')
805 {
806 if (*cp == ldfile->escape_char)
807 ++cp;
808 if (*cp == '\0')
809 /* It's a syntax error. */
810 goto syntax;
811
812 ++cp;
813 }
814
815 if (cp - startp == 5 && startp[0] == 'U'
816 && isxdigit (startp[1]) && isxdigit (startp[2])
817 && isxdigit (startp[3]) && isxdigit (startp[4]))
818 {
819 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
820 char *newstr;
821
822 newstr = (char *) xmalloc (10);
823 snprintf (newstr, 10, "U%08X", ucs4);
824 startp = newstr;
825
826 len = 9;
827 }
828 else
829 len = cp - startp;
830
831 charelem = find_element (ldfile, collate, startp, len);
832 ++cp;
833 }
834 else
835 {
836 /* People really shouldn't use characters directly in
837 the string. Especially since it's not really clear
838 what this means. We interpret all characters in the
839 string as if that would be bsymbols. Otherwise we
840 would have to match back to bsymbols somehow and this
841 is normally not what people normally expect. */
842 charelem = find_element (ldfile, collate, cp++, 1);
843 }
844
845 if (charelem == NULL)
846 {
847 /* We ignore the rest of the line. */
848 lr_ignore_rest (ldfile, 0);
849 break;
850 }
851
852 /* Add the pointer. */
853 if (cnt >= max)
854 {
855 struct element_t **newp;
856 max += 10;
857 newp = (struct element_t **)
858 alloca (max * sizeof (struct element_t *));
859 memcpy (newp, weights, cnt * sizeof (struct element_t *));
860 weights = newp;
861 }
862 weights[cnt++] = charelem;
863 }
864 while (*cp != '\0');
865
866 /* Now store the information. */
867 elem->weights[weight_cnt].w = (struct element_t **)
868 obstack_alloc (&collate->mempool,
869 cnt * sizeof (struct element_t *));
870 memcpy (elem->weights[weight_cnt].w, weights,
871 cnt * sizeof (struct element_t *));
872 elem->weights[weight_cnt].cnt = cnt;
873
874 /* We don't need the string anymore. */
875 free (arg->val.str.startmb);
876 }
877 else if (ellipsis != tok_none
878 && (arg->tok == tok_ellipsis2
879 || arg->tok == tok_ellipsis3
880 || arg->tok == tok_ellipsis4))
881 {
882 /* It must be the same ellipsis as used in the initial column. */
883 if (arg->tok != ellipsis)
884 lr_error (ldfile, _("\
885%s: weights must use the same ellipsis symbol as the name"),
886 "LC_COLLATE");
887
888 /* The weight for this level will depend on the element
889 iterating over the range. Put a placeholder. */
890 elem->weights[weight_cnt].w = (struct element_t **)
891 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
892 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
893 elem->weights[weight_cnt].cnt = 1;
894 }
895 else
896 {
897 syntax:
898 /* It's a syntax error. */
899 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
900 lr_ignore_rest (ldfile, 0);
901 break;
902 }
903
904 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
905 /* This better should be the end of the line or a semicolon. */
906 if (arg->tok == tok_semicolon)
907 /* OK, ignore this and read the next token. */
908 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
909 else if (arg->tok != tok_eof && arg->tok != tok_eol)
910 {
911 /* It's a syntax error. */
912 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
913 lr_ignore_rest (ldfile, 0);
914 break;
915 }
916 }
917 while (++weight_cnt < nrules);
918
919 if (weight_cnt < nrules)
920 {
921 /* This means the rest of the line uses the current element as
922 the weight. */
923 do
924 {
925 elem->weights[weight_cnt].w = (struct element_t **)
926 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
927 if (ellipsis == tok_none)
928 elem->weights[weight_cnt].w[0] = elem;
929 else
930 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
931 elem->weights[weight_cnt].cnt = 1;
932 }
933 while (++weight_cnt < nrules);
934 }
935 else
936 {
937 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
938 {
939 /* Too many rule values. */
940 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
941 lr_ignore_rest (ldfile, 0);
942 }
943 else
944 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
945 }
946}
947
948
949static int
950insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
951 const struct charmap_t *charmap, struct repertoire_t *repertoire,
952 struct localedef_t *result)
953{
954 /* First find out what kind of symbol this is. */
955 struct charseq *seq;
956 uint32_t wc;
957 struct element_t *elem = NULL;
958 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
959
960 /* Try to find the character in the charmap. */
961 seq = charmap_find_value (charmap, symstr, symlen);
962
963 /* Determine the wide character. */
964 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
965 {
966 wc = repertoire_find_value (repertoire, symstr, symlen);
967 if (seq != NULL)
968 seq->ucs4 = wc;
969 }
970 else
971 wc = seq->ucs4;
972
973 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
974 {
975 /* It's no character, so look through the collation elements and
976 symbol list. */
977 void *ptr = elem;
978 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
979 {
980 void *result;
981 struct symbol_t *sym = NULL;
982
983 /* It's also collation element. Therefore it's either a
984 collating symbol or it's a character which is not
985 supported by the character set. In the later case we
986 simply create a dummy entry. */
987 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
988 {
989 /* It's a collation symbol. */
990 sym = (struct symbol_t *) result;
991
992 elem = sym->order;
993 }
994
995 if (elem == NULL)
996 {
997 elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
998
999 if (sym != NULL)
1000 sym->order = elem;
1001 else
1002 /* Enter a fake element in the sequence table. This
1003 won't cause anything in the output since there is
1004 no multibyte or wide character associated with
1005 it. */
1006 insert_entry (&collate->seq_table, symstr, symlen, elem);
1007 }
1008 }
1009 else
1010 /* Copy the result back. */
1011 elem = ptr;
1012 }
1013 else
1014 {
1015 /* Otherwise the symbols stands for a character. */
1016 void *ptr = elem;
1017 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1018 {
1019 uint32_t wcs[2] = { wc, 0 };
1020
1021 /* We have to allocate an entry. */
1022 elem = new_element (collate,
1023 seq != NULL ? (char *) seq->bytes : NULL,
1024 seq != NULL ? seq->nbytes : 0,
1025 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1026 symstr, symlen, 1);
1027
1028 /* And add it to the table. */
1029 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1030 /* This cannot happen. */
1031 assert (! "Internal error");
1032 }
1033 else
1034 {
1035 /* Copy the result back. */
1036 elem = ptr;
1037
1038 /* Maybe the character was used before the definition. In this case
1039 we have to insert the byte sequences now. */
1040 if (elem->mbs == NULL && seq != NULL)
1041 {
1042 elem->mbs = obstack_copy0 (&collate->mempool,
1043 seq->bytes, seq->nbytes);
1044 elem->nmbs = seq->nbytes;
1045 }
1046
1047 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1048 {
1049 uint32_t wcs[2] = { wc, 0 };
1050
1051 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1052 elem->nwcs = 1;
1053 }
1054 }
1055 }
1056
1057 /* Test whether this element is not already in the list. */
1058 if (elem->next != NULL || elem == collate->cursor)
1059 {
1060 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1061 (int) symlen, symstr, elem->file, elem->line);
1062 lr_ignore_rest (ldfile, 0);
1063 return 1;
1064 }
1065
1066 insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1067
1068 return 0;
1069}
1070
1071
1072static void
1073handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1074 enum token_t ellipsis, const struct charmap_t *charmap,
1075 struct repertoire_t *repertoire,
1076 struct localedef_t *result)
1077{
1078 struct element_t *startp;
1079 struct element_t *endp;
1080 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1081
1082 /* Unlink the entry added for the ellipsis. */
1083 unlink_element (collate);
1084 startp = collate->cursor;
1085
1086 /* Process and add the end-entry. */
1087 if (symstr != NULL
1088 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1089 /* Something went wrong with inserting the to-value. This means
1090 we cannot process the ellipsis. */
1091 return;
1092
1093 /* Reset the cursor. */
1094 collate->cursor = startp;
1095
1096 /* Now we have to handle many different situations:
1097 - we have to distinguish between the three different ellipsis forms
1098 - the is the ellipsis at the beginning, in the middle, or at the end.
1099 */
1100 endp = collate->cursor->next;
1101 assert (symstr == NULL || endp != NULL);
1102
1103 /* XXX The following is probably very wrong since also collating symbols
1104 can appear in ranges. But do we want/can refine the test for that? */
1105#if 0
1106 /* Both, the start and the end symbol, must stand for characters. */
1107 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1108 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1109 {
1110 lr_error (ldfile, _("\
1111%s: the start and the end symbol of a range must stand for characters"),
1112 "LC_COLLATE");
1113 return;
1114 }
1115#endif
1116
1117 if (ellipsis == tok_ellipsis3)
1118 {
1119 /* One requirement we make here: the length of the byte
1120 sequences for the first and end character must be the same.
1121 This is mainly to prevent unwanted effects and this is often
1122 not what is wanted. */
1123 size_t len = (startp->mbs != NULL ? startp->nmbs
1124 : (endp->mbs != NULL ? endp->nmbs : 0));
1125 char mbcnt[len + 1];
1126 char mbend[len + 1];
1127
1128 /* Well, this should be caught somewhere else already. Just to
1129 make sure. */
1130 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1131 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1132
1133 if (startp != NULL && endp != NULL
1134 && startp->mbs != NULL && endp->mbs != NULL
1135 && startp->nmbs != endp->nmbs)
1136 {
1137 lr_error (ldfile, _("\
1138%s: byte sequences of first and last character must have the same length"),
1139 "LC_COLLATE");
1140 return;
1141 }
1142
1143 /* Determine whether we have to generate multibyte sequences. */
1144 if ((startp == NULL || startp->mbs != NULL)
1145 && (endp == NULL || endp->mbs != NULL))
1146 {
1147 int cnt;
1148 int ret;
1149
1150 /* Prepare the beginning byte sequence. This is either from the
1151 beginning byte sequence or it is all nulls if it was an
1152 initial ellipsis. */
1153 if (startp == NULL || startp->mbs == NULL)
1154 memset (mbcnt, '\0', len);
1155 else
1156 {
1157 memcpy (mbcnt, startp->mbs, len);
1158
1159 /* And increment it so that the value is the first one we will
1160 try to insert. */
1161 for (cnt = len - 1; cnt >= 0; --cnt)
1162 if (++mbcnt[cnt] != '\0')
1163 break;
1164 }
1165 mbcnt[len] = '\0';
1166
1167 /* And the end sequence. */
1168 if (endp == NULL || endp->mbs == NULL)
1169 memset (mbend, '\0', len);
1170 else
1171 memcpy (mbend, endp->mbs, len);
1172 mbend[len] = '\0';
1173
1174 /* Test whether we have a correct range. */
1175 ret = memcmp (mbcnt, mbend, len);
1176 if (ret >= 0)
1177 {
1178 if (ret > 0)
1179 lr_error (ldfile, _("%s: byte sequence of first character of \
1180range is not lower than that of the last character"), "LC_COLLATE");
1181 return;
1182 }
1183
1184 /* Generate the byte sequences data. */
1185 while (1)
1186 {
1187 struct charseq *seq;
1188
1189 /* Quite a bit of work ahead. We have to find the character
1190 definition for the byte sequence and then determine the
1191 wide character belonging to it. */
1192 seq = charmap_find_symbol (charmap, mbcnt, len);
1193 if (seq != NULL)
1194 {
1195 struct element_t *elem;
1196 size_t namelen;
1197
1198 /* I don't think this can ever happen. */
1199 assert (seq->name != NULL);
1200 namelen = strlen (seq->name);
1201
1202 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1203 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1204 namelen);
1205
1206 /* Now we are ready to insert the new value in the
1207 sequence. Find out whether the element is
1208 already known. */
1209 void *ptr;
1210 if (find_entry (&collate->seq_table, seq->name, namelen,
1211 &ptr) != 0)
1212 {
1213 uint32_t wcs[2] = { seq->ucs4, 0 };
1214
1215 /* We have to allocate an entry. */
1216 elem = new_element (collate, mbcnt, len,
1217 seq->ucs4 == ILLEGAL_CHAR_VALUE
1218 ? NULL : wcs, seq->name,
1219 namelen, 1);
1220
1221 /* And add it to the table. */
1222 if (insert_entry (&collate->seq_table, seq->name,
1223 namelen, elem) != 0)
1224 /* This cannot happen. */
1225 assert (! "Internal error");
1226 }
1227 else
1228 /* Copy the result. */
1229 elem = ptr;
1230
1231 /* Test whether this element is not already in the list. */
1232 if (elem->next != NULL || (collate->cursor != NULL
1233 && elem->next == collate->cursor))
1234 {
1235 lr_error (ldfile, _("\
1236order for `%.*s' already defined at %s:%Zu"),
1237 (int) namelen, seq->name,
1238 elem->file, elem->line);
1239 goto increment;
1240 }
1241
1242 /* Enqueue the new element. */
1243 elem->last = collate->cursor;
1244 if (collate->cursor == NULL)
1245 elem->next = NULL;
1246 else
1247 {
1248 elem->next = collate->cursor->next;
1249 elem->last->next = elem;
1250 if (elem->next != NULL)
1251 elem->next->last = elem;
1252 }
1253 if (collate->start == NULL)
1254 {
1255 assert (collate->cursor == NULL);
1256 collate->start = elem;
1257 }
1258 collate->cursor = elem;
1259
1260 /* Add the weight value. We take them from the
1261 `ellipsis_weights' member of `collate'. */
1262 elem->weights = (struct element_list_t *)
1263 obstack_alloc (&collate->mempool,
1264 nrules * sizeof (struct element_list_t));
1265 for (cnt = 0; cnt < nrules; ++cnt)
1266 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1267 && (collate->ellipsis_weight.weights[cnt].w[0]
1268 == ELEMENT_ELLIPSIS2))
1269 {
1270 elem->weights[cnt].w = (struct element_t **)
1271 obstack_alloc (&collate->mempool,
1272 sizeof (struct element_t *));
1273 elem->weights[cnt].w[0] = elem;
1274 elem->weights[cnt].cnt = 1;
1275 }
1276 else
1277 {
1278 /* Simply use the weight from `ellipsis_weight'. */
1279 elem->weights[cnt].w =
1280 collate->ellipsis_weight.weights[cnt].w;
1281 elem->weights[cnt].cnt =
1282 collate->ellipsis_weight.weights[cnt].cnt;
1283 }
1284 }
1285
1286 /* Increment for the next round. */
1287 increment:
1288 for (cnt = len - 1; cnt >= 0; --cnt)
1289 if (++mbcnt[cnt] != '\0')
1290 break;
1291
1292 /* Find out whether this was all. */
1293 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1294 /* Yep, that's all. */
1295 break;
1296 }
1297 }
1298 }
1299 else
1300 {
1301 /* For symbolic range we naturally must have a beginning and an
1302 end specified by the user. */
1303 if (startp == NULL)
1304 lr_error (ldfile, _("\
1305%s: symbolic range ellipsis must not directly follow `order_start'"),
1306 "LC_COLLATE");
1307 else if (endp == NULL)
1308 lr_error (ldfile, _("\
1309%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1310 "LC_COLLATE");
1311 else
1312 {
1313 /* Determine the range. To do so we have to determine the
1314 common prefix of the both names and then the numeric
1315 values of both ends. */
1316 size_t lenfrom = strlen (startp->name);
1317 size_t lento = strlen (endp->name);
1318 char buf[lento + 1];
1319 int preflen = 0;
1320 long int from;
1321 long int to;
1322 char *cp;
1323 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1324
1325 if (lenfrom != lento)
1326 {
1327 invalid_range:
1328 lr_error (ldfile, _("\
1329`%s' and `%.*s' are not valid names for symbolic range"),
1330 startp->name, (int) lento, endp->name);
1331 return;
1332 }
1333
1334 while (startp->name[preflen] == endp->name[preflen])
1335 if (startp->name[preflen] == '\0')
1336 /* Nothing to be done. The start and end point are identical
1337 and while inserting the end point we have already given
1338 the user an error message. */
1339 return;
1340 else
1341 ++preflen;
1342
1343 errno = 0;
1344 from = strtol (startp->name + preflen, &cp, base);
1345 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1346 goto invalid_range;
1347
1348 errno = 0;
1349 to = strtol (endp->name + preflen, &cp, base);
1350 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1351 goto invalid_range;
1352
1353 /* Copy the prefix. */
1354 memcpy (buf, startp->name, preflen);
1355
1356 /* Loop over all values. */
1357 for (++from; from < to; ++from)
1358 {
1359 struct element_t *elem = NULL;
1360 struct charseq *seq;
1361 uint32_t wc;
1362 int cnt;
1363
1364 /* Generate the name. */
1365 sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1366 (int) (lenfrom - preflen), from);
1367
1368 /* Look whether this name is already defined. */
1369 void *ptr;
1370 if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1371 {
1372 /* Copy back the result. */
1373 elem = ptr;
1374
1375 if (elem->next != NULL || (collate->cursor != NULL
1376 && elem->next == collate->cursor))
1377 {
1378 lr_error (ldfile, _("\
1379%s: order for `%.*s' already defined at %s:%Zu"),
1380 "LC_COLLATE", (int) lenfrom, buf,
1381 elem->file, elem->line);
1382 continue;
1383 }
1384
1385 if (elem->name == NULL)
1386 {
1387 lr_error (ldfile, _("%s: `%s' must be a character"),
1388 "LC_COLLATE", buf);
1389 continue;
1390 }
1391 }
1392
1393 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1394 {
1395 /* Search for a character of this name. */
1396 seq = charmap_find_value (charmap, buf, lenfrom);
1397 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1398 {
1399 wc = repertoire_find_value (repertoire, buf, lenfrom);
1400
1401 if (seq != NULL)
1402 seq->ucs4 = wc;
1403 }
1404 else
1405 wc = seq->ucs4;
1406
1407 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1408 /* We don't know anything about a character with this
1409 name. XXX Should we warn? */
1410 continue;
1411
1412 if (elem == NULL)
1413 {
1414 uint32_t wcs[2] = { wc, 0 };
1415
1416 /* We have to allocate an entry. */
1417 elem = new_element (collate,
1418 seq != NULL
1419 ? (char *) seq->bytes : NULL,
1420 seq != NULL ? seq->nbytes : 0,
1421 wc == ILLEGAL_CHAR_VALUE
1422 ? NULL : wcs, buf, lenfrom, 1);
1423 }
1424 else
1425 {
1426 /* Update the element. */
1427 if (seq != NULL)
1428 {
1429 elem->mbs = obstack_copy0 (&collate->mempool,
1430 seq->bytes, seq->nbytes);
1431 elem->nmbs = seq->nbytes;
1432 }
1433
1434 if (wc != ILLEGAL_CHAR_VALUE)
1435 {
1436 uint32_t zero = 0;
1437
1438 obstack_grow (&collate->mempool,
1439 &wc, sizeof (uint32_t));
1440 obstack_grow (&collate->mempool,
1441 &zero, sizeof (uint32_t));
1442 elem->wcs = obstack_finish (&collate->mempool);
1443 elem->nwcs = 1;
1444 }
1445 }
1446
1447 elem->file = ldfile->fname;
1448 elem->line = ldfile->lineno;
1449 elem->section = collate->current_section;
1450 }
1451
1452 /* Enqueue the new element. */
1453 elem->last = collate->cursor;
1454 elem->next = collate->cursor->next;
1455 elem->last->next = elem;
1456 if (elem->next != NULL)
1457 elem->next->last = elem;
1458 collate->cursor = elem;
1459
1460 /* Now add the weights. They come from the `ellipsis_weights'
1461 member of `collate'. */
1462 elem->weights = (struct element_list_t *)
1463 obstack_alloc (&collate->mempool,
1464 nrules * sizeof (struct element_list_t));
1465 for (cnt = 0; cnt < nrules; ++cnt)
1466 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1467 && (collate->ellipsis_weight.weights[cnt].w[0]
1468 == ELEMENT_ELLIPSIS2))
1469 {
1470 elem->weights[cnt].w = (struct element_t **)
1471 obstack_alloc (&collate->mempool,
1472 sizeof (struct element_t *));
1473 elem->weights[cnt].w[0] = elem;
1474 elem->weights[cnt].cnt = 1;
1475 }
1476 else
1477 {
1478 /* Simly use the weight from `ellipsis_weight'. */
1479 elem->weights[cnt].w =
1480 collate->ellipsis_weight.weights[cnt].w;
1481 elem->weights[cnt].cnt =
1482 collate->ellipsis_weight.weights[cnt].cnt;
1483 }
1484 }
1485 }
1486 }
1487}
1488
1489
1490static void
1491collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1492 struct localedef_t *copy_locale, int ignore_content)
1493{
1494 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1495 {
1496 struct locale_collate_t *collate;
1497
1498 if (copy_locale == NULL)
1499 {
1500 collate = locale->categories[LC_COLLATE].collate =
1501 (struct locale_collate_t *)
1502 xcalloc (1, sizeof (struct locale_collate_t));
1503
1504 /* Init the various data structures. */
1505 init_hash (&collate->elem_table, 100);
1506 init_hash (&collate->sym_table, 100);
1507 init_hash (&collate->seq_table, 500);
1508 obstack_init (&collate->mempool);
1509
1510 collate->col_weight_max = -1;
1511 }
1512 else
1513 /* Reuse the copy_locale's data structures. */
1514 collate = locale->categories[LC_COLLATE].collate =
1515 copy_locale->categories[LC_COLLATE].collate;
1516 }
1517
1518 ldfile->translate_strings = 0;
1519 ldfile->return_widestr = 0;
1520}
1521
1522
1523void
1524collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1525{
1526 /* Now is the time when we can assign the individual collation
1527 values for all the symbols. We have possibly different values
1528 for the wide- and the multibyte-character symbols. This is done
1529 since it might make a difference in the encoding if there is in
1530 some cases no multibyte-character but there are wide-characters.
1531 (The other way around it is not important since theencoded
1532 collation value in the wide-character case is 32 bits wide and
1533 therefore requires no encoding).
1534
1535 The lowest collation value assigned is 2. Zero is reserved for
1536 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1537 functions and 1 is used to separate the individual passes for the
1538 different rules.
1539
1540 We also have to construct is list with all the bytes/words which
1541 can come first in a sequence, followed by all the elements which
1542 also start with this byte/word. The order is reverse which has
1543 among others the important effect that longer strings are located
1544 first in the list. This is required for the output data since
1545 the algorithm used in `strcoll' etc depends on this.
1546
1547 The multibyte case is easy. We simply sort into an array with
1548 256 elements. */
1549 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1550 int mbact[nrules];
1551 int wcact;
1552 int mbseqact;
1553 int wcseqact;
1554 struct element_t *runp;
1555 int i;
1556 int need_undefined = 0;
1557 struct section_list *sect;
1558 int ruleidx;
1559 int nr_wide_elems = 0;
1560
1561 if (collate == NULL)
1562 {
1563 /* No data, no check. */
1564 if (! be_quiet)
1565 WITH_CUR_LOCALE (error (0, 0, _("No definition for %s category found"),
1566 "LC_COLLATE"));
1567 return;
1568 }
1569
1570 /* If this assertion is hit change the type in `element_t'. */
1571 assert (nrules <= sizeof (runp->used_in_level) * 8);
1572
1573 /* Make sure that the `position' rule is used either in all sections
1574 or in none. */
1575 for (i = 0; i < nrules; ++i)
1576 for (sect = collate->sections; sect != NULL; sect = sect->next)
1577 if (sect != collate->current_section
1578 && sect->rules != NULL
1579 && ((sect->rules[i] & sort_position)
1580 != (collate->current_section->rules[i] & sort_position)))
1581 {
1582 WITH_CUR_LOCALE (error (0, 0, _("\
1583%s: `position' must be used for a specific level in all sections or none"),
1584 "LC_COLLATE"));
1585 break;
1586 }
1587
1588 /* Find out which elements are used at which level. At the same
1589 time we find out whether we have any undefined symbols. */
1590 runp = collate->start;
1591 while (runp != NULL)
1592 {
1593 if (runp->mbs != NULL)
1594 {
1595 for (i = 0; i < nrules; ++i)
1596 {
1597 int j;
1598
1599 for (j = 0; j < runp->weights[i].cnt; ++j)
1600 /* A NULL pointer as the weight means IGNORE. */
1601 if (runp->weights[i].w[j] != NULL)
1602 {
1603 if (runp->weights[i].w[j]->weights == NULL)
1604 {
1605 WITH_CUR_LOCALE (error_at_line (0, 0, runp->file,
1606 runp->line,
1607 _("symbol `%s' not defined"),
1608 runp->weights[i].w[j]->name));
1609
1610 need_undefined = 1;
1611 runp->weights[i].w[j] = &collate->undefined;
1612 }
1613 else
1614 /* Set the bit for the level. */
1615 runp->weights[i].w[j]->used_in_level |= 1 << i;
1616 }
1617 }
1618 }
1619
1620 /* Up to the next entry. */
1621 runp = runp->next;
1622 }
1623
1624 /* Walk through the list of defined sequences and assign weights. Also
1625 create the data structure which will allow generating the single byte
1626 character based tables.
1627
1628 Since at each time only the weights for each of the rules are
1629 only compared to other weights for this rule it is possible to
1630 assign more compact weight values than simply counting all
1631 weights in sequence. We can assign weights from 3, one for each
1632 rule individually and only for those elements, which are actually
1633 used for this rule.
1634
1635 Why is this important? It is not for the wide char table. But
1636 it is for the singlebyte output since here larger numbers have to
1637 be encoded to make it possible to emit the value as a byte
1638 string. */
1639 for (i = 0; i < nrules; ++i)
1640 mbact[i] = 2;
1641 wcact = 2;
1642 mbseqact = 0;
1643 wcseqact = 0;
1644 runp = collate->start;
1645 while (runp != NULL)
1646 {
1647 /* Determine the order. */
1648 if (runp->used_in_level != 0)
1649 {
1650 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1651 nrules * sizeof (int));
1652
1653 for (i = 0; i < nrules; ++i)
1654 if ((runp->used_in_level & (1 << i)) != 0)
1655 runp->mborder[i] = mbact[i]++;
1656 else
1657 runp->mborder[i] = 0;
1658 }
1659
1660 if (runp->mbs != NULL)
1661 {
1662 struct element_t **eptr;
1663 struct element_t *lastp = NULL;
1664
1665 /* Find the point where to insert in the list. */
1666 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1667 while (*eptr != NULL)
1668 {
1669 if ((*eptr)->nmbs < runp->nmbs)
1670 break;
1671
1672 if ((*eptr)->nmbs == runp->nmbs)
1673 {
1674 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1675
1676 if (c == 0)
1677 {
1678 /* This should not happen. It means that we have
1679 to symbols with the same byte sequence. It is
1680 of course an error. */
1681 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1682 (*eptr)->line,
1683 _("\
1684symbol `%s' has the same encoding as"), (*eptr)->name);
1685 error_at_line (0, 0, runp->file,
1686 runp->line,
1687 _("symbol `%s'"),
1688 runp->name));
1689 goto dont_insert;
1690 }
1691 else if (c < 0)
1692 /* Insert it here. */
1693 break;
1694 }
1695
1696 /* To the next entry. */
1697 lastp = *eptr;
1698 eptr = &(*eptr)->mbnext;
1699 }
1700
1701 /* Set the pointers. */
1702 runp->mbnext = *eptr;
1703 runp->mblast = lastp;
1704 if (*eptr != NULL)
1705 (*eptr)->mblast = runp;
1706 *eptr = runp;
1707 dont_insert:
1708 ;
1709 }
1710
1711 if (runp->used_in_level)
1712 {
1713 runp->wcorder = wcact++;
1714
1715 /* We take the opportunity to count the elements which have
1716 wide characters. */
1717 ++nr_wide_elems;
1718 }
1719
1720 if (runp->is_character)
1721 {
1722 if (runp->nmbs == 1)
1723 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1724
1725 runp->wcseqorder = wcseqact++;
1726 }
1727 else if (runp->mbs != NULL && runp->weights != NULL)
1728 /* This is for collation elements. */
1729 runp->wcseqorder = wcseqact++;
1730
1731 /* Up to the next entry. */
1732 runp = runp->next;
1733 }
1734
1735 /* Find out whether any of the `mbheads' entries is unset. In this
1736 case we use the UNDEFINED entry. */
1737 for (i = 1; i < 256; ++i)
1738 if (collate->mbheads[i] == NULL)
1739 {
1740 need_undefined = 1;
1741 collate->mbheads[i] = &collate->undefined;
1742 }
1743
1744 /* Now to the wide character case. */
1745 collate->wcheads.p = 6;
1746 collate->wcheads.q = 10;
1747 wchead_table_init (&collate->wcheads);
1748
1749 collate->wcseqorder.p = 6;
1750 collate->wcseqorder.q = 10;
1751 collseq_table_init (&collate->wcseqorder);
1752
1753 /* Start adding. */
1754 runp = collate->start;
1755 while (runp != NULL)
1756 {
1757 if (runp->wcs != NULL)
1758 {
1759 struct element_t *e;
1760 struct element_t **eptr;
1761 struct element_t *lastp;
1762
1763 /* Insert the collation sequence value. */
1764 if (runp->is_character)
1765 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1766 runp->wcseqorder);
1767
1768 /* Find the point where to insert in the list. */
1769 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1770 eptr = &e;
1771 lastp = NULL;
1772 while (*eptr != NULL)
1773 {
1774 if ((*eptr)->nwcs < runp->nwcs)
1775 break;
1776
1777 if ((*eptr)->nwcs == runp->nwcs)
1778 {
1779 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1780 (wchar_t *) runp->wcs, runp->nwcs);
1781
1782 if (c == 0)
1783 {
1784 /* This should not happen. It means that we have
1785 two symbols with the same byte sequence. It is
1786 of course an error. */
1787 WITH_CUR_LOCALE (error_at_line (0, 0, (*eptr)->file,
1788 (*eptr)->line,
1789 _("\
1790symbol `%s' has the same encoding as"), (*eptr)->name);
1791 error_at_line (0, 0, runp->file,
1792 runp->line,
1793 _("symbol `%s'"),
1794 runp->name));
1795 goto dont_insertwc;
1796 }
1797 else if (c < 0)
1798 /* Insert it here. */
1799 break;
1800 }
1801
1802 /* To the next entry. */
1803 lastp = *eptr;
1804 eptr = &(*eptr)->wcnext;
1805 }
1806
1807 /* Set the pointers. */
1808 runp->wcnext = *eptr;
1809 runp->wclast = lastp;
1810 if (*eptr != NULL)
1811 (*eptr)->wclast = runp;
1812 *eptr = runp;
1813 if (eptr == &e)
1814 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1815 dont_insertwc:
1816 ;
1817 }
1818
1819 /* Up to the next entry. */
1820 runp = runp->next;
1821 }
1822
1823 /* Now determine whether the UNDEFINED entry is needed and if yes,
1824 whether it was defined. */
1825 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1826 if (collate->undefined.file == NULL)
1827 {
1828 if (need_undefined)
1829 {
1830 /* This seems not to be enforced by recent standards. Don't
1831 emit an error, simply append UNDEFINED at the end. */
1832 if (0)
1833 WITH_CUR_LOCALE (error (0, 0, _("no definition of `UNDEFINED'")));
1834
1835 /* Add UNDEFINED at the end. */
1836 collate->undefined.mborder =
1837 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1838
1839 for (i = 0; i < nrules; ++i)
1840 collate->undefined.mborder[i] = mbact[i]++;
1841 }
1842
1843 /* In any case we will need the definition for the wide character
1844 case. But we will not complain that it is missing since the
1845 specification strangely enough does not seem to account for
1846 this. */
1847 collate->undefined.wcorder = wcact++;
1848 }
1849
1850 /* Finally, try to unify the rules for the sections. Whenever the rules
1851 for a section are the same as those for another section give the
1852 ruleset the same index. Since there are never many section we can
1853 use an O(n^2) algorithm here. */
1854 sect = collate->sections;
1855 while (sect != NULL && sect->rules == NULL)
1856 sect = sect->next;
1857
1858 /* Bail out if we have no sections because of earlier errors. */
1859 if (sect == NULL)
1860 {
1861 WITH_CUR_LOCALE (error (EXIT_FAILURE, 0,
1862 _("too many errors; giving up")));
1863 return;
1864 }
1865
1866 ruleidx = 0;
1867 do
1868 {
1869 struct section_list *osect = collate->sections;
1870
1871 while (osect != sect)
1872 if (osect->rules != NULL
1873 && memcmp (osect->rules, sect->rules,
1874 nrules * sizeof (osect->rules[0])) == 0)
1875 break;
1876 else
1877 osect = osect->next;
1878
1879 if (osect == sect)
1880 sect->ruleidx = ruleidx++;
1881 else
1882 sect->ruleidx = osect->ruleidx;
1883
1884 /* Next section. */
1885 do
1886 sect = sect->next;
1887 while (sect != NULL && sect->rules == NULL);
1888 }
1889 while (sect != NULL);
1890 /* We are currently not prepared for more than 128 rulesets. But this
1891 should never really be a problem. */
1892 assert (ruleidx <= 128);
1893}
1894
1895
1896static int32_t
1897output_weight (struct obstack *pool, struct locale_collate_t *collate,
1898 struct element_t *elem)
1899{
1900 size_t cnt;
1901 int32_t retval;
1902
1903 /* Optimize the use of UNDEFINED. */
1904 if (elem == &collate->undefined)
1905 /* The weights are already inserted. */
1906 return 0;
1907
1908 /* This byte can start exactly one collation element and this is
1909 a single byte. We can directly give the index to the weights. */
1910 retval = obstack_object_size (pool);
1911
1912 /* Construct the weight. */
1913 for (cnt = 0; cnt < nrules; ++cnt)
1914 {
1915 char buf[elem->weights[cnt].cnt * 7];
1916 int len = 0;
1917 int i;
1918
1919 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1920 /* Encode the weight value. We do nothing for IGNORE entries. */
1921 if (elem->weights[cnt].w[i] != NULL)
1922 len += utf8_encode (&buf[len],
1923 elem->weights[cnt].w[i]->mborder[cnt]);
1924
1925 /* And add the buffer content. */
1926 obstack_1grow (pool, len);
1927 obstack_grow (pool, buf, len);
1928 }
1929
1930 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1931}
1932
1933
1934static int32_t
1935output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1936 struct element_t *elem)
1937{
1938 size_t cnt;
1939 int32_t retval;
1940
1941 /* Optimize the use of UNDEFINED. */
1942 if (elem == &collate->undefined)
1943 /* The weights are already inserted. */
1944 return 0;
1945
1946 /* This byte can start exactly one collation element and this is
1947 a single byte. We can directly give the index to the weights. */
1948 retval = obstack_object_size (pool) / sizeof (int32_t);
1949
1950 /* Construct the weight. */
1951 for (cnt = 0; cnt < nrules; ++cnt)
1952 {
1953 int32_t buf[elem->weights[cnt].cnt];
1954 int i;
1955 int32_t j;
1956
1957 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1958 if (elem->weights[cnt].w[i] != NULL)
1959 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1960
1961 /* And add the buffer content. */
1962 obstack_int32_grow (pool, j);
1963
1964 obstack_grow (pool, buf, j * sizeof (int32_t));
1965 maybe_swap_uint32_obstack (pool, j);
1966 }
1967
1968 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1969}
1970
1971/* If localedef is every threaded, this would need to be __thread var. */
1972static struct
1973{
1974 struct obstack *weightpool;
1975 struct obstack *extrapool;
1976 struct obstack *indpool;
1977 struct locale_collate_t *collate;
1978 struct collidx_table *tablewc;
1979} atwc;
1980
1981static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1982
1983static void
1984add_to_tablewc (uint32_t ch, struct element_t *runp)
1985{
1986 if (runp->wcnext == NULL && runp->nwcs == 1)
1987 {
1988 int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1989 runp);
1990 collidx_table_add (atwc.tablewc, ch, weigthidx);
1991 }
1992 else
1993 {
1994 /* As for the singlebyte table, we recognize sequences and
1995 compress them. */
1996
1997 collidx_table_add (atwc.tablewc, ch,
1998 -(obstack_object_size (atwc.extrapool)
1999 / sizeof (uint32_t)));
2000
2001 do
2002 {
2003 /* Store the current index in the weight table. We know that
2004 the current position in the `extrapool' is aligned on a
2005 32-bit address. */
2006 int32_t weightidx;
2007 int added;
2008
2009 /* Find out wether this is a single entry or we have more than
2010 one consecutive entry. */
2011 if (runp->wcnext != NULL
2012 && runp->nwcs == runp->wcnext->nwcs
2013 && wmemcmp ((wchar_t *) runp->wcs,
2014 (wchar_t *)runp->wcnext->wcs,
2015 runp->nwcs - 1) == 0
2016 && (runp->wcs[runp->nwcs - 1]
2017 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2018 {
2019 int i;
2020 struct element_t *series_startp = runp;
2021 struct element_t *curp;
2022
2023 /* Now add first the initial byte sequence. */
2024 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2025 if (sizeof (int32_t) == sizeof (int))
2026 obstack_make_room (atwc.extrapool, added);
2027
2028 /* More than one consecutive entry. We mark this by having
2029 a negative index into the indirect table. */
2030 obstack_int32_grow_fast (atwc.extrapool,
2031 -(obstack_object_size (atwc.indpool)
2032 / sizeof (int32_t)));
2033 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2034
2035 do
2036 runp = runp->wcnext;
2037 while (runp->wcnext != NULL
2038 && runp->nwcs == runp->wcnext->nwcs
2039 && wmemcmp ((wchar_t *) runp->wcs,
2040 (wchar_t *)runp->wcnext->wcs,
2041 runp->nwcs - 1) == 0
2042 && (runp->wcs[runp->nwcs - 1]
2043 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2044
2045 /* Now walk backward from here to the beginning. */
2046 curp = runp;
2047
2048 for (i = 1; i < runp->nwcs; ++i)
2049 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2050
2051 /* Now find the end of the consecutive sequence and
2052 add all the indeces in the indirect pool. */
2053 do
2054 {
2055 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2056 curp);
2057 obstack_int32_grow (atwc.indpool, weightidx);
2058
2059 curp = curp->wclast;
2060 }
2061 while (curp != series_startp);
2062
2063 /* Add the final weight. */
2064 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2065 curp);
2066 obstack_int32_grow (atwc.indpool, weightidx);
2067
2068 /* And add the end byte sequence. Without length this
2069 time. */
2070 for (i = 1; i < curp->nwcs; ++i)
2071 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2072 }
2073 else
2074 {
2075 /* A single entry. Simply add the index and the length and
2076 string (except for the first character which is already
2077 tested for). */
2078 int i;
2079
2080 /* Output the weight info. */
2081 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2082 runp);
2083
2084 assert (runp->nwcs > 0);
2085 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2086 if (sizeof (int) == sizeof (int32_t))
2087 obstack_make_room (atwc.extrapool, added);
2088
2089 obstack_int32_grow_fast (atwc.extrapool, weightidx);
2090 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2091 for (i = 1; i < runp->nwcs; ++i)
2092 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2093 }
2094
2095 /* Next entry. */
2096 runp = runp->wcnext;
2097 }
2098 while (runp != NULL);
2099 }
2100}
2101
2102void
2103collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2104 const char *output_path)
2105{
2106 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2107 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2108 struct locale_file file;
2109 size_t ch;
2110 int32_t tablemb[256];
2111 struct obstack weightpool;
2112 struct obstack extrapool;
2113 struct obstack indirectpool;
2114 struct section_list *sect;
2115 struct collidx_table tablewc;
2116 uint32_t elem_size;
2117 uint32_t *elem_table;
2118 int i;
2119 struct element_t *runp;
2120
2121 init_locale_data (&file, nelems);
2122 add_locale_uint32 (&file, nrules);
2123
2124 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2125 if (collate == NULL)
2126 {
2127 size_t idx;
2128 for (idx = 1; idx < nelems; idx++)
2129 {
2130 /* The words have to be handled specially. */
2131 if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2132 add_locale_uint32 (&file, 0);
2133 else
2134 add_locale_empty (&file);
2135 }
2136 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2137 return;
2138 }
2139
2140 obstack_init (&weightpool);
2141 obstack_init (&extrapool);
2142 obstack_init (&indirectpool);
2143
2144 /* Since we are using the sign of an integer to mark indirection the
2145 offsets in the arrays we are indirectly referring to must not be
2146 zero since -0 == 0. Therefore we add a bit of dummy content. */
2147 obstack_int32_grow (&extrapool, 0);
2148 obstack_int32_grow (&indirectpool, 0);
2149
2150 /* Prepare the ruleset table. */
2151 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2152 if (sect->rules != NULL && sect->ruleidx == i)
2153 {
2154 int j;
2155
2156 obstack_make_room (&weightpool, nrules);
2157
2158 for (j = 0; j < nrules; ++j)
2159 obstack_1grow_fast (&weightpool, sect->rules[j]);
2160 ++i;
2161 }
2162 /* And align the output. */
2163 i = (nrules * i) % LOCFILE_ALIGN;
2164 if (i > 0)
2165 do
2166 obstack_1grow (&weightpool, '\0');
2167 while (++i < LOCFILE_ALIGN);
2168
2169 add_locale_raw_obstack (&file, &weightpool);
2170
2171 /* Generate the 8-bit table. Walk through the lists of sequences
2172 starting with the same byte and add them one after the other to
2173 the table. In case we have more than one sequence starting with
2174 the same byte we have to use extra indirection.
2175
2176 First add a record for the NUL byte. This entry will never be used
2177 so it does not matter. */
2178 tablemb[0] = 0;
2179
2180 /* Now insert the `UNDEFINED' value if it is used. Since this value
2181 will probably be used more than once it is good to store the
2182 weights only once. */
2183 if (collate->undefined.used_in_level != 0)
2184 output_weight (&weightpool, collate, &collate->undefined);
2185
2186 for (ch = 1; ch < 256; ++ch)
2187 if (collate->mbheads[ch]->mbnext == NULL
2188 && collate->mbheads[ch]->nmbs <= 1)
2189 {
2190 tablemb[ch] = output_weight (&weightpool, collate,
2191 collate->mbheads[ch]);
2192 }
2193 else
2194 {
2195 /* The entries in the list are sorted by length and then
2196 alphabetically. This is the order in which we will add the
2197 elements to the collation table. This allows simply walking
2198 the table in sequence and stopping at the first matching
2199 entry. Since the longer sequences are coming first in the
2200 list they have the possibility to match first, just as it
2201 has to be. In the worst case we are walking to the end of
2202 the list where we put, if no singlebyte sequence is defined
2203 in the locale definition, the weights for UNDEFINED.
2204
2205 To reduce the length of the search list we compress them a bit.
2206 This happens by collecting sequences of consecutive byte
2207 sequences in one entry (having and begin and end byte sequence)
2208 and add only one index into the weight table. We can find the
2209 consecutive entries since they are also consecutive in the list. */
2210 struct element_t *runp = collate->mbheads[ch];
2211 struct element_t *lastp;
2212
2213 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2214
2215 tablemb[ch] = -obstack_object_size (&extrapool);
2216
2217 do
2218 {
2219 /* Store the current index in the weight table. We know that
2220 the current position in the `extrapool' is aligned on a
2221 32-bit address. */
2222 int32_t weightidx;
2223 int added;
2224
2225 /* Find out wether this is a single entry or we have more than
2226 one consecutive entry. */
2227 if (runp->mbnext != NULL
2228 && runp->nmbs == runp->mbnext->nmbs
2229 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2230 && (runp->mbs[runp->nmbs - 1]
2231 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2232 {
2233 int i;
2234 struct element_t *series_startp = runp;
2235 struct element_t *curp;
2236
2237 /* Compute how much space we will need. */
2238 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2239 + 2 * (runp->nmbs - 1));
2240 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2241 obstack_make_room (&extrapool, added);
2242
2243 /* More than one consecutive entry. We mark this by having
2244 a negative index into the indirect table. */
2245 obstack_int32_grow_fast (&extrapool,
2246 -(obstack_object_size (&indirectpool)
2247 / sizeof (int32_t)));
2248
2249 /* Now search first the end of the series. */
2250 do
2251 runp = runp->mbnext;
2252 while (runp->mbnext != NULL
2253 && runp->nmbs == runp->mbnext->nmbs
2254 && memcmp (runp->mbs, runp->mbnext->mbs,
2255 runp->nmbs - 1) == 0
2256 && (runp->mbs[runp->nmbs - 1]
2257 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2258
2259 /* Now walk backward from here to the beginning. */
2260 curp = runp;
2261
2262 assert (runp->nmbs <= 256);
2263 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2264 for (i = 1; i < curp->nmbs; ++i)
2265 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2266
2267 /* Now find the end of the consecutive sequence and
2268 add all the indeces in the indirect pool. */
2269 do
2270 {
2271 weightidx = output_weight (&weightpool, collate, curp);
2272 obstack_int32_grow (&indirectpool, weightidx);
2273
2274 curp = curp->mblast;
2275 }
2276 while (curp != series_startp);
2277
2278 /* Add the final weight. */
2279 weightidx = output_weight (&weightpool, collate, curp);
2280 obstack_int32_grow (&indirectpool, weightidx);
2281
2282 /* And add the end byte sequence. Without length this
2283 time. */
2284 for (i = 1; i < curp->nmbs; ++i)
2285 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2286 }
2287 else
2288 {
2289 /* A single entry. Simply add the index and the length and
2290 string (except for the first character which is already
2291 tested for). */
2292 int i;
2293
2294 /* Output the weight info. */
2295 weightidx = output_weight (&weightpool, collate, runp);
2296
2297 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2298 + runp->nmbs - 1);
2299 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2300 obstack_make_room (&extrapool, added);
2301
2302 obstack_int32_grow_fast (&extrapool, weightidx);
2303 assert (runp->nmbs <= 256);
2304 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2305
2306 for (i = 1; i < runp->nmbs; ++i)
2307 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2308 }
2309
2310 /* Add alignment bytes if necessary. */
2311 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2312 obstack_1grow_fast (&extrapool, '\0');
2313
2314 /* Next entry. */
2315 lastp = runp;
2316 runp = runp->mbnext;
2317 }
2318 while (runp != NULL);
2319
2320 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2321
2322 /* If the final entry in the list is not a single character we
2323 add an UNDEFINED entry here. */
2324 if (lastp->nmbs != 1)
2325 {
2326 int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2327 obstack_make_room (&extrapool, added);
2328
2329 obstack_int32_grow_fast (&extrapool, 0);
2330 /* XXX What rule? We just pick the first. */
2331 obstack_1grow_fast (&extrapool, 0);
2332 /* Length is zero. */
2333 obstack_1grow_fast (&extrapool, 0);
2334
2335 /* Add alignment bytes if necessary. */
2336 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2337 obstack_1grow_fast (&extrapool, '\0');
2338 }
2339 }
2340
2341 /* Add padding to the tables if necessary. */
2342 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2343 obstack_1grow (&weightpool, 0);
2344
2345 /* Now add the four tables. */
2346 add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2347 add_locale_raw_obstack (&file, &weightpool);
2348 add_locale_raw_obstack (&file, &extrapool);
2349 add_locale_raw_obstack (&file, &indirectpool);
2350
2351 /* Now the same for the wide character table. We need to store some
2352 more information here. */
2353 add_locale_empty (&file);
2354 add_locale_empty (&file);
2355 add_locale_empty (&file);
2356
2357 /* Since we are using the sign of an integer to mark indirection the
2358 offsets in the arrays we are indirectly referring to must not be
2359 zero since -0 == 0. Therefore we add a bit of dummy content. */
2360 obstack_int32_grow (&extrapool, 0);
2361 obstack_int32_grow (&indirectpool, 0);
2362
2363 /* Now insert the `UNDEFINED' value if it is used. Since this value
2364 will probably be used more than once it is good to store the
2365 weights only once. */
2366 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2367 abort ();
2368
2369 /* Generate the table. Walk through the lists of sequences starting
2370 with the same wide character and add them one after the other to
2371 the table. In case we have more than one sequence starting with
2372 the same byte we have to use extra indirection. */
2373 tablewc.p = 6;
2374 tablewc.q = 10;
2375 collidx_table_init (&tablewc);
2376
2377 atwc.weightpool = &weightpool;
2378 atwc.extrapool = &extrapool;
2379 atwc.indpool = &indirectpool;
2380 atwc.collate = collate;
2381 atwc.tablewc = &tablewc;
2382
2383 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2384
2385 memset (&atwc, 0, sizeof (atwc));
2386
2387 /* Now add the four tables. */
2388 add_locale_collidx_table (&file, &tablewc);
2389 add_locale_raw_obstack (&file, &weightpool);
2390 add_locale_raw_obstack (&file, &extrapool);
2391 add_locale_raw_obstack (&file, &indirectpool);
2392
2393 /* Finally write the table with collation element names out. It is
2394 a hash table with a simple function which gets the name of the
2395 character as the input. One character might have many names. The
2396 value associated with the name is an index into the weight table
2397 where we are then interested in the first-level weight value.
2398
2399 To determine how large the table should be we are counting the
2400 elements have to put in. Since we are using internal chaining
2401 using a secondary hash function we have to make the table a bit
2402 larger to avoid extremely long search times. We can achieve
2403 good results with a 40% larger table than there are entries. */
2404 elem_size = 0;
2405 runp = collate->start;
2406 while (runp != NULL)
2407 {
2408 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2409 /* Yep, the element really counts. */
2410 ++elem_size;
2411
2412 runp = runp->next;
2413 }
2414 /* Add 40% and find the next prime number. */
2415 elem_size = next_prime (elem_size * 1.4);
2416
2417 /* Allocate the table. Each entry consists of two words: the hash
2418 value and an index in a secondary table which provides the index
2419 into the weight table and the string itself (so that a match can
2420 be determined). */
2421 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2422 elem_size * 2 * sizeof (uint32_t));
2423 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2424
2425 /* Now add the elements. */
2426 runp = collate->start;
2427 while (runp != NULL)
2428 {
2429 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2430 {
2431 /* Compute the hash value of the name. */
2432 uint32_t namelen = strlen (runp->name);
2433 uint32_t hash = elem_hash (runp->name, namelen);
2434 size_t idx = hash % elem_size;
2435#ifndef NDEBUG
2436 size_t start_idx = idx;
2437#endif
2438
2439 if (elem_table[idx * 2] != 0)
2440 {
2441 /* The spot is already taken. Try iterating using the value
2442 from the secondary hashing function. */
2443 size_t iter = hash % (elem_size - 2) + 1;
2444
2445 do
2446 {
2447 idx += iter;
2448 if (idx >= elem_size)
2449 idx -= elem_size;
2450 assert (idx != start_idx);
2451 }
2452 while (elem_table[idx * 2] != 0);
2453 }
2454 /* This is the spot where we will insert the value. */
2455 elem_table[idx * 2] = hash;
2456 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2457
2458 /* The string itself including length. */
2459 obstack_1grow (&extrapool, namelen);
2460 obstack_grow (&extrapool, runp->name, namelen);
2461
2462 /* And the multibyte representation. */
2463 obstack_1grow (&extrapool, runp->nmbs);
2464 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2465
2466 /* And align again to 32 bits. */
2467 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2468 obstack_grow (&extrapool, "\0\0",
2469 (sizeof (int32_t)
2470 - ((1 + namelen + 1 + runp->nmbs)
2471 % sizeof (int32_t))));
2472
2473 /* Now some 32-bit values: multibyte collation sequence,
2474 wide char string (including length), and wide char
2475 collation sequence. */
2476 obstack_int32_grow (&extrapool, runp->mbseqorder);
2477
2478 obstack_int32_grow (&extrapool, runp->nwcs);
2479 obstack_grow (&extrapool, runp->wcs,
2480 runp->nwcs * sizeof (uint32_t));
2481 maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2482
2483 obstack_int32_grow (&extrapool, runp->wcseqorder);
2484 }
2485
2486 runp = runp->next;
2487 }
2488
2489 /* Prepare to write out this data. */
2490 add_locale_uint32 (&file, elem_size);
2491 add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2492 add_locale_raw_obstack (&file, &extrapool);
2493 add_locale_raw_data (&file, collate->mbseqorder, 256);
2494 add_locale_collseq_table (&file, &collate->wcseqorder);
2495 add_locale_string (&file, charmap->code_set_name);
2496 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2497
2498 obstack_free (&weightpool, NULL);
2499 obstack_free (&extrapool, NULL);
2500 obstack_free (&indirectpool, NULL);
2501}
2502
2503
2504static enum token_t
2505skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2506 const struct charmap_t *charmap, int to_endif)
2507{
2508 while (1)
2509 {
2510 struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2511 enum token_t nowtok = now->tok;
2512
2513 if (nowtok == tok_eof || nowtok == tok_end)
2514 return nowtok;
2515
2516 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2517 {
2518 lr_error (ldfile, _("%s: nested conditionals not supported"),
2519 "LC_COLLATE");
2520 nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2521 if (nowtok == tok_eof || nowtok == tok_end)
2522 return nowtok;
2523 }
2524 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2525 {
2526 lr_ignore_rest (ldfile, 1);
2527 return nowtok;
2528 }
2529 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2530 {
2531 /* Do not read the rest of the line. */
2532 return nowtok;
2533 }
2534 else if (nowtok == tok_else)
2535 {
2536 lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2537 }
2538
2539 lr_ignore_rest (ldfile, 0);
2540 }
2541}
2542
2543
2544void
2545collate_read (struct linereader *ldfile, struct localedef_t *result,
2546 const struct charmap_t *charmap, const char *repertoire_name,
2547 int ignore_content)
2548{
2549 struct repertoire_t *repertoire = NULL;
2550 struct locale_collate_t *collate;
2551 struct token *now;
2552 struct token *arg = NULL;
2553 enum token_t nowtok;
2554 enum token_t was_ellipsis = tok_none;
2555 struct localedef_t *copy_locale = NULL;
2556 /* Parsing state:
2557 0 - start
2558 1 - between `order-start' and `order-end'
2559 2 - after `order-end'
2560 3 - after `reorder-after', waiting for `reorder-end'
2561 4 - after `reorder-end'
2562 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2563 6 - after `reorder-sections-end'
2564 */
2565 int state = 0;
2566
2567 /* Get the repertoire we have to use. */
2568 if (repertoire_name != NULL)
2569 repertoire = repertoire_read (repertoire_name);
2570
2571 /* The rest of the line containing `LC_COLLATE' must be free. */
2572 lr_ignore_rest (ldfile, 1);
2573
2574 while (1)
2575 {
2576 do
2577 {
2578 now = lr_token (ldfile, charmap, result, NULL, verbose);
2579 nowtok = now->tok;
2580 }
2581 while (nowtok == tok_eol);
2582
2583 if (nowtok != tok_define)
2584 break;
2585
2586 if (ignore_content)
2587 lr_ignore_rest (ldfile, 0);
2588 else
2589 {
2590 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2591 if (arg->tok != tok_ident)
2592 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2593 else
2594 {
2595 /* Simply add the new symbol. */
2596 struct name_list *newsym = xmalloc (sizeof (*newsym)
2597 + arg->val.str.lenmb + 1);
2598 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2599 newsym->str[arg->val.str.lenmb] = '\0';
2600 newsym->next = defined;
2601 defined = newsym;
2602
2603 lr_ignore_rest (ldfile, 1);
2604 }
2605 }
2606 }
2607
2608 if (nowtok == tok_copy)
2609 {
2610 now = lr_token (ldfile, charmap, result, NULL, verbose);
2611 if (now->tok != tok_string)
2612 {
2613 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2614
2615 skip_category:
2616 do
2617 now = lr_token (ldfile, charmap, result, NULL, verbose);
2618 while (now->tok != tok_eof && now->tok != tok_end);
2619
2620 if (now->tok != tok_eof
2621 || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2622 now->tok == tok_eof))
2623 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2624 else if (now->tok != tok_lc_collate)
2625 {
2626 lr_error (ldfile, _("\
2627%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2628 lr_ignore_rest (ldfile, 0);
2629 }
2630 else
2631 lr_ignore_rest (ldfile, 1);
2632
2633 return;
2634 }
2635
2636 if (! ignore_content)
2637 {
2638 /* Get the locale definition. */
2639 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2640 repertoire_name, charmap, NULL);
2641 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2642 {
2643 /* Not yet loaded. So do it now. */
2644 if (locfile_read (copy_locale, charmap) != 0)
2645 goto skip_category;
2646 }
2647
2648 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2649 return;
2650 }
2651
2652 lr_ignore_rest (ldfile, 1);
2653
2654 now = lr_token (ldfile, charmap, result, NULL, verbose);
2655 nowtok = now->tok;
2656 }
2657
2658 /* Prepare the data structures. */
2659 collate_startup (ldfile, result, copy_locale, ignore_content);
2660 collate = result->categories[LC_COLLATE].collate;
2661
2662 while (1)
2663 {
2664 char ucs4buf[10];
2665 char *symstr;
2666 size_t symlen;
2667
2668 /* Of course we don't proceed beyond the end of file. */
2669 if (nowtok == tok_eof)
2670 break;
2671
2672 /* Ingore empty lines. */
2673 if (nowtok == tok_eol)
2674 {
2675 now = lr_token (ldfile, charmap, result, NULL, verbose);
2676 nowtok = now->tok;
2677 continue;
2678 }
2679
2680 switch (nowtok)
2681 {
2682 case tok_copy:
2683 /* Allow copying other locales. */
2684 now = lr_token (ldfile, charmap, result, NULL, verbose);
2685 if (now->tok != tok_string)
2686 goto err_label;
2687
2688 if (! ignore_content)
2689 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2690 charmap, result);
2691
2692 lr_ignore_rest (ldfile, 1);
2693 break;
2694
2695 case tok_coll_weight_max:
2696 /* Ignore the rest of the line if we don't need the input of
2697 this line. */
2698 if (ignore_content)
2699 {
2700 lr_ignore_rest (ldfile, 0);
2701 break;
2702 }
2703
2704 if (state != 0)
2705 goto err_label;
2706
2707 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2708 if (arg->tok != tok_number)
2709 goto err_label;
2710 if (collate->col_weight_max != -1)
2711 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2712 "LC_COLLATE", "col_weight_max");
2713 else
2714 collate->col_weight_max = arg->val.num;
2715 lr_ignore_rest (ldfile, 1);
2716 break;
2717
2718 case tok_section_symbol:
2719 /* Ignore the rest of the line if we don't need the input of
2720 this line. */
2721 if (ignore_content)
2722 {
2723 lr_ignore_rest (ldfile, 0);
2724 break;
2725 }
2726
2727 if (state != 0)
2728 goto err_label;
2729
2730 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2731 if (arg->tok != tok_bsymbol)
2732 goto err_label;
2733 else if (!ignore_content)
2734 {
2735 /* Check whether this section is already known. */
2736 struct section_list *known = collate->sections;
2737 while (known != NULL)
2738 {
2739 if (strcmp (known->name, arg->val.str.startmb) == 0)
2740 break;
2741 known = known->next;
2742 }
2743
2744 if (known != NULL)
2745 {
2746 lr_error (ldfile,
2747 _("%s: duplicate declaration of section `%s'"),
2748 "LC_COLLATE", arg->val.str.startmb);
2749 free (arg->val.str.startmb);
2750 }
2751 else
2752 collate->sections = make_seclist_elem (collate,
2753 arg->val.str.startmb,
2754 collate->sections);
2755
2756 lr_ignore_rest (ldfile, known == NULL);
2757 }
2758 else
2759 {
2760 free (arg->val.str.startmb);
2761 lr_ignore_rest (ldfile, 0);
2762 }
2763 break;
2764
2765 case tok_collating_element:
2766 /* Ignore the rest of the line if we don't need the input of
2767 this line. */
2768 if (ignore_content)
2769 {
2770 lr_ignore_rest (ldfile, 0);
2771 break;
2772 }
2773
2774 if (state != 0 && state != 2)
2775 goto err_label;
2776
2777 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2778 if (arg->tok != tok_bsymbol)
2779 goto err_label;
2780 else
2781 {
2782 const char *symbol = arg->val.str.startmb;
2783 size_t symbol_len = arg->val.str.lenmb;
2784
2785 /* Next the `from' keyword. */
2786 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2787 if (arg->tok != tok_from)
2788 {
2789 free ((char *) symbol);
2790 goto err_label;
2791 }
2792
2793 ldfile->return_widestr = 1;
2794 ldfile->translate_strings = 1;
2795
2796 /* Finally the string with the replacement. */
2797 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2798
2799 ldfile->return_widestr = 0;
2800 ldfile->translate_strings = 0;
2801
2802 if (arg->tok != tok_string)
2803 goto err_label;
2804
2805 if (!ignore_content && symbol != NULL)
2806 {
2807 /* The name is already defined. */
2808 if (check_duplicate (ldfile, collate, charmap,
2809 repertoire, symbol, symbol_len))
2810 goto col_elem_free;
2811
2812 if (arg->val.str.startmb != NULL)
2813 insert_entry (&collate->elem_table, symbol, symbol_len,
2814 new_element (collate,
2815 arg->val.str.startmb,
2816 arg->val.str.lenmb - 1,
2817 arg->val.str.startwc,
2818 symbol, symbol_len, 0));
2819 }
2820 else
2821 {
2822 col_elem_free:
2823 free ((char *) symbol);
2824 free (arg->val.str.startmb);
2825 free (arg->val.str.startwc);
2826 }
2827 lr_ignore_rest (ldfile, 1);
2828 }
2829 break;
2830
2831 case tok_collating_symbol:
2832 /* Ignore the rest of the line if we don't need the input of
2833 this line. */
2834 if (ignore_content)
2835 {
2836 lr_ignore_rest (ldfile, 0);
2837 break;
2838 }
2839
2840 if (state != 0 && state != 2)
2841 goto err_label;
2842
2843 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2844 if (arg->tok != tok_bsymbol)
2845 goto err_label;
2846 else
2847 {
2848 char *symbol = arg->val.str.startmb;
2849 size_t symbol_len = arg->val.str.lenmb;
2850 char *endsymbol = NULL;
2851 size_t endsymbol_len = 0;
2852 enum token_t ellipsis = tok_none;
2853
2854 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2855 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2856 {
2857 ellipsis = arg->tok;
2858
2859 arg = lr_token (ldfile, charmap, result, repertoire,
2860 verbose);
2861 if (arg->tok != tok_bsymbol)
2862 {
2863 free (symbol);
2864 goto err_label;
2865 }
2866
2867 endsymbol = arg->val.str.startmb;
2868 endsymbol_len = arg->val.str.lenmb;
2869
2870 lr_ignore_rest (ldfile, 1);
2871 }
2872 else if (arg->tok != tok_eol)
2873 {
2874 free (symbol);
2875 goto err_label;
2876 }
2877
2878 if (!ignore_content)
2879 {
2880 if (symbol == NULL
2881 || (ellipsis != tok_none && endsymbol == NULL))
2882 {
2883 lr_error (ldfile, _("\
2884%s: unknown character in collating symbol name"),
2885 "LC_COLLATE");
2886 goto col_sym_free;
2887 }
2888 else if (ellipsis == tok_none)
2889 {
2890 /* A single symbol, no ellipsis. */
2891 if (check_duplicate (ldfile, collate, charmap,
2892 repertoire, symbol, symbol_len))
2893 /* The name is already defined. */
2894 goto col_sym_free;
2895
2896 insert_entry (&collate->sym_table, symbol, symbol_len,
2897 new_symbol (collate, symbol, symbol_len));
2898 }
2899 else if (symbol_len != endsymbol_len)
2900 {
2901 col_sym_inv_range:
2902 lr_error (ldfile,
2903 _("invalid names for character range"));
2904 goto col_sym_free;
2905 }
2906 else
2907 {
2908 /* Oh my, we have to handle an ellipsis. First, as
2909 usual, determine the common prefix and then
2910 convert the rest into a range. */
2911 size_t prefixlen;
2912 unsigned long int from;
2913 unsigned long int to;
2914 char *endp;
2915
2916 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2917 if (symbol[prefixlen] != endsymbol[prefixlen])
2918 break;
2919
2920 /* Convert the rest into numbers. */
2921 symbol[symbol_len] = '\0';
2922 from = strtoul (&symbol[prefixlen], &endp,
2923 ellipsis == tok_ellipsis2 ? 16 : 10);
2924 if (*endp != '\0')
2925 goto col_sym_inv_range;
2926
2927 endsymbol[symbol_len] = '\0';
2928 to = strtoul (&endsymbol[prefixlen], &endp,
2929 ellipsis == tok_ellipsis2 ? 16 : 10);
2930 if (*endp != '\0')
2931 goto col_sym_inv_range;
2932
2933 if (from > to)
2934 goto col_sym_inv_range;
2935
2936 /* Now loop over all entries. */
2937 while (from <= to)
2938 {
2939 char *symbuf;
2940
2941 symbuf = (char *) obstack_alloc (&collate->mempool,
2942 symbol_len + 1);
2943
2944 /* Create the name. */
2945 sprintf (symbuf,
2946 ellipsis == tok_ellipsis2
2947 ? "%.*s%.*lX" : "%.*s%.*lu",
2948 (int) prefixlen, symbol,
2949 (int) (symbol_len - prefixlen), from);
2950
2951 if (check_duplicate (ldfile, collate, charmap,
2952 repertoire, symbuf, symbol_len))
2953 /* The name is already defined. */
2954 goto col_sym_free;
2955
2956 insert_entry (&collate->sym_table, symbuf,
2957 symbol_len,
2958 new_symbol (collate, symbuf,
2959 symbol_len));
2960
2961 /* Increment the counter. */
2962 ++from;
2963 }
2964
2965 goto col_sym_free;
2966 }
2967 }
2968 else
2969 {
2970 col_sym_free:
2971 free (symbol);
2972 free (endsymbol);
2973 }
2974 }
2975 break;
2976
2977 case tok_symbol_equivalence:
2978 /* Ignore the rest of the line if we don't need the input of
2979 this line. */
2980 if (ignore_content)
2981 {
2982 lr_ignore_rest (ldfile, 0);
2983 break;
2984 }
2985
2986 if (state != 0)
2987 goto err_label;
2988
2989 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2990 if (arg->tok != tok_bsymbol)
2991 goto err_label;
2992 else
2993 {
2994 const char *newname = arg->val.str.startmb;
2995 size_t newname_len = arg->val.str.lenmb;
2996 const char *symname;
2997 size_t symname_len;
2998 void *symval; /* Actually struct symbol_t* */
2999
3000 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3001 if (arg->tok != tok_bsymbol)
3002 {
3003 free ((char *) newname);
3004 goto err_label;
3005 }
3006
3007 symname = arg->val.str.startmb;
3008 symname_len = arg->val.str.lenmb;
3009
3010 if (newname == NULL)
3011 {
3012 lr_error (ldfile, _("\
3013%s: unknown character in equivalent definition name"),
3014 "LC_COLLATE");
3015
3016 sym_equiv_free:
3017 free ((char *) newname);
3018 free ((char *) symname);
3019 break;
3020 }
3021 if (symname == NULL)
3022 {
3023 lr_error (ldfile, _("\
3024%s: unknown character in equivalent definition value"),
3025 "LC_COLLATE");
3026 goto sym_equiv_free;
3027 }
3028
3029 /* See whether the symbol name is already defined. */
3030 if (find_entry (&collate->sym_table, symname, symname_len,
3031 &symval) != 0)
3032 {
3033 lr_error (ldfile, _("\
3034%s: unknown symbol `%s' in equivalent definition"),
3035 "LC_COLLATE", symname);
3036 goto sym_equiv_free;
3037 }
3038
3039 if (insert_entry (&collate->sym_table,
3040 newname, newname_len, symval) < 0)
3041 {
3042 lr_error (ldfile, _("\
3043error while adding equivalent collating symbol"));
3044 goto sym_equiv_free;
3045 }
3046
3047 free ((char *) symname);
3048 }
3049 lr_ignore_rest (ldfile, 1);
3050 break;
3051
3052 case tok_script:
3053 /* Ignore the rest of the line if we don't need the input of
3054 this line. */
3055 if (ignore_content)
3056 {
3057 lr_ignore_rest (ldfile, 0);
3058 break;
3059 }
3060
3061 /* We get told about the scripts we know. */
3062 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3063 if (arg->tok != tok_bsymbol)
3064 goto err_label;
3065 else
3066 {
3067 struct section_list *runp = collate->known_sections;
3068 char *name;
3069
3070 while (runp != NULL)
3071 if (strncmp (runp->name, arg->val.str.startmb,
3072 arg->val.str.lenmb) == 0
3073 && runp->name[arg->val.str.lenmb] == '\0')
3074 break;
3075 else
3076 runp = runp->def_next;
3077
3078 if (runp != NULL)
3079 {
3080 lr_error (ldfile, _("duplicate definition of script `%s'"),
3081 runp->name);
3082 lr_ignore_rest (ldfile, 0);
3083 break;
3084 }
3085
3086 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3087 name = (char *) xmalloc (arg->val.str.lenmb + 1);
3088 memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3089 name[arg->val.str.lenmb] = '\0';
3090 runp->name = name;
3091
3092 runp->def_next = collate->known_sections;
3093 collate->known_sections = runp;
3094 }
3095 lr_ignore_rest (ldfile, 1);
3096 break;
3097
3098 case tok_order_start:
3099 /* Ignore the rest of the line if we don't need the input of
3100 this line. */
3101 if (ignore_content)
3102 {
3103 lr_ignore_rest (ldfile, 0);
3104 break;
3105 }
3106
3107 if (state != 0 && state != 1 && state != 2)
3108 goto err_label;
3109 state = 1;
3110
3111 /* The 14652 draft does not specify whether all `order_start' lines
3112 must contain the same number of sort-rules, but 14651 does. So
3113 we require this here as well. */
3114 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3115 if (arg->tok == tok_bsymbol)
3116 {
3117 /* This better should be a section name. */
3118 struct section_list *sp = collate->known_sections;
3119 while (sp != NULL
3120 && (sp->name == NULL
3121 || strncmp (sp->name, arg->val.str.startmb,
3122 arg->val.str.lenmb) != 0
3123 || sp->name[arg->val.str.lenmb] != '\0'))
3124 sp = sp->def_next;
3125
3126 if (sp == NULL)
3127 {
3128 lr_error (ldfile, _("\
3129%s: unknown section name `%.*s'"),
3130 "LC_COLLATE", (int) arg->val.str.lenmb,
3131 arg->val.str.startmb);
3132 /* We use the error section. */
3133 collate->current_section = &collate->error_section;
3134
3135 if (collate->error_section.first == NULL)
3136 {
3137 /* Insert &collate->error_section at the end of
3138 the collate->sections list. */
3139 if (collate->sections == NULL)
3140 collate->sections = &collate->error_section;
3141 else
3142 {
3143 sp = collate->sections;
3144 while (sp->next != NULL)
3145 sp = sp->next;
3146
3147 sp->next = &collate->error_section;
3148 }
3149 collate->error_section.next = NULL;
3150 }
3151 }
3152 else
3153 {
3154 /* One should not be allowed to open the same
3155 section twice. */
3156 if (sp->first != NULL)
3157 lr_error (ldfile, _("\
3158%s: multiple order definitions for section `%s'"),
3159 "LC_COLLATE", sp->name);
3160 else
3161 {
3162 /* Insert sp in the collate->sections list,
3163 right after collate->current_section. */
3164 if (collate->current_section != NULL)
3165 {
3166 sp->next = collate->current_section->next;
3167 collate->current_section->next = sp;
3168 }
3169 else if (collate->sections == NULL)
3170 /* This is the first section to be defined. */
3171 collate->sections = sp;
3172
3173 collate->current_section = sp;
3174 }
3175
3176 /* Next should come the end of the line or a semicolon. */
3177 arg = lr_token (ldfile, charmap, result, repertoire,
3178 verbose);
3179 if (arg->tok == tok_eol)
3180 {
3181 uint32_t cnt;
3182
3183 /* This means we have exactly one rule: `forward'. */
3184 if (nrules > 1)
3185 lr_error (ldfile, _("\
3186%s: invalid number of sorting rules"),
3187 "LC_COLLATE");
3188 else
3189 nrules = 1;
3190 sp->rules = obstack_alloc (&collate->mempool,
3191 (sizeof (enum coll_sort_rule)
3192 * nrules));
3193 for (cnt = 0; cnt < nrules; ++cnt)
3194 sp->rules[cnt] = sort_forward;
3195
3196 /* Next line. */
3197 break;
3198 }
3199
3200 /* Get the next token. */
3201 arg = lr_token (ldfile, charmap, result, repertoire,
3202 verbose);
3203 }
3204 }
3205 else
3206 {
3207 /* There is no section symbol. Therefore we use the unnamed
3208 section. */
3209 collate->current_section = &collate->unnamed_section;
3210
3211 if (collate->unnamed_section_defined)
3212 lr_error (ldfile, _("\
3213%s: multiple order definitions for unnamed section"),
3214 "LC_COLLATE");
3215 else
3216 {
3217 /* Insert &collate->unnamed_section at the beginning of
3218 the collate->sections list. */
3219 collate->unnamed_section.next = collate->sections;
3220 collate->sections = &collate->unnamed_section;
3221 collate->unnamed_section_defined = true;
3222 }
3223 }
3224
3225 /* Now read the direction names. */
3226 read_directions (ldfile, arg, charmap, repertoire, result);
3227
3228 /* From now we need the strings untranslated. */
3229 ldfile->translate_strings = 0;
3230 break;
3231
3232 case tok_order_end:
3233 /* Ignore the rest of the line if we don't need the input of
3234 this line. */
3235 if (ignore_content)
3236 {
3237 lr_ignore_rest (ldfile, 0);
3238 break;
3239 }
3240
3241 if (state != 1)
3242 goto err_label;
3243
3244 /* Handle ellipsis at end of list. */
3245 if (was_ellipsis != tok_none)
3246 {
3247 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3248 repertoire, result);
3249 was_ellipsis = tok_none;
3250 }
3251
3252 state = 2;
3253 lr_ignore_rest (ldfile, 1);
3254 break;
3255
3256 case tok_reorder_after:
3257 /* Ignore the rest of the line if we don't need the input of
3258 this line. */
3259 if (ignore_content)
3260 {
3261 lr_ignore_rest (ldfile, 0);
3262 break;
3263 }
3264
3265 if (state == 1)
3266 {
3267 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3268 "LC_COLLATE");
3269 state = 2;
3270
3271 /* Handle ellipsis at end of list. */
3272 if (was_ellipsis != tok_none)
3273 {
3274 handle_ellipsis (ldfile, arg->val.str.startmb,
3275 arg->val.str.lenmb, was_ellipsis, charmap,
3276 repertoire, result);
3277 was_ellipsis = tok_none;
3278 }
3279 }
3280 else if (state == 0 && copy_locale == NULL)
3281 goto err_label;
3282 else if (state != 0 && state != 2 && state != 3)
3283 goto err_label;
3284 state = 3;
3285
3286 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3287 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3288 {
3289 /* Find this symbol in the sequence table. */
3290 char ucsbuf[10];
3291 char *startmb;
3292 size_t lenmb;
3293 struct element_t *insp;
3294 int no_error = 1;
3295 void *ptr;
3296
3297 if (arg->tok == tok_bsymbol)
3298 {
3299 startmb = arg->val.str.startmb;
3300 lenmb = arg->val.str.lenmb;
3301 }
3302 else
3303 {
3304 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3305 startmb = ucsbuf;
3306 lenmb = 9;
3307 }
3308
3309 if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3310 /* Yes, the symbol exists. Simply point the cursor
3311 to it. */
3312 collate->cursor = (struct element_t *) ptr;
3313 else
3314 {
3315 struct symbol_t *symbp;
3316 void *ptr;
3317
3318 if (find_entry (&collate->sym_table, startmb, lenmb,
3319 &ptr) == 0)
3320 {
3321 symbp = ptr;
3322
3323 if (symbp->order->last != NULL
3324 || symbp->order->next != NULL)
3325 collate->cursor = symbp->order;
3326 else
3327 {
3328 /* This is a collating symbol but its position
3329 is not yet defined. */
3330 lr_error (ldfile, _("\
3331%s: order for collating symbol %.*s not yet defined"),
3332 "LC_COLLATE", (int) lenmb, startmb);
3333 collate->cursor = NULL;
3334 no_error = 0;
3335 }
3336 }
3337 else if (find_entry (&collate->elem_table, startmb, lenmb,
3338 &ptr) == 0)
3339 {
3340 insp = (struct element_t *) ptr;
3341
3342 if (insp->last != NULL || insp->next != NULL)
3343 collate->cursor = insp;
3344 else
3345 {
3346 /* This is a collating element but its position
3347 is not yet defined. */
3348 lr_error (ldfile, _("\
3349%s: order for collating element %.*s not yet defined"),
3350 "LC_COLLATE", (int) lenmb, startmb);
3351 collate->cursor = NULL;
3352 no_error = 0;
3353 }
3354 }
3355 else
3356 {
3357 /* This is bad. The symbol after which we have to
3358 insert does not exist. */
3359 lr_error (ldfile, _("\
3360%s: cannot reorder after %.*s: symbol not known"),
3361 "LC_COLLATE", (int) lenmb, startmb);
3362 collate->cursor = NULL;
3363 no_error = 0;
3364 }
3365 }
3366
3367 lr_ignore_rest (ldfile, no_error);
3368 }
3369 else
3370 /* This must not happen. */
3371 goto err_label;
3372 break;
3373
3374 case tok_reorder_end:
3375 /* Ignore the rest of the line if we don't need the input of
3376 this line. */
3377 if (ignore_content)
3378 break;
3379
3380 if (state != 3)
3381 goto err_label;
3382 state = 4;
3383 lr_ignore_rest (ldfile, 1);
3384 break;
3385
3386 case tok_reorder_sections_after:
3387 /* Ignore the rest of the line if we don't need the input of
3388 this line. */
3389 if (ignore_content)
3390 {
3391 lr_ignore_rest (ldfile, 0);
3392 break;
3393 }
3394
3395 if (state == 1)
3396 {
3397 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3398 "LC_COLLATE");
3399 state = 2;
3400
3401 /* Handle ellipsis at end of list. */
3402 if (was_ellipsis != tok_none)
3403 {
3404 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3405 repertoire, result);
3406 was_ellipsis = tok_none;
3407 }
3408 }
3409 else if (state == 3)
3410 {
3411 WITH_CUR_LOCALE (error (0, 0, _("\
3412%s: missing `reorder-end' keyword"), "LC_COLLATE"));
3413 state = 4;
3414 }
3415 else if (state != 2 && state != 4)
3416 goto err_label;
3417 state = 5;
3418
3419 /* Get the name of the sections we are adding after. */
3420 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3421 if (arg->tok == tok_bsymbol)
3422 {
3423 /* Now find a section with this name. */
3424 struct section_list *runp = collate->sections;
3425
3426 while (runp != NULL)
3427 {
3428 if (runp->name != NULL
3429 && strlen (runp->name) == arg->val.str.lenmb
3430 && memcmp (runp->name, arg->val.str.startmb,
3431 arg->val.str.lenmb) == 0)
3432 break;
3433
3434 runp = runp->next;
3435 }
3436
3437 if (runp != NULL)
3438 collate->current_section = runp;
3439 else
3440 {
3441 /* This is bad. The section after which we have to
3442 reorder does not exist. Therefore we cannot
3443 process the whole rest of this reorder
3444 specification. */
3445 lr_error (ldfile, _("%s: section `%.*s' not known"),
3446 "LC_COLLATE", (int) arg->val.str.lenmb,
3447 arg->val.str.startmb);
3448
3449 do
3450 {
3451 lr_ignore_rest (ldfile, 0);
3452
3453 now = lr_token (ldfile, charmap, result, NULL, verbose);
3454 }
3455 while (now->tok == tok_reorder_sections_after
3456 || now->tok == tok_reorder_sections_end
3457 || now->tok == tok_end);
3458
3459 /* Process the token we just saw. */
3460 nowtok = now->tok;
3461 continue;
3462 }
3463 }
3464 else
3465 /* This must not happen. */
3466 goto err_label;
3467 break;
3468
3469 case tok_reorder_sections_end:
3470 /* Ignore the rest of the line if we don't need the input of
3471 this line. */
3472 if (ignore_content)
3473 break;
3474
3475 if (state != 5)
3476 goto err_label;
3477 state = 6;
3478 lr_ignore_rest (ldfile, 1);
3479 break;
3480
3481 case tok_bsymbol:
3482 case tok_ucs4:
3483 /* Ignore the rest of the line if we don't need the input of
3484 this line. */
3485 if (ignore_content)
3486 {
3487 lr_ignore_rest (ldfile, 0);
3488 break;
3489 }
3490
3491 if (state != 0 && state != 1 && state != 3 && state != 5)
3492 goto err_label;
3493
3494 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3495 goto err_label;
3496
3497 if (nowtok == tok_ucs4)
3498 {
3499 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3500 symstr = ucs4buf;
3501 symlen = 9;
3502 }
3503 else if (arg != NULL)
3504 {
3505 symstr = arg->val.str.startmb;
3506 symlen = arg->val.str.lenmb;
3507 }
3508 else
3509 {
3510 lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3511 (int) ldfile->token.val.str.lenmb,
3512 ldfile->token.val.str.startmb);
3513 break;
3514 }
3515
3516 struct element_t *seqp;
3517 if (state == 0)
3518 {
3519 /* We are outside an `order_start' region. This means
3520 we must only accept definitions of values for
3521 collation symbols since these are purely abstract
3522 values and don't need directions associated. */
3523 void *ptr;
3524
3525 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3526 {
3527 seqp = ptr;
3528
3529 /* It's already defined. First check whether this
3530 is really a collating symbol. */
3531 if (seqp->is_character)
3532 goto err_label;
3533
3534 goto move_entry;
3535 }
3536 else
3537 {
3538 void *result;
3539
3540 if (find_entry (&collate->sym_table, symstr, symlen,
3541 &result) != 0)
3542 /* No collating symbol, it's an error. */
3543 goto err_label;
3544
3545 /* Maybe this is the first time we define a symbol
3546 value and it is before the first actual section. */
3547 if (collate->sections == NULL)
3548 collate->sections = collate->current_section =
3549 &collate->symbol_section;
3550 }
3551
3552 if (was_ellipsis != tok_none)
3553 {
3554 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3555 charmap, repertoire, result);
3556
3557 /* Remember that we processed the ellipsis. */
3558 was_ellipsis = tok_none;
3559
3560 /* And don't add the value a second time. */
3561 break;
3562 }
3563 }
3564 else if (state == 3)
3565 {
3566 /* It is possible that we already have this collation sequence.
3567 In this case we move the entry. */
3568 void *sym;
3569 void *ptr;
3570
3571 /* If the symbol after which we have to insert was not found
3572 ignore all entries. */
3573 if (collate->cursor == NULL)
3574 {
3575 lr_ignore_rest (ldfile, 0);
3576 break;
3577 }
3578
3579 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3580 {
3581 seqp = (struct element_t *) ptr;
3582 goto move_entry;
3583 }
3584
3585 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3586 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3587 goto move_entry;
3588
3589 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3590 && (seqp = (struct element_t *) ptr,
3591 seqp->last != NULL || seqp->next != NULL
3592 || (collate->start != NULL && seqp == collate->start)))
3593 {
3594 move_entry:
3595 /* Remove the entry from the old position. */
3596 if (seqp->last == NULL)
3597 collate->start = seqp->next;
3598 else
3599 seqp->last->next = seqp->next;
3600 if (seqp->next != NULL)
3601 seqp->next->last = seqp->last;
3602
3603 /* We also have to check whether this entry is the
3604 first or last of a section. */
3605 if (seqp->section->first == seqp)
3606 {
3607 if (seqp->section->first == seqp->section->last)
3608 /* This section has no content anymore. */
3609 seqp->section->first = seqp->section->last = NULL;
3610 else
3611 seqp->section->first = seqp->next;
3612 }
3613 else if (seqp->section->last == seqp)
3614 seqp->section->last = seqp->last;
3615
3616 /* Now insert it in the new place. */
3617 insert_weights (ldfile, seqp, charmap, repertoire, result,
3618 tok_none);
3619 break;
3620 }
3621
3622 /* Otherwise we just add a new entry. */
3623 }
3624 else if (state == 5)
3625 {
3626 /* We are reordering sections. Find the named section. */
3627 struct section_list *runp = collate->sections;
3628 struct section_list *prevp = NULL;
3629
3630 while (runp != NULL)
3631 {
3632 if (runp->name != NULL
3633 && strlen (runp->name) == symlen
3634 && memcmp (runp->name, symstr, symlen) == 0)
3635 break;
3636
3637 prevp = runp;
3638 runp = runp->next;
3639 }
3640
3641 if (runp == NULL)
3642 {
3643 lr_error (ldfile, _("%s: section `%.*s' not known"),
3644 "LC_COLLATE", (int) symlen, symstr);
3645 lr_ignore_rest (ldfile, 0);
3646 }
3647 else
3648 {
3649 if (runp != collate->current_section)
3650 {
3651 /* Remove the named section from the old place and
3652 insert it in the new one. */
3653 prevp->next = runp->next;
3654
3655 runp->next = collate->current_section->next;
3656 collate->current_section->next = runp;
3657 collate->current_section = runp;
3658 }
3659
3660 /* Process the rest of the line which might change
3661 the collation rules. */
3662 arg = lr_token (ldfile, charmap, result, repertoire,
3663 verbose);
3664 if (arg->tok != tok_eof && arg->tok != tok_eol)
3665 read_directions (ldfile, arg, charmap, repertoire,
3666 result);
3667 }
3668 break;
3669 }
3670 else if (was_ellipsis != tok_none)
3671 {
3672 /* Using the information in the `ellipsis_weight'
3673 element and this and the last value we have to handle
3674 the ellipsis now. */
3675 assert (state == 1);
3676
3677 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3678 repertoire, result);
3679
3680 /* Remember that we processed the ellipsis. */
3681 was_ellipsis = tok_none;
3682
3683 /* And don't add the value a second time. */
3684 break;
3685 }
3686
3687 /* Now insert in the new place. */
3688 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3689 break;
3690
3691 case tok_undefined:
3692 /* Ignore the rest of the line if we don't need the input of
3693 this line. */
3694 if (ignore_content)
3695 {
3696 lr_ignore_rest (ldfile, 0);
3697 break;
3698 }
3699
3700 if (state != 1)
3701 goto err_label;
3702
3703 if (was_ellipsis != tok_none)
3704 {
3705 lr_error (ldfile,
3706 _("%s: cannot have `%s' as end of ellipsis range"),
3707 "LC_COLLATE", "UNDEFINED");
3708
3709 unlink_element (collate);
3710 was_ellipsis = tok_none;
3711 }
3712
3713 /* See whether UNDEFINED already appeared somewhere. */
3714 if (collate->undefined.next != NULL
3715 || &collate->undefined == collate->cursor)
3716 {
3717 lr_error (ldfile,
3718 _("%s: order for `%.*s' already defined at %s:%Zu"),
3719 "LC_COLLATE", 9, "UNDEFINED",
3720 collate->undefined.file,
3721 collate->undefined.line);
3722 lr_ignore_rest (ldfile, 0);
3723 }
3724 else
3725 /* Parse the weights. */
3726 insert_weights (ldfile, &collate->undefined, charmap,
3727 repertoire, result, tok_none);
3728 break;
3729
3730 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3731 case tok_ellipsis3: /* absolute ellipsis */
3732 case tok_ellipsis4: /* symbolic decimal ellipsis */
3733 /* This is the symbolic (decimal or hexadecimal) or absolute
3734 ellipsis. */
3735 if (was_ellipsis != tok_none)
3736 goto err_label;
3737
3738 if (state != 0 && state != 1 && state != 3)
3739 goto err_label;
3740
3741 was_ellipsis = nowtok;
3742
3743 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3744 repertoire, result, nowtok);
3745 break;
3746
3747 case tok_end:
3748 seen_end:
3749 /* Next we assume `LC_COLLATE'. */
3750 if (!ignore_content)
3751 {
3752 if (state == 0 && copy_locale == NULL)
3753 /* We must either see a copy statement or have
3754 ordering values. */
3755 lr_error (ldfile,
3756 _("%s: empty category description not allowed"),
3757 "LC_COLLATE");
3758 else if (state == 1)
3759 {
3760 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3761 "LC_COLLATE");
3762
3763 /* Handle ellipsis at end of list. */
3764 if (was_ellipsis != tok_none)
3765 {
3766 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3767 repertoire, result);
3768 was_ellipsis = tok_none;
3769 }
3770 }
3771 else if (state == 3)
3772 WITH_CUR_LOCALE (error (0, 0, _("\
3773%s: missing `reorder-end' keyword"), "LC_COLLATE"));
3774 else if (state == 5)
3775 WITH_CUR_LOCALE (error (0, 0, _("\
3776%s: missing `reorder-sections-end' keyword"), "LC_COLLATE"));
3777 }
3778 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3779 if (arg->tok == tok_eof)
3780 break;
3781 if (arg->tok == tok_eol)
3782 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3783 else if (arg->tok != tok_lc_collate)
3784 lr_error (ldfile, _("\
3785%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3786 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3787 return;
3788
3789 case tok_define:
3790 if (ignore_content)
3791 {
3792 lr_ignore_rest (ldfile, 0);
3793 break;
3794 }
3795
3796 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3797 if (arg->tok != tok_ident)
3798 goto err_label;
3799
3800 /* Simply add the new symbol. */
3801 struct name_list *newsym = xmalloc (sizeof (*newsym)
3802 + arg->val.str.lenmb + 1);
3803 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3804 newsym->str[arg->val.str.lenmb] = '\0';
3805 newsym->next = defined;
3806 defined = newsym;
3807
3808 lr_ignore_rest (ldfile, 1);
3809 break;
3810
3811 case tok_undef:
3812 if (ignore_content)
3813 {
3814 lr_ignore_rest (ldfile, 0);
3815 break;
3816 }
3817
3818 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3819 if (arg->tok != tok_ident)
3820 goto err_label;
3821
3822 /* Remove _all_ occurrences of the symbol from the list. */
3823 struct name_list *prevdef = NULL;
3824 struct name_list *curdef = defined;
3825 while (curdef != NULL)
3826 if (strncmp (arg->val.str.startmb, curdef->str,
3827 arg->val.str.lenmb) == 0
3828 && curdef->str[arg->val.str.lenmb] == '\0')
3829 {
3830 if (prevdef == NULL)
3831 defined = curdef->next;
3832 else
3833 prevdef->next = curdef->next;
3834
3835 struct name_list *olddef = curdef;
3836 curdef = curdef->next;
3837
3838 free (olddef);
3839 }
3840 else
3841 {
3842 prevdef = curdef;
3843 curdef = curdef->next;
3844 }
3845
3846 lr_ignore_rest (ldfile, 1);
3847 break;
3848
3849 case tok_ifdef:
3850 case tok_ifndef:
3851 if (ignore_content)
3852 {
3853 lr_ignore_rest (ldfile, 0);
3854 break;
3855 }
3856
3857 found_ifdef:
3858 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3859 if (arg->tok != tok_ident)
3860 goto err_label;
3861 lr_ignore_rest (ldfile, 1);
3862
3863 if (collate->else_action == else_none)
3864 {
3865 curdef = defined;
3866 while (curdef != NULL)
3867 if (strncmp (arg->val.str.startmb, curdef->str,
3868 arg->val.str.lenmb) == 0
3869 && curdef->str[arg->val.str.lenmb] == '\0')
3870 break;
3871 else
3872 curdef = curdef->next;
3873
3874 if ((nowtok == tok_ifdef && curdef != NULL)
3875 || (nowtok == tok_ifndef && curdef == NULL))
3876 {
3877 /* We have to use the if-branch. */
3878 collate->else_action = else_ignore;
3879 }
3880 else
3881 {
3882 /* We have to use the else-branch, if there is one. */
3883 nowtok = skip_to (ldfile, collate, charmap, 0);
3884 if (nowtok == tok_else)
3885 collate->else_action = else_seen;
3886 else if (nowtok == tok_elifdef)
3887 {
3888 nowtok = tok_ifdef;
3889 goto found_ifdef;
3890 }
3891 else if (nowtok == tok_elifndef)
3892 {
3893 nowtok = tok_ifndef;
3894 goto found_ifdef;
3895 }
3896 else if (nowtok == tok_eof)
3897 goto seen_eof;
3898 else if (nowtok == tok_end)
3899 goto seen_end;
3900 }
3901 }
3902 else
3903 {
3904 /* XXX Should it really become necessary to support nested
3905 preprocessor handling we will push the state here. */
3906 lr_error (ldfile, _("%s: nested conditionals not supported"),
3907 "LC_COLLATE");
3908 nowtok = skip_to (ldfile, collate, charmap, 1);
3909 if (nowtok == tok_eof)
3910 goto seen_eof;
3911 else if (nowtok == tok_end)
3912 goto seen_end;
3913 }
3914 break;
3915
3916 case tok_elifdef:
3917 case tok_elifndef:
3918 case tok_else:
3919 if (ignore_content)
3920 {
3921 lr_ignore_rest (ldfile, 0);
3922 break;
3923 }
3924
3925 lr_ignore_rest (ldfile, 1);
3926
3927 if (collate->else_action == else_ignore)
3928 {
3929 /* Ignore everything until the endif. */
3930 nowtok = skip_to (ldfile, collate, charmap, 1);
3931 if (nowtok == tok_eof)
3932 goto seen_eof;
3933 else if (nowtok == tok_end)
3934 goto seen_end;
3935 }
3936 else
3937 {
3938 assert (collate->else_action == else_none);
3939 lr_error (ldfile, _("\
3940%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3941 nowtok == tok_else ? "else"
3942 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3943 }
3944 break;
3945
3946 case tok_endif:
3947 if (ignore_content)
3948 {
3949 lr_ignore_rest (ldfile, 0);
3950 break;
3951 }
3952
3953 lr_ignore_rest (ldfile, 1);
3954
3955 if (collate->else_action != else_ignore
3956 && collate->else_action != else_seen)
3957 lr_error (ldfile, _("\
3958%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3959
3960 /* XXX If we support nested preprocessor directives we pop
3961 the state here. */
3962 collate->else_action = else_none;
3963 break;
3964
3965 default:
3966 err_label:
3967 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3968 }
3969
3970 /* Prepare for the next round. */
3971 now = lr_token (ldfile, charmap, result, NULL, verbose);
3972 nowtok = now->tok;
3973 }
3974
3975 seen_eof:
3976 /* When we come here we reached the end of the file. */
3977 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3978}
3979