1/* Copyright (C) 1995-2019 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <errno.h>
23#include <stdlib.h>
24#include <wchar.h>
25#include <stdint.h>
26#include <sys/param.h>
27
28#include "localedef.h"
29#include "charmap.h"
30#include "localeinfo.h"
31#include "linereader.h"
32#include "locfile.h"
33#include "elem-hash.h"
34
35/* Uncomment the following line in the production version. */
36/* #define NDEBUG 1 */
37#include <assert.h>
38
39#define obstack_chunk_alloc malloc
40#define obstack_chunk_free free
41
42static inline void
43__attribute ((always_inline))
44obstack_int32_grow (struct obstack *obstack, int32_t data)
45{
46 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
47 data = maybe_swap_uint32 (data);
48 if (sizeof (int32_t) == sizeof (int))
49 obstack_int_grow (obstack, data);
50 else
51 obstack_grow (obstack, &data, sizeof (int32_t));
52}
53
54static inline void
55__attribute ((always_inline))
56obstack_int32_grow_fast (struct obstack *obstack, int32_t data)
57{
58 assert (LOCFILE_ALIGNED_P (obstack_object_size (obstack)));
59 data = maybe_swap_uint32 (data);
60 if (sizeof (int32_t) == sizeof (int))
61 obstack_int_grow_fast (obstack, data);
62 else
63 obstack_grow (obstack, &data, sizeof (int32_t));
64}
65
66/* Forward declaration. */
67struct element_t;
68
69/* Data type for list of strings. */
70struct section_list
71{
72 /* Successor in the known_sections list. */
73 struct section_list *def_next;
74 /* Successor in the sections list. */
75 struct section_list *next;
76 /* Name of the section. */
77 const char *name;
78 /* First element of this section. */
79 struct element_t *first;
80 /* Last element of this section. */
81 struct element_t *last;
82 /* These are the rules for this section. */
83 enum coll_sort_rule *rules;
84 /* Index of the rule set in the appropriate section of the output file. */
85 int ruleidx;
86};
87
88struct element_t;
89
90struct element_list_t
91{
92 /* Number of elements. */
93 int cnt;
94
95 struct element_t **w;
96};
97
98/* Data type for collating element. */
99struct element_t
100{
101 const char *name;
102
103 const char *mbs;
104 size_t nmbs;
105 const uint32_t *wcs;
106 size_t nwcs;
107 int *mborder;
108 int wcorder;
109
110 /* The following is a bit mask which bits are set if this element is
111 used in the appropriate level. Interesting for the singlebyte
112 weight computation.
113
114 XXX The type here restricts the number of levels to 32. It could
115 be changed if necessary but I doubt this is necessary. */
116 unsigned int used_in_level;
117
118 struct element_list_t *weights;
119
120 /* Nonzero if this is a real character definition. */
121 int is_character;
122
123 /* Order of the character in the sequence. This information will
124 be used in range expressions. */
125 int mbseqorder;
126 int wcseqorder;
127
128 /* Where does the definition come from. */
129 const char *file;
130 size_t line;
131
132 /* Which section does this belong to. */
133 struct section_list *section;
134
135 /* Predecessor and successor in the order list. */
136 struct element_t *last;
137 struct element_t *next;
138
139 /* Next element in multibyte output list. */
140 struct element_t *mbnext;
141 struct element_t *mblast;
142
143 /* Next element in wide character output list. */
144 struct element_t *wcnext;
145 struct element_t *wclast;
146};
147
148/* Special element value. */
149#define ELEMENT_ELLIPSIS2 ((struct element_t *) 1)
150#define ELEMENT_ELLIPSIS3 ((struct element_t *) 2)
151#define ELEMENT_ELLIPSIS4 ((struct element_t *) 3)
152
153/* Data type for collating symbol. */
154struct symbol_t
155{
156 const char *name;
157
158 /* Point to place in the order list. */
159 struct element_t *order;
160
161 /* Where does the definition come from. */
162 const char *file;
163 size_t line;
164};
165
166/* Sparse table of struct element_t *. */
167#define TABLE wchead_table
168#define ELEMENT struct element_t *
169#define DEFAULT NULL
170#define ITERATE
171#define NO_ADD_LOCALE
172#include "3level.h"
173
174/* Sparse table of int32_t. */
175#define TABLE collidx_table
176#define ELEMENT int32_t
177#define DEFAULT 0
178#include "3level.h"
179
180/* Sparse table of uint32_t. */
181#define TABLE collseq_table
182#define ELEMENT uint32_t
183#define DEFAULT ~((uint32_t) 0)
184#include "3level.h"
185
186
187/* Simple name list for the preprocessor. */
188struct name_list
189{
190 struct name_list *next;
191 char str[0];
192};
193
194
195/* The real definition of the struct for the LC_COLLATE locale. */
196struct locale_collate_t
197{
198 int col_weight_max;
199 int cur_weight_max;
200
201 /* List of known scripts. */
202 struct section_list *known_sections;
203 /* List of used sections. */
204 struct section_list *sections;
205 /* Current section using definition. */
206 struct section_list *current_section;
207 /* There always can be an unnamed section. */
208 struct section_list unnamed_section;
209 /* Flag whether the unnamed section has been defined. */
210 bool unnamed_section_defined;
211 /* To make handling of errors easier we have another section. */
212 struct section_list error_section;
213 /* Sometimes we are defining the values for collating symbols before
214 the first actual section. */
215 struct section_list symbol_section;
216
217 /* Start of the order list. */
218 struct element_t *start;
219
220 /* The undefined element. */
221 struct element_t undefined;
222
223 /* This is the cursor for `reorder_after' insertions. */
224 struct element_t *cursor;
225
226 /* This value is used when handling ellipsis. */
227 struct element_t ellipsis_weight;
228
229 /* Known collating elements. */
230 hash_table elem_table;
231
232 /* Known collating symbols. */
233 hash_table sym_table;
234
235 /* Known collation sequences. */
236 hash_table seq_table;
237
238 struct obstack mempool;
239
240 /* The LC_COLLATE category is a bit special as it is sometimes possible
241 that the definitions from more than one input file contains information.
242 Therefore we keep all relevant input in a list. */
243 struct locale_collate_t *next;
244
245 /* Arrays with heads of the list for each of the leading bytes in
246 the multibyte sequences. */
247 struct element_t *mbheads[256];
248
249 /* Arrays with heads of the list for each of the leading bytes in
250 the multibyte sequences. */
251 struct wchead_table wcheads;
252
253 /* The arrays with the collation sequence order. */
254 unsigned char mbseqorder[256];
255 struct collseq_table wcseqorder;
256
257 /* State of the preprocessor. */
258 enum
259 {
260 else_none = 0,
261 else_ignore,
262 else_seen
263 }
264 else_action;
265};
266
267
268/* We have a few global variables which are used for reading all
269 LC_COLLATE category descriptions in all files. */
270static uint32_t nrules;
271
272/* List of defined preprocessor symbols. */
273static struct name_list *defined;
274
275
276/* We need UTF-8 encoding of numbers. */
277static inline int
278__attribute ((always_inline))
279utf8_encode (char *buf, int val)
280{
281 int retval;
282
283 if (val < 0x80)
284 {
285 *buf++ = (char) val;
286 retval = 1;
287 }
288 else
289 {
290 int step;
291
292 for (step = 2; step < 6; ++step)
293 if ((val & (~(uint32_t)0 << (5 * step + 1))) == 0)
294 break;
295 retval = step;
296
297 *buf = (unsigned char) (~0xff >> step);
298 --step;
299 do
300 {
301 buf[step] = 0x80 | (val & 0x3f);
302 val >>= 6;
303 }
304 while (--step > 0);
305 *buf |= val;
306 }
307
308 return retval;
309}
310
311
312static struct section_list *
313make_seclist_elem (struct locale_collate_t *collate, const char *string,
314 struct section_list *next)
315{
316 struct section_list *newp;
317
318 newp = (struct section_list *) obstack_alloc (&collate->mempool,
319 sizeof (*newp));
320 newp->next = next;
321 newp->name = string;
322 newp->first = NULL;
323 newp->last = NULL;
324
325 return newp;
326}
327
328
329static struct element_t *
330new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
331 const uint32_t *wcs, const char *name, size_t namelen,
332 int is_character)
333{
334 struct element_t *newp;
335
336 newp = (struct element_t *) obstack_alloc (&collate->mempool,
337 sizeof (*newp));
338 newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
339 name, namelen);
340 if (mbs != NULL)
341 {
342 newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
343 newp->nmbs = mbslen;
344 }
345 else
346 {
347 newp->mbs = NULL;
348 newp->nmbs = 0;
349 }
350 if (wcs != NULL)
351 {
352 size_t nwcs = wcslen ((wchar_t *) wcs);
353 uint32_t zero = 0;
354 /* Handle <U0000> as a single character. */
355 if (nwcs == 0)
356 nwcs = 1;
357 obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
358 obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
359 newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
360 newp->nwcs = nwcs;
361 }
362 else
363 {
364 newp->wcs = NULL;
365 newp->nwcs = 0;
366 }
367 newp->mborder = NULL;
368 newp->wcorder = 0;
369 newp->used_in_level = 0;
370 newp->is_character = is_character;
371
372 /* Will be assigned later. XXX */
373 newp->mbseqorder = 0;
374 newp->wcseqorder = 0;
375
376 /* Will be allocated later. */
377 newp->weights = NULL;
378
379 newp->file = NULL;
380 newp->line = 0;
381
382 newp->section = collate->current_section;
383
384 newp->last = NULL;
385 newp->next = NULL;
386
387 newp->mbnext = NULL;
388 newp->mblast = NULL;
389
390 newp->wcnext = NULL;
391 newp->wclast = NULL;
392
393 return newp;
394}
395
396
397static struct symbol_t *
398new_symbol (struct locale_collate_t *collate, const char *name, size_t len)
399{
400 struct symbol_t *newp;
401
402 newp = (struct symbol_t *) obstack_alloc (&collate->mempool, sizeof (*newp));
403
404 newp->name = obstack_copy0 (&collate->mempool, name, len);
405 newp->order = NULL;
406
407 newp->file = NULL;
408 newp->line = 0;
409
410 return newp;
411}
412
413
414/* Test whether this name is already defined somewhere. */
415static int
416check_duplicate (struct linereader *ldfile, struct locale_collate_t *collate,
417 const struct charmap_t *charmap,
418 struct repertoire_t *repertoire, const char *symbol,
419 size_t symbol_len)
420{
421 void *ignore = NULL;
422
423 if (find_entry (&charmap->char_table, symbol, symbol_len, &ignore) == 0)
424 {
425 lr_error (ldfile, _("`%.*s' already defined in charmap"),
426 (int) symbol_len, symbol);
427 return 1;
428 }
429
430 if (repertoire != NULL
431 && (find_entry (&repertoire->char_table, symbol, symbol_len, &ignore)
432 == 0))
433 {
434 lr_error (ldfile, _("`%.*s' already defined in repertoire"),
435 (int) symbol_len, symbol);
436 return 1;
437 }
438
439 if (find_entry (&collate->sym_table, symbol, symbol_len, &ignore) == 0)
440 {
441 lr_error (ldfile, _("`%.*s' already defined as collating symbol"),
442 (int) symbol_len, symbol);
443 return 1;
444 }
445
446 if (find_entry (&collate->elem_table, symbol, symbol_len, &ignore) == 0)
447 {
448 lr_error (ldfile, _("`%.*s' already defined as collating element"),
449 (int) symbol_len, symbol);
450 return 1;
451 }
452
453 return 0;
454}
455
456
457/* Read the direction specification. */
458static void
459read_directions (struct linereader *ldfile, struct token *arg,
460 const struct charmap_t *charmap,
461 struct repertoire_t *repertoire, struct localedef_t *result)
462{
463 int cnt = 0;
464 int max = nrules ?: 10;
465 enum coll_sort_rule *rules = calloc (max, sizeof (*rules));
466 int warned = 0;
467 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
468
469 while (1)
470 {
471 int valid = 0;
472
473 if (arg->tok == tok_forward)
474 {
475 if (rules[cnt] & sort_backward)
476 {
477 if (! warned)
478 {
479 lr_error (ldfile, _("\
480%s: `forward' and `backward' are mutually excluding each other"),
481 "LC_COLLATE");
482 warned = 1;
483 }
484 }
485 else if (rules[cnt] & sort_forward)
486 {
487 if (! warned)
488 {
489 lr_error (ldfile, _("\
490%s: `%s' mentioned more than once in definition of weight %d"),
491 "LC_COLLATE", "forward", cnt + 1);
492 }
493 }
494 else
495 rules[cnt] |= sort_forward;
496
497 valid = 1;
498 }
499 else if (arg->tok == tok_backward)
500 {
501 if (rules[cnt] & sort_forward)
502 {
503 if (! warned)
504 {
505 lr_error (ldfile, _("\
506%s: `forward' and `backward' are mutually excluding each other"),
507 "LC_COLLATE");
508 warned = 1;
509 }
510 }
511 else if (rules[cnt] & sort_backward)
512 {
513 if (! warned)
514 {
515 lr_error (ldfile, _("\
516%s: `%s' mentioned more than once in definition of weight %d"),
517 "LC_COLLATE", "backward", cnt + 1);
518 }
519 }
520 else
521 rules[cnt] |= sort_backward;
522
523 valid = 1;
524 }
525 else if (arg->tok == tok_position)
526 {
527 if (rules[cnt] & sort_position)
528 {
529 if (! warned)
530 {
531 lr_error (ldfile, _("\
532%s: `%s' mentioned more than once in definition of weight %d"),
533 "LC_COLLATE", "position", cnt + 1);
534 }
535 }
536 else
537 rules[cnt] |= sort_position;
538
539 valid = 1;
540 }
541
542 if (valid)
543 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
544
545 if (arg->tok == tok_eof || arg->tok == tok_eol || arg->tok == tok_comma
546 || arg->tok == tok_semicolon)
547 {
548 if (! valid && ! warned)
549 {
550 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
551 warned = 1;
552 }
553
554 /* See whether we have to increment the counter. */
555 if (arg->tok != tok_comma && rules[cnt] != 0)
556 {
557 /* Add the default `forward' if we have seen only `position'. */
558 if (rules[cnt] == sort_position)
559 rules[cnt] = sort_position | sort_forward;
560
561 ++cnt;
562 }
563
564 if (arg->tok == tok_eof || arg->tok == tok_eol)
565 /* End of line or file, so we exit the loop. */
566 break;
567
568 if (nrules == 0)
569 {
570 /* See whether we have enough room in the array. */
571 if (cnt == max)
572 {
573 max += 10;
574 rules = (enum coll_sort_rule *) xrealloc (rules,
575 max
576 * sizeof (*rules));
577 memset (&rules[cnt], '\0', (max - cnt) * sizeof (*rules));
578 }
579 }
580 else
581 {
582 if (cnt == nrules)
583 {
584 /* There must not be any more rule. */
585 if (! warned)
586 {
587 lr_error (ldfile, _("\
588%s: too many rules; first entry only had %d"),
589 "LC_COLLATE", nrules);
590 warned = 1;
591 }
592
593 lr_ignore_rest (ldfile, 0);
594 break;
595 }
596 }
597 }
598 else
599 {
600 if (! warned)
601 {
602 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
603 warned = 1;
604 }
605 }
606
607 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
608 }
609
610 if (nrules == 0)
611 {
612 /* Now we know how many rules we have. */
613 nrules = cnt;
614 rules = (enum coll_sort_rule *) xrealloc (rules,
615 nrules * sizeof (*rules));
616 }
617 else
618 {
619 if (cnt < nrules)
620 {
621 /* Not enough rules in this specification. */
622 if (! warned)
623 lr_error (ldfile, _("%s: not enough sorting rules"), "LC_COLLATE");
624
625 do
626 rules[cnt] = sort_forward;
627 while (++cnt < nrules);
628 }
629 }
630
631 collate->current_section->rules = rules;
632}
633
634
635static struct element_t *
636find_element (struct linereader *ldfile, struct locale_collate_t *collate,
637 const char *str, size_t len)
638{
639 void *result = NULL;
640
641 /* Search for the entries among the collation sequences already define. */
642 if (find_entry (&collate->seq_table, str, len, &result) != 0)
643 {
644 /* Nope, not define yet. So we see whether it is a
645 collation symbol. */
646 void *ptr;
647
648 if (find_entry (&collate->sym_table, str, len, &ptr) == 0)
649 {
650 /* It's a collation symbol. */
651 struct symbol_t *sym = (struct symbol_t *) ptr;
652 result = sym->order;
653
654 if (result == NULL)
655 result = sym->order = new_element (collate, NULL, 0, NULL,
656 NULL, 0, 0);
657 }
658 else if (find_entry (&collate->elem_table, str, len, &result) != 0)
659 {
660 /* It's also no collation element. So it is a character
661 element defined later. */
662 result = new_element (collate, NULL, 0, NULL, str, len, 1);
663 /* Insert it into the sequence table. */
664 insert_entry (&collate->seq_table, str, len, result);
665 }
666 }
667
668 return (struct element_t *) result;
669}
670
671
672static void
673unlink_element (struct locale_collate_t *collate)
674{
675 if (collate->cursor == collate->start)
676 {
677 assert (collate->cursor->next == NULL);
678 assert (collate->cursor->last == NULL);
679 collate->cursor = NULL;
680 }
681 else
682 {
683 if (collate->cursor->next != NULL)
684 collate->cursor->next->last = collate->cursor->last;
685 if (collate->cursor->last != NULL)
686 collate->cursor->last->next = collate->cursor->next;
687 collate->cursor = collate->cursor->last;
688 }
689}
690
691
692static void
693insert_weights (struct linereader *ldfile, struct element_t *elem,
694 const struct charmap_t *charmap,
695 struct repertoire_t *repertoire, struct localedef_t *result,
696 enum token_t ellipsis)
697{
698 int weight_cnt;
699 struct token *arg;
700 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
701
702 /* Initialize all the fields. */
703 elem->file = ldfile->fname;
704 elem->line = ldfile->lineno;
705
706 elem->last = collate->cursor;
707 elem->next = collate->cursor ? collate->cursor->next : NULL;
708 if (collate->cursor != NULL && collate->cursor->next != NULL)
709 collate->cursor->next->last = elem;
710 if (collate->cursor != NULL)
711 collate->cursor->next = elem;
712 if (collate->start == NULL)
713 {
714 assert (collate->cursor == NULL);
715 collate->start = elem;
716 }
717
718 elem->section = collate->current_section;
719
720 if (collate->current_section->first == NULL)
721 collate->current_section->first = elem;
722 if (collate->current_section->last == collate->cursor)
723 collate->current_section->last = elem;
724
725 collate->cursor = elem;
726
727 elem->weights = (struct element_list_t *)
728 obstack_alloc (&collate->mempool, nrules * sizeof (struct element_list_t));
729 memset (elem->weights, '\0', nrules * sizeof (struct element_list_t));
730
731 weight_cnt = 0;
732
733 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
734 do
735 {
736 if (arg->tok == tok_eof || arg->tok == tok_eol)
737 break;
738
739 if (arg->tok == tok_ignore)
740 {
741 /* The weight for this level has to be ignored. We use the
742 null pointer to indicate this. */
743 elem->weights[weight_cnt].w = (struct element_t **)
744 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
745 elem->weights[weight_cnt].w[0] = NULL;
746 elem->weights[weight_cnt].cnt = 1;
747 }
748 else if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
749 {
750 char ucs4str[10];
751 struct element_t *val;
752 char *symstr;
753 size_t symlen;
754
755 if (arg->tok == tok_bsymbol)
756 {
757 symstr = arg->val.str.startmb;
758 symlen = arg->val.str.lenmb;
759 }
760 else
761 {
762 snprintf (ucs4str, sizeof (ucs4str), "U%08X", arg->val.ucs4);
763 symstr = ucs4str;
764 symlen = 9;
765 }
766
767 val = find_element (ldfile, collate, symstr, symlen);
768 if (val == NULL)
769 break;
770
771 elem->weights[weight_cnt].w = (struct element_t **)
772 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
773 elem->weights[weight_cnt].w[0] = val;
774 elem->weights[weight_cnt].cnt = 1;
775 }
776 else if (arg->tok == tok_string)
777 {
778 /* Split the string up in the individual characters and put
779 the element definitions in the list. */
780 const char *cp = arg->val.str.startmb;
781 int cnt = 0;
782 struct element_t *charelem;
783 struct element_t **weights = NULL;
784 int max = 0;
785
786 if (*cp == '\0')
787 {
788 lr_error (ldfile, _("%s: empty weight string not allowed"),
789 "LC_COLLATE");
790 lr_ignore_rest (ldfile, 0);
791 break;
792 }
793
794 do
795 {
796 if (*cp == '<')
797 {
798 /* Ahh, it's a bsymbol or an UCS4 value. If it's
799 the latter we have to unify the name. */
800 const char *startp = ++cp;
801 size_t len;
802
803 while (*cp != '>')
804 {
805 if (*cp == ldfile->escape_char)
806 ++cp;
807 if (*cp == '\0')
808 /* It's a syntax error. */
809 goto syntax;
810
811 ++cp;
812 }
813
814 if (cp - startp == 5 && startp[0] == 'U'
815 && isxdigit (startp[1]) && isxdigit (startp[2])
816 && isxdigit (startp[3]) && isxdigit (startp[4]))
817 {
818 unsigned int ucs4 = strtoul (startp + 1, NULL, 16);
819 char *newstr;
820
821 newstr = (char *) xmalloc (10);
822 snprintf (newstr, 10, "U%08X", ucs4);
823 startp = newstr;
824
825 len = 9;
826 }
827 else
828 len = cp - startp;
829
830 charelem = find_element (ldfile, collate, startp, len);
831 ++cp;
832 }
833 else
834 {
835 /* People really shouldn't use characters directly in
836 the string. Especially since it's not really clear
837 what this means. We interpret all characters in the
838 string as if that would be bsymbols. Otherwise we
839 would have to match back to bsymbols somehow and this
840 is normally not what people normally expect. */
841 charelem = find_element (ldfile, collate, cp++, 1);
842 }
843
844 if (charelem == NULL)
845 {
846 /* We ignore the rest of the line. */
847 lr_ignore_rest (ldfile, 0);
848 break;
849 }
850
851 /* Add the pointer. */
852 if (cnt >= max)
853 {
854 struct element_t **newp;
855 max += 10;
856 newp = (struct element_t **)
857 alloca (max * sizeof (struct element_t *));
858 memcpy (newp, weights, cnt * sizeof (struct element_t *));
859 weights = newp;
860 }
861 weights[cnt++] = charelem;
862 }
863 while (*cp != '\0');
864
865 /* Now store the information. */
866 elem->weights[weight_cnt].w = (struct element_t **)
867 obstack_alloc (&collate->mempool,
868 cnt * sizeof (struct element_t *));
869 memcpy (elem->weights[weight_cnt].w, weights,
870 cnt * sizeof (struct element_t *));
871 elem->weights[weight_cnt].cnt = cnt;
872
873 /* We don't need the string anymore. */
874 free (arg->val.str.startmb);
875 }
876 else if (ellipsis != tok_none
877 && (arg->tok == tok_ellipsis2
878 || arg->tok == tok_ellipsis3
879 || arg->tok == tok_ellipsis4))
880 {
881 /* It must be the same ellipsis as used in the initial column. */
882 if (arg->tok != ellipsis)
883 lr_error (ldfile, _("\
884%s: weights must use the same ellipsis symbol as the name"),
885 "LC_COLLATE");
886
887 /* The weight for this level will depend on the element
888 iterating over the range. Put a placeholder. */
889 elem->weights[weight_cnt].w = (struct element_t **)
890 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
891 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
892 elem->weights[weight_cnt].cnt = 1;
893 }
894 else
895 {
896 syntax:
897 /* It's a syntax error. */
898 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
899 lr_ignore_rest (ldfile, 0);
900 break;
901 }
902
903 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
904 /* This better should be the end of the line or a semicolon. */
905 if (arg->tok == tok_semicolon)
906 /* OK, ignore this and read the next token. */
907 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
908 else if (arg->tok != tok_eof && arg->tok != tok_eol)
909 {
910 /* It's a syntax error. */
911 lr_error (ldfile, _("%s: syntax error"), "LC_COLLATE");
912 lr_ignore_rest (ldfile, 0);
913 break;
914 }
915 }
916 while (++weight_cnt < nrules);
917
918 if (weight_cnt < nrules)
919 {
920 /* This means the rest of the line uses the current element as
921 the weight. */
922 do
923 {
924 elem->weights[weight_cnt].w = (struct element_t **)
925 obstack_alloc (&collate->mempool, sizeof (struct element_t *));
926 if (ellipsis == tok_none)
927 elem->weights[weight_cnt].w[0] = elem;
928 else
929 elem->weights[weight_cnt].w[0] = ELEMENT_ELLIPSIS2;
930 elem->weights[weight_cnt].cnt = 1;
931 }
932 while (++weight_cnt < nrules);
933 }
934 else
935 {
936 if (arg->tok == tok_ignore || arg->tok == tok_bsymbol)
937 {
938 /* Too many rule values. */
939 lr_error (ldfile, _("%s: too many values"), "LC_COLLATE");
940 lr_ignore_rest (ldfile, 0);
941 }
942 else
943 lr_ignore_rest (ldfile, arg->tok != tok_eol && arg->tok != tok_eof);
944 }
945}
946
947
948static int
949insert_value (struct linereader *ldfile, const char *symstr, size_t symlen,
950 const struct charmap_t *charmap, struct repertoire_t *repertoire,
951 struct localedef_t *result)
952{
953 /* First find out what kind of symbol this is. */
954 struct charseq *seq;
955 uint32_t wc;
956 struct element_t *elem = NULL;
957 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
958
959 /* Try to find the character in the charmap. */
960 seq = charmap_find_value (charmap, symstr, symlen);
961
962 /* Determine the wide character. */
963 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
964 {
965 wc = repertoire_find_value (repertoire, symstr, symlen);
966 if (seq != NULL)
967 seq->ucs4 = wc;
968 }
969 else
970 wc = seq->ucs4;
971
972 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
973 {
974 /* It's no character, so look through the collation elements and
975 symbol list. */
976 void *ptr = elem;
977 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) != 0)
978 {
979 void *result;
980 struct symbol_t *sym = NULL;
981
982 /* It's also collation element. Therefore it's either a
983 collating symbol or it's a character which is not
984 supported by the character set. In the later case we
985 simply create a dummy entry. */
986 if (find_entry (&collate->sym_table, symstr, symlen, &result) == 0)
987 {
988 /* It's a collation symbol. */
989 sym = (struct symbol_t *) result;
990
991 elem = sym->order;
992 }
993
994 if (elem == NULL)
995 {
996 elem = new_element (collate, NULL, 0, NULL, symstr, symlen, 0);
997
998 if (sym != NULL)
999 sym->order = elem;
1000 else
1001 /* Enter a fake element in the sequence table. This
1002 won't cause anything in the output since there is
1003 no multibyte or wide character associated with
1004 it. */
1005 insert_entry (&collate->seq_table, symstr, symlen, elem);
1006 }
1007 }
1008 else
1009 /* Copy the result back. */
1010 elem = ptr;
1011 }
1012 else
1013 {
1014 /* Otherwise the symbols stands for a character. */
1015 void *ptr = elem;
1016 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) != 0)
1017 {
1018 uint32_t wcs[2] = { wc, 0 };
1019
1020 /* We have to allocate an entry. */
1021 elem = new_element (collate,
1022 seq != NULL ? (char *) seq->bytes : NULL,
1023 seq != NULL ? seq->nbytes : 0,
1024 wc == ILLEGAL_CHAR_VALUE ? NULL : wcs,
1025 symstr, symlen, 1);
1026
1027 /* And add it to the table. */
1028 if (insert_entry (&collate->seq_table, symstr, symlen, elem) != 0)
1029 /* This cannot happen. */
1030 assert (! "Internal error");
1031 }
1032 else
1033 {
1034 /* Copy the result back. */
1035 elem = ptr;
1036
1037 /* Maybe the character was used before the definition. In this case
1038 we have to insert the byte sequences now. */
1039 if (elem->mbs == NULL && seq != NULL)
1040 {
1041 elem->mbs = obstack_copy0 (&collate->mempool,
1042 seq->bytes, seq->nbytes);
1043 elem->nmbs = seq->nbytes;
1044 }
1045
1046 if (elem->wcs == NULL && wc != ILLEGAL_CHAR_VALUE)
1047 {
1048 uint32_t wcs[2] = { wc, 0 };
1049
1050 elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
1051 elem->nwcs = 1;
1052 }
1053 }
1054 }
1055
1056 /* Test whether this element is not already in the list. */
1057 if (elem->next != NULL || elem == collate->cursor)
1058 {
1059 lr_error (ldfile, _("order for `%.*s' already defined at %s:%Zu"),
1060 (int) symlen, symstr, elem->file, elem->line);
1061 lr_ignore_rest (ldfile, 0);
1062 return 1;
1063 }
1064
1065 insert_weights (ldfile, elem, charmap, repertoire, result, tok_none);
1066
1067 return 0;
1068}
1069
1070
1071static void
1072handle_ellipsis (struct linereader *ldfile, const char *symstr, size_t symlen,
1073 enum token_t ellipsis, const struct charmap_t *charmap,
1074 struct repertoire_t *repertoire,
1075 struct localedef_t *result)
1076{
1077 struct element_t *startp;
1078 struct element_t *endp;
1079 struct locale_collate_t *collate = result->categories[LC_COLLATE].collate;
1080
1081 /* Unlink the entry added for the ellipsis. */
1082 unlink_element (collate);
1083 startp = collate->cursor;
1084
1085 /* Process and add the end-entry. */
1086 if (symstr != NULL
1087 && insert_value (ldfile, symstr, symlen, charmap, repertoire, result))
1088 /* Something went wrong with inserting the to-value. This means
1089 we cannot process the ellipsis. */
1090 return;
1091
1092 /* Reset the cursor. */
1093 collate->cursor = startp;
1094
1095 /* Now we have to handle many different situations:
1096 - we have to distinguish between the three different ellipsis forms
1097 - the is the ellipsis at the beginning, in the middle, or at the end.
1098 */
1099 endp = collate->cursor->next;
1100 assert (symstr == NULL || endp != NULL);
1101
1102 /* XXX The following is probably very wrong since also collating symbols
1103 can appear in ranges. But do we want/can refine the test for that? */
1104#if 0
1105 /* Both, the start and the end symbol, must stand for characters. */
1106 if ((startp != NULL && (startp->name == NULL || ! startp->is_character))
1107 || (endp != NULL && (endp->name == NULL|| ! endp->is_character)))
1108 {
1109 lr_error (ldfile, _("\
1110%s: the start and the end symbol of a range must stand for characters"),
1111 "LC_COLLATE");
1112 return;
1113 }
1114#endif
1115
1116 if (ellipsis == tok_ellipsis3)
1117 {
1118 /* One requirement we make here: the length of the byte
1119 sequences for the first and end character must be the same.
1120 This is mainly to prevent unwanted effects and this is often
1121 not what is wanted. */
1122 size_t len = (startp->mbs != NULL ? startp->nmbs
1123 : (endp->mbs != NULL ? endp->nmbs : 0));
1124 char mbcnt[len + 1];
1125 char mbend[len + 1];
1126
1127 /* Well, this should be caught somewhere else already. Just to
1128 make sure. */
1129 assert (startp == NULL || startp->wcs == NULL || startp->wcs[1] == 0);
1130 assert (endp == NULL || endp->wcs == NULL || endp->wcs[1] == 0);
1131
1132 if (startp != NULL && endp != NULL
1133 && startp->mbs != NULL && endp->mbs != NULL
1134 && startp->nmbs != endp->nmbs)
1135 {
1136 lr_error (ldfile, _("\
1137%s: byte sequences of first and last character must have the same length"),
1138 "LC_COLLATE");
1139 return;
1140 }
1141
1142 /* Determine whether we have to generate multibyte sequences. */
1143 if ((startp == NULL || startp->mbs != NULL)
1144 && (endp == NULL || endp->mbs != NULL))
1145 {
1146 int cnt;
1147 int ret;
1148
1149 /* Prepare the beginning byte sequence. This is either from the
1150 beginning byte sequence or it is all nulls if it was an
1151 initial ellipsis. */
1152 if (startp == NULL || startp->mbs == NULL)
1153 memset (mbcnt, '\0', len);
1154 else
1155 {
1156 memcpy (mbcnt, startp->mbs, len);
1157
1158 /* And increment it so that the value is the first one we will
1159 try to insert. */
1160 for (cnt = len - 1; cnt >= 0; --cnt)
1161 if (++mbcnt[cnt] != '\0')
1162 break;
1163 }
1164 mbcnt[len] = '\0';
1165
1166 /* And the end sequence. */
1167 if (endp == NULL || endp->mbs == NULL)
1168 memset (mbend, '\0', len);
1169 else
1170 memcpy (mbend, endp->mbs, len);
1171 mbend[len] = '\0';
1172
1173 /* Test whether we have a correct range. */
1174 ret = memcmp (mbcnt, mbend, len);
1175 if (ret >= 0)
1176 {
1177 if (ret > 0)
1178 lr_error (ldfile, _("%s: byte sequence of first character of \
1179range is not lower than that of the last character"), "LC_COLLATE");
1180 return;
1181 }
1182
1183 /* Generate the byte sequences data. */
1184 while (1)
1185 {
1186 struct charseq *seq;
1187
1188 /* Quite a bit of work ahead. We have to find the character
1189 definition for the byte sequence and then determine the
1190 wide character belonging to it. */
1191 seq = charmap_find_symbol (charmap, mbcnt, len);
1192 if (seq != NULL)
1193 {
1194 struct element_t *elem;
1195 size_t namelen;
1196
1197 /* I don't think this can ever happen. */
1198 assert (seq->name != NULL);
1199 namelen = strlen (seq->name);
1200
1201 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1202 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1203 namelen);
1204
1205 /* Now we are ready to insert the new value in the
1206 sequence. Find out whether the element is
1207 already known. */
1208 void *ptr;
1209 if (find_entry (&collate->seq_table, seq->name, namelen,
1210 &ptr) != 0)
1211 {
1212 uint32_t wcs[2] = { seq->ucs4, 0 };
1213
1214 /* We have to allocate an entry. */
1215 elem = new_element (collate, mbcnt, len,
1216 seq->ucs4 == ILLEGAL_CHAR_VALUE
1217 ? NULL : wcs, seq->name,
1218 namelen, 1);
1219
1220 /* And add it to the table. */
1221 if (insert_entry (&collate->seq_table, seq->name,
1222 namelen, elem) != 0)
1223 /* This cannot happen. */
1224 assert (! "Internal error");
1225 }
1226 else
1227 /* Copy the result. */
1228 elem = ptr;
1229
1230 /* Test whether this element is not already in the list. */
1231 if (elem->next != NULL || (collate->cursor != NULL
1232 && elem->next == collate->cursor))
1233 {
1234 lr_error (ldfile, _("\
1235order for `%.*s' already defined at %s:%Zu"),
1236 (int) namelen, seq->name,
1237 elem->file, elem->line);
1238 goto increment;
1239 }
1240
1241 /* Enqueue the new element. */
1242 elem->last = collate->cursor;
1243 if (collate->cursor == NULL)
1244 elem->next = NULL;
1245 else
1246 {
1247 elem->next = collate->cursor->next;
1248 elem->last->next = elem;
1249 if (elem->next != NULL)
1250 elem->next->last = elem;
1251 }
1252 if (collate->start == NULL)
1253 {
1254 assert (collate->cursor == NULL);
1255 collate->start = elem;
1256 }
1257 collate->cursor = elem;
1258
1259 /* Add the weight value. We take them from the
1260 `ellipsis_weights' member of `collate'. */
1261 elem->weights = (struct element_list_t *)
1262 obstack_alloc (&collate->mempool,
1263 nrules * sizeof (struct element_list_t));
1264 for (cnt = 0; cnt < nrules; ++cnt)
1265 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1266 && (collate->ellipsis_weight.weights[cnt].w[0]
1267 == ELEMENT_ELLIPSIS2))
1268 {
1269 elem->weights[cnt].w = (struct element_t **)
1270 obstack_alloc (&collate->mempool,
1271 sizeof (struct element_t *));
1272 elem->weights[cnt].w[0] = elem;
1273 elem->weights[cnt].cnt = 1;
1274 }
1275 else
1276 {
1277 /* Simply use the weight from `ellipsis_weight'. */
1278 elem->weights[cnt].w =
1279 collate->ellipsis_weight.weights[cnt].w;
1280 elem->weights[cnt].cnt =
1281 collate->ellipsis_weight.weights[cnt].cnt;
1282 }
1283 }
1284
1285 /* Increment for the next round. */
1286 increment:
1287 for (cnt = len - 1; cnt >= 0; --cnt)
1288 if (++mbcnt[cnt] != '\0')
1289 break;
1290
1291 /* Find out whether this was all. */
1292 if (cnt < 0 || memcmp (mbcnt, mbend, len) >= 0)
1293 /* Yep, that's all. */
1294 break;
1295 }
1296 }
1297 }
1298 else
1299 {
1300 /* For symbolic range we naturally must have a beginning and an
1301 end specified by the user. */
1302 if (startp == NULL)
1303 lr_error (ldfile, _("\
1304%s: symbolic range ellipsis must not directly follow `order_start'"),
1305 "LC_COLLATE");
1306 else if (endp == NULL)
1307 lr_error (ldfile, _("\
1308%s: symbolic range ellipsis must not be directly followed by `order_end'"),
1309 "LC_COLLATE");
1310 else
1311 {
1312 /* Determine the range. To do so we have to determine the
1313 common prefix of the both names and then the numeric
1314 values of both ends. */
1315 size_t lenfrom = strlen (startp->name);
1316 size_t lento = strlen (endp->name);
1317 char buf[lento + 1];
1318 int preflen = 0;
1319 long int from;
1320 long int to;
1321 char *cp;
1322 int base = ellipsis == tok_ellipsis2 ? 16 : 10;
1323
1324 if (lenfrom != lento)
1325 {
1326 invalid_range:
1327 lr_error (ldfile, _("\
1328`%s' and `%.*s' are not valid names for symbolic range"),
1329 startp->name, (int) lento, endp->name);
1330 return;
1331 }
1332
1333 while (startp->name[preflen] == endp->name[preflen])
1334 if (startp->name[preflen] == '\0')
1335 /* Nothing to be done. The start and end point are identical
1336 and while inserting the end point we have already given
1337 the user an error message. */
1338 return;
1339 else
1340 ++preflen;
1341
1342 errno = 0;
1343 from = strtol (startp->name + preflen, &cp, base);
1344 if ((from == UINT_MAX && errno == ERANGE) || *cp != '\0')
1345 goto invalid_range;
1346
1347 errno = 0;
1348 to = strtol (endp->name + preflen, &cp, base);
1349 if ((to == UINT_MAX && errno == ERANGE) || *cp != '\0')
1350 goto invalid_range;
1351
1352 /* Copy the prefix. */
1353 memcpy (buf, startp->name, preflen);
1354
1355 /* Loop over all values. */
1356 for (++from; from < to; ++from)
1357 {
1358 struct element_t *elem = NULL;
1359 struct charseq *seq;
1360 uint32_t wc;
1361 int cnt;
1362
1363 /* Generate the name. */
1364 sprintf (buf + preflen, base == 10 ? "%0*ld" : "%0*lX",
1365 (int) (lenfrom - preflen), from);
1366
1367 /* Look whether this name is already defined. */
1368 void *ptr;
1369 if (find_entry (&collate->seq_table, buf, symlen, &ptr) == 0)
1370 {
1371 /* Copy back the result. */
1372 elem = ptr;
1373
1374 if (elem->next != NULL || (collate->cursor != NULL
1375 && elem->next == collate->cursor))
1376 {
1377 lr_error (ldfile, _("\
1378%s: order for `%.*s' already defined at %s:%Zu"),
1379 "LC_COLLATE", (int) lenfrom, buf,
1380 elem->file, elem->line);
1381 continue;
1382 }
1383
1384 if (elem->name == NULL)
1385 {
1386 lr_error (ldfile, _("%s: `%s' must be a character"),
1387 "LC_COLLATE", buf);
1388 continue;
1389 }
1390 }
1391
1392 if (elem == NULL || (elem->mbs == NULL && elem->wcs == NULL))
1393 {
1394 /* Search for a character of this name. */
1395 seq = charmap_find_value (charmap, buf, lenfrom);
1396 if (seq == NULL || seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1397 {
1398 wc = repertoire_find_value (repertoire, buf, lenfrom);
1399
1400 if (seq != NULL)
1401 seq->ucs4 = wc;
1402 }
1403 else
1404 wc = seq->ucs4;
1405
1406 if (wc == ILLEGAL_CHAR_VALUE && seq == NULL)
1407 /* We don't know anything about a character with this
1408 name. XXX Should we warn? */
1409 continue;
1410
1411 if (elem == NULL)
1412 {
1413 uint32_t wcs[2] = { wc, 0 };
1414
1415 /* We have to allocate an entry. */
1416 elem = new_element (collate,
1417 seq != NULL
1418 ? (char *) seq->bytes : NULL,
1419 seq != NULL ? seq->nbytes : 0,
1420 wc == ILLEGAL_CHAR_VALUE
1421 ? NULL : wcs, buf, lenfrom, 1);
1422 }
1423 else
1424 {
1425 /* Update the element. */
1426 if (seq != NULL)
1427 {
1428 elem->mbs = obstack_copy0 (&collate->mempool,
1429 seq->bytes, seq->nbytes);
1430 elem->nmbs = seq->nbytes;
1431 }
1432
1433 if (wc != ILLEGAL_CHAR_VALUE)
1434 {
1435 uint32_t zero = 0;
1436
1437 obstack_grow (&collate->mempool,
1438 &wc, sizeof (uint32_t));
1439 obstack_grow (&collate->mempool,
1440 &zero, sizeof (uint32_t));
1441 elem->wcs = obstack_finish (&collate->mempool);
1442 elem->nwcs = 1;
1443 }
1444 }
1445
1446 elem->file = ldfile->fname;
1447 elem->line = ldfile->lineno;
1448 elem->section = collate->current_section;
1449 }
1450
1451 /* Enqueue the new element. */
1452 elem->last = collate->cursor;
1453 elem->next = collate->cursor->next;
1454 elem->last->next = elem;
1455 if (elem->next != NULL)
1456 elem->next->last = elem;
1457 collate->cursor = elem;
1458
1459 /* Now add the weights. They come from the `ellipsis_weights'
1460 member of `collate'. */
1461 elem->weights = (struct element_list_t *)
1462 obstack_alloc (&collate->mempool,
1463 nrules * sizeof (struct element_list_t));
1464 for (cnt = 0; cnt < nrules; ++cnt)
1465 if (collate->ellipsis_weight.weights[cnt].cnt == 1
1466 && (collate->ellipsis_weight.weights[cnt].w[0]
1467 == ELEMENT_ELLIPSIS2))
1468 {
1469 elem->weights[cnt].w = (struct element_t **)
1470 obstack_alloc (&collate->mempool,
1471 sizeof (struct element_t *));
1472 elem->weights[cnt].w[0] = elem;
1473 elem->weights[cnt].cnt = 1;
1474 }
1475 else
1476 {
1477 /* Simly use the weight from `ellipsis_weight'. */
1478 elem->weights[cnt].w =
1479 collate->ellipsis_weight.weights[cnt].w;
1480 elem->weights[cnt].cnt =
1481 collate->ellipsis_weight.weights[cnt].cnt;
1482 }
1483 }
1484 }
1485 }
1486}
1487
1488
1489static void
1490collate_startup (struct linereader *ldfile, struct localedef_t *locale,
1491 struct localedef_t *copy_locale, int ignore_content)
1492{
1493 if (!ignore_content && locale->categories[LC_COLLATE].collate == NULL)
1494 {
1495 struct locale_collate_t *collate;
1496
1497 if (copy_locale == NULL)
1498 {
1499 collate = locale->categories[LC_COLLATE].collate =
1500 (struct locale_collate_t *)
1501 xcalloc (1, sizeof (struct locale_collate_t));
1502
1503 /* Init the various data structures. */
1504 init_hash (&collate->elem_table, 100);
1505 init_hash (&collate->sym_table, 100);
1506 init_hash (&collate->seq_table, 500);
1507 obstack_init (&collate->mempool);
1508
1509 collate->col_weight_max = -1;
1510 }
1511 else
1512 /* Reuse the copy_locale's data structures. */
1513 collate = locale->categories[LC_COLLATE].collate =
1514 copy_locale->categories[LC_COLLATE].collate;
1515 }
1516
1517 ldfile->translate_strings = 0;
1518 ldfile->return_widestr = 0;
1519}
1520
1521
1522void
1523collate_finish (struct localedef_t *locale, const struct charmap_t *charmap)
1524{
1525 /* Now is the time when we can assign the individual collation
1526 values for all the symbols. We have possibly different values
1527 for the wide- and the multibyte-character symbols. This is done
1528 since it might make a difference in the encoding if there is in
1529 some cases no multibyte-character but there are wide-characters.
1530 (The other way around it is not important since theencoded
1531 collation value in the wide-character case is 32 bits wide and
1532 therefore requires no encoding).
1533
1534 The lowest collation value assigned is 2. Zero is reserved for
1535 the NUL byte terminating the strings in the `strxfrm'/`wcsxfrm'
1536 functions and 1 is used to separate the individual passes for the
1537 different rules.
1538
1539 We also have to construct is list with all the bytes/words which
1540 can come first in a sequence, followed by all the elements which
1541 also start with this byte/word. The order is reverse which has
1542 among others the important effect that longer strings are located
1543 first in the list. This is required for the output data since
1544 the algorithm used in `strcoll' etc depends on this.
1545
1546 The multibyte case is easy. We simply sort into an array with
1547 256 elements. */
1548 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
1549 int mbact[nrules];
1550 int wcact;
1551 int mbseqact;
1552 int wcseqact;
1553 struct element_t *runp;
1554 int i;
1555 int need_undefined = 0;
1556 struct section_list *sect;
1557 int ruleidx;
1558 int nr_wide_elems = 0;
1559
1560 if (collate == NULL)
1561 {
1562 /* No data, no check. Issue a warning. */
1563 record_warning (_("No definition for %s category found"),
1564 "LC_COLLATE");
1565 return;
1566 }
1567
1568 /* If this assertion is hit change the type in `element_t'. */
1569 assert (nrules <= sizeof (runp->used_in_level) * 8);
1570
1571 /* Make sure that the `position' rule is used either in all sections
1572 or in none. */
1573 for (i = 0; i < nrules; ++i)
1574 for (sect = collate->sections; sect != NULL; sect = sect->next)
1575 if (sect != collate->current_section
1576 && sect->rules != NULL
1577 && ((sect->rules[i] & sort_position)
1578 != (collate->current_section->rules[i] & sort_position)))
1579 {
1580 record_error (0, 0, _("\
1581%s: `position' must be used for a specific level in all sections or none"),
1582 "LC_COLLATE");
1583 break;
1584 }
1585
1586 /* Find out which elements are used at which level. At the same
1587 time we find out whether we have any undefined symbols. */
1588 runp = collate->start;
1589 while (runp != NULL)
1590 {
1591 if (runp->mbs != NULL)
1592 {
1593 for (i = 0; i < nrules; ++i)
1594 {
1595 int j;
1596
1597 for (j = 0; j < runp->weights[i].cnt; ++j)
1598 /* A NULL pointer as the weight means IGNORE. */
1599 if (runp->weights[i].w[j] != NULL)
1600 {
1601 if (runp->weights[i].w[j]->weights == NULL)
1602 {
1603 record_error_at_line (0, 0, runp->file, runp->line,
1604 _("symbol `%s' not defined"),
1605 runp->weights[i].w[j]->name);
1606
1607 need_undefined = 1;
1608 runp->weights[i].w[j] = &collate->undefined;
1609 }
1610 else
1611 /* Set the bit for the level. */
1612 runp->weights[i].w[j]->used_in_level |= 1 << i;
1613 }
1614 }
1615 }
1616
1617 /* Up to the next entry. */
1618 runp = runp->next;
1619 }
1620
1621 /* Walk through the list of defined sequences and assign weights. Also
1622 create the data structure which will allow generating the single byte
1623 character based tables.
1624
1625 Since at each time only the weights for each of the rules are
1626 only compared to other weights for this rule it is possible to
1627 assign more compact weight values than simply counting all
1628 weights in sequence. We can assign weights from 3, one for each
1629 rule individually and only for those elements, which are actually
1630 used for this rule.
1631
1632 Why is this important? It is not for the wide char table. But
1633 it is for the singlebyte output since here larger numbers have to
1634 be encoded to make it possible to emit the value as a byte
1635 string. */
1636 for (i = 0; i < nrules; ++i)
1637 mbact[i] = 2;
1638 wcact = 2;
1639 mbseqact = 0;
1640 wcseqact = 0;
1641 runp = collate->start;
1642 while (runp != NULL)
1643 {
1644 /* Determine the order. */
1645 if (runp->used_in_level != 0)
1646 {
1647 runp->mborder = (int *) obstack_alloc (&collate->mempool,
1648 nrules * sizeof (int));
1649
1650 for (i = 0; i < nrules; ++i)
1651 if ((runp->used_in_level & (1 << i)) != 0)
1652 runp->mborder[i] = mbact[i]++;
1653 else
1654 runp->mborder[i] = 0;
1655 }
1656
1657 if (runp->mbs != NULL)
1658 {
1659 struct element_t **eptr;
1660 struct element_t *lastp = NULL;
1661
1662 /* Find the point where to insert in the list. */
1663 eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
1664 while (*eptr != NULL)
1665 {
1666 if ((*eptr)->nmbs < runp->nmbs)
1667 break;
1668
1669 if ((*eptr)->nmbs == runp->nmbs)
1670 {
1671 int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
1672
1673 if (c == 0)
1674 {
1675 /* This should not happen. It means that we have
1676 to symbols with the same byte sequence. It is
1677 of course an error. */
1678 record_error_at_line (0, 0, (*eptr)->file,
1679 (*eptr)->line,
1680 _("\
1681symbol `%s' has the same encoding as"), (*eptr)->name);
1682
1683 record_error_at_line (0, 0, runp->file, runp->line,
1684 _("symbol `%s'"), runp->name);
1685 goto dont_insert;
1686 }
1687 else if (c < 0)
1688 /* Insert it here. */
1689 break;
1690 }
1691
1692 /* To the next entry. */
1693 lastp = *eptr;
1694 eptr = &(*eptr)->mbnext;
1695 }
1696
1697 /* Set the pointers. */
1698 runp->mbnext = *eptr;
1699 runp->mblast = lastp;
1700 if (*eptr != NULL)
1701 (*eptr)->mblast = runp;
1702 *eptr = runp;
1703 dont_insert:
1704 ;
1705 }
1706
1707 if (runp->used_in_level)
1708 {
1709 runp->wcorder = wcact++;
1710
1711 /* We take the opportunity to count the elements which have
1712 wide characters. */
1713 ++nr_wide_elems;
1714 }
1715
1716 if (runp->is_character)
1717 {
1718 if (runp->nmbs == 1)
1719 collate->mbseqorder[((unsigned char *) runp->mbs)[0]] = mbseqact++;
1720
1721 runp->wcseqorder = wcseqact++;
1722 }
1723 else if (runp->mbs != NULL && runp->weights != NULL)
1724 /* This is for collation elements. */
1725 runp->wcseqorder = wcseqact++;
1726
1727 /* Up to the next entry. */
1728 runp = runp->next;
1729 }
1730
1731 /* Find out whether any of the `mbheads' entries is unset. In this
1732 case we use the UNDEFINED entry. */
1733 for (i = 1; i < 256; ++i)
1734 if (collate->mbheads[i] == NULL)
1735 {
1736 need_undefined = 1;
1737 collate->mbheads[i] = &collate->undefined;
1738 }
1739
1740 /* Now to the wide character case. */
1741 collate->wcheads.p = 6;
1742 collate->wcheads.q = 10;
1743 wchead_table_init (&collate->wcheads);
1744
1745 collate->wcseqorder.p = 6;
1746 collate->wcseqorder.q = 10;
1747 collseq_table_init (&collate->wcseqorder);
1748
1749 /* Start adding. */
1750 runp = collate->start;
1751 while (runp != NULL)
1752 {
1753 if (runp->wcs != NULL)
1754 {
1755 struct element_t *e;
1756 struct element_t **eptr;
1757 struct element_t *lastp;
1758
1759 /* Insert the collation sequence value. */
1760 if (runp->is_character)
1761 collseq_table_add (&collate->wcseqorder, runp->wcs[0],
1762 runp->wcseqorder);
1763
1764 /* Find the point where to insert in the list. */
1765 e = wchead_table_get (&collate->wcheads, runp->wcs[0]);
1766 eptr = &e;
1767 lastp = NULL;
1768 while (*eptr != NULL)
1769 {
1770 if ((*eptr)->nwcs < runp->nwcs)
1771 break;
1772
1773 if ((*eptr)->nwcs == runp->nwcs)
1774 {
1775 int c = wmemcmp ((wchar_t *) (*eptr)->wcs,
1776 (wchar_t *) runp->wcs, runp->nwcs);
1777
1778 if (c == 0)
1779 {
1780 /* This should not happen. It means that we have
1781 two symbols with the same byte sequence. It is
1782 of course an error. */
1783 record_error_at_line (0, 0, (*eptr)->file,
1784 (*eptr)->line,
1785 _("\
1786symbol `%s' has the same encoding as"), (*eptr)->name);
1787
1788 record_error_at_line (0, 0, runp->file, runp->line,
1789 _("symbol `%s'"), runp->name);
1790 goto dont_insertwc;
1791 }
1792 else if (c < 0)
1793 /* Insert it here. */
1794 break;
1795 }
1796
1797 /* To the next entry. */
1798 lastp = *eptr;
1799 eptr = &(*eptr)->wcnext;
1800 }
1801
1802 /* Set the pointers. */
1803 runp->wcnext = *eptr;
1804 runp->wclast = lastp;
1805 if (*eptr != NULL)
1806 (*eptr)->wclast = runp;
1807 *eptr = runp;
1808 if (eptr == &e)
1809 wchead_table_add (&collate->wcheads, runp->wcs[0], e);
1810 dont_insertwc:
1811 ;
1812 }
1813
1814 /* Up to the next entry. */
1815 runp = runp->next;
1816 }
1817
1818 /* Now determine whether the UNDEFINED entry is needed and if yes,
1819 whether it was defined. */
1820 collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
1821 if (collate->undefined.file == NULL)
1822 {
1823 if (need_undefined)
1824 {
1825 /* This seems not to be enforced by recent standards. Don't
1826 emit an error, simply append UNDEFINED at the end. */
1827 collate->undefined.mborder =
1828 (int *) obstack_alloc (&collate->mempool, nrules * sizeof (int));
1829
1830 for (i = 0; i < nrules; ++i)
1831 collate->undefined.mborder[i] = mbact[i]++;
1832 }
1833
1834 /* In any case we will need the definition for the wide character
1835 case. But we will not complain that it is missing since the
1836 specification strangely enough does not seem to account for
1837 this. */
1838 collate->undefined.wcorder = wcact++;
1839 }
1840
1841 /* Finally, try to unify the rules for the sections. Whenever the rules
1842 for a section are the same as those for another section give the
1843 ruleset the same index. Since there are never many section we can
1844 use an O(n^2) algorithm here. */
1845 sect = collate->sections;
1846 while (sect != NULL && sect->rules == NULL)
1847 sect = sect->next;
1848
1849 /* Bail out if we have no sections because of earlier errors. */
1850 if (sect == NULL)
1851 {
1852 record_error (EXIT_FAILURE, 0, _("too many errors; giving up"));
1853 return;
1854 }
1855
1856 ruleidx = 0;
1857 do
1858 {
1859 struct section_list *osect = collate->sections;
1860
1861 while (osect != sect)
1862 if (osect->rules != NULL
1863 && memcmp (osect->rules, sect->rules,
1864 nrules * sizeof (osect->rules[0])) == 0)
1865 break;
1866 else
1867 osect = osect->next;
1868
1869 if (osect == sect)
1870 sect->ruleidx = ruleidx++;
1871 else
1872 sect->ruleidx = osect->ruleidx;
1873
1874 /* Next section. */
1875 do
1876 sect = sect->next;
1877 while (sect != NULL && sect->rules == NULL);
1878 }
1879 while (sect != NULL);
1880 /* We are currently not prepared for more than 128 rulesets. But this
1881 should never really be a problem. */
1882 assert (ruleidx <= 128);
1883}
1884
1885
1886static int32_t
1887output_weight (struct obstack *pool, struct locale_collate_t *collate,
1888 struct element_t *elem)
1889{
1890 size_t cnt;
1891 int32_t retval;
1892
1893 /* Optimize the use of UNDEFINED. */
1894 if (elem == &collate->undefined)
1895 /* The weights are already inserted. */
1896 return 0;
1897
1898 /* This byte can start exactly one collation element and this is
1899 a single byte. We can directly give the index to the weights. */
1900 retval = obstack_object_size (pool);
1901
1902 /* Construct the weight. */
1903 for (cnt = 0; cnt < nrules; ++cnt)
1904 {
1905 char buf[elem->weights[cnt].cnt * 7];
1906 int len = 0;
1907 int i;
1908
1909 for (i = 0; i < elem->weights[cnt].cnt; ++i)
1910 /* Encode the weight value. We do nothing for IGNORE entries. */
1911 if (elem->weights[cnt].w[i] != NULL)
1912 len += utf8_encode (&buf[len],
1913 elem->weights[cnt].w[i]->mborder[cnt]);
1914
1915 /* And add the buffer content. */
1916 obstack_1grow (pool, len);
1917 obstack_grow (pool, buf, len);
1918 }
1919
1920 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1921}
1922
1923
1924static int32_t
1925output_weightwc (struct obstack *pool, struct locale_collate_t *collate,
1926 struct element_t *elem)
1927{
1928 size_t cnt;
1929 int32_t retval;
1930
1931 /* Optimize the use of UNDEFINED. */
1932 if (elem == &collate->undefined)
1933 /* The weights are already inserted. */
1934 return 0;
1935
1936 /* This byte can start exactly one collation element and this is
1937 a single byte. We can directly give the index to the weights. */
1938 retval = obstack_object_size (pool) / sizeof (int32_t);
1939
1940 /* Construct the weight. */
1941 for (cnt = 0; cnt < nrules; ++cnt)
1942 {
1943 int32_t buf[elem->weights[cnt].cnt];
1944 int i;
1945 int32_t j;
1946
1947 for (i = 0, j = 0; i < elem->weights[cnt].cnt; ++i)
1948 if (elem->weights[cnt].w[i] != NULL)
1949 buf[j++] = elem->weights[cnt].w[i]->wcorder;
1950
1951 /* And add the buffer content. */
1952 obstack_int32_grow (pool, j);
1953
1954 obstack_grow (pool, buf, j * sizeof (int32_t));
1955 maybe_swap_uint32_obstack (pool, j);
1956 }
1957
1958 return retval | ((elem->section->ruleidx & 0x7f) << 24);
1959}
1960
1961/* If localedef is every threaded, this would need to be __thread var. */
1962static struct
1963{
1964 struct obstack *weightpool;
1965 struct obstack *extrapool;
1966 struct obstack *indpool;
1967 struct locale_collate_t *collate;
1968 struct collidx_table *tablewc;
1969} atwc;
1970
1971static void add_to_tablewc (uint32_t ch, struct element_t *runp);
1972
1973static void
1974add_to_tablewc (uint32_t ch, struct element_t *runp)
1975{
1976 if (runp->wcnext == NULL && runp->nwcs == 1)
1977 {
1978 int32_t weigthidx = output_weightwc (atwc.weightpool, atwc.collate,
1979 runp);
1980 collidx_table_add (atwc.tablewc, ch, weigthidx);
1981 }
1982 else
1983 {
1984 /* As for the singlebyte table, we recognize sequences and
1985 compress them. */
1986
1987 collidx_table_add (atwc.tablewc, ch,
1988 -(obstack_object_size (atwc.extrapool)
1989 / sizeof (uint32_t)));
1990
1991 do
1992 {
1993 /* Store the current index in the weight table. We know that
1994 the current position in the `extrapool' is aligned on a
1995 32-bit address. */
1996 int32_t weightidx;
1997 int added;
1998
1999 /* Find out wether this is a single entry or we have more than
2000 one consecutive entry. */
2001 if (runp->wcnext != NULL
2002 && runp->nwcs == runp->wcnext->nwcs
2003 && wmemcmp ((wchar_t *) runp->wcs,
2004 (wchar_t *)runp->wcnext->wcs,
2005 runp->nwcs - 1) == 0
2006 && (runp->wcs[runp->nwcs - 1]
2007 == runp->wcnext->wcs[runp->nwcs - 1] + 1))
2008 {
2009 int i;
2010 struct element_t *series_startp = runp;
2011 struct element_t *curp;
2012
2013 /* Now add first the initial byte sequence. */
2014 added = (1 + 1 + 2 * (runp->nwcs - 1)) * sizeof (int32_t);
2015 if (sizeof (int32_t) == sizeof (int))
2016 obstack_make_room (atwc.extrapool, added);
2017
2018 /* More than one consecutive entry. We mark this by having
2019 a negative index into the indirect table. */
2020 obstack_int32_grow_fast (atwc.extrapool,
2021 -(obstack_object_size (atwc.indpool)
2022 / sizeof (int32_t)));
2023 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2024
2025 do
2026 runp = runp->wcnext;
2027 while (runp->wcnext != NULL
2028 && runp->nwcs == runp->wcnext->nwcs
2029 && wmemcmp ((wchar_t *) runp->wcs,
2030 (wchar_t *)runp->wcnext->wcs,
2031 runp->nwcs - 1) == 0
2032 && (runp->wcs[runp->nwcs - 1]
2033 == runp->wcnext->wcs[runp->nwcs - 1] + 1));
2034
2035 /* Now walk backward from here to the beginning. */
2036 curp = runp;
2037
2038 for (i = 1; i < runp->nwcs; ++i)
2039 obstack_int32_grow_fast (atwc.extrapool, curp->wcs[i]);
2040
2041 /* Now find the end of the consecutive sequence and
2042 add all the indeces in the indirect pool. */
2043 do
2044 {
2045 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2046 curp);
2047 obstack_int32_grow (atwc.indpool, weightidx);
2048
2049 curp = curp->wclast;
2050 }
2051 while (curp != series_startp);
2052
2053 /* Add the final weight. */
2054 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2055 curp);
2056 obstack_int32_grow (atwc.indpool, weightidx);
2057
2058 /* And add the end byte sequence. Without length this
2059 time. */
2060 for (i = 1; i < curp->nwcs; ++i)
2061 obstack_int32_grow (atwc.extrapool, curp->wcs[i]);
2062 }
2063 else
2064 {
2065 /* A single entry. Simply add the index and the length and
2066 string (except for the first character which is already
2067 tested for). */
2068 int i;
2069
2070 /* Output the weight info. */
2071 weightidx = output_weightwc (atwc.weightpool, atwc.collate,
2072 runp);
2073
2074 assert (runp->nwcs > 0);
2075 added = (1 + 1 + runp->nwcs - 1) * sizeof (int32_t);
2076 if (sizeof (int) == sizeof (int32_t))
2077 obstack_make_room (atwc.extrapool, added);
2078
2079 obstack_int32_grow_fast (atwc.extrapool, weightidx);
2080 obstack_int32_grow_fast (atwc.extrapool, runp->nwcs - 1);
2081 for (i = 1; i < runp->nwcs; ++i)
2082 obstack_int32_grow_fast (atwc.extrapool, runp->wcs[i]);
2083 }
2084
2085 /* Next entry. */
2086 runp = runp->wcnext;
2087 }
2088 while (runp != NULL);
2089 }
2090}
2091
2092void
2093collate_output (struct localedef_t *locale, const struct charmap_t *charmap,
2094 const char *output_path)
2095{
2096 struct locale_collate_t *collate = locale->categories[LC_COLLATE].collate;
2097 const size_t nelems = _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE);
2098 struct locale_file file;
2099 size_t ch;
2100 int32_t tablemb[256];
2101 struct obstack weightpool;
2102 struct obstack extrapool;
2103 struct obstack indirectpool;
2104 struct section_list *sect;
2105 struct collidx_table tablewc;
2106 uint32_t elem_size;
2107 uint32_t *elem_table;
2108 int i;
2109 struct element_t *runp;
2110
2111 init_locale_data (&file, nelems);
2112 add_locale_uint32 (&file, nrules);
2113
2114 /* If we have no LC_COLLATE data emit only the number of rules as zero. */
2115 if (collate == NULL)
2116 {
2117 size_t idx;
2118 for (idx = 1; idx < nelems; idx++)
2119 {
2120 /* The words have to be handled specially. */
2121 if (idx == _NL_ITEM_INDEX (_NL_COLLATE_SYMB_HASH_SIZEMB))
2122 add_locale_uint32 (&file, 0);
2123 else
2124 add_locale_empty (&file);
2125 }
2126 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2127 return;
2128 }
2129
2130 obstack_init (&weightpool);
2131 obstack_init (&extrapool);
2132 obstack_init (&indirectpool);
2133
2134 /* Since we are using the sign of an integer to mark indirection the
2135 offsets in the arrays we are indirectly referring to must not be
2136 zero since -0 == 0. Therefore we add a bit of dummy content. */
2137 obstack_int32_grow (&extrapool, 0);
2138 obstack_int32_grow (&indirectpool, 0);
2139
2140 /* Prepare the ruleset table. */
2141 for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
2142 if (sect->rules != NULL && sect->ruleidx == i)
2143 {
2144 int j;
2145
2146 obstack_make_room (&weightpool, nrules);
2147
2148 for (j = 0; j < nrules; ++j)
2149 obstack_1grow_fast (&weightpool, sect->rules[j]);
2150 ++i;
2151 }
2152 /* And align the output. */
2153 i = (nrules * i) % LOCFILE_ALIGN;
2154 if (i > 0)
2155 do
2156 obstack_1grow (&weightpool, '\0');
2157 while (++i < LOCFILE_ALIGN);
2158
2159 add_locale_raw_obstack (&file, &weightpool);
2160
2161 /* Generate the 8-bit table. Walk through the lists of sequences
2162 starting with the same byte and add them one after the other to
2163 the table. In case we have more than one sequence starting with
2164 the same byte we have to use extra indirection.
2165
2166 First add a record for the NUL byte. This entry will never be used
2167 so it does not matter. */
2168 tablemb[0] = 0;
2169
2170 /* Now insert the `UNDEFINED' value if it is used. Since this value
2171 will probably be used more than once it is good to store the
2172 weights only once. */
2173 if (collate->undefined.used_in_level != 0)
2174 output_weight (&weightpool, collate, &collate->undefined);
2175
2176 for (ch = 1; ch < 256; ++ch)
2177 if (collate->mbheads[ch]->mbnext == NULL
2178 && collate->mbheads[ch]->nmbs <= 1)
2179 {
2180 tablemb[ch] = output_weight (&weightpool, collate,
2181 collate->mbheads[ch]);
2182 }
2183 else
2184 {
2185 /* The entries in the list are sorted by length and then
2186 alphabetically. This is the order in which we will add the
2187 elements to the collation table. This allows simply walking
2188 the table in sequence and stopping at the first matching
2189 entry. Since the longer sequences are coming first in the
2190 list they have the possibility to match first, just as it
2191 has to be. In the worst case we are walking to the end of
2192 the list where we put, if no singlebyte sequence is defined
2193 in the locale definition, the weights for UNDEFINED.
2194
2195 To reduce the length of the search list we compress them a bit.
2196 This happens by collecting sequences of consecutive byte
2197 sequences in one entry (having and begin and end byte sequence)
2198 and add only one index into the weight table. We can find the
2199 consecutive entries since they are also consecutive in the list. */
2200 struct element_t *runp = collate->mbheads[ch];
2201 struct element_t *lastp;
2202
2203 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2204
2205 tablemb[ch] = -obstack_object_size (&extrapool);
2206
2207 do
2208 {
2209 /* Store the current index in the weight table. We know that
2210 the current position in the `extrapool' is aligned on a
2211 32-bit address. */
2212 int32_t weightidx;
2213 int added;
2214
2215 /* Find out wether this is a single entry or we have more than
2216 one consecutive entry. */
2217 if (runp->mbnext != NULL
2218 && runp->nmbs == runp->mbnext->nmbs
2219 && memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
2220 && (runp->mbs[runp->nmbs - 1]
2221 == runp->mbnext->mbs[runp->nmbs - 1] + 1))
2222 {
2223 int i;
2224 struct element_t *series_startp = runp;
2225 struct element_t *curp;
2226
2227 /* Compute how much space we will need. */
2228 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2229 + 2 * (runp->nmbs - 1));
2230 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2231 obstack_make_room (&extrapool, added);
2232
2233 /* More than one consecutive entry. We mark this by having
2234 a negative index into the indirect table. */
2235 obstack_int32_grow_fast (&extrapool,
2236 -(obstack_object_size (&indirectpool)
2237 / sizeof (int32_t)));
2238
2239 /* Now search first the end of the series. */
2240 do
2241 runp = runp->mbnext;
2242 while (runp->mbnext != NULL
2243 && runp->nmbs == runp->mbnext->nmbs
2244 && memcmp (runp->mbs, runp->mbnext->mbs,
2245 runp->nmbs - 1) == 0
2246 && (runp->mbs[runp->nmbs - 1]
2247 == runp->mbnext->mbs[runp->nmbs - 1] + 1));
2248
2249 /* Now walk backward from here to the beginning. */
2250 curp = runp;
2251
2252 assert (runp->nmbs <= 256);
2253 obstack_1grow_fast (&extrapool, curp->nmbs - 1);
2254 for (i = 1; i < curp->nmbs; ++i)
2255 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2256
2257 /* Now find the end of the consecutive sequence and
2258 add all the indeces in the indirect pool. */
2259 do
2260 {
2261 weightidx = output_weight (&weightpool, collate, curp);
2262 obstack_int32_grow (&indirectpool, weightidx);
2263
2264 curp = curp->mblast;
2265 }
2266 while (curp != series_startp);
2267
2268 /* Add the final weight. */
2269 weightidx = output_weight (&weightpool, collate, curp);
2270 obstack_int32_grow (&indirectpool, weightidx);
2271
2272 /* And add the end byte sequence. Without length this
2273 time. */
2274 for (i = 1; i < curp->nmbs; ++i)
2275 obstack_1grow_fast (&extrapool, curp->mbs[i]);
2276 }
2277 else
2278 {
2279 /* A single entry. Simply add the index and the length and
2280 string (except for the first character which is already
2281 tested for). */
2282 int i;
2283
2284 /* Output the weight info. */
2285 weightidx = output_weight (&weightpool, collate, runp);
2286
2287 added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1
2288 + runp->nmbs - 1);
2289 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2290 obstack_make_room (&extrapool, added);
2291
2292 obstack_int32_grow_fast (&extrapool, weightidx);
2293 assert (runp->nmbs <= 256);
2294 obstack_1grow_fast (&extrapool, runp->nmbs - 1);
2295
2296 for (i = 1; i < runp->nmbs; ++i)
2297 obstack_1grow_fast (&extrapool, runp->mbs[i]);
2298 }
2299
2300 /* Add alignment bytes if necessary. */
2301 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2302 obstack_1grow_fast (&extrapool, '\0');
2303
2304 /* Next entry. */
2305 lastp = runp;
2306 runp = runp->mbnext;
2307 }
2308 while (runp != NULL);
2309
2310 assert (LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)));
2311
2312 /* If the final entry in the list is not a single character we
2313 add an UNDEFINED entry here. */
2314 if (lastp->nmbs != 1)
2315 {
2316 int added = LOCFILE_ALIGN_UP (sizeof (int32_t) + 1 + 1);
2317 obstack_make_room (&extrapool, added);
2318
2319 obstack_int32_grow_fast (&extrapool, 0);
2320 /* XXX What rule? We just pick the first. */
2321 obstack_1grow_fast (&extrapool, 0);
2322 /* Length is zero. */
2323 obstack_1grow_fast (&extrapool, 0);
2324
2325 /* Add alignment bytes if necessary. */
2326 while (!LOCFILE_ALIGNED_P (obstack_object_size (&extrapool)))
2327 obstack_1grow_fast (&extrapool, '\0');
2328 }
2329 }
2330
2331 /* Add padding to the tables if necessary. */
2332 while (!LOCFILE_ALIGNED_P (obstack_object_size (&weightpool)))
2333 obstack_1grow (&weightpool, 0);
2334
2335 /* Now add the four tables. */
2336 add_locale_uint32_array (&file, (const uint32_t *) tablemb, 256);
2337 add_locale_raw_obstack (&file, &weightpool);
2338 add_locale_raw_obstack (&file, &extrapool);
2339 add_locale_raw_obstack (&file, &indirectpool);
2340
2341 /* Now the same for the wide character table. We need to store some
2342 more information here. */
2343 add_locale_empty (&file);
2344 add_locale_empty (&file);
2345 add_locale_empty (&file);
2346
2347 /* Since we are using the sign of an integer to mark indirection the
2348 offsets in the arrays we are indirectly referring to must not be
2349 zero since -0 == 0. Therefore we add a bit of dummy content. */
2350 obstack_int32_grow (&extrapool, 0);
2351 obstack_int32_grow (&indirectpool, 0);
2352
2353 /* Now insert the `UNDEFINED' value if it is used. Since this value
2354 will probably be used more than once it is good to store the
2355 weights only once. */
2356 if (output_weightwc (&weightpool, collate, &collate->undefined) != 0)
2357 abort ();
2358
2359 /* Generate the table. Walk through the lists of sequences starting
2360 with the same wide character and add them one after the other to
2361 the table. In case we have more than one sequence starting with
2362 the same byte we have to use extra indirection. */
2363 tablewc.p = 6;
2364 tablewc.q = 10;
2365 collidx_table_init (&tablewc);
2366
2367 atwc.weightpool = &weightpool;
2368 atwc.extrapool = &extrapool;
2369 atwc.indpool = &indirectpool;
2370 atwc.collate = collate;
2371 atwc.tablewc = &tablewc;
2372
2373 wchead_table_iterate (&collate->wcheads, add_to_tablewc);
2374
2375 memset (&atwc, 0, sizeof (atwc));
2376
2377 /* Now add the four tables. */
2378 add_locale_collidx_table (&file, &tablewc);
2379 add_locale_raw_obstack (&file, &weightpool);
2380 add_locale_raw_obstack (&file, &extrapool);
2381 add_locale_raw_obstack (&file, &indirectpool);
2382
2383 /* Finally write the table with collation element names out. It is
2384 a hash table with a simple function which gets the name of the
2385 character as the input. One character might have many names. The
2386 value associated with the name is an index into the weight table
2387 where we are then interested in the first-level weight value.
2388
2389 To determine how large the table should be we are counting the
2390 elements have to put in. Since we are using internal chaining
2391 using a secondary hash function we have to make the table a bit
2392 larger to avoid extremely long search times. We can achieve
2393 good results with a 40% larger table than there are entries. */
2394 elem_size = 0;
2395 runp = collate->start;
2396 while (runp != NULL)
2397 {
2398 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2399 /* Yep, the element really counts. */
2400 ++elem_size;
2401
2402 runp = runp->next;
2403 }
2404 /* Add 50% and find the next prime number. */
2405 elem_size = next_prime (elem_size + (elem_size >> 1));
2406
2407 /* Allocate the table. Each entry consists of two words: the hash
2408 value and an index in a secondary table which provides the index
2409 into the weight table and the string itself (so that a match can
2410 be determined). */
2411 elem_table = (uint32_t *) obstack_alloc (&extrapool,
2412 elem_size * 2 * sizeof (uint32_t));
2413 memset (elem_table, '\0', elem_size * 2 * sizeof (uint32_t));
2414
2415 /* Now add the elements. */
2416 runp = collate->start;
2417 while (runp != NULL)
2418 {
2419 if (runp->mbs != NULL && runp->weights != NULL && !runp->is_character)
2420 {
2421 /* Compute the hash value of the name. */
2422 uint32_t namelen = strlen (runp->name);
2423 uint32_t hash = elem_hash (runp->name, namelen);
2424 size_t idx = hash % elem_size;
2425#ifndef NDEBUG
2426 size_t start_idx = idx;
2427#endif
2428
2429 if (elem_table[idx * 2] != 0)
2430 {
2431 /* The spot is already taken. Try iterating using the value
2432 from the secondary hashing function. */
2433 size_t iter = hash % (elem_size - 2) + 1;
2434
2435 do
2436 {
2437 idx += iter;
2438 if (idx >= elem_size)
2439 idx -= elem_size;
2440 assert (idx != start_idx);
2441 }
2442 while (elem_table[idx * 2] != 0);
2443 }
2444 /* This is the spot where we will insert the value. */
2445 elem_table[idx * 2] = hash;
2446 elem_table[idx * 2 + 1] = obstack_object_size (&extrapool);
2447
2448 /* The string itself including length. */
2449 obstack_1grow (&extrapool, namelen);
2450 obstack_grow (&extrapool, runp->name, namelen);
2451
2452 /* And the multibyte representation. */
2453 obstack_1grow (&extrapool, runp->nmbs);
2454 obstack_grow (&extrapool, runp->mbs, runp->nmbs);
2455
2456 /* And align again to 32 bits. */
2457 if ((1 + namelen + 1 + runp->nmbs) % sizeof (int32_t) != 0)
2458 obstack_grow (&extrapool, "\0\0",
2459 (sizeof (int32_t)
2460 - ((1 + namelen + 1 + runp->nmbs)
2461 % sizeof (int32_t))));
2462
2463 /* Now some 32-bit values: multibyte collation sequence,
2464 wide char string (including length), and wide char
2465 collation sequence. */
2466 obstack_int32_grow (&extrapool, runp->mbseqorder);
2467
2468 obstack_int32_grow (&extrapool, runp->nwcs);
2469 obstack_grow (&extrapool, runp->wcs,
2470 runp->nwcs * sizeof (uint32_t));
2471 maybe_swap_uint32_obstack (&extrapool, runp->nwcs);
2472
2473 obstack_int32_grow (&extrapool, runp->wcseqorder);
2474 }
2475
2476 runp = runp->next;
2477 }
2478
2479 /* Prepare to write out this data. */
2480 add_locale_uint32 (&file, elem_size);
2481 add_locale_uint32_array (&file, elem_table, 2 * elem_size);
2482 add_locale_raw_obstack (&file, &extrapool);
2483 add_locale_raw_data (&file, collate->mbseqorder, 256);
2484 add_locale_collseq_table (&file, &collate->wcseqorder);
2485 add_locale_string (&file, charmap->code_set_name);
2486 write_locale_data (output_path, LC_COLLATE, "LC_COLLATE", &file);
2487
2488 obstack_free (&weightpool, NULL);
2489 obstack_free (&extrapool, NULL);
2490 obstack_free (&indirectpool, NULL);
2491}
2492
2493
2494static enum token_t
2495skip_to (struct linereader *ldfile, struct locale_collate_t *collate,
2496 const struct charmap_t *charmap, int to_endif)
2497{
2498 while (1)
2499 {
2500 struct token *now = lr_token (ldfile, charmap, NULL, NULL, 0);
2501 enum token_t nowtok = now->tok;
2502
2503 if (nowtok == tok_eof || nowtok == tok_end)
2504 return nowtok;
2505
2506 if (nowtok == tok_ifdef || nowtok == tok_ifndef)
2507 {
2508 lr_error (ldfile, _("%s: nested conditionals not supported"),
2509 "LC_COLLATE");
2510 nowtok = skip_to (ldfile, collate, charmap, tok_endif);
2511 if (nowtok == tok_eof || nowtok == tok_end)
2512 return nowtok;
2513 }
2514 else if (nowtok == tok_endif || (!to_endif && nowtok == tok_else))
2515 {
2516 lr_ignore_rest (ldfile, 1);
2517 return nowtok;
2518 }
2519 else if (!to_endif && (nowtok == tok_elifdef || nowtok == tok_elifndef))
2520 {
2521 /* Do not read the rest of the line. */
2522 return nowtok;
2523 }
2524 else if (nowtok == tok_else)
2525 {
2526 lr_error (ldfile, _("%s: more than one 'else'"), "LC_COLLATE");
2527 }
2528
2529 lr_ignore_rest (ldfile, 0);
2530 }
2531}
2532
2533
2534void
2535collate_read (struct linereader *ldfile, struct localedef_t *result,
2536 const struct charmap_t *charmap, const char *repertoire_name,
2537 int ignore_content)
2538{
2539 struct repertoire_t *repertoire = NULL;
2540 struct locale_collate_t *collate;
2541 struct token *now;
2542 struct token *arg = NULL;
2543 enum token_t nowtok;
2544 enum token_t was_ellipsis = tok_none;
2545 struct localedef_t *copy_locale = NULL;
2546 /* Parsing state:
2547 0 - start
2548 1 - between `order-start' and `order-end'
2549 2 - after `order-end'
2550 3 - after `reorder-after', waiting for `reorder-end'
2551 4 - after `reorder-end'
2552 5 - after `reorder-sections-after', waiting for `reorder-sections-end'
2553 6 - after `reorder-sections-end'
2554 */
2555 int state = 0;
2556
2557 /* Get the repertoire we have to use. */
2558 if (repertoire_name != NULL)
2559 repertoire = repertoire_read (repertoire_name);
2560
2561 /* The rest of the line containing `LC_COLLATE' must be free. */
2562 lr_ignore_rest (ldfile, 1);
2563
2564 while (1)
2565 {
2566 do
2567 {
2568 now = lr_token (ldfile, charmap, result, NULL, verbose);
2569 nowtok = now->tok;
2570 }
2571 while (nowtok == tok_eol);
2572
2573 if (nowtok != tok_define)
2574 break;
2575
2576 if (ignore_content)
2577 lr_ignore_rest (ldfile, 0);
2578 else
2579 {
2580 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2581 if (arg->tok != tok_ident)
2582 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2583 else
2584 {
2585 /* Simply add the new symbol. */
2586 struct name_list *newsym = xmalloc (sizeof (*newsym)
2587 + arg->val.str.lenmb + 1);
2588 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
2589 newsym->str[arg->val.str.lenmb] = '\0';
2590 newsym->next = defined;
2591 defined = newsym;
2592
2593 lr_ignore_rest (ldfile, 1);
2594 }
2595 }
2596 }
2597
2598 if (nowtok == tok_copy)
2599 {
2600 now = lr_token (ldfile, charmap, result, NULL, verbose);
2601 if (now->tok != tok_string)
2602 {
2603 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
2604
2605 skip_category:
2606 do
2607 now = lr_token (ldfile, charmap, result, NULL, verbose);
2608 while (now->tok != tok_eof && now->tok != tok_end);
2609
2610 if (now->tok != tok_eof
2611 || (now = lr_token (ldfile, charmap, result, NULL, verbose),
2612 now->tok == tok_eof))
2613 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
2614 else if (now->tok != tok_lc_collate)
2615 {
2616 lr_error (ldfile, _("\
2617%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
2618 lr_ignore_rest (ldfile, 0);
2619 }
2620 else
2621 lr_ignore_rest (ldfile, 1);
2622
2623 return;
2624 }
2625
2626 if (! ignore_content)
2627 {
2628 /* Get the locale definition. */
2629 copy_locale = load_locale (LC_COLLATE, now->val.str.startmb,
2630 repertoire_name, charmap, NULL);
2631 if ((copy_locale->avail & COLLATE_LOCALE) == 0)
2632 {
2633 /* Not yet loaded. So do it now. */
2634 if (locfile_read (copy_locale, charmap) != 0)
2635 goto skip_category;
2636 }
2637
2638 if (copy_locale->categories[LC_COLLATE].collate == NULL)
2639 return;
2640 }
2641
2642 lr_ignore_rest (ldfile, 1);
2643
2644 now = lr_token (ldfile, charmap, result, NULL, verbose);
2645 nowtok = now->tok;
2646 }
2647
2648 /* Prepare the data structures. */
2649 collate_startup (ldfile, result, copy_locale, ignore_content);
2650 collate = result->categories[LC_COLLATE].collate;
2651
2652 while (1)
2653 {
2654 char ucs4buf[10];
2655 char *symstr;
2656 size_t symlen;
2657
2658 /* Of course we don't proceed beyond the end of file. */
2659 if (nowtok == tok_eof)
2660 break;
2661
2662 /* Ingore empty lines. */
2663 if (nowtok == tok_eol)
2664 {
2665 now = lr_token (ldfile, charmap, result, NULL, verbose);
2666 nowtok = now->tok;
2667 continue;
2668 }
2669
2670 switch (nowtok)
2671 {
2672 case tok_copy:
2673 /* Allow copying other locales. */
2674 now = lr_token (ldfile, charmap, result, NULL, verbose);
2675 if (now->tok != tok_string)
2676 goto err_label;
2677
2678 if (! ignore_content)
2679 load_locale (LC_COLLATE, now->val.str.startmb, repertoire_name,
2680 charmap, result);
2681
2682 lr_ignore_rest (ldfile, 1);
2683 break;
2684
2685 case tok_coll_weight_max:
2686 /* Ignore the rest of the line if we don't need the input of
2687 this line. */
2688 if (ignore_content)
2689 {
2690 lr_ignore_rest (ldfile, 0);
2691 break;
2692 }
2693
2694 if (state != 0)
2695 goto err_label;
2696
2697 arg = lr_token (ldfile, charmap, result, NULL, verbose);
2698 if (arg->tok != tok_number)
2699 goto err_label;
2700 if (collate->col_weight_max != -1)
2701 lr_error (ldfile, _("%s: duplicate definition of `%s'"),
2702 "LC_COLLATE", "col_weight_max");
2703 else
2704 collate->col_weight_max = arg->val.num;
2705 lr_ignore_rest (ldfile, 1);
2706 break;
2707
2708 case tok_section_symbol:
2709 /* Ignore the rest of the line if we don't need the input of
2710 this line. */
2711 if (ignore_content)
2712 {
2713 lr_ignore_rest (ldfile, 0);
2714 break;
2715 }
2716
2717 if (state != 0)
2718 goto err_label;
2719
2720 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2721 if (arg->tok != tok_bsymbol)
2722 goto err_label;
2723 else if (!ignore_content)
2724 {
2725 /* Check whether this section is already known. */
2726 struct section_list *known = collate->sections;
2727 while (known != NULL)
2728 {
2729 if (strcmp (known->name, arg->val.str.startmb) == 0)
2730 break;
2731 known = known->next;
2732 }
2733
2734 if (known != NULL)
2735 {
2736 lr_error (ldfile,
2737 _("%s: duplicate declaration of section `%s'"),
2738 "LC_COLLATE", arg->val.str.startmb);
2739 free (arg->val.str.startmb);
2740 }
2741 else
2742 collate->sections = make_seclist_elem (collate,
2743 arg->val.str.startmb,
2744 collate->sections);
2745
2746 lr_ignore_rest (ldfile, known == NULL);
2747 }
2748 else
2749 {
2750 free (arg->val.str.startmb);
2751 lr_ignore_rest (ldfile, 0);
2752 }
2753 break;
2754
2755 case tok_collating_element:
2756 /* Ignore the rest of the line if we don't need the input of
2757 this line. */
2758 if (ignore_content)
2759 {
2760 lr_ignore_rest (ldfile, 0);
2761 break;
2762 }
2763
2764 if (state != 0 && state != 2)
2765 goto err_label;
2766
2767 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2768 if (arg->tok != tok_bsymbol)
2769 goto err_label;
2770 else
2771 {
2772 const char *symbol = arg->val.str.startmb;
2773 size_t symbol_len = arg->val.str.lenmb;
2774
2775 /* Next the `from' keyword. */
2776 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2777 if (arg->tok != tok_from)
2778 {
2779 free ((char *) symbol);
2780 goto err_label;
2781 }
2782
2783 ldfile->return_widestr = 1;
2784 ldfile->translate_strings = 1;
2785
2786 /* Finally the string with the replacement. */
2787 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2788
2789 ldfile->return_widestr = 0;
2790 ldfile->translate_strings = 0;
2791
2792 if (arg->tok != tok_string)
2793 goto err_label;
2794
2795 if (!ignore_content && symbol != NULL)
2796 {
2797 /* The name is already defined. */
2798 if (check_duplicate (ldfile, collate, charmap,
2799 repertoire, symbol, symbol_len))
2800 goto col_elem_free;
2801
2802 if (arg->val.str.startmb != NULL)
2803 insert_entry (&collate->elem_table, symbol, symbol_len,
2804 new_element (collate,
2805 arg->val.str.startmb,
2806 arg->val.str.lenmb - 1,
2807 arg->val.str.startwc,
2808 symbol, symbol_len, 0));
2809 }
2810 else
2811 {
2812 col_elem_free:
2813 free ((char *) symbol);
2814 free (arg->val.str.startmb);
2815 free (arg->val.str.startwc);
2816 }
2817 lr_ignore_rest (ldfile, 1);
2818 }
2819 break;
2820
2821 case tok_collating_symbol:
2822 /* Ignore the rest of the line if we don't need the input of
2823 this line. */
2824 if (ignore_content)
2825 {
2826 lr_ignore_rest (ldfile, 0);
2827 break;
2828 }
2829
2830 if (state != 0 && state != 2)
2831 goto err_label;
2832
2833 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2834 if (arg->tok != tok_bsymbol)
2835 goto err_label;
2836 else
2837 {
2838 char *symbol = arg->val.str.startmb;
2839 size_t symbol_len = arg->val.str.lenmb;
2840 char *endsymbol = NULL;
2841 size_t endsymbol_len = 0;
2842 enum token_t ellipsis = tok_none;
2843
2844 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2845 if (arg->tok == tok_ellipsis2 || arg->tok == tok_ellipsis4)
2846 {
2847 ellipsis = arg->tok;
2848
2849 arg = lr_token (ldfile, charmap, result, repertoire,
2850 verbose);
2851 if (arg->tok != tok_bsymbol)
2852 {
2853 free (symbol);
2854 goto err_label;
2855 }
2856
2857 endsymbol = arg->val.str.startmb;
2858 endsymbol_len = arg->val.str.lenmb;
2859
2860 lr_ignore_rest (ldfile, 1);
2861 }
2862 else if (arg->tok != tok_eol)
2863 {
2864 free (symbol);
2865 goto err_label;
2866 }
2867
2868 if (!ignore_content)
2869 {
2870 if (symbol == NULL
2871 || (ellipsis != tok_none && endsymbol == NULL))
2872 {
2873 lr_error (ldfile, _("\
2874%s: unknown character in collating symbol name"),
2875 "LC_COLLATE");
2876 goto col_sym_free;
2877 }
2878 else if (ellipsis == tok_none)
2879 {
2880 /* A single symbol, no ellipsis. */
2881 if (check_duplicate (ldfile, collate, charmap,
2882 repertoire, symbol, symbol_len))
2883 /* The name is already defined. */
2884 goto col_sym_free;
2885
2886 insert_entry (&collate->sym_table, symbol, symbol_len,
2887 new_symbol (collate, symbol, symbol_len));
2888 }
2889 else if (symbol_len != endsymbol_len)
2890 {
2891 col_sym_inv_range:
2892 lr_error (ldfile,
2893 _("invalid names for character range"));
2894 goto col_sym_free;
2895 }
2896 else
2897 {
2898 /* Oh my, we have to handle an ellipsis. First, as
2899 usual, determine the common prefix and then
2900 convert the rest into a range. */
2901 size_t prefixlen;
2902 unsigned long int from;
2903 unsigned long int to;
2904 char *endp;
2905
2906 for (prefixlen = 0; prefixlen < symbol_len; ++prefixlen)
2907 if (symbol[prefixlen] != endsymbol[prefixlen])
2908 break;
2909
2910 /* Convert the rest into numbers. */
2911 symbol[symbol_len] = '\0';
2912 from = strtoul (&symbol[prefixlen], &endp,
2913 ellipsis == tok_ellipsis2 ? 16 : 10);
2914 if (*endp != '\0')
2915 goto col_sym_inv_range;
2916
2917 endsymbol[symbol_len] = '\0';
2918 to = strtoul (&endsymbol[prefixlen], &endp,
2919 ellipsis == tok_ellipsis2 ? 16 : 10);
2920 if (*endp != '\0')
2921 goto col_sym_inv_range;
2922
2923 if (from > to)
2924 goto col_sym_inv_range;
2925
2926 /* Now loop over all entries. */
2927 while (from <= to)
2928 {
2929 char *symbuf;
2930
2931 symbuf = (char *) obstack_alloc (&collate->mempool,
2932 symbol_len + 1);
2933
2934 /* Create the name. */
2935 sprintf (symbuf,
2936 ellipsis == tok_ellipsis2
2937 ? "%.*s%.*lX" : "%.*s%.*lu",
2938 (int) prefixlen, symbol,
2939 (int) (symbol_len - prefixlen), from);
2940
2941 if (check_duplicate (ldfile, collate, charmap,
2942 repertoire, symbuf, symbol_len))
2943 /* The name is already defined. */
2944 goto col_sym_free;
2945
2946 insert_entry (&collate->sym_table, symbuf,
2947 symbol_len,
2948 new_symbol (collate, symbuf,
2949 symbol_len));
2950
2951 /* Increment the counter. */
2952 ++from;
2953 }
2954
2955 goto col_sym_free;
2956 }
2957 }
2958 else
2959 {
2960 col_sym_free:
2961 free (symbol);
2962 free (endsymbol);
2963 }
2964 }
2965 break;
2966
2967 case tok_symbol_equivalence:
2968 /* Ignore the rest of the line if we don't need the input of
2969 this line. */
2970 if (ignore_content)
2971 {
2972 lr_ignore_rest (ldfile, 0);
2973 break;
2974 }
2975
2976 if (state != 0)
2977 goto err_label;
2978
2979 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2980 if (arg->tok != tok_bsymbol)
2981 goto err_label;
2982 else
2983 {
2984 const char *newname = arg->val.str.startmb;
2985 size_t newname_len = arg->val.str.lenmb;
2986 const char *symname;
2987 size_t symname_len;
2988 void *symval; /* Actually struct symbol_t* */
2989
2990 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
2991 if (arg->tok != tok_bsymbol)
2992 {
2993 free ((char *) newname);
2994 goto err_label;
2995 }
2996
2997 symname = arg->val.str.startmb;
2998 symname_len = arg->val.str.lenmb;
2999
3000 if (newname == NULL)
3001 {
3002 lr_error (ldfile, _("\
3003%s: unknown character in equivalent definition name"),
3004 "LC_COLLATE");
3005
3006 sym_equiv_free:
3007 free ((char *) newname);
3008 free ((char *) symname);
3009 break;
3010 }
3011 if (symname == NULL)
3012 {
3013 lr_error (ldfile, _("\
3014%s: unknown character in equivalent definition value"),
3015 "LC_COLLATE");
3016 goto sym_equiv_free;
3017 }
3018
3019 /* See whether the symbol name is already defined. */
3020 if (find_entry (&collate->sym_table, symname, symname_len,
3021 &symval) != 0)
3022 {
3023 lr_error (ldfile, _("\
3024%s: unknown symbol `%s' in equivalent definition"),
3025 "LC_COLLATE", symname);
3026 goto sym_equiv_free;
3027 }
3028
3029 if (insert_entry (&collate->sym_table,
3030 newname, newname_len, symval) < 0)
3031 {
3032 lr_error (ldfile, _("\
3033error while adding equivalent collating symbol"));
3034 goto sym_equiv_free;
3035 }
3036
3037 free ((char *) symname);
3038 }
3039 lr_ignore_rest (ldfile, 1);
3040 break;
3041
3042 case tok_script:
3043 /* Ignore the rest of the line if we don't need the input of
3044 this line. */
3045 if (ignore_content)
3046 {
3047 lr_ignore_rest (ldfile, 0);
3048 break;
3049 }
3050
3051 /* We get told about the scripts we know. */
3052 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3053 if (arg->tok != tok_bsymbol)
3054 goto err_label;
3055 else
3056 {
3057 struct section_list *runp = collate->known_sections;
3058 char *name;
3059
3060 while (runp != NULL)
3061 if (strncmp (runp->name, arg->val.str.startmb,
3062 arg->val.str.lenmb) == 0
3063 && runp->name[arg->val.str.lenmb] == '\0')
3064 break;
3065 else
3066 runp = runp->def_next;
3067
3068 if (runp != NULL)
3069 {
3070 lr_error (ldfile, _("duplicate definition of script `%s'"),
3071 runp->name);
3072 lr_ignore_rest (ldfile, 0);
3073 break;
3074 }
3075
3076 runp = (struct section_list *) xcalloc (1, sizeof (*runp));
3077 name = (char *) xmalloc (arg->val.str.lenmb + 1);
3078 memcpy (name, arg->val.str.startmb, arg->val.str.lenmb);
3079 name[arg->val.str.lenmb] = '\0';
3080 runp->name = name;
3081
3082 runp->def_next = collate->known_sections;
3083 collate->known_sections = runp;
3084 }
3085 lr_ignore_rest (ldfile, 1);
3086 break;
3087
3088 case tok_order_start:
3089 /* Ignore the rest of the line if we don't need the input of
3090 this line. */
3091 if (ignore_content)
3092 {
3093 lr_ignore_rest (ldfile, 0);
3094 break;
3095 }
3096
3097 if (state != 0 && state != 1 && state != 2)
3098 goto err_label;
3099 state = 1;
3100
3101 /* The 14652 draft does not specify whether all `order_start' lines
3102 must contain the same number of sort-rules, but 14651 does. So
3103 we require this here as well. */
3104 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3105 if (arg->tok == tok_bsymbol)
3106 {
3107 /* This better should be a section name. */
3108 struct section_list *sp = collate->known_sections;
3109 while (sp != NULL
3110 && (sp->name == NULL
3111 || strncmp (sp->name, arg->val.str.startmb,
3112 arg->val.str.lenmb) != 0
3113 || sp->name[arg->val.str.lenmb] != '\0'))
3114 sp = sp->def_next;
3115
3116 if (sp == NULL)
3117 {
3118 lr_error (ldfile, _("\
3119%s: unknown section name `%.*s'"),
3120 "LC_COLLATE", (int) arg->val.str.lenmb,
3121 arg->val.str.startmb);
3122 /* We use the error section. */
3123 collate->current_section = &collate->error_section;
3124
3125 if (collate->error_section.first == NULL)
3126 {
3127 /* Insert &collate->error_section at the end of
3128 the collate->sections list. */
3129 if (collate->sections == NULL)
3130 collate->sections = &collate->error_section;
3131 else
3132 {
3133 sp = collate->sections;
3134 while (sp->next != NULL)
3135 sp = sp->next;
3136
3137 sp->next = &collate->error_section;
3138 }
3139 collate->error_section.next = NULL;
3140 }
3141 }
3142 else
3143 {
3144 /* One should not be allowed to open the same
3145 section twice. */
3146 if (sp->first != NULL)
3147 lr_error (ldfile, _("\
3148%s: multiple order definitions for section `%s'"),
3149 "LC_COLLATE", sp->name);
3150 else
3151 {
3152 /* Insert sp in the collate->sections list,
3153 right after collate->current_section. */
3154 if (collate->current_section != NULL)
3155 {
3156 sp->next = collate->current_section->next;
3157 collate->current_section->next = sp;
3158 }
3159 else if (collate->sections == NULL)
3160 /* This is the first section to be defined. */
3161 collate->sections = sp;
3162
3163 collate->current_section = sp;
3164 }
3165
3166 /* Next should come the end of the line or a semicolon. */
3167 arg = lr_token (ldfile, charmap, result, repertoire,
3168 verbose);
3169 if (arg->tok == tok_eol)
3170 {
3171 uint32_t cnt;
3172
3173 /* This means we have exactly one rule: `forward'. */
3174 if (nrules > 1)
3175 lr_error (ldfile, _("\
3176%s: invalid number of sorting rules"),
3177 "LC_COLLATE");
3178 else
3179 nrules = 1;
3180 sp->rules = obstack_alloc (&collate->mempool,
3181 (sizeof (enum coll_sort_rule)
3182 * nrules));
3183 for (cnt = 0; cnt < nrules; ++cnt)
3184 sp->rules[cnt] = sort_forward;
3185
3186 /* Next line. */
3187 break;
3188 }
3189
3190 /* Get the next token. */
3191 arg = lr_token (ldfile, charmap, result, repertoire,
3192 verbose);
3193 }
3194 }
3195 else
3196 {
3197 /* There is no section symbol. Therefore we use the unnamed
3198 section. */
3199 collate->current_section = &collate->unnamed_section;
3200
3201 if (collate->unnamed_section_defined)
3202 lr_error (ldfile, _("\
3203%s: multiple order definitions for unnamed section"),
3204 "LC_COLLATE");
3205 else
3206 {
3207 /* Insert &collate->unnamed_section at the beginning of
3208 the collate->sections list. */
3209 collate->unnamed_section.next = collate->sections;
3210 collate->sections = &collate->unnamed_section;
3211 collate->unnamed_section_defined = true;
3212 }
3213 }
3214
3215 /* Now read the direction names. */
3216 read_directions (ldfile, arg, charmap, repertoire, result);
3217
3218 /* From now we need the strings untranslated. */
3219 ldfile->translate_strings = 0;
3220 break;
3221
3222 case tok_order_end:
3223 /* Ignore the rest of the line if we don't need the input of
3224 this line. */
3225 if (ignore_content)
3226 {
3227 lr_ignore_rest (ldfile, 0);
3228 break;
3229 }
3230
3231 if (state != 1)
3232 goto err_label;
3233
3234 /* Handle ellipsis at end of list. */
3235 if (was_ellipsis != tok_none)
3236 {
3237 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3238 repertoire, result);
3239 was_ellipsis = tok_none;
3240 }
3241
3242 state = 2;
3243 lr_ignore_rest (ldfile, 1);
3244 break;
3245
3246 case tok_reorder_after:
3247 /* Ignore the rest of the line if we don't need the input of
3248 this line. */
3249 if (ignore_content)
3250 {
3251 lr_ignore_rest (ldfile, 0);
3252 break;
3253 }
3254
3255 if (state == 1)
3256 {
3257 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3258 "LC_COLLATE");
3259 state = 2;
3260
3261 /* Handle ellipsis at end of list. */
3262 if (was_ellipsis != tok_none)
3263 {
3264 handle_ellipsis (ldfile, arg->val.str.startmb,
3265 arg->val.str.lenmb, was_ellipsis, charmap,
3266 repertoire, result);
3267 was_ellipsis = tok_none;
3268 }
3269 }
3270 else if (state == 0 && copy_locale == NULL)
3271 goto err_label;
3272 else if (state != 0 && state != 2 && state != 3)
3273 goto err_label;
3274 state = 3;
3275
3276 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3277 if (arg->tok == tok_bsymbol || arg->tok == tok_ucs4)
3278 {
3279 /* Find this symbol in the sequence table. */
3280 char ucsbuf[10];
3281 char *startmb;
3282 size_t lenmb;
3283 struct element_t *insp;
3284 int no_error = 1;
3285 void *ptr;
3286
3287 if (arg->tok == tok_bsymbol)
3288 {
3289 startmb = arg->val.str.startmb;
3290 lenmb = arg->val.str.lenmb;
3291 }
3292 else
3293 {
3294 sprintf (ucsbuf, "U%08X", arg->val.ucs4);
3295 startmb = ucsbuf;
3296 lenmb = 9;
3297 }
3298
3299 if (find_entry (&collate->seq_table, startmb, lenmb, &ptr) == 0)
3300 /* Yes, the symbol exists. Simply point the cursor
3301 to it. */
3302 collate->cursor = (struct element_t *) ptr;
3303 else
3304 {
3305 struct symbol_t *symbp;
3306 void *ptr;
3307
3308 if (find_entry (&collate->sym_table, startmb, lenmb,
3309 &ptr) == 0)
3310 {
3311 symbp = ptr;
3312
3313 if (symbp->order->last != NULL
3314 || symbp->order->next != NULL)
3315 collate->cursor = symbp->order;
3316 else
3317 {
3318 /* This is a collating symbol but its position
3319 is not yet defined. */
3320 lr_error (ldfile, _("\
3321%s: order for collating symbol %.*s not yet defined"),
3322 "LC_COLLATE", (int) lenmb, startmb);
3323 collate->cursor = NULL;
3324 no_error = 0;
3325 }
3326 }
3327 else if (find_entry (&collate->elem_table, startmb, lenmb,
3328 &ptr) == 0)
3329 {
3330 insp = (struct element_t *) ptr;
3331
3332 if (insp->last != NULL || insp->next != NULL)
3333 collate->cursor = insp;
3334 else
3335 {
3336 /* This is a collating element but its position
3337 is not yet defined. */
3338 lr_error (ldfile, _("\
3339%s: order for collating element %.*s not yet defined"),
3340 "LC_COLLATE", (int) lenmb, startmb);
3341 collate->cursor = NULL;
3342 no_error = 0;
3343 }
3344 }
3345 else
3346 {
3347 /* This is bad. The symbol after which we have to
3348 insert does not exist. */
3349 lr_error (ldfile, _("\
3350%s: cannot reorder after %.*s: symbol not known"),
3351 "LC_COLLATE", (int) lenmb, startmb);
3352 collate->cursor = NULL;
3353 no_error = 0;
3354 }
3355 }
3356
3357 lr_ignore_rest (ldfile, no_error);
3358 }
3359 else
3360 /* This must not happen. */
3361 goto err_label;
3362 break;
3363
3364 case tok_reorder_end:
3365 /* Ignore the rest of the line if we don't need the input of
3366 this line. */
3367 if (ignore_content)
3368 break;
3369
3370 if (state != 3)
3371 goto err_label;
3372 state = 4;
3373 lr_ignore_rest (ldfile, 1);
3374 break;
3375
3376 case tok_reorder_sections_after:
3377 /* Ignore the rest of the line if we don't need the input of
3378 this line. */
3379 if (ignore_content)
3380 {
3381 lr_ignore_rest (ldfile, 0);
3382 break;
3383 }
3384
3385 if (state == 1)
3386 {
3387 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3388 "LC_COLLATE");
3389 state = 2;
3390
3391 /* Handle ellipsis at end of list. */
3392 if (was_ellipsis != tok_none)
3393 {
3394 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3395 repertoire, result);
3396 was_ellipsis = tok_none;
3397 }
3398 }
3399 else if (state == 3)
3400 {
3401 record_error (0, 0, _("\
3402%s: missing `reorder-end' keyword"), "LC_COLLATE");
3403 state = 4;
3404 }
3405 else if (state != 2 && state != 4)
3406 goto err_label;
3407 state = 5;
3408
3409 /* Get the name of the sections we are adding after. */
3410 arg = lr_token (ldfile, charmap, result, repertoire, verbose);
3411 if (arg->tok == tok_bsymbol)
3412 {
3413 /* Now find a section with this name. */
3414 struct section_list *runp = collate->sections;
3415
3416 while (runp != NULL)
3417 {
3418 if (runp->name != NULL
3419 && strlen (runp->name) == arg->val.str.lenmb
3420 && memcmp (runp->name, arg->val.str.startmb,
3421 arg->val.str.lenmb) == 0)
3422 break;
3423
3424 runp = runp->next;
3425 }
3426
3427 if (runp != NULL)
3428 collate->current_section = runp;
3429 else
3430 {
3431 /* This is bad. The section after which we have to
3432 reorder does not exist. Therefore we cannot
3433 process the whole rest of this reorder
3434 specification. */
3435 lr_error (ldfile, _("%s: section `%.*s' not known"),
3436 "LC_COLLATE", (int) arg->val.str.lenmb,
3437 arg->val.str.startmb);
3438
3439 do
3440 {
3441 lr_ignore_rest (ldfile, 0);
3442
3443 now = lr_token (ldfile, charmap, result, NULL, verbose);
3444 }
3445 while (now->tok == tok_reorder_sections_after
3446 || now->tok == tok_reorder_sections_end
3447 || now->tok == tok_end);
3448
3449 /* Process the token we just saw. */
3450 nowtok = now->tok;
3451 continue;
3452 }
3453 }
3454 else
3455 /* This must not happen. */
3456 goto err_label;
3457 break;
3458
3459 case tok_reorder_sections_end:
3460 /* Ignore the rest of the line if we don't need the input of
3461 this line. */
3462 if (ignore_content)
3463 break;
3464
3465 if (state != 5)
3466 goto err_label;
3467 state = 6;
3468 lr_ignore_rest (ldfile, 1);
3469 break;
3470
3471 case tok_bsymbol:
3472 case tok_ucs4:
3473 /* Ignore the rest of the line if we don't need the input of
3474 this line. */
3475 if (ignore_content)
3476 {
3477 lr_ignore_rest (ldfile, 0);
3478 break;
3479 }
3480
3481 if (state != 0 && state != 1 && state != 3 && state != 5)
3482 goto err_label;
3483
3484 if ((state == 0 || state == 5) && nowtok == tok_ucs4)
3485 goto err_label;
3486
3487 if (nowtok == tok_ucs4)
3488 {
3489 snprintf (ucs4buf, sizeof (ucs4buf), "U%08X", now->val.ucs4);
3490 symstr = ucs4buf;
3491 symlen = 9;
3492 }
3493 else if (arg != NULL)
3494 {
3495 symstr = arg->val.str.startmb;
3496 symlen = arg->val.str.lenmb;
3497 }
3498 else
3499 {
3500 lr_error (ldfile, _("%s: bad symbol <%.*s>"), "LC_COLLATE",
3501 (int) ldfile->token.val.str.lenmb,
3502 ldfile->token.val.str.startmb);
3503 break;
3504 }
3505
3506 struct element_t *seqp;
3507 if (state == 0)
3508 {
3509 /* We are outside an `order_start' region. This means
3510 we must only accept definitions of values for
3511 collation symbols since these are purely abstract
3512 values and don't need directions associated. */
3513 void *ptr;
3514
3515 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3516 {
3517 seqp = ptr;
3518
3519 /* It's already defined. First check whether this
3520 is really a collating symbol. */
3521 if (seqp->is_character)
3522 goto err_label;
3523
3524 goto move_entry;
3525 }
3526 else
3527 {
3528 void *result;
3529
3530 if (find_entry (&collate->sym_table, symstr, symlen,
3531 &result) != 0)
3532 /* No collating symbol, it's an error. */
3533 goto err_label;
3534
3535 /* Maybe this is the first time we define a symbol
3536 value and it is before the first actual section. */
3537 if (collate->sections == NULL)
3538 collate->sections = collate->current_section =
3539 &collate->symbol_section;
3540 }
3541
3542 if (was_ellipsis != tok_none)
3543 {
3544 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis,
3545 charmap, repertoire, result);
3546
3547 /* Remember that we processed the ellipsis. */
3548 was_ellipsis = tok_none;
3549
3550 /* And don't add the value a second time. */
3551 break;
3552 }
3553 }
3554 else if (state == 3)
3555 {
3556 /* It is possible that we already have this collation sequence.
3557 In this case we move the entry. */
3558 void *sym;
3559 void *ptr;
3560
3561 /* If the symbol after which we have to insert was not found
3562 ignore all entries. */
3563 if (collate->cursor == NULL)
3564 {
3565 lr_ignore_rest (ldfile, 0);
3566 break;
3567 }
3568
3569 if (find_entry (&collate->seq_table, symstr, symlen, &ptr) == 0)
3570 {
3571 seqp = (struct element_t *) ptr;
3572 goto move_entry;
3573 }
3574
3575 if (find_entry (&collate->sym_table, symstr, symlen, &sym) == 0
3576 && (seqp = ((struct symbol_t *) sym)->order) != NULL)
3577 goto move_entry;
3578
3579 if (find_entry (&collate->elem_table, symstr, symlen, &ptr) == 0
3580 && (seqp = (struct element_t *) ptr,
3581 seqp->last != NULL || seqp->next != NULL
3582 || (collate->start != NULL && seqp == collate->start)))
3583 {
3584 move_entry:
3585 /* Remove the entry from the old position. */
3586 if (seqp->last == NULL)
3587 collate->start = seqp->next;
3588 else
3589 seqp->last->next = seqp->next;
3590 if (seqp->next != NULL)
3591 seqp->next->last = seqp->last;
3592
3593 /* We also have to check whether this entry is the
3594 first or last of a section. */
3595 if (seqp->section->first == seqp)
3596 {
3597 if (seqp->section->first == seqp->section->last)
3598 /* This section has no content anymore. */
3599 seqp->section->first = seqp->section->last = NULL;
3600 else
3601 seqp->section->first = seqp->next;
3602 }
3603 else if (seqp->section->last == seqp)
3604 seqp->section->last = seqp->last;
3605
3606 /* Now insert it in the new place. */
3607 insert_weights (ldfile, seqp, charmap, repertoire, result,
3608 tok_none);
3609 break;
3610 }
3611
3612 /* Otherwise we just add a new entry. */
3613 }
3614 else if (state == 5)
3615 {
3616 /* We are reordering sections. Find the named section. */
3617 struct section_list *runp = collate->sections;
3618 struct section_list *prevp = NULL;
3619
3620 while (runp != NULL)
3621 {
3622 if (runp->name != NULL
3623 && strlen (runp->name) == symlen
3624 && memcmp (runp->name, symstr, symlen) == 0)
3625 break;
3626
3627 prevp = runp;
3628 runp = runp->next;
3629 }
3630
3631 if (runp == NULL)
3632 {
3633 lr_error (ldfile, _("%s: section `%.*s' not known"),
3634 "LC_COLLATE", (int) symlen, symstr);
3635 lr_ignore_rest (ldfile, 0);
3636 }
3637 else
3638 {
3639 if (runp != collate->current_section)
3640 {
3641 /* Remove the named section from the old place and
3642 insert it in the new one. */
3643 prevp->next = runp->next;
3644
3645 runp->next = collate->current_section->next;
3646 collate->current_section->next = runp;
3647 collate->current_section = runp;
3648 }
3649
3650 /* Process the rest of the line which might change
3651 the collation rules. */
3652 arg = lr_token (ldfile, charmap, result, repertoire,
3653 verbose);
3654 if (arg->tok != tok_eof && arg->tok != tok_eol)
3655 read_directions (ldfile, arg, charmap, repertoire,
3656 result);
3657 }
3658 break;
3659 }
3660 else if (was_ellipsis != tok_none)
3661 {
3662 /* Using the information in the `ellipsis_weight'
3663 element and this and the last value we have to handle
3664 the ellipsis now. */
3665 assert (state == 1);
3666
3667 handle_ellipsis (ldfile, symstr, symlen, was_ellipsis, charmap,
3668 repertoire, result);
3669
3670 /* Remember that we processed the ellipsis. */
3671 was_ellipsis = tok_none;
3672
3673 /* And don't add the value a second time. */
3674 break;
3675 }
3676
3677 /* Now insert in the new place. */
3678 insert_value (ldfile, symstr, symlen, charmap, repertoire, result);
3679 break;
3680
3681 case tok_undefined:
3682 /* Ignore the rest of the line if we don't need the input of
3683 this line. */
3684 if (ignore_content)
3685 {
3686 lr_ignore_rest (ldfile, 0);
3687 break;
3688 }
3689
3690 if (state != 1)
3691 goto err_label;
3692
3693 if (was_ellipsis != tok_none)
3694 {
3695 lr_error (ldfile,
3696 _("%s: cannot have `%s' as end of ellipsis range"),
3697 "LC_COLLATE", "UNDEFINED");
3698
3699 unlink_element (collate);
3700 was_ellipsis = tok_none;
3701 }
3702
3703 /* See whether UNDEFINED already appeared somewhere. */
3704 if (collate->undefined.next != NULL
3705 || &collate->undefined == collate->cursor)
3706 {
3707 lr_error (ldfile,
3708 _("%s: order for `%.*s' already defined at %s:%Zu"),
3709 "LC_COLLATE", 9, "UNDEFINED",
3710 collate->undefined.file,
3711 collate->undefined.line);
3712 lr_ignore_rest (ldfile, 0);
3713 }
3714 else
3715 /* Parse the weights. */
3716 insert_weights (ldfile, &collate->undefined, charmap,
3717 repertoire, result, tok_none);
3718 break;
3719
3720 case tok_ellipsis2: /* symbolic hexadecimal ellipsis */
3721 case tok_ellipsis3: /* absolute ellipsis */
3722 case tok_ellipsis4: /* symbolic decimal ellipsis */
3723 /* This is the symbolic (decimal or hexadecimal) or absolute
3724 ellipsis. */
3725 if (was_ellipsis != tok_none)
3726 goto err_label;
3727
3728 if (state != 0 && state != 1 && state != 3)
3729 goto err_label;
3730
3731 was_ellipsis = nowtok;
3732
3733 insert_weights (ldfile, &collate->ellipsis_weight, charmap,
3734 repertoire, result, nowtok);
3735 break;
3736
3737 case tok_end:
3738 seen_end:
3739 /* Next we assume `LC_COLLATE'. */
3740 if (!ignore_content)
3741 {
3742 if (state == 0 && copy_locale == NULL)
3743 /* We must either see a copy statement or have
3744 ordering values. */
3745 lr_error (ldfile,
3746 _("%s: empty category description not allowed"),
3747 "LC_COLLATE");
3748 else if (state == 1)
3749 {
3750 lr_error (ldfile, _("%s: missing `order_end' keyword"),
3751 "LC_COLLATE");
3752
3753 /* Handle ellipsis at end of list. */
3754 if (was_ellipsis != tok_none)
3755 {
3756 handle_ellipsis (ldfile, NULL, 0, was_ellipsis, charmap,
3757 repertoire, result);
3758 was_ellipsis = tok_none;
3759 }
3760 }
3761 else if (state == 3)
3762 record_error (0, 0, _("\
3763%s: missing `reorder-end' keyword"), "LC_COLLATE");
3764 else if (state == 5)
3765 record_error (0, 0, _("\
3766%s: missing `reorder-sections-end' keyword"), "LC_COLLATE");
3767 }
3768 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3769 if (arg->tok == tok_eof)
3770 break;
3771 if (arg->tok == tok_eol)
3772 lr_error (ldfile, _("%s: incomplete `END' line"), "LC_COLLATE");
3773 else if (arg->tok != tok_lc_collate)
3774 lr_error (ldfile, _("\
3775%1$s: definition does not end with `END %1$s'"), "LC_COLLATE");
3776 lr_ignore_rest (ldfile, arg->tok == tok_lc_collate);
3777 return;
3778
3779 case tok_define:
3780 if (ignore_content)
3781 {
3782 lr_ignore_rest (ldfile, 0);
3783 break;
3784 }
3785
3786 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3787 if (arg->tok != tok_ident)
3788 goto err_label;
3789
3790 /* Simply add the new symbol. */
3791 struct name_list *newsym = xmalloc (sizeof (*newsym)
3792 + arg->val.str.lenmb + 1);
3793 memcpy (newsym->str, arg->val.str.startmb, arg->val.str.lenmb);
3794 newsym->str[arg->val.str.lenmb] = '\0';
3795 newsym->next = defined;
3796 defined = newsym;
3797
3798 lr_ignore_rest (ldfile, 1);
3799 break;
3800
3801 case tok_undef:
3802 if (ignore_content)
3803 {
3804 lr_ignore_rest (ldfile, 0);
3805 break;
3806 }
3807
3808 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3809 if (arg->tok != tok_ident)
3810 goto err_label;
3811
3812 /* Remove _all_ occurrences of the symbol from the list. */
3813 struct name_list *prevdef = NULL;
3814 struct name_list *curdef = defined;
3815 while (curdef != NULL)
3816 if (strncmp (arg->val.str.startmb, curdef->str,
3817 arg->val.str.lenmb) == 0
3818 && curdef->str[arg->val.str.lenmb] == '\0')
3819 {
3820 if (prevdef == NULL)
3821 defined = curdef->next;
3822 else
3823 prevdef->next = curdef->next;
3824
3825 struct name_list *olddef = curdef;
3826 curdef = curdef->next;
3827
3828 free (olddef);
3829 }
3830 else
3831 {
3832 prevdef = curdef;
3833 curdef = curdef->next;
3834 }
3835
3836 lr_ignore_rest (ldfile, 1);
3837 break;
3838
3839 case tok_ifdef:
3840 case tok_ifndef:
3841 if (ignore_content)
3842 {
3843 lr_ignore_rest (ldfile, 0);
3844 break;
3845 }
3846
3847 found_ifdef:
3848 arg = lr_token (ldfile, charmap, result, NULL, verbose);
3849 if (arg->tok != tok_ident)
3850 goto err_label;
3851 lr_ignore_rest (ldfile, 1);
3852
3853 if (collate->else_action == else_none)
3854 {
3855 curdef = defined;
3856 while (curdef != NULL)
3857 if (strncmp (arg->val.str.startmb, curdef->str,
3858 arg->val.str.lenmb) == 0
3859 && curdef->str[arg->val.str.lenmb] == '\0')
3860 break;
3861 else
3862 curdef = curdef->next;
3863
3864 if ((nowtok == tok_ifdef && curdef != NULL)
3865 || (nowtok == tok_ifndef && curdef == NULL))
3866 {
3867 /* We have to use the if-branch. */
3868 collate->else_action = else_ignore;
3869 }
3870 else
3871 {
3872 /* We have to use the else-branch, if there is one. */
3873 nowtok = skip_to (ldfile, collate, charmap, 0);
3874 if (nowtok == tok_else)
3875 collate->else_action = else_seen;
3876 else if (nowtok == tok_elifdef)
3877 {
3878 nowtok = tok_ifdef;
3879 goto found_ifdef;
3880 }
3881 else if (nowtok == tok_elifndef)
3882 {
3883 nowtok = tok_ifndef;
3884 goto found_ifdef;
3885 }
3886 else if (nowtok == tok_eof)
3887 goto seen_eof;
3888 else if (nowtok == tok_end)
3889 goto seen_end;
3890 }
3891 }
3892 else
3893 {
3894 /* XXX Should it really become necessary to support nested
3895 preprocessor handling we will push the state here. */
3896 lr_error (ldfile, _("%s: nested conditionals not supported"),
3897 "LC_COLLATE");
3898 nowtok = skip_to (ldfile, collate, charmap, 1);
3899 if (nowtok == tok_eof)
3900 goto seen_eof;
3901 else if (nowtok == tok_end)
3902 goto seen_end;
3903 }
3904 break;
3905
3906 case tok_elifdef:
3907 case tok_elifndef:
3908 case tok_else:
3909 if (ignore_content)
3910 {
3911 lr_ignore_rest (ldfile, 0);
3912 break;
3913 }
3914
3915 lr_ignore_rest (ldfile, 1);
3916
3917 if (collate->else_action == else_ignore)
3918 {
3919 /* Ignore everything until the endif. */
3920 nowtok = skip_to (ldfile, collate, charmap, 1);
3921 if (nowtok == tok_eof)
3922 goto seen_eof;
3923 else if (nowtok == tok_end)
3924 goto seen_end;
3925 }
3926 else
3927 {
3928 assert (collate->else_action == else_none);
3929 lr_error (ldfile, _("\
3930%s: '%s' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE",
3931 nowtok == tok_else ? "else"
3932 : nowtok == tok_elifdef ? "elifdef" : "elifndef");
3933 }
3934 break;
3935
3936 case tok_endif:
3937 if (ignore_content)
3938 {
3939 lr_ignore_rest (ldfile, 0);
3940 break;
3941 }
3942
3943 lr_ignore_rest (ldfile, 1);
3944
3945 if (collate->else_action != else_ignore
3946 && collate->else_action != else_seen)
3947 lr_error (ldfile, _("\
3948%s: 'endif' without matching 'ifdef' or 'ifndef'"), "LC_COLLATE");
3949
3950 /* XXX If we support nested preprocessor directives we pop
3951 the state here. */
3952 collate->else_action = else_none;
3953 break;
3954
3955 default:
3956 err_label:
3957 SYNTAX_ERROR (_("%s: syntax error"), "LC_COLLATE");
3958 }
3959
3960 /* Prepare for the next round. */
3961 now = lr_token (ldfile, charmap, result, NULL, verbose);
3962 nowtok = now->tok;
3963 }
3964
3965 seen_eof:
3966 /* When we come here we reached the end of the file. */
3967 lr_error (ldfile, _("%s: premature end of file"), "LC_COLLATE");
3968}
3969