1/* Copyright (C) 1995-2017 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published
7 by the Free Software Foundation; version 2 of the License, or
8 (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, see <http://www.gnu.org/licenses/>. */
17
18#ifdef HAVE_CONFIG_H
19# include <config.h>
20#endif
21
22#include <alloca.h>
23#include <byteswap.h>
24#include <endian.h>
25#include <errno.h>
26#include <limits.h>
27#include <obstack.h>
28#include <stdlib.h>
29#include <string.h>
30#include <wchar.h>
31#include <wctype.h>
32#include <stdint.h>
33#include <sys/uio.h>
34
35#include "localedef.h"
36#include "charmap.h"
37#include "localeinfo.h"
38#include "langinfo.h"
39#include "linereader.h"
40#include "locfile-token.h"
41#include "locfile.h"
42
43#include <assert.h>
44
45
46/* The bit used for representing a special class. */
47#define BITPOS(class) ((class) - tok_upper)
48#define BIT(class) (_ISbit (BITPOS (class)))
49#define BITw(class) (_ISwbit (BITPOS (class)))
50
51#define ELEM(ctype, collection, idx, value) \
52 *find_idx (ctype, &ctype->collection idx, &ctype->collection##_max idx, \
53 &ctype->collection##_act idx, value)
54
55
56/* To be compatible with former implementations we for now restrict
57 the number of bits for character classes to 16. When compatibility
58 is not necessary anymore increase the number to 32. */
59#define char_class_t uint16_t
60#define char_class32_t uint32_t
61
62
63/* Type to describe a transliteration action. We have a possibly
64 multiple character from-string and a set of multiple character
65 to-strings. All are 32bit values since this is what is used in
66 the gconv functions. */
67struct translit_to_t
68{
69 uint32_t *str;
70
71 struct translit_to_t *next;
72};
73
74struct translit_t
75{
76 uint32_t *from;
77
78 const char *fname;
79 size_t lineno;
80
81 struct translit_to_t *to;
82
83 struct translit_t *next;
84};
85
86struct translit_ignore_t
87{
88 uint32_t from;
89 uint32_t to;
90 uint32_t step;
91
92 const char *fname;
93 size_t lineno;
94
95 struct translit_ignore_t *next;
96};
97
98
99/* Type to describe a transliteration include statement. */
100struct translit_include_t
101{
102 const char *copy_locale;
103 const char *copy_repertoire;
104
105 struct translit_include_t *next;
106};
107
108/* Provide some dummy pointer for empty string. */
109static uint32_t no_str[] = { 0 };
110
111
112/* Sparse table of uint32_t. */
113#define TABLE idx_table
114#define ELEMENT uint32_t
115#define DEFAULT ((uint32_t) ~0)
116#define NO_ADD_LOCALE
117#include "3level.h"
118
119#define TABLE wcwidth_table
120#define ELEMENT uint8_t
121#define DEFAULT 0xff
122#include "3level.h"
123
124#define TABLE wctrans_table
125#define ELEMENT int32_t
126#define DEFAULT 0
127#define wctrans_table_add wctrans_table_add_internal
128#include "3level.h"
129#undef wctrans_table_add
130/* The wctrans_table must actually store the difference between the
131 desired result and the argument. */
132static inline void
133wctrans_table_add (struct wctrans_table *t, uint32_t wc, uint32_t mapped_wc)
134{
135 wctrans_table_add_internal (t, wc, mapped_wc - wc);
136}
137
138/* Construction of sparse 3-level tables.
139 See wchar-lookup.h for their structure and the meaning of p and q. */
140
141struct wctype_table
142{
143 /* Parameters. */
144 unsigned int p;
145 unsigned int q;
146 /* Working representation. */
147 size_t level1_alloc;
148 size_t level1_size;
149 uint32_t *level1;
150 size_t level2_alloc;
151 size_t level2_size;
152 uint32_t *level2;
153 size_t level3_alloc;
154 size_t level3_size;
155 uint32_t *level3;
156 size_t result_size;
157};
158
159static void add_locale_wctype_table (struct locale_file *file,
160 struct wctype_table *t);
161
162/* The real definition of the struct for the LC_CTYPE locale. */
163struct locale_ctype_t
164{
165 uint32_t *charnames;
166 size_t charnames_max;
167 size_t charnames_act;
168 /* An index lookup table, to speedup find_idx. */
169 struct idx_table charnames_idx;
170
171 struct repertoire_t *repertoire;
172
173 /* We will allow up to 8 * sizeof (uint32_t) character classes. */
174#define MAX_NR_CHARCLASS (8 * sizeof (uint32_t))
175 size_t nr_charclass;
176 const char *classnames[MAX_NR_CHARCLASS];
177 uint32_t last_class_char;
178 uint32_t class256_collection[256];
179 uint32_t *class_collection;
180 size_t class_collection_max;
181 size_t class_collection_act;
182 uint32_t class_done;
183 uint32_t class_offset;
184
185 struct charseq **mbdigits;
186 size_t mbdigits_act;
187 size_t mbdigits_max;
188 uint32_t *wcdigits;
189 size_t wcdigits_act;
190 size_t wcdigits_max;
191
192 struct charseq *mboutdigits[10];
193 uint32_t wcoutdigits[10];
194 size_t outdigits_act;
195
196 /* If the following number ever turns out to be too small simply
197 increase it. But I doubt it will. --drepper@gnu */
198#define MAX_NR_CHARMAP 16
199 const char *mapnames[MAX_NR_CHARMAP];
200 uint32_t *map_collection[MAX_NR_CHARMAP];
201 uint32_t map256_collection[2][256];
202 size_t map_collection_max[MAX_NR_CHARMAP];
203 size_t map_collection_act[MAX_NR_CHARMAP];
204 size_t map_collection_nr;
205 size_t last_map_idx;
206 int tomap_done[MAX_NR_CHARMAP];
207 uint32_t map_offset;
208
209 /* Transliteration information. */
210 struct translit_include_t *translit_include;
211 struct translit_t *translit;
212 struct translit_ignore_t *translit_ignore;
213 uint32_t ntranslit_ignore;
214
215 uint32_t *default_missing;
216 const char *default_missing_file;
217 size_t default_missing_lineno;
218
219 uint32_t to_nonascii;
220 uint32_t nonascii_case;
221
222 /* The arrays for the binary representation. */
223 char_class_t *ctype_b;
224 char_class32_t *ctype32_b;
225 uint32_t **map_b;
226 uint32_t **map32_b;
227 uint32_t **class_b;
228 struct wctype_table *class_3level;
229 struct wctrans_table *map_3level;
230 uint32_t *class_name_ptr;
231 uint32_t *map_name_ptr;
232 struct wcwidth_table width;
233 uint32_t mb_cur_max;
234 const char *codeset_name;
235 uint32_t *translit_from_idx;
236 uint32_t *translit_from_tbl;
237 uint32_t *translit_to_idx;
238 uint32_t *translit_to_tbl;
239 uint32_t translit_idx_size;
240 size_t translit_from_tbl_size;
241 size_t translit_to_tbl_size;
242
243 struct obstack mempool;
244};
245
246
247/* Marker for an empty slot. This has the value 0xFFFFFFFF, regardless
248 whether 'int' is 16 bit, 32 bit, or 64 bit. */
249#define EMPTY ((uint32_t) ~0)
250
251
252#define obstack_chunk_alloc xmalloc
253#define obstack_chunk_free free
254
255
256/* Prototypes for local functions. */
257static void ctype_startup (struct linereader *lr, struct localedef_t *locale,
258 const struct charmap_t *charmap,
259 struct localedef_t *copy_locale,
260 int ignore_content);
261static void ctype_class_new (struct linereader *lr,
262 struct locale_ctype_t *ctype, const char *name);
263static void ctype_map_new (struct linereader *lr,
264 struct locale_ctype_t *ctype,
265 const char *name, const struct charmap_t *charmap);
266static uint32_t *find_idx (struct locale_ctype_t *ctype, uint32_t **table,
267 size_t *max, size_t *act, uint32_t idx);
268static void set_class_defaults (struct locale_ctype_t *ctype,
269 const struct charmap_t *charmap,
270 struct repertoire_t *repertoire);
271static void allocate_arrays (struct locale_ctype_t *ctype,
272 const struct charmap_t *charmap,
273 struct repertoire_t *repertoire);
274
275
276static const char *longnames[] =
277{
278 "zero", "one", "two", "three", "four",
279 "five", "six", "seven", "eight", "nine"
280};
281static const char *uninames[] =
282{
283 "U00000030", "U00000031", "U00000032", "U00000033", "U00000034",
284 "U00000035", "U00000036", "U00000037", "U00000038", "U00000039"
285};
286static const unsigned char digits[] = "0123456789";
287
288
289static void
290ctype_startup (struct linereader *lr, struct localedef_t *locale,
291 const struct charmap_t *charmap,
292 struct localedef_t *copy_locale, int ignore_content)
293{
294 unsigned int cnt;
295 struct locale_ctype_t *ctype;
296
297 if (!ignore_content && locale->categories[LC_CTYPE].ctype == NULL)
298 {
299 if (copy_locale == NULL)
300 {
301 /* Allocate the needed room. */
302 locale->categories[LC_CTYPE].ctype = ctype =
303 (struct locale_ctype_t *) xcalloc (1,
304 sizeof (struct locale_ctype_t));
305
306 /* We have seen no names yet. */
307 ctype->charnames_max = charmap->mb_cur_max == 1 ? 256 : 512;
308 ctype->charnames = (uint32_t *) xmalloc (ctype->charnames_max
309 * sizeof (uint32_t));
310 for (cnt = 0; cnt < 256; ++cnt)
311 ctype->charnames[cnt] = cnt;
312 ctype->charnames_act = 256;
313 idx_table_init (&ctype->charnames_idx);
314
315 /* Fill character class information. */
316 ctype->last_class_char = ILLEGAL_CHAR_VALUE;
317 /* The order of the following instructions determines the bit
318 positions! */
319 ctype_class_new (lr, ctype, "upper");
320 ctype_class_new (lr, ctype, "lower");
321 ctype_class_new (lr, ctype, "alpha");
322 ctype_class_new (lr, ctype, "digit");
323 ctype_class_new (lr, ctype, "xdigit");
324 ctype_class_new (lr, ctype, "space");
325 ctype_class_new (lr, ctype, "print");
326 ctype_class_new (lr, ctype, "graph");
327 ctype_class_new (lr, ctype, "blank");
328 ctype_class_new (lr, ctype, "cntrl");
329 ctype_class_new (lr, ctype, "punct");
330 ctype_class_new (lr, ctype, "alnum");
331
332 ctype->class_collection_max = charmap->mb_cur_max == 1 ? 256 : 512;
333 ctype->class_collection
334 = (uint32_t *) xcalloc (sizeof (unsigned long int),
335 ctype->class_collection_max);
336 ctype->class_collection_act = 256;
337
338 /* Fill character map information. */
339 ctype->last_map_idx = MAX_NR_CHARMAP;
340 ctype_map_new (lr, ctype, "toupper", charmap);
341 ctype_map_new (lr, ctype, "tolower", charmap);
342
343 /* Fill first 256 entries in `toXXX' arrays. */
344 for (cnt = 0; cnt < 256; ++cnt)
345 {
346 ctype->map_collection[0][cnt] = cnt;
347 ctype->map_collection[1][cnt] = cnt;
348
349 ctype->map256_collection[0][cnt] = cnt;
350 ctype->map256_collection[1][cnt] = cnt;
351 }
352
353 if (enc_not_ascii_compatible)
354 ctype->to_nonascii = 1;
355
356 obstack_init (&ctype->mempool);
357 }
358 else
359 ctype = locale->categories[LC_CTYPE].ctype =
360 copy_locale->categories[LC_CTYPE].ctype;
361 }
362}
363
364
365void
366ctype_finish (struct localedef_t *locale, const struct charmap_t *charmap)
367{
368 /* See POSIX.2, table 2-6 for the meaning of the following table. */
369#define NCLASS 12
370 static const struct
371 {
372 const char *name;
373 const char allow[NCLASS];
374 }
375 valid_table[NCLASS] =
376 {
377 /* The order is important. See token.h for more information.
378 M = Always, D = Default, - = Permitted, X = Mutually exclusive */
379 { "upper", "--MX-XDDXXX-" },
380 { "lower", "--MX-XDDXXX-" },
381 { "alpha", "---X-XDDXXX-" },
382 { "digit", "XXX--XDDXXX-" },
383 { "xdigit", "-----XDDXXX-" },
384 { "space", "XXXXX------X" },
385 { "print", "---------X--" },
386 { "graph", "---------X--" },
387 { "blank", "XXXXXM-----X" },
388 { "cntrl", "XXXXX-XX--XX" },
389 { "punct", "XXXXX-DD-X-X" },
390 { "alnum", "-----XDDXXX-" }
391 };
392 size_t cnt;
393 int cls1, cls2;
394 uint32_t space_value;
395 struct charseq *space_seq;
396 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
397 int warned;
398 const void *key;
399 size_t len;
400 void *vdata;
401 void *curs;
402
403 /* Now resolve copying and also handle completely missing definitions. */
404 if (ctype == NULL)
405 {
406 const char *repertoire_name;
407
408 /* First see whether we were supposed to copy. If yes, find the
409 actual definition. */
410 if (locale->copy_name[LC_CTYPE] != NULL)
411 {
412 /* Find the copying locale. This has to happen transitively since
413 the locale we are copying from might also copying another one. */
414 struct localedef_t *from = locale;
415
416 do
417 from = find_locale (LC_CTYPE, from->copy_name[LC_CTYPE],
418 from->repertoire_name, charmap);
419 while (from->categories[LC_CTYPE].ctype == NULL
420 && from->copy_name[LC_CTYPE] != NULL);
421
422 ctype = locale->categories[LC_CTYPE].ctype
423 = from->categories[LC_CTYPE].ctype;
424 }
425
426 /* If there is still no definition issue an warning and create an
427 empty one. */
428 if (ctype == NULL)
429 {
430 if (! be_quiet)
431 WITH_CUR_LOCALE (error (0, 0, _("\
432No definition for %s category found"), "LC_CTYPE"));
433 ctype_startup (NULL, locale, charmap, NULL, 0);
434 ctype = locale->categories[LC_CTYPE].ctype;
435 }
436
437 /* Get the repertoire we have to use. */
438 repertoire_name = locale->repertoire_name ?: repertoire_global;
439 if (repertoire_name != NULL)
440 ctype->repertoire = repertoire_read (repertoire_name);
441 }
442
443 /* We need the name of the currently used 8-bit character set to
444 make correct conversion between this 8-bit representation and the
445 ISO 10646 character set used internally for wide characters. */
446 ctype->codeset_name = charmap->code_set_name;
447 if (ctype->codeset_name == NULL)
448 {
449 if (! be_quiet)
450 WITH_CUR_LOCALE (error (0, 0, _("\
451No character set name specified in charmap")));
452 ctype->codeset_name = "//UNKNOWN//";
453 }
454
455 /* Set default value for classes not specified. */
456 set_class_defaults (ctype, charmap, ctype->repertoire);
457
458 /* Check according to table. */
459 for (cnt = 0; cnt < ctype->class_collection_act; ++cnt)
460 {
461 uint32_t tmp = ctype->class_collection[cnt];
462
463 if (tmp != 0)
464 {
465 for (cls1 = 0; cls1 < NCLASS; ++cls1)
466 if ((tmp & _ISwbit (cls1)) != 0)
467 for (cls2 = 0; cls2 < NCLASS; ++cls2)
468 if (valid_table[cls1].allow[cls2] != '-')
469 {
470 int eq = (tmp & _ISwbit (cls2)) != 0;
471 switch (valid_table[cls1].allow[cls2])
472 {
473 case 'M':
474 if (!eq)
475 {
476 uint32_t value = ctype->charnames[cnt];
477
478 if (!be_quiet)
479 WITH_CUR_LOCALE (error (0, 0, _("\
480character L'\\u%0*x' in class `%s' must be in class `%s'"),
481 value > 0xffff ? 8 : 4,
482 value,
483 valid_table[cls1].name,
484 valid_table[cls2].name));
485 }
486 break;
487
488 case 'X':
489 if (eq)
490 {
491 uint32_t value = ctype->charnames[cnt];
492
493 if (!be_quiet)
494 WITH_CUR_LOCALE (error (0, 0, _("\
495character L'\\u%0*x' in class `%s' must not be in class `%s'"),
496 value > 0xffff ? 8 : 4,
497 value,
498 valid_table[cls1].name,
499 valid_table[cls2].name));
500 }
501 break;
502
503 case 'D':
504 ctype->class_collection[cnt] |= _ISwbit (cls2);
505 break;
506
507 default:
508 WITH_CUR_LOCALE (error (5, 0, _("\
509internal error in %s, line %u"), __FUNCTION__, __LINE__));
510 }
511 }
512 }
513 }
514
515 for (cnt = 0; cnt < 256; ++cnt)
516 {
517 uint32_t tmp = ctype->class256_collection[cnt];
518
519 if (tmp != 0)
520 {
521 for (cls1 = 0; cls1 < NCLASS; ++cls1)
522 if ((tmp & _ISbit (cls1)) != 0)
523 for (cls2 = 0; cls2 < NCLASS; ++cls2)
524 if (valid_table[cls1].allow[cls2] != '-')
525 {
526 int eq = (tmp & _ISbit (cls2)) != 0;
527 switch (valid_table[cls1].allow[cls2])
528 {
529 case 'M':
530 if (!eq)
531 {
532 char buf[17];
533
534 snprintf (buf, sizeof buf, "\\%Zo", cnt);
535
536 if (!be_quiet)
537 WITH_CUR_LOCALE (error (0, 0, _("\
538character '%s' in class `%s' must be in class `%s'"),
539 buf,
540 valid_table[cls1].name,
541 valid_table[cls2].name));
542 }
543 break;
544
545 case 'X':
546 if (eq)
547 {
548 char buf[17];
549
550 snprintf (buf, sizeof buf, "\\%Zo", cnt);
551
552 if (!be_quiet)
553 WITH_CUR_LOCALE (error (0, 0, _("\
554character '%s' in class `%s' must not be in class `%s'"),
555 buf,
556 valid_table[cls1].name,
557 valid_table[cls2].name));
558 }
559 break;
560
561 case 'D':
562 ctype->class256_collection[cnt] |= _ISbit (cls2);
563 break;
564
565 default:
566 WITH_CUR_LOCALE (error (5, 0, _("\
567internal error in %s, line %u"), __FUNCTION__, __LINE__));
568 }
569 }
570 }
571 }
572
573 /* ... and now test <SP> as a special case. */
574 space_value = 32;
575 if (((cnt = BITPOS (tok_space),
576 (ELEM (ctype, class_collection, , space_value)
577 & BITw (tok_space)) == 0)
578 || (cnt = BITPOS (tok_blank),
579 (ELEM (ctype, class_collection, , space_value)
580 & BITw (tok_blank)) == 0)))
581 {
582 if (!be_quiet)
583 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
584 valid_table[cnt].name));
585 }
586 else if (((cnt = BITPOS (tok_punct),
587 (ELEM (ctype, class_collection, , space_value)
588 & BITw (tok_punct)) != 0)
589 || (cnt = BITPOS (tok_graph),
590 (ELEM (ctype, class_collection, , space_value)
591 & BITw (tok_graph))
592 != 0)))
593 {
594 if (!be_quiet)
595 WITH_CUR_LOCALE (error (0, 0, _("\
596<SP> character must not be in class `%s'"),
597 valid_table[cnt].name));
598 }
599 else
600 ELEM (ctype, class_collection, , space_value) |= BITw (tok_print);
601
602 space_seq = charmap_find_value (charmap, "SP", 2);
603 if (space_seq == NULL)
604 space_seq = charmap_find_value (charmap, "space", 5);
605 if (space_seq == NULL)
606 space_seq = charmap_find_value (charmap, "U00000020", 9);
607 if (space_seq == NULL || space_seq->nbytes != 1)
608 {
609 if (!be_quiet)
610 WITH_CUR_LOCALE (error (0, 0, _("\
611character <SP> not defined in character map")));
612 }
613 else if (((cnt = BITPOS (tok_space),
614 (ctype->class256_collection[space_seq->bytes[0]]
615 & BIT (tok_space)) == 0)
616 || (cnt = BITPOS (tok_blank),
617 (ctype->class256_collection[space_seq->bytes[0]]
618 & BIT (tok_blank)) == 0)))
619 {
620 if (!be_quiet)
621 WITH_CUR_LOCALE (error (0, 0, _("<SP> character not in class `%s'"),
622 valid_table[cnt].name));
623 }
624 else if (((cnt = BITPOS (tok_punct),
625 (ctype->class256_collection[space_seq->bytes[0]]
626 & BIT (tok_punct)) != 0)
627 || (cnt = BITPOS (tok_graph),
628 (ctype->class256_collection[space_seq->bytes[0]]
629 & BIT (tok_graph)) != 0)))
630 {
631 if (!be_quiet)
632 WITH_CUR_LOCALE (error (0, 0, _("\
633<SP> character must not be in class `%s'"),
634 valid_table[cnt].name));
635 }
636 else
637 ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
638
639 /* Check whether all single-byte characters make to their upper/lowercase
640 equivalent according to the ASCII rules. */
641 for (cnt = 'A'; cnt <= 'Z'; ++cnt)
642 {
643 uint32_t uppval = ctype->map256_collection[0][cnt];
644 uint32_t lowval = ctype->map256_collection[1][cnt];
645 uint32_t lowuppval = ctype->map256_collection[0][lowval];
646 uint32_t lowlowval = ctype->map256_collection[1][lowval];
647
648 if (uppval != cnt
649 || lowval != cnt + 0x20
650 || lowuppval != cnt
651 || lowlowval != cnt + 0x20)
652 ctype->nonascii_case = 1;
653 }
654 for (cnt = 0; cnt < 256; ++cnt)
655 if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
656 if (ctype->map256_collection[0][cnt] != cnt
657 || ctype->map256_collection[1][cnt] != cnt)
658 ctype->nonascii_case = 1;
659
660 /* Now that the tests are done make sure the name array contains all
661 characters which are handled in the WIDTH section of the
662 character set definition file. */
663 if (charmap->width_rules != NULL)
664 for (cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
665 {
666 unsigned char bytes[charmap->mb_cur_max];
667 int nbytes = charmap->width_rules[cnt].from->nbytes;
668
669 /* We have the range of character for which the width is
670 specified described using byte sequences of the multibyte
671 charset. We have to convert this to UCS4 now. And we
672 cannot simply convert the beginning and the end of the
673 sequence, we have to iterate over the byte sequence and
674 convert it for every single character. */
675 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
676
677 while (nbytes < charmap->width_rules[cnt].to->nbytes
678 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
679 nbytes) <= 0)
680 {
681 /* Find the UCS value for `bytes'. */
682 int inner;
683 uint32_t wch;
684 struct charseq *seq
685 = charmap_find_symbol (charmap, (char *) bytes, nbytes);
686
687 if (seq == NULL)
688 wch = ILLEGAL_CHAR_VALUE;
689 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
690 wch = seq->ucs4;
691 else
692 wch = repertoire_find_value (ctype->repertoire, seq->name,
693 strlen (seq->name));
694
695 if (wch != ILLEGAL_CHAR_VALUE)
696 /* We are only interested in the side-effects of the
697 `find_idx' call. It will add appropriate entries in
698 the name array if this is necessary. */
699 (void) find_idx (ctype, NULL, NULL, NULL, wch);
700
701 /* "Increment" the bytes sequence. */
702 inner = nbytes - 1;
703 while (inner >= 0 && bytes[inner] == 0xff)
704 --inner;
705
706 if (inner < 0)
707 {
708 /* We have to extend the byte sequence. */
709 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
710 break;
711
712 bytes[0] = 1;
713 memset (&bytes[1], 0, nbytes);
714 ++nbytes;
715 }
716 else
717 {
718 ++bytes[inner];
719 while (++inner < nbytes)
720 bytes[inner] = 0;
721 }
722 }
723 }
724
725 /* Now set all the other characters of the character set to the
726 default width. */
727 curs = NULL;
728 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
729 {
730 struct charseq *data = (struct charseq *) vdata;
731
732 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
733 data->ucs4 = repertoire_find_value (ctype->repertoire,
734 data->name, len);
735
736 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
737 (void) find_idx (ctype, NULL, NULL, NULL, data->ucs4);
738 }
739
740 /* There must be a multiple of 10 digits. */
741 if (ctype->mbdigits_act % 10 != 0)
742 {
743 assert (ctype->mbdigits_act == ctype->wcdigits_act);
744 ctype->wcdigits_act -= ctype->mbdigits_act % 10;
745 ctype->mbdigits_act -= ctype->mbdigits_act % 10;
746 WITH_CUR_LOCALE (error (0, 0, _("\
747`digit' category has not entries in groups of ten")));
748 }
749
750 /* Check the input digits. There must be a multiple of ten available.
751 In each group it could be that one or the other character is missing.
752 In this case the whole group must be removed. */
753 cnt = 0;
754 while (cnt < ctype->mbdigits_act)
755 {
756 size_t inner;
757 for (inner = 0; inner < 10; ++inner)
758 if (ctype->mbdigits[cnt + inner] == NULL)
759 break;
760
761 if (inner == 10)
762 cnt += 10;
763 else
764 {
765 /* Remove the group. */
766 memmove (&ctype->mbdigits[cnt], &ctype->mbdigits[cnt + 10],
767 ((ctype->wcdigits_act - cnt - 10)
768 * sizeof (ctype->mbdigits[0])));
769 ctype->mbdigits_act -= 10;
770 }
771 }
772
773 /* If no input digits are given use the default. */
774 if (ctype->mbdigits_act == 0)
775 {
776 if (ctype->mbdigits_max == 0)
777 {
778 ctype->mbdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
779 10 * sizeof (struct charseq *));
780 ctype->mbdigits_max = 10;
781 }
782
783 for (cnt = 0; cnt < 10; ++cnt)
784 {
785 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
786 (char *) digits + cnt, 1);
787 if (ctype->mbdigits[cnt] == NULL)
788 {
789 ctype->mbdigits[cnt] = charmap_find_symbol (charmap,
790 longnames[cnt],
791 strlen (longnames[cnt]));
792 if (ctype->mbdigits[cnt] == NULL)
793 {
794 /* Hum, this ain't good. */
795 WITH_CUR_LOCALE (error (0, 0, _("\
796no input digits defined and none of the standard names in the charmap")));
797
798 ctype->mbdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
799 sizeof (struct charseq) + 1);
800
801 /* This is better than nothing. */
802 ctype->mbdigits[cnt]->bytes[0] = digits[cnt];
803 ctype->mbdigits[cnt]->nbytes = 1;
804 }
805 }
806 }
807
808 ctype->mbdigits_act = 10;
809 }
810
811 /* Check the wide character input digits. There must be a multiple
812 of ten available. In each group it could be that one or the other
813 character is missing. In this case the whole group must be
814 removed. */
815 cnt = 0;
816 while (cnt < ctype->wcdigits_act)
817 {
818 size_t inner;
819 for (inner = 0; inner < 10; ++inner)
820 if (ctype->wcdigits[cnt + inner] == ILLEGAL_CHAR_VALUE)
821 break;
822
823 if (inner == 10)
824 cnt += 10;
825 else
826 {
827 /* Remove the group. */
828 memmove (&ctype->wcdigits[cnt], &ctype->wcdigits[cnt + 10],
829 ((ctype->wcdigits_act - cnt - 10)
830 * sizeof (ctype->wcdigits[0])));
831 ctype->wcdigits_act -= 10;
832 }
833 }
834
835 /* If no input digits are given use the default. */
836 if (ctype->wcdigits_act == 0)
837 {
838 if (ctype->wcdigits_max == 0)
839 {
840 ctype->wcdigits = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
841 10 * sizeof (uint32_t));
842 ctype->wcdigits_max = 10;
843 }
844
845 for (cnt = 0; cnt < 10; ++cnt)
846 ctype->wcdigits[cnt] = L'0' + cnt;
847
848 ctype->mbdigits_act = 10;
849 }
850
851 /* Check the outdigits. */
852 warned = 0;
853 for (cnt = 0; cnt < 10; ++cnt)
854 if (ctype->mboutdigits[cnt] == NULL)
855 {
856 static struct charseq replace[2];
857
858 if (!warned)
859 {
860 WITH_CUR_LOCALE (error (0, 0, _("\
861not all characters used in `outdigit' are available in the charmap")));
862 warned = 1;
863 }
864
865 replace[0].nbytes = 1;
866 replace[0].bytes[0] = '?';
867 replace[0].bytes[1] = '\0';
868 ctype->mboutdigits[cnt] = &replace[0];
869 }
870
871 warned = 0;
872 for (cnt = 0; cnt < 10; ++cnt)
873 if (ctype->wcoutdigits[cnt] == 0)
874 {
875 if (!warned)
876 {
877 WITH_CUR_LOCALE (error (0, 0, _("\
878not all characters used in `outdigit' are available in the repertoire")));
879 warned = 1;
880 }
881
882 ctype->wcoutdigits[cnt] = L'?';
883 }
884
885 /* Sort the entries in the translit_ignore list. */
886 if (ctype->translit_ignore != NULL)
887 {
888 struct translit_ignore_t *firstp = ctype->translit_ignore;
889 struct translit_ignore_t *runp;
890
891 ctype->ntranslit_ignore = 1;
892
893 for (runp = firstp->next; runp != NULL; runp = runp->next)
894 {
895 struct translit_ignore_t *lastp = NULL;
896 struct translit_ignore_t *cmpp;
897
898 ++ctype->ntranslit_ignore;
899
900 for (cmpp = firstp; cmpp != NULL; lastp = cmpp, cmpp = cmpp->next)
901 if (runp->from < cmpp->from)
902 break;
903
904 runp->next = lastp;
905 if (lastp == NULL)
906 firstp = runp;
907 }
908
909 ctype->translit_ignore = firstp;
910 }
911}
912
913
914void
915ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
916 const char *output_path)
917{
918 struct locale_ctype_t *ctype = locale->categories[LC_CTYPE].ctype;
919 const size_t nelems = (_NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1)
920 + ctype->nr_charclass + ctype->map_collection_nr);
921 struct locale_file file;
922 uint32_t default_missing_len;
923 size_t elem, cnt;
924
925 /* Now prepare the output: Find the sizes of the table we can use. */
926 allocate_arrays (ctype, charmap, ctype->repertoire);
927
928 default_missing_len = (ctype->default_missing
929 ? wcslen ((wchar_t *) ctype->default_missing)
930 : 0);
931
932 init_locale_data (&file, nelems);
933 for (elem = 0; elem < nelems; ++elem)
934 {
935 if (elem < _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1))
936 switch (elem)
937 {
938#define CTYPE_EMPTY(name) \
939 case name: \
940 add_locale_empty (&file); \
941 break
942
943 CTYPE_EMPTY(_NL_CTYPE_GAP1);
944 CTYPE_EMPTY(_NL_CTYPE_GAP2);
945 CTYPE_EMPTY(_NL_CTYPE_GAP3);
946 CTYPE_EMPTY(_NL_CTYPE_GAP4);
947 CTYPE_EMPTY(_NL_CTYPE_GAP5);
948 CTYPE_EMPTY(_NL_CTYPE_GAP6);
949
950#define CTYPE_RAW_DATA(name, base, size) \
951 case _NL_ITEM_INDEX (name): \
952 add_locale_raw_data (&file, base, size); \
953 break
954
955 CTYPE_RAW_DATA (_NL_CTYPE_CLASS,
956 ctype->ctype_b,
957 (256 + 128) * sizeof (char_class_t));
958
959#define CTYPE_UINT32_ARRAY(name, base, n_elems) \
960 case _NL_ITEM_INDEX (name): \
961 add_locale_uint32_array (&file, base, n_elems); \
962 break
963
964 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER, ctype->map_b[0], 256 + 128);
965 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER, ctype->map_b[1], 256 + 128);
966 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOUPPER32, ctype->map32_b[0], 256);
967 CTYPE_UINT32_ARRAY (_NL_CTYPE_TOLOWER32, ctype->map32_b[1], 256);
968 CTYPE_RAW_DATA (_NL_CTYPE_CLASS32,
969 ctype->ctype32_b,
970 256 * sizeof (char_class32_t));
971
972#define CTYPE_UINT32(name, value) \
973 case _NL_ITEM_INDEX (name): \
974 add_locale_uint32 (&file, value); \
975 break
976
977 CTYPE_UINT32 (_NL_CTYPE_CLASS_OFFSET, ctype->class_offset);
978 CTYPE_UINT32 (_NL_CTYPE_MAP_OFFSET, ctype->map_offset);
979 CTYPE_UINT32 (_NL_CTYPE_TRANSLIT_TAB_SIZE, ctype->translit_idx_size);
980
981 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_IDX,
982 ctype->translit_from_idx,
983 ctype->translit_idx_size);
984
985 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_FROM_TBL,
986 ctype->translit_from_tbl,
987 ctype->translit_from_tbl_size
988 / sizeof (uint32_t));
989
990 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_IDX,
991 ctype->translit_to_idx,
992 ctype->translit_idx_size);
993
994 CTYPE_UINT32_ARRAY (_NL_CTYPE_TRANSLIT_TO_TBL,
995 ctype->translit_to_tbl,
996 ctype->translit_to_tbl_size / sizeof (uint32_t));
997
998 case _NL_ITEM_INDEX (_NL_CTYPE_CLASS_NAMES):
999 /* The class name array. */
1000 start_locale_structure (&file);
1001 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1002 add_locale_string (&file, ctype->classnames[cnt]);
1003 add_locale_char (&file, 0);
1004 align_locale_data (&file, LOCFILE_ALIGN);
1005 end_locale_structure (&file);
1006 break;
1007
1008 case _NL_ITEM_INDEX (_NL_CTYPE_MAP_NAMES):
1009 /* The class name array. */
1010 start_locale_structure (&file);
1011 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1012 add_locale_string (&file, ctype->mapnames[cnt]);
1013 add_locale_char (&file, 0);
1014 align_locale_data (&file, LOCFILE_ALIGN);
1015 end_locale_structure (&file);
1016 break;
1017
1018 case _NL_ITEM_INDEX (_NL_CTYPE_WIDTH):
1019 add_locale_wcwidth_table (&file, &ctype->width);
1020 break;
1021
1022 CTYPE_UINT32 (_NL_CTYPE_MB_CUR_MAX, ctype->mb_cur_max);
1023
1024 case _NL_ITEM_INDEX (_NL_CTYPE_CODESET_NAME):
1025 add_locale_string (&file, ctype->codeset_name);
1026 break;
1027
1028 CTYPE_UINT32 (_NL_CTYPE_MAP_TO_NONASCII, ctype->to_nonascii);
1029
1030 CTYPE_UINT32 (_NL_CTYPE_NONASCII_CASE, ctype->nonascii_case);
1031
1032 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
1033 add_locale_uint32 (&file, ctype->mbdigits_act / 10);
1034 break;
1035
1036 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_WC_LEN):
1037 add_locale_uint32 (&file, ctype->wcdigits_act / 10);
1038 break;
1039
1040 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_MB):
1041 start_locale_structure (&file);
1042 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_MB);
1043 cnt < ctype->mbdigits_act; cnt += 10)
1044 {
1045 add_locale_raw_data (&file, ctype->mbdigits[cnt]->bytes,
1046 ctype->mbdigits[cnt]->nbytes);
1047 add_locale_char (&file, 0);
1048 }
1049 end_locale_structure (&file);
1050 break;
1051
1052 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_MB):
1053 start_locale_structure (&file);
1054 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_MB);
1055 add_locale_raw_data (&file, ctype->mboutdigits[cnt]->bytes,
1056 ctype->mboutdigits[cnt]->nbytes);
1057 add_locale_char (&file, 0);
1058 end_locale_structure (&file);
1059 break;
1060
1061 case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS9_WC):
1062 start_locale_structure (&file);
1063 for (cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS0_WC);
1064 cnt < ctype->wcdigits_act; cnt += 10)
1065 add_locale_uint32 (&file, ctype->wcdigits[cnt]);
1066 end_locale_structure (&file);
1067 break;
1068
1069 case _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC) ... _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT9_WC):
1070 cnt = elem - _NL_ITEM_INDEX (_NL_CTYPE_OUTDIGIT0_WC);
1071 add_locale_uint32 (&file, ctype->wcoutdigits[cnt]);
1072 break;
1073
1074 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING_LEN):
1075 add_locale_uint32 (&file, default_missing_len);
1076 break;
1077
1078 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_DEFAULT_MISSING):
1079 add_locale_uint32_array (&file, ctype->default_missing,
1080 default_missing_len);
1081 break;
1082
1083 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE_LEN):
1084 add_locale_uint32 (&file, ctype->ntranslit_ignore);
1085 break;
1086
1087 case _NL_ITEM_INDEX(_NL_CTYPE_TRANSLIT_IGNORE):
1088 start_locale_structure (&file);
1089 {
1090 struct translit_ignore_t *runp;
1091 for (runp = ctype->translit_ignore; runp != NULL;
1092 runp = runp->next)
1093 {
1094 add_locale_uint32 (&file, runp->from);
1095 add_locale_uint32 (&file, runp->to);
1096 add_locale_uint32 (&file, runp->step);
1097 }
1098 }
1099 end_locale_structure (&file);
1100 break;
1101
1102 default:
1103 assert (! "unknown CTYPE element");
1104 }
1105 else
1106 {
1107 /* Handle extra maps. */
1108 size_t nr = elem - _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
1109 if (nr < ctype->nr_charclass)
1110 {
1111 start_locale_prelude (&file);
1112 add_locale_uint32_array (&file, ctype->class_b[nr], 256 / 32);
1113 end_locale_prelude (&file);
1114 add_locale_wctype_table (&file, &ctype->class_3level[nr]);
1115 }
1116 else
1117 {
1118 nr -= ctype->nr_charclass;
1119 assert (nr < ctype->map_collection_nr);
1120 add_locale_wctrans_table (&file, &ctype->map_3level[nr]);
1121 }
1122 }
1123 }
1124
1125 write_locale_data (output_path, LC_CTYPE, "LC_CTYPE", &file);
1126}
1127
1128
1129/* Local functions. */
1130static void
1131ctype_class_new (struct linereader *lr, struct locale_ctype_t *ctype,
1132 const char *name)
1133{
1134 size_t cnt;
1135
1136 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
1137 if (strcmp (ctype->classnames[cnt], name) == 0)
1138 break;
1139
1140 if (cnt < ctype->nr_charclass)
1141 {
1142 lr_error (lr, _("character class `%s' already defined"), name);
1143 return;
1144 }
1145
1146 if (ctype->nr_charclass == MAX_NR_CHARCLASS)
1147 /* Exit code 2 is prescribed in P1003.2b. */
1148 WITH_CUR_LOCALE (error (2, 0, _("\
1149implementation limit: no more than %Zd character classes allowed"),
1150 MAX_NR_CHARCLASS));
1151
1152 ctype->classnames[ctype->nr_charclass++] = name;
1153}
1154
1155
1156static void
1157ctype_map_new (struct linereader *lr, struct locale_ctype_t *ctype,
1158 const char *name, const struct charmap_t *charmap)
1159{
1160 size_t max_chars = 0;
1161 size_t cnt;
1162
1163 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
1164 {
1165 if (strcmp (ctype->mapnames[cnt], name) == 0)
1166 break;
1167
1168 if (max_chars < ctype->map_collection_max[cnt])
1169 max_chars = ctype->map_collection_max[cnt];
1170 }
1171
1172 if (cnt < ctype->map_collection_nr)
1173 {
1174 lr_error (lr, _("character map `%s' already defined"), name);
1175 return;
1176 }
1177
1178 if (ctype->map_collection_nr == MAX_NR_CHARMAP)
1179 /* Exit code 2 is prescribed in P1003.2b. */
1180 WITH_CUR_LOCALE (error (2, 0, _("\
1181implementation limit: no more than %d character maps allowed"),
1182 MAX_NR_CHARMAP));
1183
1184 ctype->mapnames[cnt] = name;
1185
1186 if (max_chars == 0)
1187 ctype->map_collection_max[cnt] = charmap->mb_cur_max == 1 ? 256 : 512;
1188 else
1189 ctype->map_collection_max[cnt] = max_chars;
1190
1191 ctype->map_collection[cnt] = (uint32_t *)
1192 xcalloc (sizeof (uint32_t), ctype->map_collection_max[cnt]);
1193 ctype->map_collection_act[cnt] = 256;
1194
1195 ++ctype->map_collection_nr;
1196}
1197
1198
1199/* We have to be prepared that TABLE, MAX, and ACT can be NULL. This
1200 is possible if we only want to extend the name array. */
1201static uint32_t *
1202find_idx (struct locale_ctype_t *ctype, uint32_t **table, size_t *max,
1203 size_t *act, uint32_t idx)
1204{
1205 size_t cnt;
1206
1207 if (idx < 256)
1208 return table == NULL ? NULL : &(*table)[idx];
1209
1210 /* Use the charnames_idx lookup table instead of the slow search loop. */
1211#if 1
1212 cnt = idx_table_get (&ctype->charnames_idx, idx);
1213 if (cnt == EMPTY)
1214 /* Not found. */
1215 cnt = ctype->charnames_act;
1216#else
1217 for (cnt = 256; cnt < ctype->charnames_act; ++cnt)
1218 if (ctype->charnames[cnt] == idx)
1219 break;
1220#endif
1221
1222 /* We have to distinguish two cases: the name is found or not. */
1223 if (cnt == ctype->charnames_act)
1224 {
1225 /* Extend the name array. */
1226 if (ctype->charnames_act == ctype->charnames_max)
1227 {
1228 ctype->charnames_max *= 2;
1229 ctype->charnames = (uint32_t *)
1230 xrealloc (ctype->charnames,
1231 sizeof (uint32_t) * ctype->charnames_max);
1232 }
1233 ctype->charnames[ctype->charnames_act++] = idx;
1234 idx_table_add (&ctype->charnames_idx, idx, cnt);
1235 }
1236
1237 if (table == NULL)
1238 /* We have done everything we are asked to do. */
1239 return NULL;
1240
1241 if (max == NULL)
1242 /* The caller does not want to extend the table. */
1243 return (cnt >= *act ? NULL : &(*table)[cnt]);
1244
1245 if (cnt >= *act)
1246 {
1247 if (cnt >= *max)
1248 {
1249 size_t old_max = *max;
1250 do
1251 *max *= 2;
1252 while (*max <= cnt);
1253
1254 *table =
1255 (uint32_t *) xrealloc (*table, *max * sizeof (uint32_t));
1256 memset (&(*table)[old_max], '\0',
1257 (*max - old_max) * sizeof (uint32_t));
1258 }
1259
1260 *act = cnt + 1;
1261 }
1262
1263 return &(*table)[cnt];
1264}
1265
1266
1267static int
1268get_character (struct token *now, const struct charmap_t *charmap,
1269 struct repertoire_t *repertoire,
1270 struct charseq **seqp, uint32_t *wchp)
1271{
1272 if (now->tok == tok_bsymbol)
1273 {
1274 /* This will hopefully be the normal case. */
1275 *wchp = repertoire_find_value (repertoire, now->val.str.startmb,
1276 now->val.str.lenmb);
1277 *seqp = charmap_find_value (charmap, now->val.str.startmb,
1278 now->val.str.lenmb);
1279 }
1280 else if (now->tok == tok_ucs4)
1281 {
1282 char utmp[10];
1283
1284 snprintf (utmp, sizeof (utmp), "U%08X", now->val.ucs4);
1285 *seqp = charmap_find_value (charmap, utmp, 9);
1286
1287 if (*seqp == NULL)
1288 *seqp = repertoire_find_seq (repertoire, now->val.ucs4);
1289
1290 if (*seqp == NULL)
1291 {
1292 /* Compute the value in the charmap from the UCS value. */
1293 const char *symbol = repertoire_find_symbol (repertoire,
1294 now->val.ucs4);
1295
1296 if (symbol == NULL)
1297 *seqp = NULL;
1298 else
1299 *seqp = charmap_find_value (charmap, symbol, strlen (symbol));
1300
1301 if (*seqp == NULL)
1302 {
1303 if (repertoire != NULL)
1304 {
1305 /* Insert a negative entry. */
1306 static const struct charseq negative
1307 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1308 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1309 sizeof (uint32_t));
1310 *newp = now->val.ucs4;
1311
1312 insert_entry (&repertoire->seq_table, newp,
1313 sizeof (uint32_t), (void *) &negative);
1314 }
1315 }
1316 else
1317 (*seqp)->ucs4 = now->val.ucs4;
1318 }
1319 else if ((*seqp)->ucs4 != now->val.ucs4)
1320 *seqp = NULL;
1321
1322 *wchp = now->val.ucs4;
1323 }
1324 else if (now->tok == tok_charcode)
1325 {
1326 /* We must map from the byte code to UCS4. */
1327 *seqp = charmap_find_symbol (charmap, now->val.str.startmb,
1328 now->val.str.lenmb);
1329
1330 if (*seqp == NULL)
1331 *wchp = ILLEGAL_CHAR_VALUE;
1332 else
1333 {
1334 if ((*seqp)->ucs4 == UNINITIALIZED_CHAR_VALUE)
1335 (*seqp)->ucs4 = repertoire_find_value (repertoire, (*seqp)->name,
1336 strlen ((*seqp)->name));
1337 *wchp = (*seqp)->ucs4;
1338 }
1339 }
1340 else
1341 return 1;
1342
1343 return 0;
1344}
1345
1346
1347/* Ellipsis like in `<foo123>..<foo12a>' or `<j1234>....<j1245>' and
1348 the .(2). counterparts. */
1349static void
1350charclass_symbolic_ellipsis (struct linereader *ldfile,
1351 struct locale_ctype_t *ctype,
1352 const struct charmap_t *charmap,
1353 struct repertoire_t *repertoire,
1354 struct token *now,
1355 const char *last_str,
1356 unsigned long int class256_bit,
1357 unsigned long int class_bit, int base,
1358 int ignore_content, int handle_digits, int step)
1359{
1360 const char *nowstr = now->val.str.startmb;
1361 char tmp[now->val.str.lenmb + 1];
1362 const char *cp;
1363 char *endp;
1364 unsigned long int from;
1365 unsigned long int to;
1366
1367 /* We have to compute the ellipsis values using the symbolic names. */
1368 assert (last_str != NULL);
1369
1370 if (strlen (last_str) != now->val.str.lenmb)
1371 {
1372 invalid_range:
1373 lr_error (ldfile,
1374 _("`%s' and `%.*s' are not valid names for symbolic range"),
1375 last_str, (int) now->val.str.lenmb, nowstr);
1376 return;
1377 }
1378
1379 if (memcmp (last_str, nowstr, now->val.str.lenmb) == 0)
1380 /* Nothing to do, the names are the same. */
1381 return;
1382
1383 for (cp = last_str; *cp == *(nowstr + (cp - last_str)); ++cp)
1384 ;
1385
1386 errno = 0;
1387 from = strtoul (cp, &endp, base);
1388 if ((from == UINT_MAX && errno == ERANGE) || *endp != '\0')
1389 goto invalid_range;
1390
1391 to = strtoul (nowstr + (cp - last_str), &endp, base);
1392 if ((to == UINT_MAX && errno == ERANGE)
1393 || (endp - nowstr) != now->val.str.lenmb || from >= to)
1394 goto invalid_range;
1395
1396 /* OK, we have a range FROM - TO. Now we can create the symbolic names. */
1397 if (!ignore_content)
1398 {
1399 now->val.str.startmb = tmp;
1400 while ((from += step) <= to)
1401 {
1402 struct charseq *seq;
1403 uint32_t wch;
1404
1405 sprintf (tmp, (base == 10 ? "%.*s%0*ld" : "%.*s%0*lX"),
1406 (int) (cp - last_str), last_str,
1407 (int) (now->val.str.lenmb - (cp - last_str)),
1408 from);
1409
1410 get_character (now, charmap, repertoire, &seq, &wch);
1411
1412 if (seq != NULL && seq->nbytes == 1)
1413 /* Yep, we can store information about this byte sequence. */
1414 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
1415
1416 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1417 /* We have the UCS4 position. */
1418 *find_idx (ctype, &ctype->class_collection,
1419 &ctype->class_collection_max,
1420 &ctype->class_collection_act, wch) |= class_bit;
1421
1422 if (handle_digits == 1)
1423 {
1424 /* We must store the digit values. */
1425 if (ctype->mbdigits_act == ctype->mbdigits_max)
1426 {
1427 ctype->mbdigits_max *= 2;
1428 ctype->mbdigits = xrealloc (ctype->mbdigits,
1429 (ctype->mbdigits_max
1430 * sizeof (char *)));
1431 ctype->wcdigits_max *= 2;
1432 ctype->wcdigits = xrealloc (ctype->wcdigits,
1433 (ctype->wcdigits_max
1434 * sizeof (uint32_t)));
1435 }
1436
1437 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1438 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1439 }
1440 else if (handle_digits == 2)
1441 {
1442 /* We must store the digit values. */
1443 if (ctype->outdigits_act >= 10)
1444 {
1445 lr_error (ldfile, _("\
1446%s: field `%s' does not contain exactly ten entries"),
1447 "LC_CTYPE", "outdigit");
1448 return;
1449 }
1450
1451 ctype->mboutdigits[ctype->outdigits_act] = seq;
1452 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1453 ++ctype->outdigits_act;
1454 }
1455 }
1456 }
1457}
1458
1459
1460/* Ellipsis like in `<U1234>..<U2345>' or `<U1234>..(2)..<U2345>'. */
1461static void
1462charclass_ucs4_ellipsis (struct linereader *ldfile,
1463 struct locale_ctype_t *ctype,
1464 const struct charmap_t *charmap,
1465 struct repertoire_t *repertoire,
1466 struct token *now, uint32_t last_wch,
1467 unsigned long int class256_bit,
1468 unsigned long int class_bit, int ignore_content,
1469 int handle_digits, int step)
1470{
1471 if (last_wch > now->val.ucs4)
1472 {
1473 lr_error (ldfile, _("\
1474to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
1475 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, now->val.ucs4,
1476 (now->val.ucs4 | last_wch) < 65536 ? 4 : 8, last_wch);
1477 return;
1478 }
1479
1480 if (!ignore_content)
1481 while ((last_wch += step) <= now->val.ucs4)
1482 {
1483 /* We have to find out whether there is a byte sequence corresponding
1484 to this UCS4 value. */
1485 struct charseq *seq;
1486 char utmp[10];
1487
1488 snprintf (utmp, sizeof (utmp), "U%08X", last_wch);
1489 seq = charmap_find_value (charmap, utmp, 9);
1490 if (seq == NULL)
1491 {
1492 snprintf (utmp, sizeof (utmp), "U%04X", last_wch);
1493 seq = charmap_find_value (charmap, utmp, 5);
1494 }
1495
1496 if (seq == NULL)
1497 /* Try looking in the repertoire map. */
1498 seq = repertoire_find_seq (repertoire, last_wch);
1499
1500 /* If this is the first time we look for this sequence create a new
1501 entry. */
1502 if (seq == NULL)
1503 {
1504 static const struct charseq negative
1505 = { .ucs4 = ILLEGAL_CHAR_VALUE };
1506
1507 /* Find the symbolic name for this UCS4 value. */
1508 if (repertoire != NULL)
1509 {
1510 const char *symbol = repertoire_find_symbol (repertoire,
1511 last_wch);
1512 uint32_t *newp = obstack_alloc (&repertoire->mem_pool,
1513 sizeof (uint32_t));
1514 *newp = last_wch;
1515
1516 if (symbol != NULL)
1517 /* We have a name, now search the multibyte value. */
1518 seq = charmap_find_value (charmap, symbol, strlen (symbol));
1519
1520 if (seq == NULL)
1521 /* We have to create a fake entry. */
1522 seq = (struct charseq *) &negative;
1523 else
1524 seq->ucs4 = last_wch;
1525
1526 insert_entry (&repertoire->seq_table, newp, sizeof (uint32_t),
1527 seq);
1528 }
1529 else
1530 /* We have to create a fake entry. */
1531 seq = (struct charseq *) &negative;
1532 }
1533
1534 /* We have a name, now search the multibyte value. */
1535 if (seq->ucs4 == last_wch && seq->nbytes == 1)
1536 /* Yep, we can store information about this byte sequence. */
1537 ctype->class256_collection[(size_t) seq->bytes[0]]
1538 |= class256_bit;
1539
1540 /* And of course we have the UCS4 position. */
1541 if (class_bit != 0)
1542 *find_idx (ctype, &ctype->class_collection,
1543 &ctype->class_collection_max,
1544 &ctype->class_collection_act, last_wch) |= class_bit;
1545
1546 if (handle_digits == 1)
1547 {
1548 /* We must store the digit values. */
1549 if (ctype->mbdigits_act == ctype->mbdigits_max)
1550 {
1551 ctype->mbdigits_max *= 2;
1552 ctype->mbdigits = xrealloc (ctype->mbdigits,
1553 (ctype->mbdigits_max
1554 * sizeof (char *)));
1555 ctype->wcdigits_max *= 2;
1556 ctype->wcdigits = xrealloc (ctype->wcdigits,
1557 (ctype->wcdigits_max
1558 * sizeof (uint32_t)));
1559 }
1560
1561 ctype->mbdigits[ctype->mbdigits_act++] = (seq->ucs4 == last_wch
1562 ? seq : NULL);
1563 ctype->wcdigits[ctype->wcdigits_act++] = last_wch;
1564 }
1565 else if (handle_digits == 2)
1566 {
1567 /* We must store the digit values. */
1568 if (ctype->outdigits_act >= 10)
1569 {
1570 lr_error (ldfile, _("\
1571%s: field `%s' does not contain exactly ten entries"),
1572 "LC_CTYPE", "outdigit");
1573 return;
1574 }
1575
1576 ctype->mboutdigits[ctype->outdigits_act] = (seq->ucs4 == last_wch
1577 ? seq : NULL);
1578 ctype->wcoutdigits[ctype->outdigits_act] = last_wch;
1579 ++ctype->outdigits_act;
1580 }
1581 }
1582}
1583
1584
1585/* Ellipsis as in `/xea/x12.../xea/x34'. */
1586static void
1587charclass_charcode_ellipsis (struct linereader *ldfile,
1588 struct locale_ctype_t *ctype,
1589 const struct charmap_t *charmap,
1590 struct repertoire_t *repertoire,
1591 struct token *now, char *last_charcode,
1592 uint32_t last_charcode_len,
1593 unsigned long int class256_bit,
1594 unsigned long int class_bit, int ignore_content,
1595 int handle_digits)
1596{
1597 /* First check whether the to-value is larger. */
1598 if (now->val.charcode.nbytes != last_charcode_len)
1599 {
1600 lr_error (ldfile, _("\
1601start and end character sequence of range must have the same length"));
1602 return;
1603 }
1604
1605 if (memcmp (last_charcode, now->val.charcode.bytes, last_charcode_len) > 0)
1606 {
1607 lr_error (ldfile, _("\
1608to-value character sequence is smaller than from-value sequence"));
1609 return;
1610 }
1611
1612 if (!ignore_content)
1613 {
1614 do
1615 {
1616 /* Increment the byte sequence value. */
1617 struct charseq *seq;
1618 uint32_t wch;
1619 int i;
1620
1621 for (i = last_charcode_len - 1; i >= 0; --i)
1622 if (++last_charcode[i] != 0)
1623 break;
1624
1625 if (last_charcode_len == 1)
1626 /* Of course we have the charcode value. */
1627 ctype->class256_collection[(size_t) last_charcode[0]]
1628 |= class256_bit;
1629
1630 /* Find the symbolic name. */
1631 seq = charmap_find_symbol (charmap, last_charcode,
1632 last_charcode_len);
1633 if (seq != NULL)
1634 {
1635 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1636 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1637 strlen (seq->name));
1638 wch = seq == NULL ? ILLEGAL_CHAR_VALUE : seq->ucs4;
1639
1640 if (wch != ILLEGAL_CHAR_VALUE && class_bit != 0)
1641 *find_idx (ctype, &ctype->class_collection,
1642 &ctype->class_collection_max,
1643 &ctype->class_collection_act, wch) |= class_bit;
1644 }
1645 else
1646 wch = ILLEGAL_CHAR_VALUE;
1647
1648 if (handle_digits == 1)
1649 {
1650 /* We must store the digit values. */
1651 if (ctype->mbdigits_act == ctype->mbdigits_max)
1652 {
1653 ctype->mbdigits_max *= 2;
1654 ctype->mbdigits = xrealloc (ctype->mbdigits,
1655 (ctype->mbdigits_max
1656 * sizeof (char *)));
1657 ctype->wcdigits_max *= 2;
1658 ctype->wcdigits = xrealloc (ctype->wcdigits,
1659 (ctype->wcdigits_max
1660 * sizeof (uint32_t)));
1661 }
1662
1663 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1664 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1665 seq->nbytes = last_charcode_len;
1666
1667 ctype->mbdigits[ctype->mbdigits_act++] = seq;
1668 ctype->wcdigits[ctype->wcdigits_act++] = wch;
1669 }
1670 else if (handle_digits == 2)
1671 {
1672 struct charseq *seq;
1673 /* We must store the digit values. */
1674 if (ctype->outdigits_act >= 10)
1675 {
1676 lr_error (ldfile, _("\
1677%s: field `%s' does not contain exactly ten entries"),
1678 "LC_CTYPE", "outdigit");
1679 return;
1680 }
1681
1682 seq = xmalloc (sizeof (struct charseq) + last_charcode_len);
1683 memcpy ((char *) (seq + 1), last_charcode, last_charcode_len);
1684 seq->nbytes = last_charcode_len;
1685
1686 ctype->mboutdigits[ctype->outdigits_act] = seq;
1687 ctype->wcoutdigits[ctype->outdigits_act] = wch;
1688 ++ctype->outdigits_act;
1689 }
1690 }
1691 while (memcmp (last_charcode, now->val.charcode.bytes,
1692 last_charcode_len) != 0);
1693 }
1694}
1695
1696
1697static uint32_t *
1698find_translit2 (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
1699 uint32_t wch)
1700{
1701 struct translit_t *trunp = ctype->translit;
1702 struct translit_ignore_t *tirunp = ctype->translit_ignore;
1703
1704 while (trunp != NULL)
1705 {
1706 /* XXX We simplify things here. The transliterations we look
1707 for are only allowed to have one character. */
1708 if (trunp->from[0] == wch && trunp->from[1] == 0)
1709 {
1710 /* Found it. Now look for a transliteration which can be
1711 represented with the character set. */
1712 struct translit_to_t *torunp = trunp->to;
1713
1714 while (torunp != NULL)
1715 {
1716 int i;
1717
1718 for (i = 0; torunp->str[i] != 0; ++i)
1719 {
1720 char utmp[10];
1721
1722 snprintf (utmp, sizeof (utmp), "U%08X", torunp->str[i]);
1723 if (charmap_find_value (charmap, utmp, 9) == NULL)
1724 /* This character cannot be represented. */
1725 break;
1726 }
1727
1728 if (torunp->str[i] == 0)
1729 return torunp->str;
1730
1731 torunp = torunp->next;
1732 }
1733
1734 break;
1735 }
1736
1737 trunp = trunp->next;
1738 }
1739
1740 /* Check for ignored chars. */
1741 while (tirunp != NULL)
1742 {
1743 if (tirunp->from <= wch && tirunp->to >= wch)
1744 {
1745 uint32_t wi;
1746
1747 for (wi = tirunp->from; wi <= wch; wi += tirunp->step)
1748 if (wi == wch)
1749 return no_str;
1750 }
1751 }
1752
1753 /* Nothing found. */
1754 return NULL;
1755}
1756
1757
1758uint32_t *
1759find_translit (struct localedef_t *locale, const struct charmap_t *charmap,
1760 uint32_t wch)
1761{
1762 struct locale_ctype_t *ctype;
1763 uint32_t *result = NULL;
1764
1765 assert (locale != NULL);
1766 ctype = locale->categories[LC_CTYPE].ctype;
1767
1768 if (ctype == NULL)
1769 return NULL;
1770
1771 if (ctype->translit != NULL)
1772 result = find_translit2 (ctype, charmap, wch);
1773
1774 if (result == NULL)
1775 {
1776 struct translit_include_t *irunp = ctype->translit_include;
1777
1778 while (irunp != NULL && result == NULL)
1779 {
1780 result = find_translit (find_locale (CTYPE_LOCALE,
1781 irunp->copy_locale,
1782 irunp->copy_repertoire,
1783 charmap),
1784 charmap, wch);
1785 irunp = irunp->next;
1786 }
1787 }
1788
1789 return result;
1790}
1791
1792
1793/* Read one transliteration entry. */
1794static uint32_t *
1795read_widestring (struct linereader *ldfile, struct token *now,
1796 const struct charmap_t *charmap,
1797 struct repertoire_t *repertoire)
1798{
1799 uint32_t *wstr;
1800
1801 if (now->tok == tok_default_missing)
1802 /* The special name "" will denote this case. */
1803 wstr = no_str;
1804 else if (now->tok == tok_bsymbol)
1805 {
1806 /* Get the value from the repertoire. */
1807 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1808 wstr[0] = repertoire_find_value (repertoire, now->val.str.startmb,
1809 now->val.str.lenmb);
1810 if (wstr[0] == ILLEGAL_CHAR_VALUE)
1811 {
1812 /* We cannot proceed, we don't know the UCS4 value. */
1813 free (wstr);
1814 return NULL;
1815 }
1816
1817 wstr[1] = 0;
1818 }
1819 else if (now->tok == tok_ucs4)
1820 {
1821 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1822 wstr[0] = now->val.ucs4;
1823 wstr[1] = 0;
1824 }
1825 else if (now->tok == tok_charcode)
1826 {
1827 /* Argh, we have to convert to the symbol name first and then to the
1828 UCS4 value. */
1829 struct charseq *seq = charmap_find_symbol (charmap,
1830 now->val.str.startmb,
1831 now->val.str.lenmb);
1832 if (seq == NULL)
1833 /* Cannot find the UCS4 value. */
1834 return NULL;
1835
1836 if (seq->ucs4 == UNINITIALIZED_CHAR_VALUE)
1837 seq->ucs4 = repertoire_find_value (repertoire, seq->name,
1838 strlen (seq->name));
1839 if (seq->ucs4 == ILLEGAL_CHAR_VALUE)
1840 /* We cannot proceed, we don't know the UCS4 value. */
1841 return NULL;
1842
1843 wstr = (uint32_t *) xmalloc (2 * sizeof (uint32_t));
1844 wstr[0] = seq->ucs4;
1845 wstr[1] = 0;
1846 }
1847 else if (now->tok == tok_string)
1848 {
1849 wstr = now->val.str.startwc;
1850 if (wstr == NULL || wstr[0] == 0)
1851 return NULL;
1852 }
1853 else
1854 {
1855 if (now->tok != tok_eol && now->tok != tok_eof)
1856 lr_ignore_rest (ldfile, 0);
1857 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
1858 return (uint32_t *) -1l;
1859 }
1860
1861 return wstr;
1862}
1863
1864
1865static void
1866read_translit_entry (struct linereader *ldfile, struct locale_ctype_t *ctype,
1867 struct token *now, const struct charmap_t *charmap,
1868 struct repertoire_t *repertoire)
1869{
1870 uint32_t *from_wstr = read_widestring (ldfile, now, charmap, repertoire);
1871 struct translit_t *result;
1872 struct translit_to_t **top;
1873 struct obstack *ob = &ctype->mempool;
1874 int first;
1875 int ignore;
1876
1877 if (from_wstr == NULL)
1878 /* There is no valid from string. */
1879 return;
1880
1881 result = (struct translit_t *) obstack_alloc (ob,
1882 sizeof (struct translit_t));
1883 result->from = from_wstr;
1884 result->fname = ldfile->fname;
1885 result->lineno = ldfile->lineno;
1886 result->next = NULL;
1887 result->to = NULL;
1888 top = &result->to;
1889 first = 1;
1890 ignore = 0;
1891
1892 while (1)
1893 {
1894 uint32_t *to_wstr;
1895
1896 /* Next we have one or more transliterations. They are
1897 separated by semicolons. */
1898 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
1899
1900 if (!first && (now->tok == tok_semicolon || now->tok == tok_eol))
1901 {
1902 /* One string read. */
1903 const uint32_t zero = 0;
1904
1905 if (!ignore)
1906 {
1907 obstack_grow (ob, &zero, 4);
1908 to_wstr = obstack_finish (ob);
1909
1910 *top = obstack_alloc (ob, sizeof (struct translit_to_t));
1911 (*top)->str = to_wstr;
1912 (*top)->next = NULL;
1913 }
1914
1915 if (now->tok == tok_eol)
1916 {
1917 result->next = ctype->translit;
1918 ctype->translit = result;
1919 return;
1920 }
1921
1922 if (!ignore)
1923 top = &(*top)->next;
1924 ignore = 0;
1925 }
1926 else
1927 {
1928 to_wstr = read_widestring (ldfile, now, charmap, repertoire);
1929 if (to_wstr == (uint32_t *) -1l)
1930 {
1931 /* An error occurred. */
1932 obstack_free (ob, result);
1933 return;
1934 }
1935
1936 if (to_wstr == NULL)
1937 ignore = 1;
1938 else
1939 /* This value is usable. */
1940 obstack_grow (ob, to_wstr, wcslen ((wchar_t *) to_wstr) * 4);
1941
1942 first = 0;
1943 }
1944 }
1945}
1946
1947
1948static void
1949read_translit_ignore_entry (struct linereader *ldfile,
1950 struct locale_ctype_t *ctype,
1951 const struct charmap_t *charmap,
1952 struct repertoire_t *repertoire)
1953{
1954 /* We expect a semicolon-separated list of characters we ignore. We are
1955 only interested in the wide character definitions. These must be
1956 single characters, possibly defining a range when an ellipsis is used. */
1957 while (1)
1958 {
1959 struct token *now = lr_token (ldfile, charmap, NULL, repertoire,
1960 verbose);
1961 struct translit_ignore_t *newp;
1962 uint32_t from;
1963
1964 if (now->tok == tok_eol || now->tok == tok_eof)
1965 {
1966 lr_error (ldfile,
1967 _("premature end of `translit_ignore' definition"));
1968 return;
1969 }
1970
1971 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
1972 {
1973 lr_error (ldfile, _("syntax error"));
1974 lr_ignore_rest (ldfile, 0);
1975 return;
1976 }
1977
1978 if (now->tok == tok_ucs4)
1979 from = now->val.ucs4;
1980 else
1981 /* Try to get the value. */
1982 from = repertoire_find_value (repertoire, now->val.str.startmb,
1983 now->val.str.lenmb);
1984
1985 if (from == ILLEGAL_CHAR_VALUE)
1986 {
1987 lr_error (ldfile, "invalid character name");
1988 newp = NULL;
1989 }
1990 else
1991 {
1992 newp = (struct translit_ignore_t *)
1993 obstack_alloc (&ctype->mempool, sizeof (struct translit_ignore_t));
1994 newp->from = from;
1995 newp->to = from;
1996 newp->step = 1;
1997
1998 newp->next = ctype->translit_ignore;
1999 ctype->translit_ignore = newp;
2000 }
2001
2002 /* Now we expect either a semicolon, an ellipsis, or the end of the
2003 line. */
2004 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2005
2006 if (now->tok == tok_ellipsis2 || now->tok == tok_ellipsis2_2)
2007 {
2008 /* XXX Should we bother implementing `....'? `...' certainly
2009 will not be implemented. */
2010 uint32_t to;
2011 int step = now->tok == tok_ellipsis2_2 ? 2 : 1;
2012
2013 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2014
2015 if (now->tok == tok_eol || now->tok == tok_eof)
2016 {
2017 lr_error (ldfile,
2018 _("premature end of `translit_ignore' definition"));
2019 return;
2020 }
2021
2022 if (now->tok != tok_bsymbol && now->tok != tok_ucs4)
2023 {
2024 lr_error (ldfile, _("syntax error"));
2025 lr_ignore_rest (ldfile, 0);
2026 return;
2027 }
2028
2029 if (now->tok == tok_ucs4)
2030 to = now->val.ucs4;
2031 else
2032 /* Try to get the value. */
2033 to = repertoire_find_value (repertoire, now->val.str.startmb,
2034 now->val.str.lenmb);
2035
2036 if (to == ILLEGAL_CHAR_VALUE)
2037 lr_error (ldfile, "invalid character name");
2038 else
2039 {
2040 /* Make sure the `to'-value is larger. */
2041 if (to >= from)
2042 {
2043 newp->to = to;
2044 newp->step = step;
2045 }
2046 else
2047 lr_error (ldfile, _("\
2048to-value <U%0*X> of range is smaller than from-value <U%0*X>"),
2049 (to | from) < 65536 ? 4 : 8, to,
2050 (to | from) < 65536 ? 4 : 8, from);
2051 }
2052
2053 /* And the next token. */
2054 now = lr_token (ldfile, charmap, NULL, repertoire, verbose);
2055 }
2056
2057 if (now->tok == tok_eol || now->tok == tok_eof)
2058 /* We are done. */
2059 return;
2060
2061 if (now->tok == tok_semicolon)
2062 /* Next round. */
2063 continue;
2064
2065 /* If we come here something is wrong. */
2066 lr_error (ldfile, _("syntax error"));
2067 lr_ignore_rest (ldfile, 0);
2068 return;
2069 }
2070}
2071
2072
2073/* The parser for the LC_CTYPE section of the locale definition. */
2074void
2075ctype_read (struct linereader *ldfile, struct localedef_t *result,
2076 const struct charmap_t *charmap, const char *repertoire_name,
2077 int ignore_content)
2078{
2079 struct repertoire_t *repertoire = NULL;
2080 struct locale_ctype_t *ctype;
2081 struct token *now;
2082 enum token_t nowtok;
2083 size_t cnt;
2084 uint32_t last_wch = 0;
2085 enum token_t last_token;
2086 enum token_t ellipsis_token;
2087 int step;
2088 char last_charcode[16];
2089 size_t last_charcode_len = 0;
2090 const char *last_str = NULL;
2091 int mapidx;
2092 struct localedef_t *copy_locale = NULL;
2093
2094 /* Get the repertoire we have to use. */
2095 if (repertoire_name != NULL)
2096 repertoire = repertoire_read (repertoire_name);
2097
2098 /* The rest of the line containing `LC_CTYPE' must be free. */
2099 lr_ignore_rest (ldfile, 1);
2100
2101
2102 do
2103 {
2104 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2105 nowtok = now->tok;
2106 }
2107 while (nowtok == tok_eol);
2108
2109 /* If we see `copy' now we are almost done. */
2110 if (nowtok == tok_copy)
2111 {
2112 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2113 if (now->tok != tok_string)
2114 {
2115 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2116
2117 skip_category:
2118 do
2119 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2120 while (now->tok != tok_eof && now->tok != tok_end);
2121
2122 if (now->tok != tok_eof
2123 || (now = lr_token (ldfile, charmap, NULL, NULL, verbose),
2124 now->tok == tok_eof))
2125 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2126 else if (now->tok != tok_lc_ctype)
2127 {
2128 lr_error (ldfile, _("\
2129%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2130 lr_ignore_rest (ldfile, 0);
2131 }
2132 else
2133 lr_ignore_rest (ldfile, 1);
2134
2135 return;
2136 }
2137
2138 if (! ignore_content)
2139 {
2140 /* Get the locale definition. */
2141 copy_locale = load_locale (LC_CTYPE, now->val.str.startmb,
2142 repertoire_name, charmap, NULL);
2143 if ((copy_locale->avail & CTYPE_LOCALE) == 0)
2144 {
2145 /* Not yet loaded. So do it now. */
2146 if (locfile_read (copy_locale, charmap) != 0)
2147 goto skip_category;
2148 }
2149
2150 if (copy_locale->categories[LC_CTYPE].ctype == NULL)
2151 return;
2152 }
2153
2154 lr_ignore_rest (ldfile, 1);
2155
2156 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2157 nowtok = now->tok;
2158 }
2159
2160 /* Prepare the data structures. */
2161 ctype_startup (ldfile, result, charmap, copy_locale, ignore_content);
2162 ctype = result->categories[LC_CTYPE].ctype;
2163
2164 /* Remember the repertoire we use. */
2165 if (!ignore_content)
2166 ctype->repertoire = repertoire;
2167
2168 while (1)
2169 {
2170 unsigned long int class_bit = 0;
2171 unsigned long int class256_bit = 0;
2172 int handle_digits = 0;
2173
2174 /* Of course we don't proceed beyond the end of file. */
2175 if (nowtok == tok_eof)
2176 break;
2177
2178 /* Ingore empty lines. */
2179 if (nowtok == tok_eol)
2180 {
2181 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2182 nowtok = now->tok;
2183 continue;
2184 }
2185
2186 switch (nowtok)
2187 {
2188 case tok_charclass:
2189 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2190 while (now->tok == tok_ident || now->tok == tok_string)
2191 {
2192 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2193 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2194 if (now->tok != tok_semicolon)
2195 break;
2196 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2197 }
2198 if (now->tok != tok_eol)
2199 SYNTAX_ERROR (_("\
2200%s: syntax error in definition of new character class"), "LC_CTYPE");
2201 break;
2202
2203 case tok_charconv:
2204 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2205 while (now->tok == tok_ident || now->tok == tok_string)
2206 {
2207 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2208 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2209 if (now->tok != tok_semicolon)
2210 break;
2211 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2212 }
2213 if (now->tok != tok_eol)
2214 SYNTAX_ERROR (_("\
2215%s: syntax error in definition of new character map"), "LC_CTYPE");
2216 break;
2217
2218 case tok_class:
2219 /* Ignore the rest of the line if we don't need the input of
2220 this line. */
2221 if (ignore_content)
2222 {
2223 lr_ignore_rest (ldfile, 0);
2224 break;
2225 }
2226
2227 /* We simply forget the `class' keyword and use the following
2228 operand to determine the bit. */
2229 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2230 if (now->tok == tok_ident || now->tok == tok_string)
2231 {
2232 /* Must can be one of the predefined class names. */
2233 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2234 if (strcmp (ctype->classnames[cnt], now->val.str.startmb) == 0)
2235 break;
2236 if (cnt >= ctype->nr_charclass)
2237 {
2238 /* OK, it's a new class. */
2239 ctype_class_new (ldfile, ctype, now->val.str.startmb);
2240
2241 class_bit = _ISwbit (ctype->nr_charclass - 1);
2242 }
2243 else
2244 {
2245 class_bit = _ISwbit (cnt);
2246
2247 free (now->val.str.startmb);
2248 }
2249 }
2250 else if (now->tok == tok_digit)
2251 goto handle_tok_digit;
2252 else if (now->tok < tok_upper || now->tok > tok_blank)
2253 goto err_label;
2254 else
2255 {
2256 class_bit = BITw (now->tok);
2257 class256_bit = BIT (now->tok);
2258 }
2259
2260 /* The next character must be a semicolon. */
2261 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2262 if (now->tok != tok_semicolon)
2263 goto err_label;
2264 goto read_charclass;
2265
2266 case tok_upper:
2267 case tok_lower:
2268 case tok_alpha:
2269 case tok_alnum:
2270 case tok_space:
2271 case tok_cntrl:
2272 case tok_punct:
2273 case tok_graph:
2274 case tok_print:
2275 case tok_xdigit:
2276 case tok_blank:
2277 /* Ignore the rest of the line if we don't need the input of
2278 this line. */
2279 if (ignore_content)
2280 {
2281 lr_ignore_rest (ldfile, 0);
2282 break;
2283 }
2284
2285 class_bit = BITw (now->tok);
2286 class256_bit = BIT (now->tok);
2287 handle_digits = 0;
2288 read_charclass:
2289 ctype->class_done |= class_bit;
2290 last_token = tok_none;
2291 ellipsis_token = tok_none;
2292 step = 1;
2293 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2294 while (now->tok != tok_eol && now->tok != tok_eof)
2295 {
2296 uint32_t wch;
2297 struct charseq *seq;
2298
2299 if (ellipsis_token == tok_none)
2300 {
2301 if (get_character (now, charmap, repertoire, &seq, &wch))
2302 goto err_label;
2303
2304 if (!ignore_content && seq != NULL && seq->nbytes == 1)
2305 /* Yep, we can store information about this byte
2306 sequence. */
2307 ctype->class256_collection[seq->bytes[0]] |= class256_bit;
2308
2309 if (!ignore_content && wch != ILLEGAL_CHAR_VALUE
2310 && class_bit != 0)
2311 /* We have the UCS4 position. */
2312 *find_idx (ctype, &ctype->class_collection,
2313 &ctype->class_collection_max,
2314 &ctype->class_collection_act, wch) |= class_bit;
2315
2316 last_token = now->tok;
2317 /* Terminate the string. */
2318 if (last_token == tok_bsymbol)
2319 {
2320 now->val.str.startmb[now->val.str.lenmb] = '\0';
2321 last_str = now->val.str.startmb;
2322 }
2323 else
2324 last_str = NULL;
2325 last_wch = wch;
2326 memcpy (last_charcode, now->val.charcode.bytes, 16);
2327 last_charcode_len = now->val.charcode.nbytes;
2328
2329 if (!ignore_content && handle_digits == 1)
2330 {
2331 /* We must store the digit values. */
2332 if (ctype->mbdigits_act == ctype->mbdigits_max)
2333 {
2334 ctype->mbdigits_max += 10;
2335 ctype->mbdigits = xrealloc (ctype->mbdigits,
2336 (ctype->mbdigits_max
2337 * sizeof (char *)));
2338 ctype->wcdigits_max += 10;
2339 ctype->wcdigits = xrealloc (ctype->wcdigits,
2340 (ctype->wcdigits_max
2341 * sizeof (uint32_t)));
2342 }
2343
2344 ctype->mbdigits[ctype->mbdigits_act++] = seq;
2345 ctype->wcdigits[ctype->wcdigits_act++] = wch;
2346 }
2347 else if (!ignore_content && handle_digits == 2)
2348 {
2349 /* We must store the digit values. */
2350 if (ctype->outdigits_act >= 10)
2351 {
2352 lr_error (ldfile, _("\
2353%s: field `%s' does not contain exactly ten entries"),
2354 "LC_CTYPE", "outdigit");
2355 lr_ignore_rest (ldfile, 0);
2356 break;
2357 }
2358
2359 ctype->mboutdigits[ctype->outdigits_act] = seq;
2360 ctype->wcoutdigits[ctype->outdigits_act] = wch;
2361 ++ctype->outdigits_act;
2362 }
2363 }
2364 else
2365 {
2366 /* Now it gets complicated. We have to resolve the
2367 ellipsis problem. First we must distinguish between
2368 the different kind of ellipsis and this must match the
2369 tokens we have seen. */
2370 assert (last_token != tok_none);
2371
2372 if (last_token != now->tok)
2373 {
2374 lr_error (ldfile, _("\
2375ellipsis range must be marked by two operands of same type"));
2376 lr_ignore_rest (ldfile, 0);
2377 break;
2378 }
2379
2380 if (last_token == tok_bsymbol)
2381 {
2382 if (ellipsis_token == tok_ellipsis3)
2383 lr_error (ldfile, _("with symbolic name range values \
2384the absolute ellipsis `...' must not be used"));
2385
2386 charclass_symbolic_ellipsis (ldfile, ctype, charmap,
2387 repertoire, now, last_str,
2388 class256_bit, class_bit,
2389 (ellipsis_token
2390 == tok_ellipsis4
2391 ? 10 : 16),
2392 ignore_content,
2393 handle_digits, step);
2394 }
2395 else if (last_token == tok_ucs4)
2396 {
2397 if (ellipsis_token != tok_ellipsis2)
2398 lr_error (ldfile, _("\
2399with UCS range values one must use the hexadecimal symbolic ellipsis `..'"));
2400
2401 charclass_ucs4_ellipsis (ldfile, ctype, charmap,
2402 repertoire, now, last_wch,
2403 class256_bit, class_bit,
2404 ignore_content, handle_digits,
2405 step);
2406 }
2407 else
2408 {
2409 assert (last_token == tok_charcode);
2410
2411 if (ellipsis_token != tok_ellipsis3)
2412 lr_error (ldfile, _("\
2413with character code range values one must use the absolute ellipsis `...'"));
2414
2415 charclass_charcode_ellipsis (ldfile, ctype, charmap,
2416 repertoire, now,
2417 last_charcode,
2418 last_charcode_len,
2419 class256_bit, class_bit,
2420 ignore_content,
2421 handle_digits);
2422 }
2423
2424 /* Now we have used the last value. */
2425 last_token = tok_none;
2426 }
2427
2428 /* Next we expect a semicolon or the end of the line. */
2429 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2430 if (now->tok == tok_eol || now->tok == tok_eof)
2431 break;
2432
2433 if (last_token != tok_none
2434 && now->tok >= tok_ellipsis2 && now->tok <= tok_ellipsis4_2)
2435 {
2436 if (now->tok == tok_ellipsis2_2)
2437 {
2438 now->tok = tok_ellipsis2;
2439 step = 2;
2440 }
2441 else if (now->tok == tok_ellipsis4_2)
2442 {
2443 now->tok = tok_ellipsis4;
2444 step = 2;
2445 }
2446
2447 ellipsis_token = now->tok;
2448
2449 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2450 continue;
2451 }
2452
2453 if (now->tok != tok_semicolon)
2454 goto err_label;
2455
2456 /* And get the next character. */
2457 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2458
2459 ellipsis_token = tok_none;
2460 step = 1;
2461 }
2462 break;
2463
2464 case tok_digit:
2465 /* Ignore the rest of the line if we don't need the input of
2466 this line. */
2467 if (ignore_content)
2468 {
2469 lr_ignore_rest (ldfile, 0);
2470 break;
2471 }
2472
2473 handle_tok_digit:
2474 class_bit = _ISwdigit;
2475 class256_bit = _ISdigit;
2476 handle_digits = 1;
2477 goto read_charclass;
2478
2479 case tok_outdigit:
2480 /* Ignore the rest of the line if we don't need the input of
2481 this line. */
2482 if (ignore_content)
2483 {
2484 lr_ignore_rest (ldfile, 0);
2485 break;
2486 }
2487
2488 if (ctype->outdigits_act != 0)
2489 lr_error (ldfile, _("\
2490%s: field `%s' declared more than once"),
2491 "LC_CTYPE", "outdigit");
2492 class_bit = 0;
2493 class256_bit = 0;
2494 handle_digits = 2;
2495 goto read_charclass;
2496
2497 case tok_toupper:
2498 /* Ignore the rest of the line if we don't need the input of
2499 this line. */
2500 if (ignore_content)
2501 {
2502 lr_ignore_rest (ldfile, 0);
2503 break;
2504 }
2505
2506 mapidx = 0;
2507 goto read_mapping;
2508
2509 case tok_tolower:
2510 /* Ignore the rest of the line if we don't need the input of
2511 this line. */
2512 if (ignore_content)
2513 {
2514 lr_ignore_rest (ldfile, 0);
2515 break;
2516 }
2517
2518 mapidx = 1;
2519 goto read_mapping;
2520
2521 case tok_map:
2522 /* Ignore the rest of the line if we don't need the input of
2523 this line. */
2524 if (ignore_content)
2525 {
2526 lr_ignore_rest (ldfile, 0);
2527 break;
2528 }
2529
2530 /* We simply forget the `map' keyword and use the following
2531 operand to determine the mapping. */
2532 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2533 if (now->tok == tok_ident || now->tok == tok_string)
2534 {
2535 size_t cnt;
2536
2537 for (cnt = 2; cnt < ctype->map_collection_nr; ++cnt)
2538 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2539 break;
2540
2541 if (cnt < ctype->map_collection_nr)
2542 free (now->val.str.startmb);
2543 else
2544 /* OK, it's a new map. */
2545 ctype_map_new (ldfile, ctype, now->val.str.startmb, charmap);
2546
2547 mapidx = cnt;
2548 }
2549 else if (now->tok < tok_toupper || now->tok > tok_tolower)
2550 goto err_label;
2551 else
2552 mapidx = now->tok - tok_toupper;
2553
2554 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2555 /* This better should be a semicolon. */
2556 if (now->tok != tok_semicolon)
2557 goto err_label;
2558
2559 read_mapping:
2560 /* Test whether this mapping was already defined. */
2561 if (ctype->tomap_done[mapidx])
2562 {
2563 lr_error (ldfile, _("duplicated definition for mapping `%s'"),
2564 ctype->mapnames[mapidx]);
2565 lr_ignore_rest (ldfile, 0);
2566 break;
2567 }
2568 ctype->tomap_done[mapidx] = 1;
2569
2570 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2571 while (now->tok != tok_eol && now->tok != tok_eof)
2572 {
2573 struct charseq *from_seq;
2574 uint32_t from_wch;
2575 struct charseq *to_seq;
2576 uint32_t to_wch;
2577
2578 /* Every pair starts with an opening brace. */
2579 if (now->tok != tok_open_brace)
2580 goto err_label;
2581
2582 /* Next comes the from-value. */
2583 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2584 if (get_character (now, charmap, repertoire, &from_seq,
2585 &from_wch) != 0)
2586 goto err_label;
2587
2588 /* The next is a comma. */
2589 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2590 if (now->tok != tok_comma)
2591 goto err_label;
2592
2593 /* And the other value. */
2594 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2595 if (get_character (now, charmap, repertoire, &to_seq,
2596 &to_wch) != 0)
2597 goto err_label;
2598
2599 /* And the last thing is the closing brace. */
2600 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2601 if (now->tok != tok_close_brace)
2602 goto err_label;
2603
2604 if (!ignore_content)
2605 {
2606 /* Check whether the mapping converts from an ASCII value
2607 to a non-ASCII value. */
2608 if (from_seq != NULL && from_seq->nbytes == 1
2609 && isascii (from_seq->bytes[0])
2610 && to_seq != NULL && (to_seq->nbytes != 1
2611 || !isascii (to_seq->bytes[0])))
2612 ctype->to_nonascii = 1;
2613
2614 if (mapidx < 2 && from_seq != NULL && to_seq != NULL
2615 && from_seq->nbytes == 1 && to_seq->nbytes == 1)
2616 /* We can use this value. */
2617 ctype->map256_collection[mapidx][from_seq->bytes[0]]
2618 = to_seq->bytes[0];
2619
2620 if (from_wch != ILLEGAL_CHAR_VALUE
2621 && to_wch != ILLEGAL_CHAR_VALUE)
2622 /* Both correct values. */
2623 *find_idx (ctype, &ctype->map_collection[mapidx],
2624 &ctype->map_collection_max[mapidx],
2625 &ctype->map_collection_act[mapidx],
2626 from_wch) = to_wch;
2627 }
2628
2629 /* Now comes a semicolon or the end of the line/file. */
2630 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2631 if (now->tok == tok_semicolon)
2632 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2633 }
2634 break;
2635
2636 case tok_translit_start:
2637 /* Ignore the entire translit section with its peculiar syntax
2638 if we don't need the input. */
2639 if (ignore_content)
2640 {
2641 do
2642 {
2643 lr_ignore_rest (ldfile, 0);
2644 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2645 }
2646 while (now->tok != tok_translit_end && now->tok != tok_eof);
2647
2648 if (now->tok == tok_eof)
2649 lr_error (ldfile, _(\
2650"%s: `translit_start' section does not end with `translit_end'"),
2651 "LC_CTYPE");
2652
2653 break;
2654 }
2655
2656 /* The rest of the line better should be empty. */
2657 lr_ignore_rest (ldfile, 1);
2658
2659 /* We count here the number of allocated entries in the `translit'
2660 array. */
2661 cnt = 0;
2662
2663 ldfile->translate_strings = 1;
2664 ldfile->return_widestr = 1;
2665
2666 /* We proceed until we see the `translit_end' token. */
2667 while (now = lr_token (ldfile, charmap, NULL, repertoire, verbose),
2668 now->tok != tok_translit_end && now->tok != tok_eof)
2669 {
2670 if (now->tok == tok_eol)
2671 /* Ignore empty lines. */
2672 continue;
2673
2674 if (now->tok == tok_include)
2675 {
2676 /* We have to include locale. */
2677 const char *locale_name;
2678 const char *repertoire_name;
2679 struct translit_include_t *include_stmt, **include_ptr;
2680
2681 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2682 /* This should be a string or an identifier. In any
2683 case something to name a locale. */
2684 if (now->tok != tok_string && now->tok != tok_ident)
2685 {
2686 translit_syntax:
2687 lr_error (ldfile, _("%s: syntax error"), "LC_CTYPE");
2688 lr_ignore_rest (ldfile, 0);
2689 continue;
2690 }
2691 locale_name = now->val.str.startmb;
2692
2693 /* Next should be a semicolon. */
2694 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2695 if (now->tok != tok_semicolon)
2696 goto translit_syntax;
2697
2698 /* Now the repertoire name. */
2699 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2700 if ((now->tok != tok_string && now->tok != tok_ident)
2701 || now->val.str.startmb == NULL)
2702 goto translit_syntax;
2703 repertoire_name = now->val.str.startmb;
2704 if (repertoire_name[0] == '\0')
2705 /* Ignore the empty string. */
2706 repertoire_name = NULL;
2707
2708 /* Save the include statement for later processing. */
2709 include_stmt = (struct translit_include_t *)
2710 xmalloc (sizeof (struct translit_include_t));
2711 include_stmt->copy_locale = locale_name;
2712 include_stmt->copy_repertoire = repertoire_name;
2713 include_stmt->next = NULL;
2714
2715 include_ptr = &ctype->translit_include;
2716 while (*include_ptr != NULL)
2717 include_ptr = &(*include_ptr)->next;
2718 *include_ptr = include_stmt;
2719
2720 /* The rest of the line must be empty. */
2721 lr_ignore_rest (ldfile, 1);
2722
2723 /* Make sure the locale is read. */
2724 add_to_readlist (LC_CTYPE, locale_name, repertoire_name,
2725 1, NULL);
2726 continue;
2727 }
2728 else if (now->tok == tok_default_missing)
2729 {
2730 uint32_t *wstr;
2731
2732 while (1)
2733 {
2734 /* We expect a single character or string as the
2735 argument. */
2736 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2737 wstr = read_widestring (ldfile, now, charmap,
2738 repertoire);
2739
2740 if (wstr != NULL)
2741 {
2742 if (ctype->default_missing != NULL)
2743 {
2744 lr_error (ldfile, _("\
2745%s: duplicate `default_missing' definition"), "LC_CTYPE");
2746 WITH_CUR_LOCALE (error_at_line (0, 0,
2747 ctype->default_missing_file,
2748 ctype->default_missing_lineno,
2749 _("\
2750previous definition was here")));
2751 }
2752 else
2753 {
2754 ctype->default_missing = wstr;
2755 ctype->default_missing_file = ldfile->fname;
2756 ctype->default_missing_lineno = ldfile->lineno;
2757 }
2758 /* We can have more entries, ignore them. */
2759 lr_ignore_rest (ldfile, 0);
2760 break;
2761 }
2762 else if (wstr == (uint32_t *) -1l)
2763 /* This was an syntax error. */
2764 break;
2765
2766 /* Maybe there is another replacement we can use. */
2767 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2768 if (now->tok == tok_eol || now->tok == tok_eof)
2769 {
2770 /* Nothing found. We tell the user. */
2771 lr_error (ldfile, _("\
2772%s: no representable `default_missing' definition found"), "LC_CTYPE");
2773 break;
2774 }
2775 if (now->tok != tok_semicolon)
2776 goto translit_syntax;
2777 }
2778
2779 continue;
2780 }
2781 else if (now->tok == tok_translit_ignore)
2782 {
2783 read_translit_ignore_entry (ldfile, ctype, charmap,
2784 repertoire);
2785 continue;
2786 }
2787
2788 read_translit_entry (ldfile, ctype, now, charmap, repertoire);
2789 }
2790 ldfile->return_widestr = 0;
2791
2792 if (now->tok == tok_eof)
2793 lr_error (ldfile, _(\
2794"%s: `translit_start' section does not end with `translit_end'"),
2795 "LC_CTYPE");
2796
2797 break;
2798
2799 case tok_ident:
2800 /* Ignore the rest of the line if we don't need the input of
2801 this line. */
2802 if (ignore_content)
2803 {
2804 lr_ignore_rest (ldfile, 0);
2805 break;
2806 }
2807
2808 /* This could mean one of several things. First test whether
2809 it's a character class name. */
2810 for (cnt = 0; cnt < ctype->nr_charclass; ++cnt)
2811 if (strcmp (now->val.str.startmb, ctype->classnames[cnt]) == 0)
2812 break;
2813 if (cnt < ctype->nr_charclass)
2814 {
2815 class_bit = _ISwbit (cnt);
2816 class256_bit = cnt <= 11 ? _ISbit (cnt) : 0;
2817 free (now->val.str.startmb);
2818 goto read_charclass;
2819 }
2820 for (cnt = 0; cnt < ctype->map_collection_nr; ++cnt)
2821 if (strcmp (now->val.str.startmb, ctype->mapnames[cnt]) == 0)
2822 break;
2823 if (cnt < ctype->map_collection_nr)
2824 {
2825 mapidx = cnt;
2826 free (now->val.str.startmb);
2827 goto read_mapping;
2828 }
2829 break;
2830
2831 case tok_end:
2832 /* Next we assume `LC_CTYPE'. */
2833 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2834 if (now->tok == tok_eof)
2835 break;
2836 if (now->tok == tok_eol)
2837 lr_error (ldfile, _("%s: incomplete `END' line"),
2838 "LC_CTYPE");
2839 else if (now->tok != tok_lc_ctype)
2840 lr_error (ldfile, _("\
2841%1$s: definition does not end with `END %1$s'"), "LC_CTYPE");
2842 lr_ignore_rest (ldfile, now->tok == tok_lc_ctype);
2843 return;
2844
2845 default:
2846 err_label:
2847 if (now->tok != tok_eof)
2848 SYNTAX_ERROR (_("%s: syntax error"), "LC_CTYPE");
2849 }
2850
2851 /* Prepare for the next round. */
2852 now = lr_token (ldfile, charmap, NULL, NULL, verbose);
2853 nowtok = now->tok;
2854 }
2855
2856 /* When we come here we reached the end of the file. */
2857 lr_error (ldfile, _("%s: premature end of file"), "LC_CTYPE");
2858}
2859
2860
2861/* Subroutine of set_class_defaults, below. */
2862static void
2863set_one_default (struct locale_ctype_t *ctype,
2864 const struct charmap_t *charmap,
2865 int bitpos, int from, int to)
2866{
2867 char tmp[2];
2868 int ch;
2869 int bit = _ISbit (bitpos);
2870 int bitw = _ISwbit (bitpos);
2871 /* Define string. */
2872 strcpy (tmp, "?");
2873
2874 for (ch = from; ch <= to; ++ch)
2875 {
2876 struct charseq *seq;
2877 tmp[0] = ch;
2878
2879 seq = charmap_find_value (charmap, tmp, 1);
2880 if (seq == NULL)
2881 {
2882 char buf[10];
2883 sprintf (buf, "U%08X", ch);
2884 seq = charmap_find_value (charmap, buf, 9);
2885 }
2886 if (seq == NULL)
2887 {
2888 if (!be_quiet)
2889 WITH_CUR_LOCALE (error (0, 0, _("\
2890%s: character `%s' not defined while needed as default value"),
2891 "LC_CTYPE", tmp));
2892 }
2893 else if (seq->nbytes != 1)
2894 WITH_CUR_LOCALE (error (0, 0, _("\
2895%s: character `%s' in charmap not representable with one byte"),
2896 "LC_CTYPE", tmp));
2897 else
2898 ctype->class256_collection[seq->bytes[0]] |= bit;
2899
2900 /* No need to search here, the ASCII value is also the Unicode
2901 value. */
2902 ELEM (ctype, class_collection, , ch) |= bitw;
2903 }
2904}
2905
2906static void
2907set_class_defaults (struct locale_ctype_t *ctype,
2908 const struct charmap_t *charmap,
2909 struct repertoire_t *repertoire)
2910{
2911#define set_default(bitpos, from, to) \
2912 set_one_default (ctype, charmap, bitpos, from, to)
2913
2914 /* These function defines the default values for the classes and conversions
2915 according to POSIX.2 2.5.2.1.
2916 It may seem that the order of these if-blocks is arbitrary but it is NOT.
2917 Don't move them unless you know what you do! */
2918
2919 /* Set default values if keyword was not present. */
2920 if ((ctype->class_done & BITw (tok_upper)) == 0)
2921 /* "If this keyword [lower] is not specified, the lowercase letters
2922 `A' through `Z', ..., shall automatically belong to this class,
2923 with implementation defined character values." [P1003.2, 2.5.2.1] */
2924 set_default (BITPOS (tok_upper), 'A', 'Z');
2925
2926 if ((ctype->class_done & BITw (tok_lower)) == 0)
2927 /* "If this keyword [lower] is not specified, the lowercase letters
2928 `a' through `z', ..., shall automatically belong to this class,
2929 with implementation defined character values." [P1003.2, 2.5.2.1] */
2930 set_default (BITPOS (tok_lower), 'a', 'z');
2931
2932 if ((ctype->class_done & BITw (tok_alpha)) == 0)
2933 {
2934 /* Table 2-6 in P1003.2 says that characters in class `upper' or
2935 class `lower' *must* be in class `alpha'. */
2936 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower);
2937 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower);
2938
2939 for (size_t cnt = 0; cnt < 256; ++cnt)
2940 if ((ctype->class256_collection[cnt] & mask) != 0)
2941 ctype->class256_collection[cnt] |= BIT (tok_alpha);
2942
2943 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2944 if ((ctype->class_collection[cnt] & maskw) != 0)
2945 ctype->class_collection[cnt] |= BITw (tok_alpha);
2946 }
2947
2948 if ((ctype->class_done & BITw (tok_digit)) == 0)
2949 /* "If this keyword [digit] is not specified, the digits `0' through
2950 `9', ..., shall automatically belong to this class, with
2951 implementation-defined character values." [P1003.2, 2.5.2.1] */
2952 set_default (BITPOS (tok_digit), '0', '9');
2953
2954 /* "Only characters specified for the `alpha' and `digit' keyword
2955 shall be specified. Characters specified for the keyword `alpha'
2956 and `digit' are automatically included in this class. */
2957 {
2958 unsigned long int mask = BIT (tok_alpha) | BIT (tok_digit);
2959 unsigned long int maskw = BITw (tok_alpha) | BITw (tok_digit);
2960
2961 for (size_t cnt = 0; cnt < 256; ++cnt)
2962 if ((ctype->class256_collection[cnt] & mask) != 0)
2963 ctype->class256_collection[cnt] |= BIT (tok_alnum);
2964
2965 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
2966 if ((ctype->class_collection[cnt] & maskw) != 0)
2967 ctype->class_collection[cnt] |= BITw (tok_alnum);
2968 }
2969
2970 if ((ctype->class_done & BITw (tok_space)) == 0)
2971 /* "If this keyword [space] is not specified, the characters <space>,
2972 <form-feed>, <newline>, <carriage-return>, <tab>, and
2973 <vertical-tab>, ..., shall automatically belong to this class,
2974 with implementation-defined character values." [P1003.2, 2.5.2.1] */
2975 {
2976 struct charseq *seq;
2977
2978 seq = charmap_find_value (charmap, "space", 5);
2979 if (seq == NULL)
2980 seq = charmap_find_value (charmap, "SP", 2);
2981 if (seq == NULL)
2982 seq = charmap_find_value (charmap, "U00000020", 9);
2983 if (seq == NULL)
2984 {
2985 if (!be_quiet)
2986 WITH_CUR_LOCALE (error (0, 0, _("\
2987%s: character `%s' not defined while needed as default value"),
2988 "LC_CTYPE", "<space>"));
2989 }
2990 else if (seq->nbytes != 1)
2991 WITH_CUR_LOCALE (error (0, 0, _("\
2992%s: character `%s' in charmap not representable with one byte"),
2993 "LC_CTYPE", "<space>"));
2994 else
2995 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
2996
2997 /* No need to search. */
2998 ELEM (ctype, class_collection, , L' ') |= BITw (tok_space);
2999
3000 seq = charmap_find_value (charmap, "form-feed", 9);
3001 if (seq == NULL)
3002 seq = charmap_find_value (charmap, "U0000000C", 9);
3003 if (seq == NULL)
3004 {
3005 if (!be_quiet)
3006 WITH_CUR_LOCALE (error (0, 0, _("\
3007%s: character `%s' not defined while needed as default value"),
3008 "LC_CTYPE", "<form-feed>"));
3009 }
3010 else if (seq->nbytes != 1)
3011 WITH_CUR_LOCALE (error (0, 0, _("\
3012%s: character `%s' in charmap not representable with one byte"),
3013 "LC_CTYPE", "<form-feed>"));
3014 else
3015 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3016
3017 /* No need to search. */
3018 ELEM (ctype, class_collection, , L'\f') |= BITw (tok_space);
3019
3020
3021 seq = charmap_find_value (charmap, "newline", 7);
3022 if (seq == NULL)
3023 seq = charmap_find_value (charmap, "U0000000A", 9);
3024 if (seq == NULL)
3025 {
3026 if (!be_quiet)
3027 WITH_CUR_LOCALE (error (0, 0, _("\
3028%s: character `%s' not defined while needed as default value"),
3029 "LC_CTYPE", "<newline>"));
3030 }
3031 else if (seq->nbytes != 1)
3032 WITH_CUR_LOCALE (error (0, 0, _("\
3033%s: character `%s' in charmap not representable with one byte"),
3034 "LC_CTYPE", "<newline>"));
3035 else
3036 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3037
3038 /* No need to search. */
3039 ELEM (ctype, class_collection, , L'\n') |= BITw (tok_space);
3040
3041
3042 seq = charmap_find_value (charmap, "carriage-return", 15);
3043 if (seq == NULL)
3044 seq = charmap_find_value (charmap, "U0000000D", 9);
3045 if (seq == NULL)
3046 {
3047 if (!be_quiet)
3048 WITH_CUR_LOCALE (error (0, 0, _("\
3049%s: character `%s' not defined while needed as default value"),
3050 "LC_CTYPE", "<carriage-return>"));
3051 }
3052 else if (seq->nbytes != 1)
3053 WITH_CUR_LOCALE (error (0, 0, _("\
3054%s: character `%s' in charmap not representable with one byte"),
3055 "LC_CTYPE", "<carriage-return>"));
3056 else
3057 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3058
3059 /* No need to search. */
3060 ELEM (ctype, class_collection, , L'\r') |= BITw (tok_space);
3061
3062
3063 seq = charmap_find_value (charmap, "tab", 3);
3064 if (seq == NULL)
3065 seq = charmap_find_value (charmap, "U00000009", 9);
3066 if (seq == NULL)
3067 {
3068 if (!be_quiet)
3069 WITH_CUR_LOCALE (error (0, 0, _("\
3070%s: character `%s' not defined while needed as default value"),
3071 "LC_CTYPE", "<tab>"));
3072 }
3073 else if (seq->nbytes != 1)
3074 WITH_CUR_LOCALE (error (0, 0, _("\
3075%s: character `%s' in charmap not representable with one byte"),
3076 "LC_CTYPE", "<tab>"));
3077 else
3078 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3079
3080 /* No need to search. */
3081 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_space);
3082
3083
3084 seq = charmap_find_value (charmap, "vertical-tab", 12);
3085 if (seq == NULL)
3086 seq = charmap_find_value (charmap, "U0000000B", 9);
3087 if (seq == NULL)
3088 {
3089 if (!be_quiet)
3090 WITH_CUR_LOCALE (error (0, 0, _("\
3091%s: character `%s' not defined while needed as default value"),
3092 "LC_CTYPE", "<vertical-tab>"));
3093 }
3094 else if (seq->nbytes != 1)
3095 WITH_CUR_LOCALE (error (0, 0, _("\
3096%s: character `%s' in charmap not representable with one byte"),
3097 "LC_CTYPE", "<vertical-tab>"));
3098 else
3099 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_space);
3100
3101 /* No need to search. */
3102 ELEM (ctype, class_collection, , L'\v') |= BITw (tok_space);
3103 }
3104
3105 if ((ctype->class_done & BITw (tok_xdigit)) == 0)
3106 /* "If this keyword is not specified, the digits `0' to `9', the
3107 uppercase letters `A' through `F', and the lowercase letters `a'
3108 through `f', ..., shell automatically belong to this class, with
3109 implementation defined character values." [P1003.2, 2.5.2.1] */
3110 {
3111 set_default (BITPOS (tok_xdigit), '0', '9');
3112 set_default (BITPOS (tok_xdigit), 'A', 'F');
3113 set_default (BITPOS (tok_xdigit), 'a', 'f');
3114 }
3115
3116 if ((ctype->class_done & BITw (tok_blank)) == 0)
3117 /* "If this keyword [blank] is unspecified, the characters <space> and
3118 <tab> shall belong to this character class." [P1003.2, 2.5.2.1] */
3119 {
3120 struct charseq *seq;
3121
3122 seq = charmap_find_value (charmap, "space", 5);
3123 if (seq == NULL)
3124 seq = charmap_find_value (charmap, "SP", 2);
3125 if (seq == NULL)
3126 seq = charmap_find_value (charmap, "U00000020", 9);
3127 if (seq == NULL)
3128 {
3129 if (!be_quiet)
3130 WITH_CUR_LOCALE (error (0, 0, _("\
3131%s: character `%s' not defined while needed as default value"),
3132 "LC_CTYPE", "<space>"));
3133 }
3134 else if (seq->nbytes != 1)
3135 WITH_CUR_LOCALE (error (0, 0, _("\
3136%s: character `%s' in charmap not representable with one byte"),
3137 "LC_CTYPE", "<space>"));
3138 else
3139 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3140
3141 /* No need to search. */
3142 ELEM (ctype, class_collection, , L' ') |= BITw (tok_blank);
3143
3144
3145 seq = charmap_find_value (charmap, "tab", 3);
3146 if (seq == NULL)
3147 seq = charmap_find_value (charmap, "U00000009", 9);
3148 if (seq == NULL)
3149 {
3150 if (!be_quiet)
3151 WITH_CUR_LOCALE (error (0, 0, _("\
3152%s: character `%s' not defined while needed as default value"),
3153 "LC_CTYPE", "<tab>"));
3154 }
3155 else if (seq->nbytes != 1)
3156 WITH_CUR_LOCALE (error (0, 0, _("\
3157%s: character `%s' in charmap not representable with one byte"),
3158 "LC_CTYPE", "<tab>"));
3159 else
3160 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_blank);
3161
3162 /* No need to search. */
3163 ELEM (ctype, class_collection, , L'\t') |= BITw (tok_blank);
3164 }
3165
3166 if ((ctype->class_done & BITw (tok_graph)) == 0)
3167 /* "If this keyword [graph] is not specified, characters specified for
3168 the keywords `upper', `lower', `alpha', `digit', `xdigit' and `punct',
3169 shall belong to this character class." [P1003.2, 2.5.2.1] */
3170 {
3171 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3172 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3173 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3174 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3175 BITw (tok_punct);
3176
3177 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3178 if ((ctype->class_collection[cnt] & maskw) != 0)
3179 ctype->class_collection[cnt] |= BITw (tok_graph);
3180
3181 for (size_t cnt = 0; cnt < 256; ++cnt)
3182 if ((ctype->class256_collection[cnt] & mask) != 0)
3183 ctype->class256_collection[cnt] |= BIT (tok_graph);
3184 }
3185
3186 if ((ctype->class_done & BITw (tok_print)) == 0)
3187 /* "If this keyword [print] is not provided, characters specified for
3188 the keywords `upper', `lower', `alpha', `digit', `xdigit', `punct',
3189 and the <space> character shall belong to this character class."
3190 [P1003.2, 2.5.2.1] */
3191 {
3192 unsigned long int mask = BIT (tok_upper) | BIT (tok_lower) |
3193 BIT (tok_alpha) | BIT (tok_digit) | BIT (tok_xdigit) | BIT (tok_punct);
3194 unsigned long int maskw = BITw (tok_upper) | BITw (tok_lower) |
3195 BITw (tok_alpha) | BITw (tok_digit) | BITw (tok_xdigit) |
3196 BITw (tok_punct);
3197 struct charseq *seq;
3198
3199 for (size_t cnt = 0; cnt < ctype->class_collection_act; ++cnt)
3200 if ((ctype->class_collection[cnt] & maskw) != 0)
3201 ctype->class_collection[cnt] |= BITw (tok_print);
3202
3203 for (size_t cnt = 0; cnt < 256; ++cnt)
3204 if ((ctype->class256_collection[cnt] & mask) != 0)
3205 ctype->class256_collection[cnt] |= BIT (tok_print);
3206
3207
3208 seq = charmap_find_value (charmap, "space", 5);
3209 if (seq == NULL)
3210 seq = charmap_find_value (charmap, "SP", 2);
3211 if (seq == NULL)
3212 seq = charmap_find_value (charmap, "U00000020", 9);
3213 if (seq == NULL)
3214 {
3215 if (!be_quiet)
3216 WITH_CUR_LOCALE (error (0, 0, _("\
3217%s: character `%s' not defined while needed as default value"),
3218 "LC_CTYPE", "<space>"));
3219 }
3220 else if (seq->nbytes != 1)
3221 WITH_CUR_LOCALE (error (0, 0, _("\
3222%s: character `%s' in charmap not representable with one byte"),
3223 "LC_CTYPE", "<space>"));
3224 else
3225 ctype->class256_collection[seq->bytes[0]] |= BIT (tok_print);
3226
3227 /* No need to search. */
3228 ELEM (ctype, class_collection, , L' ') |= BITw (tok_print);
3229 }
3230
3231 if (ctype->tomap_done[0] == 0)
3232 /* "If this keyword [toupper] is not specified, the lowercase letters
3233 `a' through `z', and their corresponding uppercase letters `A' to
3234 `Z', ..., shall automatically be included, with implementation-
3235 defined character values." [P1003.2, 2.5.2.1] */
3236 {
3237 char tmp[4];
3238 int ch;
3239
3240 strcpy (tmp, "<?>");
3241
3242 for (ch = 'a'; ch <= 'z'; ++ch)
3243 {
3244 struct charseq *seq_from, *seq_to;
3245
3246 tmp[1] = (char) ch;
3247
3248 seq_from = charmap_find_value (charmap, &tmp[1], 1);
3249 if (seq_from == NULL)
3250 {
3251 char buf[10];
3252 sprintf (buf, "U%08X", ch);
3253 seq_from = charmap_find_value (charmap, buf, 9);
3254 }
3255 if (seq_from == NULL)
3256 {
3257 if (!be_quiet)
3258 WITH_CUR_LOCALE (error (0, 0, _("\
3259%s: character `%s' not defined while needed as default value"),
3260 "LC_CTYPE", tmp));
3261 }
3262 else if (seq_from->nbytes != 1)
3263 {
3264 if (!be_quiet)
3265 WITH_CUR_LOCALE (error (0, 0, _("\
3266%s: character `%s' needed as default value not representable with one byte"),
3267 "LC_CTYPE", tmp));
3268 }
3269 else
3270 {
3271 /* This conversion is implementation defined. */
3272 tmp[1] = (char) (ch + ('A' - 'a'));
3273 seq_to = charmap_find_value (charmap, &tmp[1], 1);
3274 if (seq_to == NULL)
3275 {
3276 char buf[10];
3277 sprintf (buf, "U%08X", ch + ('A' - 'a'));
3278 seq_to = charmap_find_value (charmap, buf, 9);
3279 }
3280 if (seq_to == NULL)
3281 {
3282 if (!be_quiet)
3283 WITH_CUR_LOCALE (error (0, 0, _("\
3284%s: character `%s' not defined while needed as default value"),
3285 "LC_CTYPE", tmp));
3286 }
3287 else if (seq_to->nbytes != 1)
3288 {
3289 if (!be_quiet)
3290 WITH_CUR_LOCALE (error (0, 0, _("\
3291%s: character `%s' needed as default value not representable with one byte"),
3292 "LC_CTYPE", tmp));
3293 }
3294 else
3295 /* The index [0] is determined by the order of the
3296 `ctype_map_newP' calls in `ctype_startup'. */
3297 ctype->map256_collection[0][seq_from->bytes[0]]
3298 = seq_to->bytes[0];
3299 }
3300
3301 /* No need to search. */
3302 ELEM (ctype, map_collection, [0], ch) = ch + ('A' - 'a');
3303 }
3304 }
3305
3306 if (ctype->tomap_done[1] == 0)
3307 /* "If this keyword [tolower] is not specified, the mapping shall be
3308 the reverse mapping of the one specified to `toupper'." [P1003.2] */
3309 {
3310 for (size_t cnt = 0; cnt < ctype->map_collection_act[0]; ++cnt)
3311 if (ctype->map_collection[0][cnt] != 0)
3312 ELEM (ctype, map_collection, [1],
3313 ctype->map_collection[0][cnt])
3314 = ctype->charnames[cnt];
3315
3316 for (size_t cnt = 0; cnt < 256; ++cnt)
3317 if (ctype->map256_collection[0][cnt] != 0)
3318 ctype->map256_collection[1][ctype->map256_collection[0][cnt]] = cnt;
3319 }
3320
3321 if (ctype->outdigits_act != 10)
3322 {
3323 if (ctype->outdigits_act != 0)
3324 WITH_CUR_LOCALE (error (0, 0, _("\
3325%s: field `%s' does not contain exactly ten entries"),
3326 "LC_CTYPE", "outdigit"));
3327
3328 for (size_t cnt = ctype->outdigits_act; cnt < 10; ++cnt)
3329 {
3330 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3331 (char *) digits + cnt,
3332 1);
3333
3334 if (ctype->mboutdigits[cnt] == NULL)
3335 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3336 longnames[cnt],
3337 strlen (longnames[cnt]));
3338
3339 if (ctype->mboutdigits[cnt] == NULL)
3340 ctype->mboutdigits[cnt] = charmap_find_symbol (charmap,
3341 uninames[cnt], 9);
3342
3343 if (ctype->mboutdigits[cnt] == NULL)
3344 {
3345 /* Provide a replacement. */
3346 WITH_CUR_LOCALE (error (0, 0, _("\
3347no output digits defined and none of the standard names in the charmap")));
3348
3349 ctype->mboutdigits[cnt] = obstack_alloc (&((struct charmap_t *) charmap)->mem_pool,
3350 sizeof (struct charseq)
3351 + 1);
3352
3353 /* This is better than nothing. */
3354 ctype->mboutdigits[cnt]->bytes[0] = digits[cnt];
3355 ctype->mboutdigits[cnt]->nbytes = 1;
3356 }
3357
3358 ctype->wcoutdigits[cnt] = L'0' + cnt;
3359 }
3360
3361 ctype->outdigits_act = 10;
3362 }
3363
3364#undef set_default
3365}
3366
3367
3368/* Initialize. Assumes t->p and t->q have already been set. */
3369static inline void
3370wctype_table_init (struct wctype_table *t)
3371{
3372 t->level1 = NULL;
3373 t->level1_alloc = t->level1_size = 0;
3374 t->level2 = NULL;
3375 t->level2_alloc = t->level2_size = 0;
3376 t->level3 = NULL;
3377 t->level3_alloc = t->level3_size = 0;
3378}
3379
3380/* Retrieve an entry. */
3381static inline int
3382wctype_table_get (struct wctype_table *t, uint32_t wc)
3383{
3384 uint32_t index1 = wc >> (t->q + t->p + 5);
3385 if (index1 < t->level1_size)
3386 {
3387 uint32_t lookup1 = t->level1[index1];
3388 if (lookup1 != EMPTY)
3389 {
3390 uint32_t index2 = ((wc >> (t->p + 5)) & ((1 << t->q) - 1))
3391 + (lookup1 << t->q);
3392 uint32_t lookup2 = t->level2[index2];
3393 if (lookup2 != EMPTY)
3394 {
3395 uint32_t index3 = ((wc >> 5) & ((1 << t->p) - 1))
3396 + (lookup2 << t->p);
3397 uint32_t lookup3 = t->level3[index3];
3398 uint32_t index4 = wc & 0x1f;
3399
3400 return (lookup3 >> index4) & 1;
3401 }
3402 }
3403 }
3404 return 0;
3405}
3406
3407/* Add one entry. */
3408static void
3409wctype_table_add (struct wctype_table *t, uint32_t wc)
3410{
3411 uint32_t index1 = wc >> (t->q + t->p + 5);
3412 uint32_t index2 = (wc >> (t->p + 5)) & ((1 << t->q) - 1);
3413 uint32_t index3 = (wc >> 5) & ((1 << t->p) - 1);
3414 uint32_t index4 = wc & 0x1f;
3415 size_t i, i1, i2;
3416
3417 if (index1 >= t->level1_size)
3418 {
3419 if (index1 >= t->level1_alloc)
3420 {
3421 size_t alloc = 2 * t->level1_alloc;
3422 if (alloc <= index1)
3423 alloc = index1 + 1;
3424 t->level1 = (uint32_t *) xrealloc ((char *) t->level1,
3425 alloc * sizeof (uint32_t));
3426 t->level1_alloc = alloc;
3427 }
3428 while (index1 >= t->level1_size)
3429 t->level1[t->level1_size++] = EMPTY;
3430 }
3431
3432 if (t->level1[index1] == EMPTY)
3433 {
3434 if (t->level2_size == t->level2_alloc)
3435 {
3436 size_t alloc = 2 * t->level2_alloc + 1;
3437 t->level2 = (uint32_t *) xrealloc ((char *) t->level2,
3438 (alloc << t->q) * sizeof (uint32_t));
3439 t->level2_alloc = alloc;
3440 }
3441 i1 = t->level2_size << t->q;
3442 i2 = (t->level2_size + 1) << t->q;
3443 for (i = i1; i < i2; i++)
3444 t->level2[i] = EMPTY;
3445 t->level1[index1] = t->level2_size++;
3446 }
3447
3448 index2 += t->level1[index1] << t->q;
3449
3450 if (t->level2[index2] == EMPTY)
3451 {
3452 if (t->level3_size == t->level3_alloc)
3453 {
3454 size_t alloc = 2 * t->level3_alloc + 1;
3455 t->level3 = (uint32_t *) xrealloc ((char *) t->level3,
3456 (alloc << t->p) * sizeof (uint32_t));
3457 t->level3_alloc = alloc;
3458 }
3459 i1 = t->level3_size << t->p;
3460 i2 = (t->level3_size + 1) << t->p;
3461 for (i = i1; i < i2; i++)
3462 t->level3[i] = 0;
3463 t->level2[index2] = t->level3_size++;
3464 }
3465
3466 index3 += t->level2[index2] << t->p;
3467
3468 t->level3[index3] |= (uint32_t)1 << index4;
3469}
3470
3471/* Finalize and shrink. */
3472static void
3473add_locale_wctype_table (struct locale_file *file, struct wctype_table *t)
3474{
3475 size_t i, j, k;
3476 uint32_t reorder3[t->level3_size];
3477 uint32_t reorder2[t->level2_size];
3478 uint32_t level2_offset, level3_offset;
3479
3480 /* Uniquify level3 blocks. */
3481 k = 0;
3482 for (j = 0; j < t->level3_size; j++)
3483 {
3484 for (i = 0; i < k; i++)
3485 if (memcmp (&t->level3[i << t->p], &t->level3[j << t->p],
3486 (1 << t->p) * sizeof (uint32_t)) == 0)
3487 break;
3488 /* Relocate block j to block i. */
3489 reorder3[j] = i;
3490 if (i == k)
3491 {
3492 if (i != j)
3493 memcpy (&t->level3[i << t->p], &t->level3[j << t->p],
3494 (1 << t->p) * sizeof (uint32_t));
3495 k++;
3496 }
3497 }
3498 t->level3_size = k;
3499
3500 for (i = 0; i < (t->level2_size << t->q); i++)
3501 if (t->level2[i] != EMPTY)
3502 t->level2[i] = reorder3[t->level2[i]];
3503
3504 /* Uniquify level2 blocks. */
3505 k = 0;
3506 for (j = 0; j < t->level2_size; j++)
3507 {
3508 for (i = 0; i < k; i++)
3509 if (memcmp (&t->level2[i << t->q], &t->level2[j << t->q],
3510 (1 << t->q) * sizeof (uint32_t)) == 0)
3511 break;
3512 /* Relocate block j to block i. */
3513 reorder2[j] = i;
3514 if (i == k)
3515 {
3516 if (i != j)
3517 memcpy (&t->level2[i << t->q], &t->level2[j << t->q],
3518 (1 << t->q) * sizeof (uint32_t));
3519 k++;
3520 }
3521 }
3522 t->level2_size = k;
3523
3524 for (i = 0; i < t->level1_size; i++)
3525 if (t->level1[i] != EMPTY)
3526 t->level1[i] = reorder2[t->level1[i]];
3527
3528 t->result_size =
3529 5 * sizeof (uint32_t)
3530 + t->level1_size * sizeof (uint32_t)
3531 + (t->level2_size << t->q) * sizeof (uint32_t)
3532 + (t->level3_size << t->p) * sizeof (uint32_t);
3533
3534 level2_offset =
3535 5 * sizeof (uint32_t)
3536 + t->level1_size * sizeof (uint32_t);
3537 level3_offset =
3538 5 * sizeof (uint32_t)
3539 + t->level1_size * sizeof (uint32_t)
3540 + (t->level2_size << t->q) * sizeof (uint32_t);
3541
3542 start_locale_structure (file);
3543 add_locale_uint32 (file, t->q + t->p + 5);
3544 add_locale_uint32 (file, t->level1_size);
3545 add_locale_uint32 (file, t->p + 5);
3546 add_locale_uint32 (file, (1 << t->q) - 1);
3547 add_locale_uint32 (file, (1 << t->p) - 1);
3548
3549 for (i = 0; i < t->level1_size; i++)
3550 add_locale_uint32
3551 (file,
3552 t->level1[i] == EMPTY
3553 ? 0
3554 : (t->level1[i] << t->q) * sizeof (uint32_t) + level2_offset);
3555
3556 for (i = 0; i < (t->level2_size << t->q); i++)
3557 add_locale_uint32
3558 (file,
3559 t->level2[i] == EMPTY
3560 ? 0
3561 : (t->level2[i] << t->p) * sizeof (uint32_t) + level3_offset);
3562
3563 add_locale_uint32_array (file, t->level3, t->level3_size << t->p);
3564 end_locale_structure (file);
3565
3566 if (t->level1_alloc > 0)
3567 free (t->level1);
3568 if (t->level2_alloc > 0)
3569 free (t->level2);
3570 if (t->level3_alloc > 0)
3571 free (t->level3);
3572}
3573
3574/* Flattens the included transliterations into a translit list.
3575 Inserts them in the list at `cursor', and returns the new cursor. */
3576static struct translit_t **
3577translit_flatten (struct locale_ctype_t *ctype,
3578 const struct charmap_t *charmap,
3579 struct translit_t **cursor)
3580{
3581 while (ctype->translit_include != NULL)
3582 {
3583 const char *copy_locale = ctype->translit_include->copy_locale;
3584 const char *copy_repertoire = ctype->translit_include->copy_repertoire;
3585 struct localedef_t *other;
3586
3587 /* Unchain the include statement. During the depth-first traversal
3588 we don't want to visit any locale more than once. */
3589 ctype->translit_include = ctype->translit_include->next;
3590
3591 other = find_locale (LC_CTYPE, copy_locale, copy_repertoire, charmap);
3592
3593 if (other == NULL || other->categories[LC_CTYPE].ctype == NULL)
3594 {
3595 WITH_CUR_LOCALE (error (0, 0, _("\
3596%s: transliteration data from locale `%s' not available"),
3597 "LC_CTYPE", copy_locale));
3598 }
3599 else
3600 {
3601 struct locale_ctype_t *other_ctype =
3602 other->categories[LC_CTYPE].ctype;
3603
3604 cursor = translit_flatten (other_ctype, charmap, cursor);
3605 assert (other_ctype->translit_include == NULL);
3606
3607 if (other_ctype->translit != NULL)
3608 {
3609 /* Insert the other_ctype->translit list at *cursor. */
3610 struct translit_t *endp = other_ctype->translit;
3611 while (endp->next != NULL)
3612 endp = endp->next;
3613
3614 endp->next = *cursor;
3615 *cursor = other_ctype->translit;
3616
3617 /* Avoid any risk of circular lists. */
3618 other_ctype->translit = NULL;
3619
3620 cursor = &endp->next;
3621 }
3622
3623 if (ctype->default_missing == NULL)
3624 ctype->default_missing = other_ctype->default_missing;
3625 }
3626 }
3627
3628 return cursor;
3629}
3630
3631static void
3632allocate_arrays (struct locale_ctype_t *ctype, const struct charmap_t *charmap,
3633 struct repertoire_t *repertoire)
3634{
3635 size_t idx, nr;
3636 const void *key;
3637 size_t len;
3638 void *vdata;
3639 void *curs;
3640
3641 /* You wonder about this amount of memory? This is only because some
3642 users do not manage to address the array with unsigned values or
3643 data types with range >= 256. '\200' would result in the array
3644 index -128. To help these poor people we duplicate the entries for
3645 128 up to 255 below the entry for \0. */
3646 ctype->ctype_b = (char_class_t *) xcalloc (256 + 128, sizeof (char_class_t));
3647 ctype->ctype32_b = (char_class32_t *) xcalloc (256, sizeof (char_class32_t));
3648 ctype->class_b = (uint32_t **)
3649 xmalloc (ctype->nr_charclass * sizeof (uint32_t *));
3650 ctype->class_3level = (struct wctype_table *)
3651 xmalloc (ctype->nr_charclass * sizeof (struct wctype_table));
3652
3653 /* This is the array accessed using the multibyte string elements. */
3654 for (idx = 0; idx < 256; ++idx)
3655 ctype->ctype_b[128 + idx] = ctype->class256_collection[idx];
3656
3657 /* Mirror first 127 entries. We must take care that entry -1 is not
3658 mirrored because EOF == -1. */
3659 for (idx = 0; idx < 127; ++idx)
3660 ctype->ctype_b[idx] = ctype->ctype_b[256 + idx];
3661
3662 /* The 32 bit array contains all characters < 0x100. */
3663 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3664 if (ctype->charnames[idx] < 0x100)
3665 ctype->ctype32_b[ctype->charnames[idx]] = ctype->class_collection[idx];
3666
3667 for (nr = 0; nr < ctype->nr_charclass; nr++)
3668 {
3669 ctype->class_b[nr] = (uint32_t *) xcalloc (256 / 32, sizeof (uint32_t));
3670
3671 /* We only set CLASS_B for the bits in the ISO C classes, not
3672 the user defined classes. The number should not change but
3673 who knows. */
3674#define LAST_ISO_C_BIT 11
3675 if (nr <= LAST_ISO_C_BIT)
3676 for (idx = 0; idx < 256; ++idx)
3677 if (ctype->class256_collection[idx] & _ISbit (nr))
3678 ctype->class_b[nr][idx >> 5] |= (uint32_t) 1 << (idx & 0x1f);
3679 }
3680
3681 for (nr = 0; nr < ctype->nr_charclass; nr++)
3682 {
3683 struct wctype_table *t;
3684
3685 t = &ctype->class_3level[nr];
3686 t->p = 4; /* or: 5 */
3687 t->q = 7; /* or: 6 */
3688 wctype_table_init (t);
3689
3690 for (idx = 0; idx < ctype->class_collection_act; ++idx)
3691 if (ctype->class_collection[idx] & _ISwbit (nr))
3692 wctype_table_add (t, ctype->charnames[idx]);
3693
3694 if (verbose)
3695 WITH_CUR_LOCALE (fprintf (stderr, _("\
3696%s: table for class \"%s\": %lu bytes\n"),
3697 "LC_CTYPE", ctype->classnames[nr],
3698 (unsigned long int) t->result_size));
3699 }
3700
3701 /* Room for table of mappings. */
3702 ctype->map_b = (uint32_t **) xmalloc (2 * sizeof (uint32_t *));
3703 ctype->map32_b = (uint32_t **) xmalloc (ctype->map_collection_nr
3704 * sizeof (uint32_t *));
3705 ctype->map_3level = (struct wctrans_table *)
3706 xmalloc (ctype->map_collection_nr * sizeof (struct wctrans_table));
3707
3708 /* Fill in all mappings. */
3709 for (idx = 0; idx < 2; ++idx)
3710 {
3711 unsigned int idx2;
3712
3713 /* Allocate table. */
3714 ctype->map_b[idx] = (uint32_t *)
3715 xmalloc ((256 + 128) * sizeof (uint32_t));
3716
3717 /* Copy values from collection. */
3718 for (idx2 = 0; idx2 < 256; ++idx2)
3719 ctype->map_b[idx][128 + idx2] = ctype->map256_collection[idx][idx2];
3720
3721 /* Mirror first 127 entries. We must take care not to map entry
3722 -1 because EOF == -1. */
3723 for (idx2 = 0; idx2 < 127; ++idx2)
3724 ctype->map_b[idx][idx2] = ctype->map_b[idx][256 + idx2];
3725
3726 /* EOF must map to EOF. */
3727 ctype->map_b[idx][127] = EOF;
3728 }
3729
3730 for (idx = 0; idx < ctype->map_collection_nr; ++idx)
3731 {
3732 unsigned int idx2;
3733
3734 /* Allocate table. */
3735 ctype->map32_b[idx] = (uint32_t *) xmalloc (256 * sizeof (uint32_t));
3736
3737 /* Copy values from collection. Default is identity mapping. */
3738 for (idx2 = 0; idx2 < 256; ++idx2)
3739 ctype->map32_b[idx][idx2] =
3740 (ctype->map_collection[idx][idx2] != 0
3741 ? ctype->map_collection[idx][idx2]
3742 : idx2);
3743 }
3744
3745 for (nr = 0; nr < ctype->map_collection_nr; nr++)
3746 {
3747 struct wctrans_table *t;
3748
3749 t = &ctype->map_3level[nr];
3750 t->p = 7;
3751 t->q = 9;
3752 wctrans_table_init (t);
3753
3754 for (idx = 0; idx < ctype->map_collection_act[nr]; ++idx)
3755 if (ctype->map_collection[nr][idx] != 0)
3756 wctrans_table_add (t, ctype->charnames[idx],
3757 ctype->map_collection[nr][idx]);
3758
3759 if (verbose)
3760 WITH_CUR_LOCALE (fprintf (stderr, _("\
3761%s: table for map \"%s\": %lu bytes\n"),
3762 "LC_CTYPE", ctype->mapnames[nr],
3763 (unsigned long int) t->result_size));
3764 }
3765
3766 /* Extra array for class and map names. */
3767 ctype->class_name_ptr = (uint32_t *) xmalloc (ctype->nr_charclass
3768 * sizeof (uint32_t));
3769 ctype->map_name_ptr = (uint32_t *) xmalloc (ctype->map_collection_nr
3770 * sizeof (uint32_t));
3771
3772 ctype->class_offset = _NL_ITEM_INDEX (_NL_CTYPE_EXTRA_MAP_1);
3773 ctype->map_offset = ctype->class_offset + ctype->nr_charclass;
3774
3775 /* Array for width information. Because the expected widths are very
3776 small (never larger than 2) we use only one single byte. This
3777 saves space.
3778 We put only printable characters in the table. wcwidth is specified
3779 to return -1 for non-printable characters. Doing the check here
3780 saves a run-time check.
3781 But we put L'\0' in the table. This again saves a run-time check. */
3782 {
3783 struct wcwidth_table *t;
3784
3785 t = &ctype->width;
3786 t->p = 7;
3787 t->q = 9;
3788 wcwidth_table_init (t);
3789
3790 /* First set all the printable characters of the character set to
3791 the default width. */
3792 curs = NULL;
3793 while (iterate_table (&charmap->char_table, &curs, &key, &len, &vdata) == 0)
3794 {
3795 struct charseq *data = (struct charseq *) vdata;
3796
3797 if (data->ucs4 == UNINITIALIZED_CHAR_VALUE)
3798 data->ucs4 = repertoire_find_value (ctype->repertoire,
3799 data->name, len);
3800
3801 if (data->ucs4 != ILLEGAL_CHAR_VALUE)
3802 {
3803 uint32_t *class_bits =
3804 find_idx (ctype, &ctype->class_collection, NULL,
3805 &ctype->class_collection_act, data->ucs4);
3806
3807 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3808 wcwidth_table_add (t, data->ucs4, charmap->width_default);
3809 }
3810 }
3811
3812 /* Now add the explicitly specified widths. */
3813 if (charmap->width_rules != NULL)
3814 for (size_t cnt = 0; cnt < charmap->nwidth_rules; ++cnt)
3815 {
3816 unsigned char bytes[charmap->mb_cur_max];
3817 int nbytes = charmap->width_rules[cnt].from->nbytes;
3818
3819 /* We have the range of character for which the width is
3820 specified described using byte sequences of the multibyte
3821 charset. We have to convert this to UCS4 now. And we
3822 cannot simply convert the beginning and the end of the
3823 sequence, we have to iterate over the byte sequence and
3824 convert it for every single character. */
3825 memcpy (bytes, charmap->width_rules[cnt].from->bytes, nbytes);
3826
3827 while (nbytes < charmap->width_rules[cnt].to->nbytes
3828 || memcmp (bytes, charmap->width_rules[cnt].to->bytes,
3829 nbytes) <= 0)
3830 {
3831 /* Find the UCS value for `bytes'. */
3832 int inner;
3833 uint32_t wch;
3834 struct charseq *seq =
3835 charmap_find_symbol (charmap, (char *) bytes, nbytes);
3836
3837 if (seq == NULL)
3838 wch = ILLEGAL_CHAR_VALUE;
3839 else if (seq->ucs4 != UNINITIALIZED_CHAR_VALUE)
3840 wch = seq->ucs4;
3841 else
3842 wch = repertoire_find_value (ctype->repertoire, seq->name,
3843 strlen (seq->name));
3844
3845 if (wch != ILLEGAL_CHAR_VALUE)
3846 {
3847 /* Store the value. */
3848 uint32_t *class_bits =
3849 find_idx (ctype, &ctype->class_collection, NULL,
3850 &ctype->class_collection_act, wch);
3851
3852 if (class_bits != NULL && (*class_bits & BITw (tok_print)))
3853 wcwidth_table_add (t, wch,
3854 charmap->width_rules[cnt].width);
3855 }
3856
3857 /* "Increment" the bytes sequence. */
3858 inner = nbytes - 1;
3859 while (inner >= 0 && bytes[inner] == 0xff)
3860 --inner;
3861
3862 if (inner < 0)
3863 {
3864 /* We have to extend the byte sequence. */
3865 if (nbytes >= charmap->width_rules[cnt].to->nbytes)
3866 break;
3867
3868 bytes[0] = 1;
3869 memset (&bytes[1], 0, nbytes);
3870 ++nbytes;
3871 }
3872 else
3873 {
3874 ++bytes[inner];
3875 while (++inner < nbytes)
3876 bytes[inner] = 0;
3877 }
3878 }
3879 }
3880
3881 /* Set the width of L'\0' to 0. */
3882 wcwidth_table_add (t, 0, 0);
3883
3884 if (verbose)
3885 WITH_CUR_LOCALE (fprintf (stderr, _("%s: table for width: %lu bytes\n"),
3886 "LC_CTYPE", (unsigned long int) t->result_size));
3887 }
3888
3889 /* Set MB_CUR_MAX. */
3890 ctype->mb_cur_max = charmap->mb_cur_max;
3891
3892 /* Now determine the table for the transliteration information.
3893
3894 XXX It is not yet clear to me whether it is worth implementing a
3895 complicated algorithm which uses a hash table to locate the entries.
3896 For now I'll use a simple array which can be searching using binary
3897 search. */
3898 if (ctype->translit_include != NULL)
3899 /* Traverse the locales mentioned in the `include' statements in a
3900 depth-first way and fold in their transliteration information. */
3901 translit_flatten (ctype, charmap, &ctype->translit);
3902
3903 if (ctype->translit != NULL)
3904 {
3905 /* First count how many entries we have. This is the upper limit
3906 since some entries from the included files might be overwritten. */
3907 size_t number = 0;
3908 struct translit_t *runp = ctype->translit;
3909 struct translit_t **sorted;
3910 size_t from_len, to_len;
3911
3912 while (runp != NULL)
3913 {
3914 ++number;
3915 runp = runp->next;
3916 }
3917
3918 /* Next we allocate an array large enough and fill in the values. */
3919 sorted = (struct translit_t **) alloca (number
3920 * sizeof (struct translit_t **));
3921 runp = ctype->translit;
3922 number = 0;
3923 do
3924 {
3925 /* Search for the place where to insert this string.
3926 XXX Better use a real sorting algorithm later. */
3927 size_t idx = 0;
3928 int replace = 0;
3929
3930 while (idx < number)
3931 {
3932 int res = wcscmp ((const wchar_t *) sorted[idx]->from,
3933 (const wchar_t *) runp->from);
3934 if (res == 0)
3935 {
3936 replace = 1;
3937 break;
3938 }
3939 if (res > 0)
3940 break;
3941 ++idx;
3942 }
3943
3944 if (replace)
3945 sorted[idx] = runp;
3946 else
3947 {
3948 memmove (&sorted[idx + 1], &sorted[idx],
3949 (number - idx) * sizeof (struct translit_t *));
3950 sorted[idx] = runp;
3951 ++number;
3952 }
3953
3954 runp = runp->next;
3955 }
3956 while (runp != NULL);
3957
3958 /* The next step is putting all the possible transliteration
3959 strings in one memory block so that we can write it out.
3960 We need several different blocks:
3961 - index to the from-string array
3962 - from-string array
3963 - index to the to-string array
3964 - to-string array.
3965 */
3966 from_len = to_len = 0;
3967 for (size_t cnt = 0; cnt < number; ++cnt)
3968 {
3969 struct translit_to_t *srunp;
3970 from_len += wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3971 srunp = sorted[cnt]->to;
3972 while (srunp != NULL)
3973 {
3974 to_len += wcslen ((const wchar_t *) srunp->str) + 1;
3975 srunp = srunp->next;
3976 }
3977 /* Plus one for the extra NUL character marking the end of
3978 the list for the current entry. */
3979 ++to_len;
3980 }
3981
3982 /* We can allocate the arrays for the results. */
3983 ctype->translit_from_idx = xmalloc (number * sizeof (uint32_t));
3984 ctype->translit_from_tbl = xmalloc (from_len * sizeof (uint32_t));
3985 ctype->translit_to_idx = xmalloc (number * sizeof (uint32_t));
3986 ctype->translit_to_tbl = xmalloc (to_len * sizeof (uint32_t));
3987
3988 from_len = 0;
3989 to_len = 0;
3990 for (size_t cnt = 0; cnt < number; ++cnt)
3991 {
3992 size_t len;
3993 struct translit_to_t *srunp;
3994
3995 ctype->translit_from_idx[cnt] = from_len;
3996 ctype->translit_to_idx[cnt] = to_len;
3997
3998 len = wcslen ((const wchar_t *) sorted[cnt]->from) + 1;
3999 wmemcpy ((wchar_t *) &ctype->translit_from_tbl[from_len],
4000 (const wchar_t *) sorted[cnt]->from, len);
4001 from_len += len;
4002
4003 ctype->translit_to_idx[cnt] = to_len;
4004 srunp = sorted[cnt]->to;
4005 while (srunp != NULL)
4006 {
4007 len = wcslen ((const wchar_t *) srunp->str) + 1;
4008 wmemcpy ((wchar_t *) &ctype->translit_to_tbl[to_len],
4009 (const wchar_t *) srunp->str, len);
4010 to_len += len;
4011 srunp = srunp->next;
4012 }
4013 ctype->translit_to_tbl[to_len++] = L'\0';
4014 }
4015
4016 /* Store the information about the length. */
4017 ctype->translit_idx_size = number;
4018 ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
4019 ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
4020 }
4021 else
4022 {
4023 ctype->translit_from_idx = no_str;
4024 ctype->translit_from_tbl = no_str;
4025 ctype->translit_to_tbl = no_str;
4026 ctype->translit_idx_size = 0;
4027 ctype->translit_from_tbl_size = 0;
4028 ctype->translit_to_tbl_size = 0;
4029 }
4030}
4031