1 | /* Handle configuration data. |
2 | Copyright (C) 1997-2017 Free Software Foundation, Inc. |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997. |
5 | |
6 | The GNU C Library is free software; you can redistribute it and/or |
7 | modify it under the terms of the GNU Lesser General Public |
8 | License as published by the Free Software Foundation; either |
9 | version 2.1 of the License, or (at your option) any later version. |
10 | |
11 | The GNU C Library is distributed in the hope that it will be useful, |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | Lesser General Public License for more details. |
15 | |
16 | You should have received a copy of the GNU Lesser General Public |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ |
19 | |
20 | #include <assert.h> |
21 | #include <ctype.h> |
22 | #include <errno.h> |
23 | #include <limits.h> |
24 | #include <locale.h> |
25 | #include <search.h> |
26 | #include <stddef.h> |
27 | #include <stdio.h> |
28 | #include <stdio_ext.h> |
29 | #include <stdlib.h> |
30 | #include <string.h> |
31 | #include <unistd.h> |
32 | #include <sys/param.h> |
33 | |
34 | #include <libc-lock.h> |
35 | #include <gconv_int.h> |
36 | |
37 | |
38 | /* This is the default path where we look for module lists. */ |
39 | static const char default_gconv_path[] = GCONV_PATH; |
40 | |
41 | /* The path elements, as determined by the __gconv_get_path function. |
42 | All path elements end in a slash. */ |
43 | struct path_elem *__gconv_path_elem; |
44 | /* Maximum length of a single path element in __gconv_path_elem. */ |
45 | size_t __gconv_max_path_elem_len; |
46 | |
47 | /* We use the following struct if we couldn't allocate memory. */ |
48 | static const struct path_elem empty_path_elem = { NULL, 0 }; |
49 | |
50 | /* Name of the file containing the module information in the directories |
51 | along the path. */ |
52 | static const char gconv_conf_filename[] = "gconv-modules" ; |
53 | |
54 | /* Filename extension for the modules. */ |
55 | #ifndef MODULE_EXT |
56 | # define MODULE_EXT ".so" |
57 | #endif |
58 | static const char gconv_module_ext[] = MODULE_EXT; |
59 | |
60 | /* We have a few builtin transformations. */ |
61 | static struct gconv_module builtin_modules[] = |
62 | { |
63 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
64 | MinF, MaxF, MinT, MaxT) \ |
65 | { \ |
66 | .from_string = From, \ |
67 | .to_string = To, \ |
68 | .cost_hi = Cost, \ |
69 | .cost_lo = INT_MAX, \ |
70 | .module_name = Name \ |
71 | }, |
72 | #define BUILTIN_ALIAS(From, To) |
73 | |
74 | #include "gconv_builtin.h" |
75 | |
76 | #undef BUILTIN_TRANSFORMATION |
77 | #undef BUILTIN_ALIAS |
78 | }; |
79 | |
80 | static const char builtin_aliases[] = |
81 | { |
82 | #define BUILTIN_TRANSFORMATION(From, To, Cost, Name, Fct, BtowcFct, \ |
83 | MinF, MaxF, MinT, MaxT) |
84 | #define BUILTIN_ALIAS(From, To) From "\0" To "\0" |
85 | |
86 | #include "gconv_builtin.h" |
87 | |
88 | #undef BUILTIN_TRANSFORMATION |
89 | #undef BUILTIN_ALIAS |
90 | }; |
91 | |
92 | #include <libio/libioP.h> |
93 | #define __getdelim(line, len, c, fp) _IO_getdelim (line, len, c, fp) |
94 | |
95 | |
96 | /* Value of the GCONV_PATH environment variable. */ |
97 | const char *__gconv_path_envvar; |
98 | |
99 | |
100 | /* Test whether there is already a matching module known. */ |
101 | static int |
102 | internal_function |
103 | detect_conflict (const char *alias) |
104 | { |
105 | struct gconv_module *node = __gconv_modules_db; |
106 | |
107 | while (node != NULL) |
108 | { |
109 | int cmpres = strcmp (alias, node->from_string); |
110 | |
111 | if (cmpres == 0) |
112 | /* We have a conflict. */ |
113 | return 1; |
114 | else if (cmpres < 0) |
115 | node = node->left; |
116 | else |
117 | node = node->right; |
118 | } |
119 | |
120 | return node != NULL; |
121 | } |
122 | |
123 | |
124 | /* The actual code to add aliases. */ |
125 | static void |
126 | add_alias2 (const char *from, const char *to, const char *wp, void *modules) |
127 | { |
128 | /* Test whether this alias conflicts with any available module. */ |
129 | if (detect_conflict (from)) |
130 | /* It does conflict, don't add the alias. */ |
131 | return; |
132 | |
133 | struct gconv_alias *new_alias = (struct gconv_alias *) |
134 | malloc (sizeof (struct gconv_alias) + (wp - from)); |
135 | if (new_alias != NULL) |
136 | { |
137 | void **inserted; |
138 | |
139 | new_alias->fromname = memcpy ((char *) new_alias |
140 | + sizeof (struct gconv_alias), |
141 | from, wp - from); |
142 | new_alias->toname = new_alias->fromname + (to - from); |
143 | |
144 | inserted = (void **) __tsearch (new_alias, &__gconv_alias_db, |
145 | __gconv_alias_compare); |
146 | if (inserted == NULL || *inserted != new_alias) |
147 | /* Something went wrong, free this entry. */ |
148 | free (new_alias); |
149 | } |
150 | } |
151 | |
152 | |
153 | /* Add new alias. */ |
154 | static void |
155 | add_alias (char *rp, void *modules) |
156 | { |
157 | /* We now expect two more string. The strings are normalized |
158 | (converted to UPPER case) and strored in the alias database. */ |
159 | char *from, *to, *wp; |
160 | |
161 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
162 | ++rp; |
163 | from = wp = rp; |
164 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
165 | *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); |
166 | if (*rp == '\0') |
167 | /* There is no `to' string on the line. Ignore it. */ |
168 | return; |
169 | *wp++ = '\0'; |
170 | to = ++rp; |
171 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
172 | ++rp; |
173 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
174 | *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); |
175 | if (to == wp) |
176 | /* No `to' string, ignore the line. */ |
177 | return; |
178 | *wp++ = '\0'; |
179 | |
180 | add_alias2 (from, to, wp, modules); |
181 | } |
182 | |
183 | |
184 | /* Insert a data structure for a new module in the search tree. */ |
185 | static void |
186 | internal_function |
187 | insert_module (struct gconv_module *newp, int tobefreed) |
188 | { |
189 | struct gconv_module **rootp = &__gconv_modules_db; |
190 | |
191 | while (*rootp != NULL) |
192 | { |
193 | struct gconv_module *root = *rootp; |
194 | int cmpres; |
195 | |
196 | cmpres = strcmp (newp->from_string, root->from_string); |
197 | if (cmpres == 0) |
198 | { |
199 | /* Both strings are identical. Insert the string at the |
200 | end of the `same' list if it is not already there. */ |
201 | while (strcmp (newp->from_string, root->from_string) != 0 |
202 | || strcmp (newp->to_string, root->to_string) != 0) |
203 | { |
204 | rootp = &root->same; |
205 | root = *rootp; |
206 | if (root == NULL) |
207 | break; |
208 | } |
209 | |
210 | if (root != NULL) |
211 | { |
212 | /* This is a no new conversion. But maybe the cost is |
213 | better. */ |
214 | if (newp->cost_hi < root->cost_hi |
215 | || (newp->cost_hi == root->cost_hi |
216 | && newp->cost_lo < root->cost_lo)) |
217 | { |
218 | newp->left = root->left; |
219 | newp->right = root->right; |
220 | newp->same = root->same; |
221 | *rootp = newp; |
222 | |
223 | free (root); |
224 | } |
225 | else if (tobefreed) |
226 | free (newp); |
227 | return; |
228 | } |
229 | |
230 | break; |
231 | } |
232 | else if (cmpres < 0) |
233 | rootp = &root->left; |
234 | else |
235 | rootp = &root->right; |
236 | } |
237 | |
238 | /* Plug in the new node here. */ |
239 | *rootp = newp; |
240 | } |
241 | |
242 | |
243 | /* Add new module. */ |
244 | static void |
245 | internal_function |
246 | add_module (char *rp, const char *directory, size_t dir_len, void **modules, |
247 | size_t *nmodules, int modcounter) |
248 | { |
249 | /* We expect now |
250 | 1. `from' name |
251 | 2. `to' name |
252 | 3. filename of the module |
253 | 4. an optional cost value |
254 | */ |
255 | struct gconv_alias fake_alias; |
256 | struct gconv_module *new_module; |
257 | char *from, *to, *module, *wp; |
258 | int need_ext; |
259 | int cost_hi; |
260 | |
261 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
262 | ++rp; |
263 | from = rp; |
264 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
265 | { |
266 | *rp = __toupper_l (*rp, _nl_C_locobj_ptr); |
267 | ++rp; |
268 | } |
269 | if (*rp == '\0') |
270 | return; |
271 | *rp++ = '\0'; |
272 | to = wp = rp; |
273 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
274 | ++rp; |
275 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
276 | *wp++ = __toupper_l (*rp++, _nl_C_locobj_ptr); |
277 | if (*rp == '\0') |
278 | return; |
279 | *wp++ = '\0'; |
280 | do |
281 | ++rp; |
282 | while (__isspace_l (*rp, _nl_C_locobj_ptr)); |
283 | module = wp; |
284 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
285 | *wp++ = *rp++; |
286 | if (*rp == '\0') |
287 | { |
288 | /* There is no cost, use one by default. */ |
289 | *wp++ = '\0'; |
290 | cost_hi = 1; |
291 | } |
292 | else |
293 | { |
294 | /* There might be a cost value. */ |
295 | char *endp; |
296 | |
297 | *wp++ = '\0'; |
298 | cost_hi = strtol (rp, &endp, 10); |
299 | if (rp == endp || cost_hi < 1) |
300 | /* No useful information. */ |
301 | cost_hi = 1; |
302 | } |
303 | |
304 | if (module[0] == '\0') |
305 | /* No module name given. */ |
306 | return; |
307 | if (module[0] == '/') |
308 | dir_len = 0; |
309 | |
310 | /* See whether we must add the ending. */ |
311 | need_ext = 0; |
312 | if (wp - module < (ptrdiff_t) sizeof (gconv_module_ext) |
313 | || memcmp (wp - sizeof (gconv_module_ext), gconv_module_ext, |
314 | sizeof (gconv_module_ext)) != 0) |
315 | /* We must add the module extension. */ |
316 | need_ext = sizeof (gconv_module_ext) - 1; |
317 | |
318 | /* See whether we have already an alias with this name defined. */ |
319 | fake_alias.fromname = strndupa (from, to - from); |
320 | |
321 | if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) != NULL) |
322 | /* This module duplicates an alias. */ |
323 | return; |
324 | |
325 | new_module = (struct gconv_module *) calloc (1, |
326 | sizeof (struct gconv_module) |
327 | + (wp - from) |
328 | + dir_len + need_ext); |
329 | if (new_module != NULL) |
330 | { |
331 | char *tmp; |
332 | |
333 | new_module->from_string = tmp = (char *) (new_module + 1); |
334 | tmp = __mempcpy (tmp, from, to - from); |
335 | |
336 | new_module->to_string = tmp; |
337 | tmp = __mempcpy (tmp, to, module - to); |
338 | |
339 | new_module->cost_hi = cost_hi; |
340 | new_module->cost_lo = modcounter; |
341 | |
342 | new_module->module_name = tmp; |
343 | |
344 | if (dir_len != 0) |
345 | tmp = __mempcpy (tmp, directory, dir_len); |
346 | |
347 | tmp = __mempcpy (tmp, module, wp - module); |
348 | |
349 | if (need_ext) |
350 | memcpy (tmp - 1, gconv_module_ext, sizeof (gconv_module_ext)); |
351 | |
352 | /* Now insert the new module data structure in our search tree. */ |
353 | insert_module (new_module, 1); |
354 | } |
355 | } |
356 | |
357 | |
358 | /* Read the next configuration file. */ |
359 | static void |
360 | internal_function |
361 | read_conf_file (const char *filename, const char *directory, size_t dir_len, |
362 | void **modules, size_t *nmodules) |
363 | { |
364 | /* Note the file is opened with cancellation in the I/O functions |
365 | disabled. */ |
366 | FILE *fp = fopen (filename, "rce" ); |
367 | char *line = NULL; |
368 | size_t line_len = 0; |
369 | static int modcounter; |
370 | |
371 | /* Don't complain if a file is not present or readable, simply silently |
372 | ignore it. */ |
373 | if (fp == NULL) |
374 | return; |
375 | |
376 | /* No threads reading from this stream. */ |
377 | __fsetlocking (fp, FSETLOCKING_BYCALLER); |
378 | |
379 | /* Process the known entries of the file. Comments start with `#' and |
380 | end with the end of the line. Empty lines are ignored. */ |
381 | while (!feof_unlocked (fp)) |
382 | { |
383 | char *rp, *endp, *word; |
384 | ssize_t n = __getdelim (&line, &line_len, '\n', fp); |
385 | if (n < 0) |
386 | /* An error occurred. */ |
387 | break; |
388 | |
389 | rp = line; |
390 | /* Terminate the line (excluding comments or newline) by an NUL byte |
391 | to simplify the following code. */ |
392 | endp = strchr (rp, '#'); |
393 | if (endp != NULL) |
394 | *endp = '\0'; |
395 | else |
396 | if (rp[n - 1] == '\n') |
397 | rp[n - 1] = '\0'; |
398 | |
399 | while (__isspace_l (*rp, _nl_C_locobj_ptr)) |
400 | ++rp; |
401 | |
402 | /* If this is an empty line go on with the next one. */ |
403 | if (rp == endp) |
404 | continue; |
405 | |
406 | word = rp; |
407 | while (*rp != '\0' && !__isspace_l (*rp, _nl_C_locobj_ptr)) |
408 | ++rp; |
409 | |
410 | if (rp - word == sizeof ("alias" ) - 1 |
411 | && memcmp (word, "alias" , sizeof ("alias" ) - 1) == 0) |
412 | add_alias (rp, *modules); |
413 | else if (rp - word == sizeof ("module" ) - 1 |
414 | && memcmp (word, "module" , sizeof ("module" ) - 1) == 0) |
415 | add_module (rp, directory, dir_len, modules, nmodules, modcounter++); |
416 | /* else */ |
417 | /* Otherwise ignore the line. */ |
418 | } |
419 | |
420 | free (line); |
421 | |
422 | fclose (fp); |
423 | } |
424 | |
425 | |
426 | /* Determine the directories we are looking for data in. */ |
427 | void |
428 | internal_function |
429 | __gconv_get_path (void) |
430 | { |
431 | struct path_elem *result; |
432 | __libc_lock_define_initialized (static, lock); |
433 | |
434 | __libc_lock_lock (lock); |
435 | |
436 | /* Make sure there wasn't a second thread doing it already. */ |
437 | result = (struct path_elem *) __gconv_path_elem; |
438 | if (result == NULL) |
439 | { |
440 | /* Determine the complete path first. */ |
441 | char *gconv_path; |
442 | size_t gconv_path_len; |
443 | char *elem; |
444 | char *oldp; |
445 | char *cp; |
446 | int nelems; |
447 | char *cwd; |
448 | size_t cwdlen; |
449 | |
450 | if (__gconv_path_envvar == NULL) |
451 | { |
452 | /* No user-defined path. Make a modifiable copy of the |
453 | default path. */ |
454 | gconv_path = strdupa (default_gconv_path); |
455 | gconv_path_len = sizeof (default_gconv_path); |
456 | cwd = NULL; |
457 | cwdlen = 0; |
458 | } |
459 | else |
460 | { |
461 | /* Append the default path to the user-defined path. */ |
462 | size_t user_len = strlen (__gconv_path_envvar); |
463 | |
464 | gconv_path_len = user_len + 1 + sizeof (default_gconv_path); |
465 | gconv_path = alloca (gconv_path_len); |
466 | __mempcpy (__mempcpy (__mempcpy (gconv_path, __gconv_path_envvar, |
467 | user_len), |
468 | ":" , 1), |
469 | default_gconv_path, sizeof (default_gconv_path)); |
470 | cwd = __getcwd (NULL, 0); |
471 | cwdlen = __glibc_unlikely (cwd == NULL) ? 0 : strlen (cwd); |
472 | } |
473 | assert (default_gconv_path[0] == '/'); |
474 | |
475 | /* In a first pass we calculate the number of elements. */ |
476 | oldp = NULL; |
477 | cp = strchr (gconv_path, ':'); |
478 | nelems = 1; |
479 | while (cp != NULL) |
480 | { |
481 | if (cp != oldp + 1) |
482 | ++nelems; |
483 | oldp = cp; |
484 | cp = strchr (cp + 1, ':'); |
485 | } |
486 | |
487 | /* Allocate the memory for the result. */ |
488 | result = (struct path_elem *) malloc ((nelems + 1) |
489 | * sizeof (struct path_elem) |
490 | + gconv_path_len + nelems |
491 | + (nelems - 1) * (cwdlen + 1)); |
492 | if (result != NULL) |
493 | { |
494 | char *strspace = (char *) &result[nelems + 1]; |
495 | int n = 0; |
496 | |
497 | /* Separate the individual parts. */ |
498 | __gconv_max_path_elem_len = 0; |
499 | elem = __strtok_r (gconv_path, ":" , &gconv_path); |
500 | assert (elem != NULL); |
501 | do |
502 | { |
503 | result[n].name = strspace; |
504 | if (elem[0] != '/') |
505 | { |
506 | assert (cwd != NULL); |
507 | strspace = __mempcpy (strspace, cwd, cwdlen); |
508 | *strspace++ = '/'; |
509 | } |
510 | strspace = __stpcpy (strspace, elem); |
511 | if (strspace[-1] != '/') |
512 | *strspace++ = '/'; |
513 | |
514 | result[n].len = strspace - result[n].name; |
515 | if (result[n].len > __gconv_max_path_elem_len) |
516 | __gconv_max_path_elem_len = result[n].len; |
517 | |
518 | *strspace++ = '\0'; |
519 | ++n; |
520 | } |
521 | while ((elem = __strtok_r (NULL, ":" , &gconv_path)) != NULL); |
522 | |
523 | result[n].name = NULL; |
524 | result[n].len = 0; |
525 | } |
526 | |
527 | __gconv_path_elem = result ?: (struct path_elem *) &empty_path_elem; |
528 | |
529 | free (cwd); |
530 | } |
531 | |
532 | __libc_lock_unlock (lock); |
533 | } |
534 | |
535 | |
536 | /* Read all configuration files found in the user-specified and the default |
537 | path. */ |
538 | void |
539 | attribute_hidden |
540 | __gconv_read_conf (void) |
541 | { |
542 | void *modules = NULL; |
543 | size_t nmodules = 0; |
544 | int save_errno = errno; |
545 | size_t cnt; |
546 | |
547 | /* First see whether we should use the cache. */ |
548 | if (__gconv_load_cache () == 0) |
549 | { |
550 | /* Yes, we are done. */ |
551 | __set_errno (save_errno); |
552 | return; |
553 | } |
554 | |
555 | #ifndef STATIC_GCONV |
556 | /* Find out where we have to look. */ |
557 | if (__gconv_path_elem == NULL) |
558 | __gconv_get_path (); |
559 | |
560 | for (cnt = 0; __gconv_path_elem[cnt].name != NULL; ++cnt) |
561 | { |
562 | const char *elem = __gconv_path_elem[cnt].name; |
563 | size_t elem_len = __gconv_path_elem[cnt].len; |
564 | char *filename; |
565 | |
566 | /* No slash needs to be inserted between elem and gconv_conf_filename; |
567 | elem already ends in a slash. */ |
568 | filename = alloca (elem_len + sizeof (gconv_conf_filename)); |
569 | __mempcpy (__mempcpy (filename, elem, elem_len), |
570 | gconv_conf_filename, sizeof (gconv_conf_filename)); |
571 | |
572 | /* Read the next configuration file. */ |
573 | read_conf_file (filename, elem, elem_len, &modules, &nmodules); |
574 | } |
575 | #endif |
576 | |
577 | /* Add the internal modules. */ |
578 | for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]); |
579 | ++cnt) |
580 | { |
581 | struct gconv_alias fake_alias; |
582 | |
583 | fake_alias.fromname = (char *) builtin_modules[cnt].from_string; |
584 | |
585 | if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare) |
586 | != NULL) |
587 | /* It'll conflict so don't add it. */ |
588 | continue; |
589 | |
590 | insert_module (&builtin_modules[cnt], 0); |
591 | } |
592 | |
593 | /* Add aliases for builtin conversions. */ |
594 | const char *cp = builtin_aliases; |
595 | do |
596 | { |
597 | const char *from = cp; |
598 | const char *to = __rawmemchr (from, '\0') + 1; |
599 | cp = __rawmemchr (to, '\0') + 1; |
600 | |
601 | add_alias2 (from, to, cp, modules); |
602 | } |
603 | while (*cp != '\0'); |
604 | |
605 | /* Restore the error number. */ |
606 | __set_errno (save_errno); |
607 | } |
608 | |
609 | |
610 | |
611 | /* Free all resources if necessary. */ |
612 | libc_freeres_fn (free_mem) |
613 | { |
614 | if (__gconv_path_elem != NULL && __gconv_path_elem != &empty_path_elem) |
615 | free ((void *) __gconv_path_elem); |
616 | } |
617 | |