1 | /* toutf8.c --- Convert strings from system locale into UTF-8. |
2 | * Copyright (C) 2002, 2003, 2004, 2005 Simon Josefsson |
3 | * |
4 | * This file is part of GNU Libidn. |
5 | * |
6 | * GNU Libidn is free software; you can redistribute it and/or |
7 | * modify it under the terms of the GNU Lesser General Public |
8 | * License as published by the Free Software Foundation; either |
9 | * version 2.1 of the License, or (at your option) any later version. |
10 | * |
11 | * GNU Libidn is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | * Lesser General Public License for more details. |
15 | * |
16 | * You should have received a copy of the GNU Lesser General Public |
17 | * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>. |
18 | */ |
19 | |
20 | #if HAVE_CONFIG_H |
21 | # include "config.h" |
22 | #endif |
23 | |
24 | /* Get prototypes. */ |
25 | #include "stringprep.h" |
26 | |
27 | /* Get fprintf. */ |
28 | #include <stdio.h> |
29 | |
30 | /* Get getenv. */ |
31 | #include <stdlib.h> |
32 | |
33 | /* Get strlen. */ |
34 | #include <string.h> |
35 | |
36 | /* Get iconv_string. */ |
37 | #include "iconvme.h" |
38 | |
39 | #ifdef _LIBC |
40 | # define HAVE_ICONV 1 |
41 | # define LOCALE_WORKS 1 |
42 | #endif |
43 | |
44 | #if LOCALE_WORKS |
45 | # include <langinfo.h> |
46 | # include <locale.h> |
47 | #endif |
48 | |
49 | #ifdef _LIBC |
50 | # define stringprep_locale_charset() nl_langinfo (CODESET) |
51 | #else |
52 | /** |
53 | * stringprep_locale_charset - return charset used in current locale |
54 | * |
55 | * Find out current locale charset. The function respect the CHARSET |
56 | * environment variable, but typically uses nl_langinfo(CODESET) when |
57 | * it is supported. It fall back on "ASCII" if CHARSET isn't set and |
58 | * nl_langinfo isn't supported or return anything. |
59 | * |
60 | * Note that this function return the application's locale's preferred |
61 | * charset (or thread's locale's preffered charset, if your system |
62 | * support thread-specific locales). It does not return what the |
63 | * system may be using. Thus, if you receive data from external |
64 | * sources you cannot in general use this function to guess what |
65 | * charset it is encoded in. Use stringprep_convert from the external |
66 | * representation into the charset returned by this function, to have |
67 | * data in the locale encoding. |
68 | * |
69 | * Return value: Return the character set used by the current locale. |
70 | * It will never return NULL, but use "ASCII" as a fallback. |
71 | **/ |
72 | const char * |
73 | stringprep_locale_charset (void) |
74 | { |
75 | const char *charset = getenv ("CHARSET" ); /* flawfinder: ignore */ |
76 | |
77 | if (charset && *charset) |
78 | return charset; |
79 | |
80 | # ifdef LOCALE_WORKS |
81 | charset = nl_langinfo (CODESET); |
82 | |
83 | if (charset && *charset) |
84 | return charset; |
85 | # endif |
86 | |
87 | return "ASCII" ; |
88 | } |
89 | #endif |
90 | |
91 | /** |
92 | * stringprep_convert - encode string using new character set |
93 | * @str: input zero-terminated string. |
94 | * @to_codeset: name of destination character set. |
95 | * @from_codeset: name of origin character set, as used by @str. |
96 | * |
97 | * Convert the string from one character set to another using the |
98 | * system's iconv() function. |
99 | * |
100 | * Return value: Returns newly allocated zero-terminated string which |
101 | * is @str transcoded into to_codeset. |
102 | **/ |
103 | char * |
104 | stringprep_convert (const char *str, |
105 | const char *to_codeset, const char *from_codeset) |
106 | { |
107 | #if HAVE_ICONV |
108 | return iconv_string (str, from_codeset, to_codeset); |
109 | #else |
110 | char *p; |
111 | fprintf (stderr, "libidn: warning: libiconv not installed, cannot " |
112 | "convert data to UTF-8\n" ); |
113 | p = malloc (strlen (str) + 1); |
114 | if (!p) |
115 | return NULL; |
116 | return strcpy (p, str); |
117 | #endif |
118 | } |
119 | |
120 | /** |
121 | * stringprep_locale_to_utf8 - convert locale encoded string to UTF-8 |
122 | * @str: input zero terminated string. |
123 | * |
124 | * Convert string encoded in the locale's character set into UTF-8 by |
125 | * using stringprep_convert(). |
126 | * |
127 | * Return value: Returns newly allocated zero-terminated string which |
128 | * is @str transcoded into UTF-8. |
129 | **/ |
130 | char * |
131 | stringprep_locale_to_utf8 (const char *str) |
132 | { |
133 | return stringprep_convert (str, "UTF-8" , stringprep_locale_charset ()); |
134 | } |
135 | |
136 | /** |
137 | * stringprep_utf8_to_locale - encode UTF-8 string to locale encoding |
138 | * @str: input zero terminated string. |
139 | * |
140 | * Convert string encoded in UTF-8 into the locale's character set by |
141 | * using stringprep_convert(). |
142 | * |
143 | * Return value: Returns newly allocated zero-terminated string which |
144 | * is @str transcoded into the locale's character set. |
145 | **/ |
146 | char * |
147 | stringprep_utf8_to_locale (const char *str) |
148 | { |
149 | return stringprep_convert (str, stringprep_locale_charset (), "UTF-8" ); |
150 | } |
151 | |