1/* Copyright (C) 1999-2016 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3
4 The GNU C Library is free software; you can redistribute it and/or
5 modify it under the terms of the GNU Lesser General Public
6 License as published by the Free Software Foundation; either
7 version 2.1 of the License, or (at your option) any later version.
8
9 The GNU C Library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Lesser General Public License for more details.
13
14 You should have received a copy of the GNU Lesser General Public
15 License along with the GNU C Library; if not, see
16 <http://www.gnu.org/licenses/>.
17
18 As a special exception, if you link the code in this file with
19 files compiled with a GNU compiler to produce an executable,
20 that does not cause the resulting executable to be covered by
21 the GNU Lesser General Public License. This exception does not
22 however invalidate any other reasons why the executable file
23 might be covered by the GNU Lesser General Public License.
24 This exception applies to code released by its copyright holders
25 in files containing the exception. */
26
27#include <libioP.h>
28#ifdef _LIBC
29# include <dlfcn.h>
30# include <wchar.h>
31#endif
32#include <assert.h>
33#include <stdlib.h>
34#include <string.h>
35
36#ifdef _LIBC
37# include <langinfo.h>
38# include <locale/localeinfo.h>
39# include <wcsmbs/wcsmbsload.h>
40# include <iconv/gconv_int.h>
41# include <shlib-compat.h>
42# include <sysdep.h>
43#endif
44
45
46/* Prototypes of libio's codecvt functions. */
47static enum __codecvt_result do_out (struct _IO_codecvt *codecvt,
48 __mbstate_t *statep,
49 const wchar_t *from_start,
50 const wchar_t *from_end,
51 const wchar_t **from_stop, char *to_start,
52 char *to_end, char **to_stop);
53static enum __codecvt_result do_unshift (struct _IO_codecvt *codecvt,
54 __mbstate_t *statep, char *to_start,
55 char *to_end, char **to_stop);
56static enum __codecvt_result do_in (struct _IO_codecvt *codecvt,
57 __mbstate_t *statep,
58 const char *from_start,
59 const char *from_end,
60 const char **from_stop, wchar_t *to_start,
61 wchar_t *to_end, wchar_t **to_stop);
62static int do_encoding (struct _IO_codecvt *codecvt);
63static int do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
64 const char *from_start,
65 const char *from_end, _IO_size_t max);
66static int do_max_length (struct _IO_codecvt *codecvt);
67static int do_always_noconv (struct _IO_codecvt *codecvt);
68
69
70/* The functions used in `codecvt' for libio are always the same. */
71const struct _IO_codecvt __libio_codecvt =
72{
73 .__codecvt_destr = NULL, /* Destructor, never used. */
74 .__codecvt_do_out = do_out,
75 .__codecvt_do_unshift = do_unshift,
76 .__codecvt_do_in = do_in,
77 .__codecvt_do_encoding = do_encoding,
78 .__codecvt_do_always_noconv = do_always_noconv,
79 .__codecvt_do_length = do_length,
80 .__codecvt_do_max_length = do_max_length
81};
82
83
84/* Return orientation of stream. If mode is nonzero try to change
85 the orientation first. */
86#undef _IO_fwide
87int
88_IO_fwide (_IO_FILE *fp, int mode)
89{
90 /* Normalize the value. */
91 mode = mode < 0 ? -1 : (mode == 0 ? 0 : 1);
92
93#if defined SHARED && defined _LIBC \
94 && SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_1)
95 if (__builtin_expect (&_IO_stdin_used == NULL, 0)
96 && (fp == _IO_stdin || fp == _IO_stdout || fp == _IO_stderr))
97 /* This is for a stream in the glibc 2.0 format. */
98 return -1;
99#endif
100
101 /* The orientation already has been determined. */
102 if (fp->_mode != 0
103 /* Or the caller simply wants to know about the current orientation. */
104 || mode == 0)
105 return fp->_mode;
106
107 /* Set the orientation appropriately. */
108 if (mode > 0)
109 {
110 struct _IO_codecvt *cc = fp->_codecvt = &fp->_wide_data->_codecvt;
111
112 fp->_wide_data->_IO_read_ptr = fp->_wide_data->_IO_read_end;
113 fp->_wide_data->_IO_write_ptr = fp->_wide_data->_IO_write_base;
114
115 /* Get the character conversion functions based on the currently
116 selected locale for LC_CTYPE. */
117#ifdef _LIBC
118 {
119 /* Clear the state. We start all over again. */
120 memset (&fp->_wide_data->_IO_state, '\0', sizeof (__mbstate_t));
121 memset (&fp->_wide_data->_IO_last_state, '\0', sizeof (__mbstate_t));
122
123 struct gconv_fcts fcts;
124 __wcsmbs_clone_conv (&fcts);
125 assert (fcts.towc_nsteps == 1);
126 assert (fcts.tomb_nsteps == 1);
127
128 /* The functions are always the same. */
129 *cc = __libio_codecvt;
130
131 cc->__cd_in.__cd.__nsteps = fcts.towc_nsteps;
132 cc->__cd_in.__cd.__steps = fcts.towc;
133
134 cc->__cd_in.__cd.__data[0].__invocation_counter = 0;
135 cc->__cd_in.__cd.__data[0].__internal_use = 1;
136 cc->__cd_in.__cd.__data[0].__flags = __GCONV_IS_LAST;
137 cc->__cd_in.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
138
139 cc->__cd_out.__cd.__nsteps = fcts.tomb_nsteps;
140 cc->__cd_out.__cd.__steps = fcts.tomb;
141
142 cc->__cd_out.__cd.__data[0].__invocation_counter = 0;
143 cc->__cd_out.__cd.__data[0].__internal_use = 1;
144 cc->__cd_out.__cd.__data[0].__flags
145 = __GCONV_IS_LAST | __GCONV_TRANSLIT;
146 cc->__cd_out.__cd.__data[0].__statep = &fp->_wide_data->_IO_state;
147 }
148#else
149# ifdef _GLIBCPP_USE_WCHAR_T
150 {
151 /* Determine internal and external character sets.
152
153 XXX For now we make our life easy: we assume a fixed internal
154 encoding (as most sane systems have; hi HP/UX!). If somebody
155 cares about systems which changing internal charsets they
156 should come up with a solution for the determination of the
157 currently used internal character set. */
158 const char *internal_ccs = _G_INTERNAL_CCS;
159 const char *external_ccs = NULL;
160
161# ifdef HAVE_NL_LANGINFO
162 external_ccs = nl_langinfo (CODESET);
163# endif
164 if (external_ccs == NULL)
165 external_ccs = "ISO-8859-1";
166
167 cc->__cd_in = iconv_open (internal_ccs, external_ccs);
168 if (cc->__cd_in != (iconv_t) -1)
169 cc->__cd_out = iconv_open (external_ccs, internal_ccs);
170
171 if (cc->__cd_in == (iconv_t) -1 || cc->__cd_out == (iconv_t) -1)
172 {
173 if (cc->__cd_in != (iconv_t) -1)
174 iconv_close (cc->__cd_in);
175 /* XXX */
176 abort ();
177 }
178 }
179# else
180# error "somehow determine this from LC_CTYPE"
181# endif
182#endif
183
184 /* From now on use the wide character callback functions. */
185 _IO_JUMPS_FILE_plus (fp) = fp->_wide_data->_wide_vtable;
186 }
187
188 /* Set the mode now. */
189 fp->_mode = mode;
190
191 return mode;
192}
193
194
195static enum __codecvt_result
196do_out (struct _IO_codecvt *codecvt, __mbstate_t *statep,
197 const wchar_t *from_start, const wchar_t *from_end,
198 const wchar_t **from_stop, char *to_start, char *to_end,
199 char **to_stop)
200{
201 enum __codecvt_result result;
202
203#ifdef _LIBC
204 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
205 int status;
206 size_t dummy;
207 const unsigned char *from_start_copy = (unsigned char *) from_start;
208
209 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
210 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
211 codecvt->__cd_out.__cd.__data[0].__statep = statep;
212
213 __gconv_fct fct = gs->__fct;
214#ifdef PTR_DEMANGLE
215 if (gs->__shlib_handle != NULL)
216 PTR_DEMANGLE (fct);
217#endif
218
219 status = DL_CALL_FCT (fct,
220 (gs, codecvt->__cd_out.__cd.__data, &from_start_copy,
221 (const unsigned char *) from_end, NULL,
222 &dummy, 0, 0));
223
224 *from_stop = (wchar_t *) from_start_copy;
225 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
226
227 switch (status)
228 {
229 case __GCONV_OK:
230 case __GCONV_EMPTY_INPUT:
231 result = __codecvt_ok;
232 break;
233
234 case __GCONV_FULL_OUTPUT:
235 case __GCONV_INCOMPLETE_INPUT:
236 result = __codecvt_partial;
237 break;
238
239 default:
240 result = __codecvt_error;
241 break;
242 }
243#else
244# ifdef _GLIBCPP_USE_WCHAR_T
245 size_t res;
246 const char *from_start_copy = (const char *) from_start;
247 size_t from_len = from_end - from_start;
248 char *to_start_copy = to_start;
249 size_t to_len = to_end - to_start;
250 res = iconv (codecvt->__cd_out, &from_start_copy, &from_len,
251 &to_start_copy, &to_len);
252
253 if (res == 0 || from_len == 0)
254 result = __codecvt_ok;
255 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
256 result = __codecvt_partial;
257 else
258 result = __codecvt_error;
259
260# else
261 /* Decide what to do. */
262 result = __codecvt_error;
263# endif
264#endif
265
266 return result;
267}
268
269
270static enum __codecvt_result
271do_unshift (struct _IO_codecvt *codecvt, __mbstate_t *statep,
272 char *to_start, char *to_end, char **to_stop)
273{
274 enum __codecvt_result result;
275
276#ifdef _LIBC
277 struct __gconv_step *gs = codecvt->__cd_out.__cd.__steps;
278 int status;
279 size_t dummy;
280
281 codecvt->__cd_out.__cd.__data[0].__outbuf = (unsigned char *) to_start;
282 codecvt->__cd_out.__cd.__data[0].__outbufend = (unsigned char *) to_end;
283 codecvt->__cd_out.__cd.__data[0].__statep = statep;
284
285 __gconv_fct fct = gs->__fct;
286#ifdef PTR_DEMANGLE
287 if (gs->__shlib_handle != NULL)
288 PTR_DEMANGLE (fct);
289#endif
290
291 status = DL_CALL_FCT (fct,
292 (gs, codecvt->__cd_out.__cd.__data, NULL, NULL,
293 NULL, &dummy, 1, 0));
294
295 *to_stop = (char *) codecvt->__cd_out.__cd.__data[0].__outbuf;
296
297 switch (status)
298 {
299 case __GCONV_OK:
300 case __GCONV_EMPTY_INPUT:
301 result = __codecvt_ok;
302 break;
303
304 case __GCONV_FULL_OUTPUT:
305 case __GCONV_INCOMPLETE_INPUT:
306 result = __codecvt_partial;
307 break;
308
309 default:
310 result = __codecvt_error;
311 break;
312 }
313#else
314# ifdef _GLIBCPP_USE_WCHAR_T
315 size_t res;
316 char *to_start_copy = (char *) to_start;
317 size_t to_len = to_end - to_start;
318
319 res = iconv (codecvt->__cd_out, NULL, NULL, &to_start_copy, &to_len);
320
321 if (res == 0)
322 result = __codecvt_ok;
323 else if (to_len < codecvt->__codecvt_do_max_length (codecvt))
324 result = __codecvt_partial;
325 else
326 result = __codecvt_error;
327# else
328 /* Decide what to do. */
329 result = __codecvt_error;
330# endif
331#endif
332
333 return result;
334}
335
336
337static enum __codecvt_result
338do_in (struct _IO_codecvt *codecvt, __mbstate_t *statep,
339 const char *from_start, const char *from_end, const char **from_stop,
340 wchar_t *to_start, wchar_t *to_end, wchar_t **to_stop)
341{
342 enum __codecvt_result result;
343
344#ifdef _LIBC
345 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
346 int status;
347 size_t dummy;
348 const unsigned char *from_start_copy = (unsigned char *) from_start;
349
350 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_start;
351 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) to_end;
352 codecvt->__cd_in.__cd.__data[0].__statep = statep;
353
354 __gconv_fct fct = gs->__fct;
355#ifdef PTR_DEMANGLE
356 if (gs->__shlib_handle != NULL)
357 PTR_DEMANGLE (fct);
358#endif
359
360 status = DL_CALL_FCT (fct,
361 (gs, codecvt->__cd_in.__cd.__data, &from_start_copy,
362 (const unsigned char *) from_end, NULL,
363 &dummy, 0, 0));
364
365 *from_stop = (const char *) from_start_copy;
366 *to_stop = (wchar_t *) codecvt->__cd_in.__cd.__data[0].__outbuf;
367
368 switch (status)
369 {
370 case __GCONV_OK:
371 case __GCONV_EMPTY_INPUT:
372 result = __codecvt_ok;
373 break;
374
375 case __GCONV_FULL_OUTPUT:
376 case __GCONV_INCOMPLETE_INPUT:
377 result = __codecvt_partial;
378 break;
379
380 default:
381 result = __codecvt_error;
382 break;
383 }
384#else
385# ifdef _GLIBCPP_USE_WCHAR_T
386 size_t res;
387 const char *from_start_copy = (const char *) from_start;
388 size_t from_len = from_end - from_start;
389 char *to_start_copy = (char *) from_start;
390 size_t to_len = to_end - to_start;
391
392 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
393 &to_start_copy, &to_len);
394
395 if (res == 0)
396 result = __codecvt_ok;
397 else if (to_len == 0)
398 result = __codecvt_partial;
399 else if (from_len < codecvt->__codecvt_do_max_length (codecvt))
400 result = __codecvt_partial;
401 else
402 result = __codecvt_error;
403# else
404 /* Decide what to do. */
405 result = __codecvt_error;
406# endif
407#endif
408
409 return result;
410}
411
412
413static int
414do_encoding (struct _IO_codecvt *codecvt)
415{
416#ifdef _LIBC
417 /* See whether the encoding is stateful. */
418 if (codecvt->__cd_in.__cd.__steps[0].__stateful)
419 return -1;
420 /* Fortunately not. Now determine the input bytes for the conversion
421 necessary for each wide character. */
422 if (codecvt->__cd_in.__cd.__steps[0].__min_needed_from
423 != codecvt->__cd_in.__cd.__steps[0].__max_needed_from)
424 /* Not a constant value. */
425 return 0;
426
427 return codecvt->__cd_in.__cd.__steps[0].__min_needed_from;
428#else
429 /* Worst case scenario. */
430 return -1;
431#endif
432}
433
434
435static int
436do_always_noconv (struct _IO_codecvt *codecvt)
437{
438 return 0;
439}
440
441
442static int
443do_length (struct _IO_codecvt *codecvt, __mbstate_t *statep,
444 const char *from_start, const char *from_end, _IO_size_t max)
445{
446 int result;
447#ifdef _LIBC
448 const unsigned char *cp = (const unsigned char *) from_start;
449 wchar_t to_buf[max];
450 struct __gconv_step *gs = codecvt->__cd_in.__cd.__steps;
451 size_t dummy;
452
453 codecvt->__cd_in.__cd.__data[0].__outbuf = (unsigned char *) to_buf;
454 codecvt->__cd_in.__cd.__data[0].__outbufend = (unsigned char *) &to_buf[max];
455 codecvt->__cd_in.__cd.__data[0].__statep = statep;
456
457 __gconv_fct fct = gs->__fct;
458#ifdef PTR_DEMANGLE
459 if (gs->__shlib_handle != NULL)
460 PTR_DEMANGLE (fct);
461#endif
462
463 DL_CALL_FCT (fct,
464 (gs, codecvt->__cd_in.__cd.__data, &cp,
465 (const unsigned char *) from_end, NULL,
466 &dummy, 0, 0));
467
468 result = cp - (const unsigned char *) from_start;
469#else
470# ifdef _GLIBCPP_USE_WCHAR_T
471 const char *from_start_copy = (const char *) from_start;
472 size_t from_len = from_end - from_start;
473 wchar_t to_buf[max];
474 size_t res;
475 char *to_start = (char *) to_buf;
476
477 res = iconv (codecvt->__cd_in, &from_start_copy, &from_len,
478 &to_start, &max);
479
480 result = from_start_copy - (char *) from_start;
481# else
482 /* Decide what to do. */
483 result = 0;
484# endif
485#endif
486
487 return result;
488}
489
490
491static int
492do_max_length (struct _IO_codecvt *codecvt)
493{
494#ifdef _LIBC
495 return codecvt->__cd_in.__cd.__steps[0].__max_needed_from;
496#else
497 return MB_CUR_MAX;
498#endif
499}
500