1/* Conversion module for ISO-2022-JP and ISO-2022-JP-2.
2 Copyright (C) 1998-2018 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
5
6 The GNU C Library is free software; you can redistribute it and/or
7 modify it under the terms of the GNU Lesser General Public
8 License as published by the Free Software Foundation; either
9 version 2.1 of the License, or (at your option) any later version.
10
11 The GNU C Library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public
17 License along with the GNU C Library; if not, see
18 <http://www.gnu.org/licenses/>. */
19
20#include <assert.h>
21#include <dlfcn.h>
22#include <gconv.h>
23#include <stdint.h>
24#include <stdlib.h>
25#include <string.h>
26#include "jis0201.h"
27#include "jis0208.h"
28#include "jis0212.h"
29#include "gb2312.h"
30#include "ksc5601.h"
31
32struct gap
33{
34 uint16_t start;
35 uint16_t end;
36 int32_t idx;
37};
38
39#include "iso8859-7jp.h"
40
41/* This makes obvious what everybody knows: 0x1b is the Esc character. */
42#define ESC 0x1b
43
44/* We provide our own initialization and destructor function. */
45#define DEFINE_INIT 0
46#define DEFINE_FINI 0
47
48/* Definitions used in the body of the `gconv' function. */
49#define FROM_LOOP from_iso2022jp_loop
50#define TO_LOOP to_iso2022jp_loop
51#define ONE_DIRECTION 0
52#define FROM_LOOP_MIN_NEEDED_FROM 1
53#define FROM_LOOP_MAX_NEEDED_FROM 4
54#define FROM_LOOP_MIN_NEEDED_TO 4
55#define FROM_LOOP_MAX_NEEDED_TO 4
56#define TO_LOOP_MIN_NEEDED_FROM 4
57#define TO_LOOP_MAX_NEEDED_FROM 4
58#define TO_LOOP_MIN_NEEDED_TO 1
59#define TO_LOOP_MAX_NEEDED_TO 6
60#define FROM_DIRECTION (dir == from_iso2022jp)
61#define PREPARE_LOOP \
62 enum direction dir = ((struct iso2022jp_data *) step->__data)->dir; \
63 enum variant var = ((struct iso2022jp_data *) step->__data)->var; \
64 int save_set; \
65 int *setp = &data->__statep->__count;
66#define EXTRA_LOOP_ARGS , var, setp
67
68
69/* Direction of the transformation. */
70enum direction
71{
72 illegal_dir,
73 to_iso2022jp,
74 from_iso2022jp
75};
76
77/* We handle ISO-2022-jp and ISO-2022-JP-2 here. */
78enum variant
79{
80 illegal_var,
81 iso2022jp,
82 iso2022jp2
83};
84
85
86struct iso2022jp_data
87{
88 enum direction dir;
89 enum variant var;
90};
91
92
93/* The COUNT element of the state keeps track of the currently selected
94 character set. The possible values are: */
95enum
96{
97 ASCII_set = 0,
98 JISX0208_1978_set = 1 << 3,
99 JISX0208_1983_set = 2 << 3,
100 JISX0201_Roman_set = 3 << 3,
101 JISX0201_Kana_set = 4 << 3,
102 GB2312_set = 5 << 3,
103 KSC5601_set = 6 << 3,
104 JISX0212_set = 7 << 3,
105 CURRENT_SEL_MASK = 7 << 3
106};
107
108/* The second value stored is the designation of the G2 set. The following
109 values are possible: */
110enum
111{
112 UNSPECIFIED_set = 0,
113 ISO88591_set = 1 << 6,
114 ISO88597_set = 2 << 6,
115 CURRENT_ASSIGN_MASK = 3 << 6
116};
117
118/* The third value, only used during conversion from Unicode to ISO-2022-JP-2,
119 describes the language tag parsing status. The possible values are as
120 follows. Values >= TAG_language are temporary tag parsing states. */
121enum
122{
123 TAG_none = 0,
124 TAG_language = 4 << 8,
125 TAG_language_j = 5 << 8,
126 TAG_language_ja = 1 << 8,
127 TAG_language_k = 6 << 8,
128 TAG_language_ko = 2 << 8,
129 TAG_language_z = 7 << 8,
130 TAG_language_zh = 3 << 8,
131 CURRENT_TAG_MASK = 7 << 8
132};
133
134
135extern int gconv_init (struct __gconv_step *step);
136int
137gconv_init (struct __gconv_step *step)
138{
139 /* Determine which direction. */
140 struct iso2022jp_data *new_data;
141 enum direction dir = illegal_dir;
142 enum variant var = illegal_var;
143 int result;
144
145 if (__strcasecmp (step->__from_name, "ISO-2022-JP//") == 0)
146 {
147 dir = from_iso2022jp;
148 var = iso2022jp;
149 }
150 else if (__strcasecmp (step->__to_name, "ISO-2022-JP//") == 0)
151 {
152 dir = to_iso2022jp;
153 var = iso2022jp;
154 }
155 else if (__strcasecmp (step->__from_name, "ISO-2022-JP-2//") == 0)
156 {
157 dir = from_iso2022jp;
158 var = iso2022jp2;
159 }
160 else if (__strcasecmp (step->__to_name, "ISO-2022-JP-2//") == 0)
161 {
162 dir = to_iso2022jp;
163 var = iso2022jp2;
164 }
165
166 result = __GCONV_NOCONV;
167 if (__builtin_expect (dir, from_iso2022jp) != illegal_dir)
168 {
169 new_data
170 = (struct iso2022jp_data *) malloc (sizeof (struct iso2022jp_data));
171
172 result = __GCONV_NOMEM;
173 if (new_data != NULL)
174 {
175 new_data->dir = dir;
176 new_data->var = var;
177 step->__data = new_data;
178
179 if (dir == from_iso2022jp)
180 {
181 step->__min_needed_from = FROM_LOOP_MIN_NEEDED_FROM;
182 step->__max_needed_from = FROM_LOOP_MAX_NEEDED_FROM;
183 step->__min_needed_to = FROM_LOOP_MIN_NEEDED_TO;
184 step->__max_needed_to = FROM_LOOP_MAX_NEEDED_TO;
185 }
186 else
187 {
188 step->__min_needed_from = TO_LOOP_MIN_NEEDED_FROM;
189 step->__max_needed_from = TO_LOOP_MAX_NEEDED_FROM;
190 step->__min_needed_to = TO_LOOP_MIN_NEEDED_TO;
191 step->__max_needed_to = TO_LOOP_MAX_NEEDED_TO;
192 }
193
194 /* Yes, this is a stateful encoding. */
195 step->__stateful = 1;
196
197 result = __GCONV_OK;
198 }
199 }
200
201 return result;
202}
203
204
205extern void gconv_end (struct __gconv_step *data);
206void
207gconv_end (struct __gconv_step *data)
208{
209 free (data->__data);
210}
211
212
213/* Since this is a stateful encoding we have to provide code which resets
214 the output state to the initial state. This has to be done during the
215 flushing. */
216#define EMIT_SHIFT_TO_INIT \
217 /* Avoid warning about unused variable 'var'. */ \
218 (void) var; \
219 \
220 if ((data->__statep->__count & ~7) != ASCII_set) \
221 { \
222 if (dir == from_iso2022jp \
223 || (data->__statep->__count & CURRENT_SEL_MASK) == ASCII_set) \
224 { \
225 /* It's easy, we don't have to emit anything, we just reset the \
226 state for the input. Note that this also clears the G2 \
227 designation. */ \
228 data->__statep->__count &= 7; \
229 data->__statep->__count |= ASCII_set; \
230 } \
231 else \
232 { \
233 /* We are not in the initial state. To switch back we have \
234 to emit the sequence `Esc ( B'. */ \
235 if (__glibc_unlikely (outbuf + 3 > outend)) \
236 /* We don't have enough room in the output buffer. */ \
237 status = __GCONV_FULL_OUTPUT; \
238 else \
239 { \
240 /* Write out the shift sequence. */ \
241 *outbuf++ = ESC; \
242 *outbuf++ = '('; \
243 *outbuf++ = 'B'; \
244 /* Note that this also clears the G2 designation. */ \
245 data->__statep->__count &= 7; \
246 data->__statep->__count |= ASCII_set; \
247 } \
248 } \
249 }
250
251
252/* Since we might have to reset input pointer we must be able to save
253 and retore the state. */
254#define SAVE_RESET_STATE(Save) \
255 if (Save) \
256 save_set = *setp; \
257 else \
258 *setp = save_set
259
260
261/* First define the conversion function from ISO-2022-JP to UCS4. */
262#define MIN_NEEDED_INPUT FROM_LOOP_MIN_NEEDED_FROM
263#define MAX_NEEDED_INPUT FROM_LOOP_MAX_NEEDED_FROM
264#define MIN_NEEDED_OUTPUT FROM_LOOP_MIN_NEEDED_TO
265#define MAX_NEEDED_OUTPUT FROM_LOOP_MAX_NEEDED_TO
266#define LOOPFCT FROM_LOOP
267#define BODY \
268 { \
269 uint32_t ch = *inptr; \
270 \
271 /* Recognize escape sequences. */ \
272 if (__builtin_expect (ch, 0) == ESC) \
273 { \
274 /* We now must be prepared to read two to three more \
275 characters. If we have a match in the first character but \
276 then the input buffer ends we terminate with an error since \
277 we must not risk missing an escape sequence just because it \
278 is not entirely in the current input buffer. */ \
279 if (__builtin_expect (inptr + 2 >= inend, 0) \
280 || (var == iso2022jp2 && inptr[1] == '$' && inptr[2] == '(' \
281 && __builtin_expect (inptr + 3 >= inend, 0))) \
282 { \
283 /* Not enough input available. */ \
284 result = __GCONV_INCOMPLETE_INPUT; \
285 break; \
286 } \
287 \
288 if (inptr[1] == '(') \
289 { \
290 if (inptr[2] == 'B') \
291 { \
292 /* ASCII selected. */ \
293 set = ASCII_set; \
294 inptr += 3; \
295 continue; \
296 } \
297 else if (inptr[2] == 'J') \
298 { \
299 /* JIS X 0201 selected. */ \
300 set = JISX0201_Roman_set; \
301 inptr += 3; \
302 continue; \
303 } \
304 else if (var == iso2022jp2 && inptr[2] == 'I') \
305 { \
306 /* JIS X 0201 selected. */ \
307 set = JISX0201_Kana_set; \
308 inptr += 3; \
309 continue; \
310 } \
311 } \
312 else if (inptr[1] == '$') \
313 { \
314 if (inptr[2] == '@') \
315 { \
316 /* JIS X 0208-1978 selected. */ \
317 set = JISX0208_1978_set; \
318 inptr += 3; \
319 continue; \
320 } \
321 else if (inptr[2] == 'B') \
322 { \
323 /* JIS X 0208-1983 selected. */ \
324 set = JISX0208_1983_set; \
325 inptr += 3; \
326 continue; \
327 } \
328 else if (var == iso2022jp2) \
329 { \
330 if (inptr[2] == 'A') \
331 { \
332 /* GB 2312-1980 selected. */ \
333 set = GB2312_set; \
334 inptr += 3; \
335 continue; \
336 } \
337 else if (inptr[2] == '(') \
338 { \
339 if (inptr[3] == 'C') \
340 { \
341 /* KSC 5601-1987 selected. */ \
342 set = KSC5601_set; \
343 inptr += 4; \
344 continue; \
345 } \
346 else if (inptr[3] == 'D') \
347 { \
348 /* JIS X 0212-1990 selected. */ \
349 set = JISX0212_set; \
350 inptr += 4; \
351 continue; \
352 } \
353 } \
354 } \
355 } \
356 else if (var == iso2022jp2 && inptr[1] == '.') \
357 { \
358 if (inptr[2] == 'A') \
359 { \
360 /* ISO 8859-1-GR selected. */ \
361 set2 = ISO88591_set; \
362 inptr += 3; \
363 continue; \
364 } \
365 else if (inptr[2] == 'F') \
366 { \
367 /* ISO 8859-7-GR selected. */ \
368 set2 = ISO88597_set; \
369 inptr += 3; \
370 continue; \
371 } \
372 } \
373 } \
374 \
375 if (ch == ESC && var == iso2022jp2 && inptr[1] == 'N') \
376 { \
377 if (set2 == ISO88591_set) \
378 { \
379 ch = inptr[2] | 0x80; \
380 inptr += 3; \
381 } \
382 else if (__builtin_expect (set2, ISO88597_set) == ISO88597_set) \
383 { \
384 /* We use the table from the ISO 8859-7 module. */ \
385 if (inptr[2] < 0x20 || inptr[2] >= 0x80) \
386 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
387 ch = iso88597_to_ucs4[inptr[2] - 0x20]; \
388 if (ch == 0) \
389 STANDARD_FROM_LOOP_ERR_HANDLER (3); \
390 inptr += 3; \
391 } \
392 else \
393 { \
394 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
395 } \
396 } \
397 else if (ch >= 0x80) \
398 { \
399 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
400 } \
401 else if (set == ASCII_set || (ch < 0x21 || ch == 0x7f)) \
402 /* Almost done, just advance the input pointer. */ \
403 ++inptr; \
404 else if (set == JISX0201_Roman_set) \
405 { \
406 /* Use the JIS X 0201 table. */ \
407 ch = jisx0201_to_ucs4 (ch); \
408 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
409 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
410 ++inptr; \
411 } \
412 else if (set == JISX0201_Kana_set) \
413 { \
414 /* Use the JIS X 0201 table. */ \
415 ch = jisx0201_to_ucs4 (ch + 0x80); \
416 if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
417 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
418 ++inptr; \
419 } \
420 else \
421 { \
422 if (set == JISX0208_1978_set || set == JISX0208_1983_set) \
423 /* XXX I don't have the tables for these two old variants of \
424 JIS X 0208. Therefore I'm using the tables for JIS X \
425 0208-1990. If somebody has problems with this please \
426 provide the appropriate tables. */ \
427 ch = jisx0208_to_ucs4 (&inptr, inend - inptr, 0); \
428 else if (set == JISX0212_set) \
429 /* Use the JIS X 0212 table. */ \
430 ch = jisx0212_to_ucs4 (&inptr, inend - inptr, 0); \
431 else if (set == GB2312_set) \
432 /* Use the GB 2312 table. */ \
433 ch = gb2312_to_ucs4 (&inptr, inend - inptr, 0); \
434 else \
435 { \
436 assert (set == KSC5601_set); \
437 \
438 /* Use the KSC 5601 table. */ \
439 ch = ksc5601_to_ucs4 (&inptr, inend - inptr, 0); \
440 } \
441 \
442 if (__glibc_unlikely (ch == 0)) \
443 { \
444 result = __GCONV_INCOMPLETE_INPUT; \
445 break; \
446 } \
447 else if (__glibc_unlikely (ch == __UNKNOWN_10646_CHAR)) \
448 { \
449 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
450 } \
451 } \
452 \
453 put32 (outptr, ch); \
454 outptr += 4; \
455 }
456#define LOOP_NEED_FLAGS
457#define EXTRA_LOOP_DECLS , enum variant var, int *setp
458#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
459 int set2 = *setp & CURRENT_ASSIGN_MASK
460#define UPDATE_PARAMS *setp = set | set2
461#include <iconv/loop.c>
462
463
464/* Next, define the other direction. */
465
466enum conversion { none = 0, european, japanese, chinese, korean, other };
467
468/* A datatype for conversion lists. */
469typedef unsigned int cvlist_t;
470#define CVLIST(cv1, cv2, cv3, cv4, cv5) \
471 ((cv1) + ((cv2) << 3) + ((cv3) << 6) + ((cv4) << 9) + ((cv5) << 12))
472#define CVLIST_FIRST(cvl) ((cvl) & ((1 << 3) - 1))
473#define CVLIST_REST(cvl) ((cvl) >> 3)
474static const cvlist_t conversion_lists[4] =
475 {
476 /* TAG_none */ CVLIST (japanese, european, chinese, korean, other),
477 /* TAG_language_ja */ CVLIST (japanese, european, chinese, korean, other),
478 /* TAG_language_ko */ CVLIST (korean, european, japanese, chinese, other),
479 /* TAG_language_zh */ CVLIST (chinese, european, japanese, korean, other)
480 };
481
482#define MIN_NEEDED_INPUT TO_LOOP_MIN_NEEDED_FROM
483#define MAX_NEEDED_INPUT TO_LOOP_MAX_NEEDED_FROM
484#define MIN_NEEDED_OUTPUT TO_LOOP_MIN_NEEDED_TO
485#define MAX_NEEDED_OUTPUT TO_LOOP_MAX_NEEDED_TO
486#define LOOPFCT TO_LOOP
487#define BODY \
488 { \
489 uint32_t ch; \
490 size_t written; \
491 \
492 ch = get32 (inptr); \
493 \
494 if (var == iso2022jp2) \
495 { \
496 /* Handle Unicode tag characters (range U+E0000..U+E007F). */ \
497 if (__glibc_unlikely ((ch >> 7) == (0xe0000 >> 7))) \
498 { \
499 ch &= 0x7f; \
500 if (ch >= 'A' && ch <= 'Z') \
501 ch += 'a' - 'A'; \
502 if (ch == 0x01) \
503 tag = TAG_language; \
504 else if (ch == 'j' && tag == TAG_language) \
505 tag = TAG_language_j; \
506 else if (ch == 'a' && tag == TAG_language_j) \
507 tag = TAG_language_ja; \
508 else if (ch == 'k' && tag == TAG_language) \
509 tag = TAG_language_k; \
510 else if (ch == 'o' && tag == TAG_language_k) \
511 tag = TAG_language_ko; \
512 else if (ch == 'z' && tag == TAG_language) \
513 tag = TAG_language_z; \
514 else if (ch == 'h' && tag == TAG_language_z) \
515 tag = TAG_language_zh; \
516 else if (ch == 0x7f) \
517 tag = TAG_none; \
518 else \
519 { \
520 /* Other tag characters reset the tag parsing state (if the \
521 current state is a temporary state) or are ignored (if \
522 the current state is a stable one). */ \
523 if (tag >= TAG_language) \
524 tag = TAG_none; \
525 } \
526 \
527 inptr += 4; \
528 continue; \
529 } \
530 \
531 /* Non-tag characters reset the tag parsing state, if the current \
532 state is a temporary state. */ \
533 if (__glibc_unlikely (tag >= TAG_language)) \
534 tag = TAG_none; \
535 } \
536 \
537 /* First see whether we can write the character using the currently \
538 selected character set. But ignore the selected character set if \
539 the current language tag shows different preferences. */ \
540 if (set == ASCII_set) \
541 { \
542 /* Please note that the NUL byte is *not* matched if we are not \
543 currently using the ASCII charset. This is because we must \
544 switch to the initial state whenever a NUL byte is written. */ \
545 if (ch <= 0x7f) \
546 { \
547 *outptr++ = ch; \
548 written = 1; \
549 \
550 /* At the beginning of a line, G2 designation is cleared. */ \
551 if (var == iso2022jp2 && ch == 0x0a) \
552 set2 = UNSPECIFIED_set; \
553 } \
554 else \
555 written = __UNKNOWN_10646_CHAR; \
556 } \
557 /* ISO-2022-JP recommends to encode the newline character always in \
558 ASCII since this allows a context-free interpretation of the \
559 characters at the beginning of the next line. Otherwise it would \
560 have to be known whether the last line ended using ASCII or \
561 JIS X 0201. */ \
562 else if (set == JISX0201_Roman_set \
563 && (__builtin_expect (tag == TAG_none, 1) \
564 || tag == TAG_language_ja)) \
565 { \
566 unsigned char buf[1]; \
567 written = ucs4_to_jisx0201 (ch, buf); \
568 if (written != __UNKNOWN_10646_CHAR) \
569 { \
570 if (buf[0] > 0x20 && buf[0] < 0x80) \
571 { \
572 *outptr++ = buf[0]; \
573 written = 1; \
574 } \
575 else \
576 written = __UNKNOWN_10646_CHAR; \
577 } \
578 } \
579 else if (set == JISX0201_Kana_set \
580 && (__builtin_expect (tag == TAG_none, 1) \
581 || tag == TAG_language_ja)) \
582 { \
583 unsigned char buf[1]; \
584 written = ucs4_to_jisx0201 (ch, buf); \
585 if (written != __UNKNOWN_10646_CHAR) \
586 { \
587 if (buf[0] > 0xa0 && buf[0] < 0xe0) \
588 { \
589 *outptr++ = buf[0] - 0x80; \
590 written = 1; \
591 } \
592 else \
593 written = __UNKNOWN_10646_CHAR; \
594 } \
595 } \
596 else \
597 { \
598 if ((set == JISX0208_1978_set || set == JISX0208_1983_set) \
599 && (__builtin_expect (tag == TAG_none, 1) \
600 || tag == TAG_language_ja)) \
601 written = ucs4_to_jisx0208 (ch, outptr, outend - outptr); \
602 else if (set == JISX0212_set \
603 && (__builtin_expect (tag == TAG_none, 1) \
604 || tag == TAG_language_ja)) \
605 written = ucs4_to_jisx0212 (ch, outptr, outend - outptr); \
606 else if (set == GB2312_set \
607 && (__builtin_expect (tag == TAG_none, 1) \
608 || tag == TAG_language_zh)) \
609 written = ucs4_to_gb2312 (ch, outptr, outend - outptr); \
610 else if (set == KSC5601_set \
611 && (__builtin_expect (tag == TAG_none, 1) \
612 || tag == TAG_language_ko)) \
613 written = ucs4_to_ksc5601 (ch, outptr, outend - outptr); \
614 else \
615 written = __UNKNOWN_10646_CHAR; \
616 \
617 if (__glibc_unlikely (written == 0)) \
618 { \
619 result = __GCONV_FULL_OUTPUT; \
620 break; \
621 } \
622 else if (written != __UNKNOWN_10646_CHAR) \
623 outptr += written; \
624 } \
625 \
626 if (written == __UNKNOWN_10646_CHAR \
627 && __builtin_expect (tag == TAG_none, 1)) \
628 { \
629 if (set2 == ISO88591_set) \
630 { \
631 if (ch >= 0x80 && ch <= 0xff) \
632 { \
633 if (__glibc_unlikely (outptr + 3 > outend)) \
634 { \
635 result = __GCONV_FULL_OUTPUT; \
636 break; \
637 } \
638 \
639 *outptr++ = ESC; \
640 *outptr++ = 'N'; \
641 *outptr++ = ch & 0x7f; \
642 written = 3; \
643 } \
644 } \
645 else if (set2 == ISO88597_set) \
646 { \
647 if (__glibc_likely (ch < 0xffff)) \
648 { \
649 const struct gap *rp = from_idx; \
650 \
651 while (ch > rp->end) \
652 ++rp; \
653 if (ch >= rp->start) \
654 { \
655 unsigned char res = \
656 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
657 if (res != '\0') \
658 { \
659 if (__glibc_unlikely (outptr + 3 > outend)) \
660 { \
661 result = __GCONV_FULL_OUTPUT; \
662 break; \
663 } \
664 \
665 *outptr++ = ESC; \
666 *outptr++ = 'N'; \
667 *outptr++ = res & 0x7f; \
668 written = 3; \
669 } \
670 } \
671 } \
672 } \
673 } \
674 \
675 if (written == __UNKNOWN_10646_CHAR) \
676 { \
677 /* The attempts to use the currently selected character set \
678 failed, either because the language tag changed, or because \
679 the character requires a different character set, or because \
680 the character is unknown. \
681 The CJK character sets partially overlap when seen as subsets \
682 of ISO 10646; therefore there is no single correct result. \
683 We use a preferrence order which depends on the language tag. */ \
684 \
685 if (ch <= 0x7f) \
686 { \
687 /* We must encode using ASCII. First write out the \
688 escape sequence. */ \
689 if (__glibc_unlikely (outptr + 3 > outend)) \
690 { \
691 result = __GCONV_FULL_OUTPUT; \
692 break; \
693 } \
694 \
695 *outptr++ = ESC; \
696 *outptr++ = '('; \
697 *outptr++ = 'B'; \
698 set = ASCII_set; \
699 \
700 if (__glibc_unlikely (outptr + 1 > outend)) \
701 { \
702 result = __GCONV_FULL_OUTPUT; \
703 break; \
704 } \
705 *outptr++ = ch; \
706 \
707 /* At the beginning of a line, G2 designation is cleared. */ \
708 if (var == iso2022jp2 && ch == 0x0a) \
709 set2 = UNSPECIFIED_set; \
710 } \
711 else \
712 { \
713 /* Now it becomes difficult. We must search the other \
714 character sets one by one. Use an ordered conversion \
715 list that depends on the current language tag. */ \
716 cvlist_t conversion_list; \
717 unsigned char buf[2]; \
718 int res = __GCONV_ILLEGAL_INPUT; \
719 \
720 if (var == iso2022jp2) \
721 conversion_list = conversion_lists[tag >> 8]; \
722 else \
723 conversion_list = CVLIST (japanese, 0, 0, 0, 0); \
724 \
725 do \
726 switch (CVLIST_FIRST (conversion_list)) \
727 { \
728 case european: \
729 \
730 /* Try ISO 8859-1 upper half. */ \
731 if (ch >= 0x80 && ch <= 0xff) \
732 { \
733 if (set2 != ISO88591_set) \
734 { \
735 if (__builtin_expect (outptr + 3 > outend, 0)) \
736 { \
737 res = __GCONV_FULL_OUTPUT; \
738 break; \
739 } \
740 *outptr++ = ESC; \
741 *outptr++ = '.'; \
742 *outptr++ = 'A'; \
743 set2 = ISO88591_set; \
744 } \
745 \
746 if (__glibc_unlikely (outptr + 3 > outend)) \
747 { \
748 res = __GCONV_FULL_OUTPUT; \
749 break; \
750 } \
751 *outptr++ = ESC; \
752 *outptr++ = 'N'; \
753 *outptr++ = ch - 0x80; \
754 res = __GCONV_OK; \
755 break; \
756 } \
757 \
758 /* Try ISO 8859-7 upper half. */ \
759 if (__glibc_likely (ch < 0xffff)) \
760 { \
761 const struct gap *rp = from_idx; \
762 \
763 while (ch > rp->end) \
764 ++rp; \
765 if (ch >= rp->start) \
766 { \
767 unsigned char ch2 = \
768 iso88597_from_ucs4[ch - 0xa0 + rp->idx]; \
769 if (ch2 != '\0') \
770 { \
771 if (set2 != ISO88597_set) \
772 { \
773 if (__builtin_expect (outptr + 3 > outend, \
774 0)) \
775 { \
776 res = __GCONV_FULL_OUTPUT; \
777 break; \
778 } \
779 *outptr++ = ESC; \
780 *outptr++ = '.'; \
781 *outptr++ = 'F'; \
782 set2 = ISO88597_set; \
783 } \
784 \
785 if (__builtin_expect (outptr + 3 > outend, 0)) \
786 { \
787 res = __GCONV_FULL_OUTPUT; \
788 break; \
789 } \
790 *outptr++ = ESC; \
791 *outptr++ = 'N'; \
792 *outptr++ = ch2 - 0x80; \
793 res = __GCONV_OK; \
794 break; \
795 } \
796 } \
797 } \
798 \
799 break; \
800 \
801 case japanese: \
802 \
803 /* Try JIS X 0201 Roman. */ \
804 written = ucs4_to_jisx0201 (ch, buf); \
805 if (written != __UNKNOWN_10646_CHAR \
806 && buf[0] > 0x20 && buf[0] < 0x80) \
807 { \
808 if (set != JISX0201_Roman_set) \
809 { \
810 if (__builtin_expect (outptr + 3 > outend, 0)) \
811 { \
812 res = __GCONV_FULL_OUTPUT; \
813 break; \
814 } \
815 *outptr++ = ESC; \
816 *outptr++ = '('; \
817 *outptr++ = 'J'; \
818 set = JISX0201_Roman_set; \
819 } \
820 \
821 if (__glibc_unlikely (outptr + 1 > outend)) \
822 { \
823 res = __GCONV_FULL_OUTPUT; \
824 break; \
825 } \
826 *outptr++ = buf[0]; \
827 res = __GCONV_OK; \
828 break; \
829 } \
830 \
831 /* Try JIS X 0208. */ \
832 written = ucs4_to_jisx0208 (ch, buf, 2); \
833 if (written != __UNKNOWN_10646_CHAR) \
834 { \
835 if (set != JISX0208_1983_set) \
836 { \
837 if (__builtin_expect (outptr + 3 > outend, 0)) \
838 { \
839 res = __GCONV_FULL_OUTPUT; \
840 break; \
841 } \
842 *outptr++ = ESC; \
843 *outptr++ = '$'; \
844 *outptr++ = 'B'; \
845 set = JISX0208_1983_set; \
846 } \
847 \
848 if (__glibc_unlikely (outptr + 2 > outend)) \
849 { \
850 res = __GCONV_FULL_OUTPUT; \
851 break; \
852 } \
853 *outptr++ = buf[0]; \
854 *outptr++ = buf[1]; \
855 res = __GCONV_OK; \
856 break; \
857 } \
858 \
859 if (__glibc_unlikely (var == iso2022jp)) \
860 /* Don't use the other Japanese character sets. */ \
861 break; \
862 \
863 /* Try JIS X 0212. */ \
864 written = ucs4_to_jisx0212 (ch, buf, 2); \
865 if (written != __UNKNOWN_10646_CHAR) \
866 { \
867 if (set != JISX0212_set) \
868 { \
869 if (__builtin_expect (outptr + 4 > outend, 0)) \
870 { \
871 res = __GCONV_FULL_OUTPUT; \
872 break; \
873 } \
874 *outptr++ = ESC; \
875 *outptr++ = '$'; \
876 *outptr++ = '('; \
877 *outptr++ = 'D'; \
878 set = JISX0212_set; \
879 } \
880 \
881 if (__glibc_unlikely (outptr + 2 > outend)) \
882 { \
883 res = __GCONV_FULL_OUTPUT; \
884 break; \
885 } \
886 *outptr++ = buf[0]; \
887 *outptr++ = buf[1]; \
888 res = __GCONV_OK; \
889 break; \
890 } \
891 \
892 break; \
893 \
894 case chinese: \
895 assert (var == iso2022jp2); \
896 \
897 /* Try GB 2312. */ \
898 written = ucs4_to_gb2312 (ch, buf, 2); \
899 if (written != __UNKNOWN_10646_CHAR) \
900 { \
901 if (set != GB2312_set) \
902 { \
903 if (__builtin_expect (outptr + 3 > outend, 0)) \
904 { \
905 res = __GCONV_FULL_OUTPUT; \
906 break; \
907 } \
908 *outptr++ = ESC; \
909 *outptr++ = '$'; \
910 *outptr++ = 'A'; \
911 set = GB2312_set; \
912 } \
913 \
914 if (__glibc_unlikely (outptr + 2 > outend)) \
915 { \
916 res = __GCONV_FULL_OUTPUT; \
917 break; \
918 } \
919 *outptr++ = buf[0]; \
920 *outptr++ = buf[1]; \
921 res = __GCONV_OK; \
922 break; \
923 } \
924 \
925 break; \
926 \
927 case korean: \
928 assert (var == iso2022jp2); \
929 \
930 /* Try KSC 5601. */ \
931 written = ucs4_to_ksc5601 (ch, buf, 2); \
932 if (written != __UNKNOWN_10646_CHAR) \
933 { \
934 if (set != KSC5601_set) \
935 { \
936 if (__builtin_expect (outptr + 4 > outend, 0)) \
937 { \
938 res = __GCONV_FULL_OUTPUT; \
939 break; \
940 } \
941 *outptr++ = ESC; \
942 *outptr++ = '$'; \
943 *outptr++ = '('; \
944 *outptr++ = 'C'; \
945 set = KSC5601_set; \
946 } \
947 \
948 if (__glibc_unlikely (outptr + 2 > outend)) \
949 { \
950 res = __GCONV_FULL_OUTPUT; \
951 break; \
952 } \
953 *outptr++ = buf[0]; \
954 *outptr++ = buf[1]; \
955 res = __GCONV_OK; \
956 break; \
957 } \
958 \
959 break; \
960 \
961 case other: \
962 assert (var == iso2022jp2); \
963 \
964 /* Try JIS X 0201 Kana. This is not officially part \
965 of ISO-2022-JP-2, according to RFC 1554. Therefore \
966 we try this only after all other attempts. */ \
967 written = ucs4_to_jisx0201 (ch, buf); \
968 if (written != __UNKNOWN_10646_CHAR && buf[0] >= 0x80) \
969 { \
970 if (set != JISX0201_Kana_set) \
971 { \
972 if (__builtin_expect (outptr + 3 > outend, 0)) \
973 { \
974 res = __GCONV_FULL_OUTPUT; \
975 break; \
976 } \
977 *outptr++ = ESC; \
978 *outptr++ = '('; \
979 *outptr++ = 'I'; \
980 set = JISX0201_Kana_set; \
981 } \
982 \
983 if (__glibc_unlikely (outptr + 1 > outend)) \
984 { \
985 res = __GCONV_FULL_OUTPUT; \
986 break; \
987 } \
988 *outptr++ = buf[0] - 0x80; \
989 res = __GCONV_OK; \
990 break; \
991 } \
992 \
993 break; \
994 \
995 default: \
996 abort (); \
997 } \
998 while (res == __GCONV_ILLEGAL_INPUT \
999 && (conversion_list = CVLIST_REST (conversion_list)) != 0);\
1000 \
1001 if (res == __GCONV_FULL_OUTPUT) \
1002 { \
1003 result = res; \
1004 break; \
1005 } \
1006 \
1007 if (res == __GCONV_ILLEGAL_INPUT) \
1008 { \
1009 STANDARD_TO_LOOP_ERR_HANDLER (4); \
1010 } \
1011 } \
1012 } \
1013 \
1014 /* Now that we wrote the output increment the input pointer. */ \
1015 inptr += 4; \
1016 }
1017#define LOOP_NEED_FLAGS
1018#define EXTRA_LOOP_DECLS , enum variant var, int *setp
1019#define INIT_PARAMS int set = *setp & CURRENT_SEL_MASK; \
1020 int set2 = *setp & CURRENT_ASSIGN_MASK; \
1021 int tag = *setp & CURRENT_TAG_MASK;
1022#define REINIT_PARAMS do \
1023 { \
1024 set = *setp & CURRENT_SEL_MASK; \
1025 set2 = *setp & CURRENT_ASSIGN_MASK; \
1026 tag = *setp & CURRENT_TAG_MASK; \
1027 } \
1028 while (0)
1029#define UPDATE_PARAMS *setp = set | set2 | tag
1030#include <iconv/loop.c>
1031
1032
1033/* Now define the toplevel functions. */
1034#include <iconv/skeleton.c>
1035