1/* Mapping tables for JOHAB handling.
2 Copyright (C) 1998-2016 Free Software Foundation, Inc.
3 This file is part of the GNU C Library.
4 Contributed by Jungshik Shin <jshin@pantheon.yale.edu>
5 and Ulrich Drepper <drepper@cygnus.com>, 1998.
6
7 The GNU C Library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Lesser General Public
9 License as published by the Free Software Foundation; either
10 version 2.1 of the License, or (at your option) any later version.
11
12 The GNU C Library is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public
18 License along with the GNU C Library; if not, see
19 <http://www.gnu.org/licenses/>. */
20
21#include <dlfcn.h>
22#include <stdint.h>
23#include <ksc5601.h>
24
25/* The table for Bit pattern to Hangul Jamo
26 5 bits each are used to encode
27 leading consonants(19 + 1 filler), medial vowels(21 + 1 filler)
28 and trailing consonants(27 + 1 filler).
29
30 KS C 5601-1992 Annex 3 Table 2
31 0 : Filler, -1: invalid, >= 1 : valid
32
33 */
34static const int init[32] =
35{
36 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
37 19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
38};
39static const int mid[32] =
40{
41 -1, -1, 0, 1, 2, 3, 4, 5,
42 -1, -1, 6, 7, 8, 9, 10, 11,
43 -1, -1, 12, 13, 14, 15, 16, 17,
44 -1, -1, 18, 19, 20, 21, -1, -1
45};
46static const int final[32] =
47{
48 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
49 -1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1
50};
51
52/*
53 Hangul Jamo in Johab to Unicode 2.0 : Unicode 2.0
54 defines 51 Hangul Compatibility Jamos in the block [0x3131,0x314e]
55
56 It's to be considered later which Jamo block to use, Compatibility
57 block [0x3131,0x314e] or Hangul Conjoining Jamo block, [0x1100,0x11ff]
58
59 */
60static const uint32_t init_to_ucs[19] =
61{
62 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
63 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
64 0x314c, 0x314d, 0x314e
65};
66
67static const uint32_t final_to_ucs[31] =
68{
69 L'\0', L'\0', 0x3133, L'\0', 0x3135, 0x3136, L'\0', L'\0',
70 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f,
71 0x3140, L'\0', L'\0', 0x3144, L'\0', L'\0', L'\0', L'\0',
72 L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'
73};
74
75/* The following three arrays are used to convert
76 precomposed Hangul syllables in [0xac00,0xd???]
77 to Jamo bit patterns for Johab encoding
78
79 cf. : KS C 5601-1992, Annex3 Table 2
80
81 Arrays are used to speed up things although it's possible
82 to get the same result arithmetically.
83
84 */
85static const int init_to_bit[19] =
86{
87 0x8800, 0x8c00, 0x9000, 0x9400, 0x9800, 0x9c00,
88 0xa000, 0xa400, 0xa800, 0xac00, 0xb000, 0xb400,
89 0xb800, 0xbc00, 0xc000, 0xc400, 0xc800, 0xcc00,
90 0xd000
91};
92
93static const int mid_to_bit[21] =
94{
95 0x0060, 0x0080, 0x00a0, 0x00c0, 0x00e0,
96 0x0140, 0x0160, 0x0180, 0x01a0, 0x01c0, 0x1e0,
97 0x0240, 0x0260, 0x0280, 0x02a0, 0x02c0, 0x02e0,
98 0x0340, 0x0360, 0x0380, 0x03a0
99};
100
101static const int final_to_bit[28] =
102{
103 1, 2, 3, 4, 5, 6, 7, 8, 9, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 0x10, 0x11,
104 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d
105};
106
107/* The conversion table from
108 UCS4 Hangul Compatibility Jamo in [0x3131,0x3163]
109 to Johab
110
111 cf. 1. KS C 5601-1992 Annex 3 Table 2
112 2. Unicode 2.0 manual
113
114 */
115static const uint16_t jamo_from_ucs_table[51] =
116{
117 0x8841, 0x8c41,
118 0x8444,
119 0x9041,
120 0x8446, 0x8447,
121 0x9441, 0x9841, 0x9c41,
122 0x844a, 0x844b, 0x844c, 0x844d, 0x844e, 0x844f, 0x8450,
123 0xa041, 0xa441, 0xa841,
124 0x8454,
125 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
126 0xc041, 0xc441, 0xc841, 0xcc41, 0xd041,
127 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
128 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
129 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
130 0x8741, 0x8761, 0x8781, 0x87a1
131};
132
133
134static uint32_t
135johab_sym_hanja_to_ucs (uint_fast32_t idx, uint_fast32_t c1, uint_fast32_t c2)
136{
137 if (idx <= 0xdefe)
138 return (uint32_t) __ksc5601_sym_to_ucs[(c1 - 0xd9) * 188 + c2
139 - (c2 > 0x90 ? 0x43 : 0x31)];
140 else
141 return (uint32_t) __ksc5601_hanja_to_ucs[(c1 - 0xe0) * 188 + c2
142 - (c2 > 0x90 ? 0x43 : 0x31)];
143}
144/* Definitions used in the body of the `gconv' function. */
145#define CHARSET_NAME "JOHAB//"
146#define FROM_LOOP from_johab
147#define TO_LOOP to_johab
148#define DEFINE_INIT 1
149#define DEFINE_FINI 1
150#define MIN_NEEDED_FROM 1
151#define MAX_NEEDED_FROM 2
152#define MIN_NEEDED_TO 4
153#define ONE_DIRECTION 0
154
155
156/* First define the conversion function from JOHAB to UCS4. */
157#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
158#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
159#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
160#define LOOPFCT FROM_LOOP
161#define BODY \
162 { \
163 uint32_t ch = *inptr; \
164 \
165 if (ch <= 0x7f) \
166 { \
167 /* Plain ISO646-KR. */ \
168 if (ch == 0x5c) \
169 ch = 0x20a9; /* half-width Korean Currency WON sign */ \
170 ++inptr; \
171 } \
172 /* Johab : 1. Hangul \
173 1st byte : 0x84-0xd3 \
174 2nd byte : 0x41-0x7e, 0x81-0xfe \
175 2. Hanja & Symbol : \
176 1st byte : 0xd8-0xde, 0xe0-0xf9 \
177 2nd byte : 0x31-0x7e, 0x91-0xfe \
178 0xd831-0xd87e and 0xd891-0xd8fe are user-defined area */ \
179 else \
180 { \
181 if (__builtin_expect (ch > 0xf9, 0) \
182 || __builtin_expect (ch == 0xdf, 0) \
183 || (__builtin_expect (ch > 0x7e, 0) && ch < 0x84) \
184 || (__builtin_expect (ch > 0xd3, 0) && ch < 0xd9)) \
185 { \
186 /* These are illegal. */ \
187 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
188 } \
189 else \
190 { \
191 /* Two-byte character. First test whether the next \
192 character is also available. */ \
193 uint32_t ch2; \
194 uint_fast32_t idx; \
195 \
196 if (__glibc_unlikely (inptr + 1 >= inend)) \
197 { \
198 /* The second character is not available. Store the \
199 intermediate result. */ \
200 result = __GCONV_INCOMPLETE_INPUT; \
201 break; \
202 } \
203 \
204 ch2 = inptr[1]; \
205 idx = ch * 256 + ch2; \
206 if (__glibc_likely (ch <= 0xd3)) \
207 { \
208 /* Hangul */ \
209 int_fast32_t i, m, f; \
210 \
211 i = init[(idx & 0x7c00) >> 10]; \
212 m = mid[(idx & 0x03e0) >> 5]; \
213 f = final[idx & 0x001f]; \
214 \
215 if (__builtin_expect (i == -1, 0) \
216 || __builtin_expect (m == -1, 0) \
217 || __builtin_expect (f == -1, 0)) \
218 { \
219 /* This is illegal. */ \
220 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
221 } \
222 else if (i > 0 && m > 0) \
223 ch = ((i - 1) * 21 + (m - 1)) * 28 + f + 0xac00; \
224 else if (i > 0 && m == 0 && f == 0) \
225 ch = init_to_ucs[i - 1]; \
226 else if (i == 0 && m > 0 && f == 0) \
227 ch = 0x314e + m; /* 0x314f + m - 1 */ \
228 else if (__builtin_expect ((i | m) == 0, 1) \
229 && __builtin_expect (f > 0, 1)) \
230 ch = final_to_ucs[f - 1]; /* round trip?? */ \
231 else \
232 { \
233 /* This is illegal. */ \
234 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
235 } \
236 } \
237 else \
238 { \
239 if (__builtin_expect (ch2 < 0x31, 0) \
240 || (__builtin_expect (ch2 > 0x7e, 0) && ch2 < 0x91) \
241 || __builtin_expect (ch2, 0) == 0xff \
242 || (__builtin_expect (ch, 0) == 0xd9 && ch2 > 0xe8) \
243 || (__builtin_expect (ch, 0) == 0xda \
244 && ch2 > 0xa0 && ch2 < 0xd4) \
245 || (__builtin_expect (ch, 0) == 0xde && ch2 > 0xf1)) \
246 { \
247 /* This is illegal. */ \
248 STANDARD_FROM_LOOP_ERR_HANDLER (1); \
249 } \
250 else \
251 { \
252 ch = johab_sym_hanja_to_ucs (idx, ch, ch2); \
253 /* if (idx <= 0xdefe) \
254 ch = __ksc5601_sym_to_ucs[(ch - 0xd9) * 192 \
255 + ch2 - (ch2 > 0x90 \
256 ? 0x43 : 0x31)]; \
257 else \
258 ch = __ksc5601_hanja_to_ucs[(ch - 0xe0) *192 \
259 + ch2 - (ch2 > 0x90 \
260 ?0x43 : 0x31)];\
261 */ \
262 } \
263 } \
264 } \
265 \
266 if (__glibc_unlikely (ch == 0)) \
267 { \
268 /* This is an illegal character. */ \
269 STANDARD_FROM_LOOP_ERR_HANDLER (2); \
270 } \
271 \
272 inptr += 2; \
273 } \
274 \
275 put32 (outptr, ch); \
276 outptr += 4; \
277 }
278#define LOOP_NEED_FLAGS
279#define ONEBYTE_BODY \
280 { \
281 if (c <= 0x7f) \
282 return (c == 0x5c ? 0x20a9 : c); \
283 else \
284 return WEOF; \
285 }
286#include <iconv/loop.c>
287
288
289/* Next, define the other direction. */
290#define MIN_NEEDED_INPUT MIN_NEEDED_TO
291#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
292#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
293#define LOOPFCT TO_LOOP
294#define BODY \
295 { \
296 uint32_t ch = get32 (inptr); \
297 /* \
298 if (ch >= (sizeof (from_ucs4_lat1) / sizeof (from_ucs4_lat1[0]))) \
299 { \
300 if (ch >= 0x0391 && ch <= 0x0451) \
301 cp = from_ucs4_greek[ch - 0x391]; \
302 else if (ch >= 0x2010 && ch <= 0x9fa0) \
303 cp = from_ucs4_cjk[ch - 0x02010]; \
304 else \
305 break; \
306 } \
307 else \
308 cp = from_ucs4_lat1[ch]; \
309 */ \
310 \
311 if (ch <= 0x7f && ch != 0x5c) \
312 *outptr++ = ch; \
313 else \
314 { \
315 if (ch >= 0xac00 && ch <= 0xd7a3) \
316 { \
317 if (__glibc_unlikely (outptr + 2 > outend)) \
318 { \
319 result = __GCONV_FULL_OUTPUT; \
320 break; \
321 } \
322 \
323 ch -= 0xac00; \
324 \
325 ch = (init_to_bit[ch / 588] /* 21 * 28 = 588 */ \
326 + mid_to_bit[(ch / 28) % 21]/* (ch % (21 * 28)) / 28 */ \
327 + final_to_bit[ch % 28]); /* (ch % (21 * 28)) % 28 */ \
328 \
329 *outptr++ = ch / 256; \
330 *outptr++ = ch % 256; \
331 } \
332 /* KS C 5601-1992 Annex 3 regards 0xA4DA(Hangul Filler : U3164) \
333 as symbol */ \
334 else if (ch >= 0x3131 && ch <= 0x3163) \
335 { \
336 ch = jamo_from_ucs_table[ch - 0x3131]; \
337 \
338 if (__glibc_unlikely (outptr + 2 > outend)) \
339 { \
340 result = __GCONV_FULL_OUTPUT; \
341 break; \
342 } \
343 \
344 *outptr++ = ch / 256; \
345 *outptr++ = ch % 256; \
346 } \
347 else if ((ch >= 0x4e00 && ch <= 0x9fa5) \
348 || (ch >= 0xf900 && ch <= 0xfa0b)) \
349 { \
350 size_t written; \
351 uint32_t temp; \
352 \
353 written = ucs4_to_ksc5601_hanja (ch, outptr, outend - outptr); \
354 if (__builtin_expect (written, 1) == 0) \
355 { \
356 result = __GCONV_FULL_OUTPUT; \
357 break; \
358 } \
359 if (__glibc_unlikely (written == __UNKNOWN_10646_CHAR)) \
360 { \
361 STANDARD_TO_LOOP_ERR_HANDLER (4); \
362 } \
363 \
364 outptr[0] -= 0x4a; \
365 outptr[1] -= 0x21; \
366 \
367 temp = outptr[0] * 94 + outptr[1]; \
368 \
369 outptr[0] = 0xe0 + temp / 188; \
370 outptr[1] = temp % 188; \
371 outptr[1] += outptr[1] >= 78 ? 0x43 : 0x31; \
372 \
373 outptr += 2; \
374 } \
375 else if (ch == 0x20a9) \
376 *outptr++ = 0x5c; \
377 else \
378 { \
379 size_t written; \
380 uint32_t temp; \
381 \
382 written = ucs4_to_ksc5601_sym (ch, outptr, outend - outptr); \
383 if (__builtin_expect (written, 1) == 0) \
384 { \
385 result = __GCONV_FULL_OUTPUT; \
386 break; \
387 } \
388 if (__builtin_expect (written == __UNKNOWN_10646_CHAR, 0) \
389 || (outptr[0] == 0x22 && outptr[1] > 0x68)) \
390 { \
391 UNICODE_TAG_HANDLER (ch, 4); \
392 STANDARD_TO_LOOP_ERR_HANDLER (4); \
393 } \
394 \
395 temp = (outptr[0] < 0x4a ? outptr[0] + 0x191 : outptr[0] + 0x176);\
396 outptr[1] += (temp % 2 ? 0x5e : 0); \
397 outptr[1] += (outptr[1] < 0x6f ? 0x10 : 0x22); \
398 outptr[0] = temp / 2; \
399 \
400 outptr += 2; \
401 } \
402 } \
403 \
404 inptr += 4; \
405 }
406#define LOOP_NEED_FLAGS
407#include <iconv/loop.c>
408
409
410/* Now define the toplevel functions. */
411#include <iconv/skeleton.c>
412