ibm1364.c source code [glibc_src_2.23/iconvdata/ibm1364.c]

1	/ Conversion from and to IBM1364.*
2	Copyright (C) 2005-2016 Free Software Foundation, Inc.
3	This file is part of the GNU C Library.
4	Contributed by Masahide Washizawa <washi@jp.ibm.com>, 2005.
5
6	The GNU C Library is free software; you can redistribute it and/or
7	modify it under the terms of the GNU Lesser General Public
8	License as published by the Free Software Foundation; either
9	version 2.1 of the License, or (at your option) any later version.
10
11	The GNU C Library is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14	Lesser General Public License for more details.
15
16	You should have received a copy of the GNU Lesser General Public
17	License along with the GNU C Library; if not, see
18	<http://www.gnu.org/licenses/>. /*
19
20	#include <dlfcn.h>
21	#include <stdint.h>
22	#include <wchar.h>
23	#include <byteswap.h>
24
25	#ifndef CHARSET_NAME
26	/ This is really the IBM1364 converter, not another module sharing*
27	the code. /*
28	# define DATA_HEADER "ibm1364.h"
29	# define CHARSET_NAME "IBM1364//"
30	# define FROM_LOOP from_ibm1364
31	# define TO_LOOP to_ibm1364
32	# define SB_TO_UCS4 __ibm1364sb_to_ucs4
33	# define DB_TO_UCS4_IDX __ibm1364db_to_ucs4_idx
34	# define DB_TO_UCS4 __ibm1364db_to_ucs4
35	# define UCS4_TO_SB_IDX __ucs4_to_ibm1364sb_idx
36	# define UCS4_TO_SB __ucs4_to_ibm1364sb
37	# define UCS4_TO_DB_IDX __ucs4_to_ibm1364db_idx
38	# define UCS4_TO_DB __ucs4_to_ibm1364db
39	# define UCS_LIMIT 0xffff
40	#endif
41
42
43	#include DATA_HEADER
44
45	/ The shift sequences for this charset (it does not use ESC). /
46	#define SI 0x0F /* Shift In, host code to turn DBCS off. */
47	#define SO 0x0E /* Shift Out, host code to turn DBCS on. */
48
49	/ Definitions used in the body of the `gconv' function. /
50	#define MIN_NEEDED_FROM 1
51	#define MAX_NEEDED_FROM 2
52	#define MIN_NEEDED_TO 4
53	#ifdef HAS_COMBINED
54	# define MAX_NEEDED_TO 8
55	#else
56	# define MAX_NEEDED_TO 4
57	#endif
58	#define ONE_DIRECTION 0
59	#define PREPARE_LOOP \
60	int save_curcs; \
61	int *curcsp = &data->__statep->__count;
62	#define EXTRA_LOOP_ARGS , curcsp
63
64	/ Definitions of initialization and destructor function. /
65	#define DEFINE_INIT 1
66	#define DEFINE_FINI 1
67
68
69	/ Since this is a stateful encoding we have to provide code which resets*
70	the output state to the initial state. This has to be done during the
71	flushing. /*
72	#define EMIT_SHIFT_TO_INIT \
73	if ((data->__statep->__count & ~7) != sb) \
74	{ \
75	if (FROM_DIRECTION) \
76	data->__statep->__count &= 7; \
77	else \
78	{ \
79	/* We are not in the initial state. To switch back we have \
80	to emit `SI'. */ \
81	if (__glibc_unlikely (outbuf >= outend)) \
82	/* We don't have enough room in the output buffer. */ \
83	status = __GCONV_FULL_OUTPUT; \
84	else \
85	{ \
86	/* Write out the shift sequence. */ \
87	*outbuf++ = SI; \
88	data->__statep->__count &= 7; \
89	} \
90	} \
91	}
92
93
94	/ Since we might have to reset input pointer we must be able to save*
95	and retore the state. /*
96	#define SAVE_RESET_STATE(Save) \
97	if (Save) \
98	save_curcs = *curcsp; \
99	else \
100	*curcsp = save_curcs
101
102
103	/ Current codeset type. /
104	enum
105	{
106	sb = `0`,
107	db = `64`
108	};
109
110
111	/ Subroutine to write out converted UCS4 from IBM-13XX. /
112	#ifdef HAS_COMBINED
113	# define SUB_COMBINED_UCS_FROM_IBM13XX \
114	{ \
115	if (res != UCS_LIMIT \|\| ch < __TO_UCS4_COMBINED_MIN \
116	\|\| ch > __TO_UCS4_COMBINED_MAX) \
117	{ \
118	put32 (outptr, res); \
119	outptr += 4; \
120	} \
121	else \
122	{ \
123	/* This is a combined character. Make sure we have room. */ \
124	if (__glibc_unlikely (outptr + 8 > outend)) \
125	{ \
126	result = __GCONV_FULL_OUTPUT; \
127	break; \
128	} \
129	\
130	const struct divide *cmbp \
131	= &DB_TO_UCS4_COMB[ch - __TO_UCS4_COMBINED_MIN]; \
132	assert (cmbp->res1 != 0 && cmbp->res2 != 0); \
133	\
134	put32 (outptr, cmbp->res1); \
135	outptr += 4; \
136	put32 (outptr, cmbp->res2); \
137	outptr += 4; \
138	} \
139	}
140	#else
141	# define SUB_COMBINED_UCS_FROM_IBM13XX \
142	{ \
143	put32 (outptr, res); \
144	outptr += 4; \
145	}
146	#endif /* HAS_COMBINED */
147
148
149	/ First, define the conversion function from IBM-13XX to UCS4. /
150	#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
151	#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
152	#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
153	#define LOOPFCT FROM_LOOP
154	#define BODY \
155	{ \
156	uint32_t ch = *inptr; \
157	\
158	if (__builtin_expect (ch, 0) == SO) \
159	{ \
160	/* Shift OUT, change to DBCS converter. */ \
161	if (curcs == db) \
162	{ \
163	result = __GCONV_ILLEGAL_INPUT; \
164	break; \
165	} \
166	curcs = db; \
167	++inptr; \
168	continue; \
169	} \
170	if (__builtin_expect (ch, 0) == SI) \
171	{ \
172	/* Shift IN, change to SBCS converter. */ \
173	if (curcs == sb) \
174	{ \
175	result = __GCONV_ILLEGAL_INPUT; \
176	break; \
177	} \
178	curcs = sb; \
179	++inptr; \
180	continue; \
181	} \
182	\
183	if (curcs == sb) \
184	{ \
185	/* Use the IBM13XX table for single byte. */ \
186	uint32_t res = SB_TO_UCS4[ch]; \
187	if (__builtin_expect (res, L'\1') == L'\0' && ch != '\0') \
188	{ \
189	/* This is an illegal character. */ \
190	if (! ignore_errors_p ()) \
191	{ \
192	result = __GCONV_ILLEGAL_INPUT; \
193	break; \
194	} \
195	++*irreversible; \
196	} \
197	else \
198	{ \
199	put32 (outptr, res); \
200	outptr += 4; \
201	} \
202	++inptr; \
203	} \
204	else \
205	{ \
206	assert (curcs == db); \
207	\
208	if (__glibc_unlikely (inptr + 1 >= inend)) \
209	{ \
210	/* The second character is not available. Store the \
211	intermediate result. */ \
212	result = __GCONV_INCOMPLETE_INPUT; \
213	break; \
214	} \
215	\
216	ch = (ch * 0x100) + inptr[1]; \
217	\
218	/* Use the IBM1364 table for double byte. */ \
219	const struct gap *rp2 = DB_TO_UCS4_IDX; \
220	while (ch > rp2->end) \
221	++rp2; \
222	\
223	uint32_t res; \
224	if (__builtin_expect (rp2->start == 0xffff, 0) \
225	\|\| __builtin_expect (ch < rp2->start, 0) \
226	\|\| (res = DB_TO_UCS4[ch + rp2->idx], \
227	__builtin_expect (res, L'\1') == L'\0' && ch != '\0')) \
228	{ \
229	/* This is an illegal character. */ \
230	if (! ignore_errors_p ()) \
231	{ \
232	result = __GCONV_ILLEGAL_INPUT; \
233	break; \
234	} \
235	++*irreversible; \
236	} \
237	else \
238	{ \
239	SUB_COMBINED_UCS_FROM_IBM13XX; \
240	} \
241	inptr += 2; \
242	} \
243	}
244	#define LOOP_NEED_FLAGS
245	#define EXTRA_LOOP_DECLS , int *curcsp
246	#define INIT_PARAMS int curcs = *curcsp & ~7
247	#define UPDATE_PARAMS *curcsp = curcs
248	#include <iconv/loop.c>
249
250
251	/ Subroutine to convert two UCS4 codes to IBM-13XX. /
252	#ifdef HAS_COMBINED
253	# define SUB_COMBINED_UCS_TO_IBM13XX \
254	{ \
255	const struct combine *cmbp = UCS4_COMB_TO_DB; \
256	while (cmbp->res1 < ch) \
257	++cmbp; \
258	/* XXX if last char is beginning of combining store in state */ \
259	if (cmbp->res1 == ch && inptr + 4 < inend) \
260	{ \
261	/* See if input is part of a combined character. */ \
262	uint32_t ch_next = get32 (inptr + 4); \
263	while (cmbp->res2 != ch_next) \
264	{ \
265	++cmbp; \
266	if (cmbp->res1 != ch) \
267	goto not_combined; \
268	} \
269	\
270	/* It is a combined character. First make sure we are in \
271	double byte mode. */ \
272	if (curcs == sb) \
273	{ \
274	/* We know there is room for at least one byte. */ \
275	*outptr++ = SO; \
276	curcs = db; \
277	} \
278	\
279	if (__glibc_unlikely (outptr + 2 > outend)) \
280	{ \
281	result = __GCONV_FULL_OUTPUT; \
282	break; \
283	} \
284	*outptr++ = cmbp->ch[0]; \
285	*outptr++ = cmbp->ch[1]; \
286	inptr += 8; \
287	continue; \
288	\
289	not_combined:; \
290	} \
291	}
292	#else
293	# define SUB_COMBINED_UCS_TO_IBM13XX
294	#endif /* HAS_COMBINED */
295
296
297	/ Next, define the other direction. /
298	#define MIN_NEEDED_INPUT MIN_NEEDED_TO
299	#define MIN_NEEDED_OUTPUT MIN_NEEDED_FROM
300	#define MAX_NEEDED_OUTPUT MAX_NEEDED_FROM
301	#define LOOPFCT TO_LOOP
302	#define BODY \
303	{ \
304	uint32_t ch = get32 (inptr); \
305	\
306	if (__glibc_unlikely (ch >= UCS_LIMIT)) \
307	{ \
308	UNICODE_TAG_HANDLER (ch, 4); \
309	\
310	if (! ignore_errors_p ()) \
311	{ \
312	result = __GCONV_ILLEGAL_INPUT; \
313	break; \
314	} \
315	++*irreversible; \
316	inptr += 4; \
317	continue; \
318	} \
319	\
320	SUB_COMBINED_UCS_TO_IBM13XX; \
321	\
322	const struct gap *rp1 = UCS4_TO_SB_IDX; \
323	while (ch > rp1->end) \
324	++rp1; \
325	\
326	/* Use the UCS4 table for single byte. */ \
327	const char *cp; \
328	if (__builtin_expect (ch < rp1->start, 0) \
329	\|\| (cp = UCS4_TO_SB[ch + rp1->idx], \
330	__builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0')) \
331	{ \
332	/* Use the UCS4 table for double byte. */ \
333	const struct gap *rp2 = UCS4_TO_DB_IDX; \
334	while (ch > rp2->end) \
335	++rp2; \
336	\
337	if (__builtin_expect (ch < rp2->start, 0) \
338	\|\| (cp = UCS4_TO_DB[ch + rp2->idx], \
339	__builtin_expect (cp[0], L'\1') == L'\0' && ch != '\0')) \
340	{ \
341	/* This is an illegal character. */ \
342	if (! ignore_errors_p ()) \
343	{ \
344	result = __GCONV_ILLEGAL_INPUT; \
345	break; \
346	} \
347	++*irreversible; \
348	} \
349	else \
350	{ \
351	if (curcs == sb) \
352	{ \
353	/* We know there is room for at least one byte. */ \
354	*outptr++ = SO; \
355	curcs = db; \
356	} \
357	\
358	if (__glibc_unlikely (outptr + 2 > outend)) \
359	{ \
360	result = __GCONV_FULL_OUTPUT; \
361	break; \
362	} \
363	*outptr++ = cp[0]; \
364	*outptr++ = cp[1]; \
365	} \
366	} \
367	else \
368	{ \
369	if (__glibc_unlikely (curcs == db)) \
370	{ \
371	/* We know there is room for at least one byte. */ \
372	*outptr++ = SI; \
373	curcs = sb; \
374	\
375	if (__glibc_unlikely (outptr >= outend)) \
376	{ \
377	result = __GCONV_FULL_OUTPUT; \
378	break; \
379	} \
380	} \
381	\
382	*outptr++ = cp[0]; \
383	} \
384	\
385	/* Now that we wrote the output increment the input pointer. */ \
386	inptr += 4; \
387	}
388	#define LOOP_NEED_FLAGS
389	#define EXTRA_LOOP_DECLS , int *curcsp
390	#define INIT_PARAMS int curcs = *curcsp & ~7
391	#define REINIT_PARAMS curcs = *curcsp & ~7
392	#define UPDATE_PARAMS *curcsp = curcs
393	#include <iconv/loop.c>
394
395	/ Now define the toplevel functions. /
396	#include <iconv/skeleton.c>
397

Browse the source code of glibc_src_2.23/iconvdata/ibm1364.c