strcoll_l.c source code [glibc_src_2.24/string/strcoll_l.c]

1	/ Copyright (C) 1995-2016 Free Software Foundation, Inc.*
2	This file is part of the GNU C Library.
3	Written by Ulrich Drepper <drepper@gnu.org>, 1995.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, see
17	<http://www.gnu.org/licenses/>. /*
18
19
20	#include <assert.h>
21	#include <langinfo.h>
22	#include <locale.h>
23	#include <stddef.h>
24	#include <stdint.h>
25	#include <string.h>
26	#include <sys/param.h>
27
28	#ifndef STRING_TYPE
29	# define STRING_TYPE char
30	# define USTRING_TYPE unsigned char
31	# define STRCOLL __strcoll_l
32	# define STRCMP strcmp
33	# define WEIGHT_H "../locale/weight.h"
34	# define SUFFIX MB
35	# define L(arg) arg
36	#endif
37
38	#define CONCAT(a,b) CONCAT1(a,b)
39	#define CONCAT1(a,b) a##b
40
41	#include "../locale/localeinfo.h"
42	#include WEIGHT_H
43
44	/ Track status while looking for sequences in a string. /
45	typedef struct
46	{
47	int len; / Length of the current sequence. /
48	size_t val; / Position of the sequence relative to the*
49	previous non-ignored sequence. /*
50	size_t idxmax; / Maximum index in sequences. /
51	size_t idxcnt; / Current count of indices. /
52	size_t backw; / Current Backward sequence index. /
53	size_t backw_stop; / Index where the backward sequences stop. /
54	const USTRING_TYPE us; /* The string. /
55	unsigned char rule; / Saved rule for the first sequence. /
56	int32_t idx; / Index to weight of the current sequence. /
57	int32_t save_idx; / Save looked up index of a forward*
58	sequence after the last backward
59	sequence. /*
60	const USTRING_TYPE back_us; /* Beginning of the backward sequence. /
61	} coll_seq;
62
63	/ Get next sequence. Traverse the string as required. /
64	static __always_inline void
65	get_next_seq (coll_seq seq, int* nrules, const unsigned char *rulesets,
66	const USTRING_TYPE weights, const* int32_t *table,
67	const USTRING_TYPE extra, const* int32_t *indirect,
68	int pass)
69	{
70	size_t val = seq->val = `0`;
71	int len = seq->len;
72	size_t backw_stop = seq->backw_stop;
73	size_t backw = seq->backw;
74	size_t idxcnt = seq->idxcnt;
75	size_t idxmax = seq->idxmax;
76	int32_t idx = seq->idx;
77	const USTRING_TYPE *us = seq->us;
78
79	while (len == `0`)
80	{
81	++val;
82	if (backw_stop != ~`0ul`)
83	{
84	/ There is something pushed. /
85	if (backw == backw_stop)
86	{
87	/ The last pushed character was handled. Continue*
88	with forward characters. /*
89	if (idxcnt < idxmax)
90	{
91	idx = seq->save_idx;
92	backw_stop = ~`0ul`;
93	}
94	else
95	{
96	/ Nothing anymore. The backward sequence ended with*
97	the last sequence in the string. Note that len is
98	still zero. /*
99	idx = `0`;
100	break;
101	}
102	}
103	else
104	{
105	/ XXX Traverse BACKW sequences from the beginning of*
106	BACKW_STOP to get the next sequence. Is ther a quicker way
107	to do this? /*
108	size_t i = backw_stop;
109	us = seq->back_us;
110	while (i < backw)
111	{
112	int32_t tmp = findidx (table, indirect, extra, &us, -`1`);
113	idx = tmp & `0xffffff`;
114	i++;
115	}
116	--backw;
117	us = seq->us;
118	}
119	}
120	else
121	{
122	backw_stop = idxmax;
123	int32_t prev_idx = idx;
124
125	while (*us != L(`'\0'`))
126	{
127	int32_t tmp = findidx (table, indirect, extra, &us, -`1`);
128	unsigned char rule = tmp >> `24`;
129	prev_idx = idx;
130	idx = tmp & `0xffffff`;
131	idxcnt = idxmax++;
132
133	/ Save the rule for the first sequence. /
134	if (__glibc_unlikely (idxcnt == `0`))
135	seq->rule = rule;
136
137	if ((rulesets[rule * nrules + pass]
138	& sort_backward) == `0`)
139	/ No more backward characters to push. /
140	break;
141	++idxcnt;
142	}
143
144	if (backw_stop >= idxcnt)
145	{
146	/ No sequence at all or just one. /
147	if (idxcnt == idxmax \|\| backw_stop > idxcnt)
148	/ Note that len is still zero. /
149	break;
150
151	backw_stop = ~`0ul`;
152	}
153	else
154	{
155	/ We pushed backward sequences. If the stream ended with the*
156	backward sequence, then we process the last sequence we
157	found. Otherwise we process the sequence before the last
158	one since the last one was a forward sequence. /*
159	seq->back_us = seq->us;
160	seq->us = us;
161	backw = idxcnt;
162	if (idxmax > idxcnt)
163	{
164	backw--;
165	seq->save_idx = idx;
166	idx = prev_idx;
167	}
168	if (backw > backw_stop)
169	backw--;
170	}
171	}
172
173	len = weights[idx++];
174	/ Skip over indices of previous levels. /
175	for (int i = `0`; i < pass; i++)
176	{
177	idx += len;
178	len = weights[idx];
179	idx++;
180	}
181	}
182
183	/ Update the structure. /
184	seq->val = val;
185	seq->len = len;
186	seq->backw_stop = backw_stop;
187	seq->backw = backw;
188	seq->idxcnt = idxcnt;
189	seq->idxmax = idxmax;
190	seq->us = us;
191	seq->idx = idx;
192	}
193
194	/ Compare two sequences. /
195	static __always_inline int
196	do_compare (coll_seq seq1, coll_seq seq2, int position,
197	const USTRING_TYPE *weights)
198	{
199	int seq1len = seq1->len;
200	int seq2len = seq2->len;
201	size_t val1 = seq1->val;
202	size_t val2 = seq2->val;
203	int idx1 = seq1->idx;
204	int idx2 = seq2->idx;
205	int result = `0`;
206
207	/ Test for position if necessary. /
208	if (position && val1 != val2)
209	{
210	result = val1 > val2 ? `1` : -`1`;
211	goto out;
212	}
213
214	/ Compare the two sequences. /
215	do
216	{
217	if (weights[idx1] != weights[idx2])
218	{
219	/ The sequences differ. /
220	result = weights[idx1] - weights[idx2];
221	goto out;
222	}
223
224	/ Increment the offsets. /
225	++idx1;
226	++idx2;
227
228	--seq1len;
229	--seq2len;
230	}
231	while (seq1len > `0` && seq2len > `0`);
232
233	if (position && seq1len != seq2len)
234	result = seq1len - seq2len;
235
236	out:
237	seq1->len = seq1len;
238	seq2->len = seq2len;
239	seq1->idx = idx1;
240	seq2->idx = idx2;
241	return result;
242	}
243
244	int
245	STRCOLL (const STRING_TYPE s1, const* STRING_TYPE *s2, __locale_t l)
246	{
247	struct __locale_data *current = l->__locales[LC_COLLATE];
248	uint_fast32_t nrules = current->values[_NL_ITEM_INDEX (_NL_COLLATE_NRULES)].word;
249	/ We don't assign the following values right away since it might be*
250	unnecessary in case there are no rules. /*
251	const unsigned char *rulesets;
252	const int32_t *table;
253	const USTRING_TYPE *weights;
254	const USTRING_TYPE *extra;
255	const int32_t *indirect;
256
257	if (nrules == `0`)
258	return STRCMP (s1, s2);
259
260	/ Catch empty strings. /
261	if (__glibc_unlikely (s1 == `'\0'`) \|\| __glibc_unlikely (s2 == `'\0'`))
262	return (s1 != `'\0'`) - (s2 != `'\0'`);
263
264	rulesets = (const unsigned char *)
265	current->values[_NL_ITEM_INDEX (_NL_COLLATE_RULESETS)].string;
266	table = (const int32_t *)
267	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_TABLE,SUFFIX))].string;
268	weights = (const USTRING_TYPE *)
269	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_WEIGHT,SUFFIX))].string;
270	extra = (const USTRING_TYPE *)
271	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_EXTRA,SUFFIX))].string;
272	indirect = (const int32_t *)
273	current->values[_NL_ITEM_INDEX (CONCAT(_NL_COLLATE_INDIRECT,SUFFIX))].string;
274
275	assert (((uintptr_t) table) % __alignof__ (table[`0`]) == `0`);
276	assert (((uintptr_t) weights) % __alignof__ (weights[`0`]) == `0`);
277	assert (((uintptr_t) extra) % __alignof__ (extra[`0`]) == `0`);
278	assert (((uintptr_t) indirect) % __alignof__ (indirect[`0`]) == `0`);
279
280	int result = `0`, rule = `0`;
281
282	coll_seq seq1, seq2;
283	seq1.len = `0`;
284	seq1.idxmax = `0`;
285	seq1.rule = `0`;
286	seq2.len = `0`;
287	seq2.idxmax = `0`;
288
289	for (int pass = `0`; pass < nrules; ++pass)
290	{
291	seq1.idxcnt = `0`;
292	seq1.idx = `0`;
293	seq2.idx = `0`;
294	seq1.backw_stop = ~`0ul`;
295	seq1.backw = ~`0ul`;
296	seq2.idxcnt = `0`;
297	seq2.backw_stop = ~`0ul`;
298	seq2.backw = ~`0ul`;
299
300	/ We need the elements of the strings as unsigned values since they*
301	are used as indices. /*
302	seq1.us = (const USTRING_TYPE *) s1;
303	seq2.us = (const USTRING_TYPE *) s2;
304
305	/ We assume that if a rule has defined `position' in one section*
306	this is true for all of them. Please note that the localedef programs
307	makes sure that `position' is not used at the first level. /*
308
309	int position = rulesets[rule * nrules + pass] & sort_position;
310
311	while (`1`)
312	{
313	get_next_seq (&seq1, nrules, rulesets, weights, table,
314	extra, indirect, pass);
315	get_next_seq (&seq2, nrules, rulesets, weights, table,
316	extra, indirect, pass);
317	/ See whether any or both strings are empty. /
318	if (seq1.len == `0` \|\| seq2.len == `0`)
319	{
320	if (seq1.len == seq2.len)
321	{
322	/ Both strings ended and are equal at this level. Do a*
323	byte-level comparison to ensure that we don't waste time
324	going through multiple passes for totally equal strings
325	before proceeding to subsequent passes. /*
326	if (pass == `0` && STRCMP (s1, s2) == `0`)
327	return result;
328	else
329	break;
330	}
331
332	/ This means one string is shorter than the other. Find out*
333	which one and return an appropriate value. /*
334	return seq1.len == `0` ? -`1` : `1`;
335	}
336
337	result = do_compare (&seq1, &seq2, position, weights);
338	if (result != `0`)
339	return result;
340	}
341
342	rule = seq1.rule;
343	}
344
345	return result;
346	}
347	libc_hidden_def (STRCOLL)
348
349	#ifndef WIDE_CHAR_VERSION
350	weak_alias (__strcoll_l, strcoll_l)
351	#endif
352

Browse the source code of glibc_src_2.24/string/strcoll_l.c