1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
|
/*
* Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
*
* Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03
*/
/* UNIX(R) Regular Expresssion Library
*
* Note: Code is released under the GNU LGPL
*
* Copyright (C) 2001 Caldera International, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to:
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/* #include "synonyms.h" */
#include "colldata.h"
#include <stddef.h>
#define CCE(p) ((const CollElem *)(p))
#define CCM(p) ((const CollMult *)(p))
LIBUXRE_STATIC const CollElem *
libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc)
{
const char *tbl;
size_t hi, lo, cur;
const CollMult *cmp;
const CollElem *cep;
long diff;
int sz;
/*
* ELEM_ENCODED is returned when the collation is entirely
* based on the encoded value of the character.
*/
if (col == 0 || col->flags & CHF_ENCODED
|| (tbl = (const char *)col->maintbl) == 0)
{
return ELEM_ENCODED;
}
if ((wuchar_type)wc <= UCHAR_MAX)
{
indexed:;
cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]);
if (cep->weight[0] == WGHT_SPECIAL)
return ELEM_BADCHAR;
return cep;
}
if (col->flags & CHF_INDEXED)
{
if ((wuchar_type)wc >= col->nmain)
return ELEM_BADCHAR;
goto indexed;
}
/*
* Binary search for a match. Could speed up the search if
* some interpolation was used, but keep it simple for now.
* Note that this is actually a table of CollMult's.
*
* To save space in the file, sequences of similar elements
* are sometimes compressed into a single CollMult that
* describes many entries. This is denoted by a subnbeg
* with the SUBN_SPECIAL bit set. The rest of the bits give
* the range covered by this entry.
*/
sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem));
tbl += (1 + UCHAR_MAX) * col->elemsize;
lo = 0;
hi = col->nmain - UCHAR_MAX;
while (lo < hi)
{
if ((cur = (hi + lo) >> 1) < lo) /* hi+lo overflowed */
cur |= ~(~(size_t)0 >> 1); /* lost high order bit */
cmp = CCM(&tbl[cur * sz]);
if ((diff = wc - cmp->ch) < 0)
hi = cur;
else if (cmp->elem.subnbeg & SUBN_SPECIAL)
{
if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL))
lo = cur + 1;
else /* create an entry from the sequence in spare */
{
spare->multbeg = cmp->elem.multbeg;
spare->subnbeg = 0;
spare->weight[0] = cmp->elem.weight[0] + diff;
for (lo = 1; lo < col->nweight; lo++)
{
wuchar_type w;
if ((w = cmp->elem.weight[lo])
== WGHT_SPECIAL)
{
w = spare->weight[0];
}
spare->weight[lo] = w;
}
return spare;
}
}
else if (diff == 0)
return &cmp->elem;
else
lo = cur + 1;
}
return ELEM_BADCHAR;
}
|