1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
|
/*
* Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
*
* Sccsid @(#)colldata.h 1.5 (gritter) 5/1/04
*/
/* UNIX(R) Regular Expresssion Library
*
* Note: Code is released under the GNU LGPL
*
* Copyright (C) 2001 Caldera International, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to:
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef LIBUXRE_COLLDATA_H
#define LIBUXRE_COLLDATA_H
typedef struct
{
long coll_offst; /* offset to xnd table */
long sub_cnt; /* length of subnd table */
long sub_offst; /* offset to subnd table */
long str_offst; /* offset to strings for subnd table */
long flags; /* nonzero if reg.exp. used */
} hd;
typedef struct
{
unsigned char ch; /* character or number of followers */
unsigned char pwt; /* primary weight */
unsigned char swt; /* secondary weight */
unsigned char ns; /* index of follower state list */
} xnd;
typedef struct
{
char *exp; /* expression to be replaced */
long explen; /* length of expression */
char *repl; /* replacement string */
} subnd;
/*----------------------------------*/
#include <wcharm.h>
#include <limits.h>
/* #include <stdlock.h> */
/*
* Structure of a collation file:
* 1. CollHead (maintbl is 0 if CHF_ENCODED)
* if !CHF_ENCODED then
* 2. CollElem[bytes] (256 for 8 bit bytes)
* 3. if CHF_INDEXED then
* CollElem[wides] (nmain-256 for 8 bit bytes)
* else
* CollMult[wides]
* 4. CollMult[*] (none if multtbl is 0)
* 5. wuchar_type[*] (none if repltbl is 0)
* 6. CollSubn[*] (none if subntbl is 0)
* 7. strings (first is pathname for .so if CHF_DYNAMIC)
*
* The actual location of parts 2 through 7 is not important.
*
* The main table is in encoded value order.
*
* All indeces/offsets must be nonzero to be effective; zero is reserved
* to indicate no-such-entry. This implies either that an unused initial
* entry is placed in each of (4) through (7), or that the "start offset"
* given by the header is artificially pushed back by an entry size.
*
* Note that if CHF_ENCODED is not set, then nweight must be positive.
*
* If an element can begin a multiple character element, it contains a
* nonzero multbeg which is the initial index into (4) for its list;
* the list is terminated by a CollMult with a ch of zero.
*
* If there are elements with the same primary weight (weight[1]), then
* for each such element, it must have a CollMult list. The CollMult
* that terminates the list (ch==0) notes the lowest and highest basic
* weights for those elements with that same primary weight value
* respectively in weight[0] and weight[1]. If there are some basic
* weights between these values that do not have the same primary
* weight--are not in the equivalence class--then the terminator also
* has a SUBN_SPECIAL mark. Note that this list terminator should be
* shared when the elements are not multiple character collating
* elements because they wouldn't otherwise have a CollMult list.
*
* WGHT_IGNORE is used to denote ignored collating elements for a
* particular collation ordering pass. All main table entries other
* than for '\0' will have a non-WGHT_IGNORE weight[0]. However, it is
* possible for a CollMult entries from (4) to have a WGHT_IGNORE
* weight[0]: If, for example, "xyz" is a multiple character collating
* element, but "xy" is not, then the CollMult for "y" will have a
* WGHT_IGNORE weight[0]. Also, WGHT_IGNORE is used to terminate each
* list of replacement weights.
*
* Within (3), it is possible to describe a sequence of unremarkable
* collating elements with a single CollMult entry. If the SUBN_SPECIAL
* bit is set, the rest of subnbeg represents the number of collating
* elements covered by this entry. The weight[0] values are determined
* by adding the difference between the encoded value and the entry's ch
* value to the entry's weight[0]. This value is then substituted for
* any weight[n], n>0 that has only the WGHT_SPECIAL bit set. libuxre_collelem()
* hides any match to such an entry by filling in a "spare" CollElem.
*
* If there are substitution strings, then for each character that begins
* a string, it has a nonzero subnbeg which is similarly the initial
* index into (6). The indeces in (6) refer to offsets within (7).
*/
#define TOPBIT(t) (((t)1) << (sizeof(t) * CHAR_BIT - 1))
#define CHF_ENCODED 0x1 /* collation by encoded values only */
#define CHF_INDEXED 0x2 /* main table indexed by encoded values */
#define CHF_MULTICH 0x4 /* a multiple char. coll. elem. exists */
#define CHF_DYNAMIC 0x8 /* shared object has collation functions */
#define CWF_BACKWARD 0x1 /* reversed ordering for this weight */
#define CWF_POSITION 0x2 /* weight takes position into account */
#define CLVERS 1 /* most recent version */
#define WGHT_IGNORE 0 /* ignore this collating element */
#define WGHT_SPECIAL TOPBIT(wuchar_type)
#define SUBN_SPECIAL TOPBIT(unsigned short)
#ifndef COLL_WEIGHTS_MAX
#define COLL_WEIGHTS_MAX 1
#endif
typedef struct
{
unsigned long maintbl; /* start of main table */
unsigned long multtbl; /* start of multi-char table */
unsigned long repltbl; /* start of replacement weights */
unsigned long subntbl; /* start of substitutions */
unsigned long strstbl; /* start of sub. strings */
unsigned long nmain; /* # entries in main table */
unsigned short flags; /* CHF_* bits */
unsigned short version; /* handle future changes */
unsigned char elemsize; /* # bytes/element (w/padding) */
unsigned char nweight; /* # weights/element */
unsigned char order[COLL_WEIGHTS_MAX]; /* CWF_* bits/weight */
} CollHead;
typedef struct
{
unsigned short multbeg; /* start of multi-chars */
unsigned short subnbeg; /* start of substitutions */
wuchar_type weight[COLL_WEIGHTS_MAX];
} CollElem;
typedef struct
{
wchar_t ch; /* "this" character (of sequence) */
CollElem elem; /* its full information */
} CollMult;
typedef struct
{
unsigned short strbeg; /* start of match string */
unsigned short length; /* length of match string */
unsigned short repbeg; /* start of replacement */
} CollSubn;
struct lc_collate
{
const unsigned char *strstbl;
const wuchar_type *repltbl;
const CollElem *maintbl;
const CollMult *multtbl;
const CollSubn *subntbl;
#ifdef DSHLIB
void *handle;
void (*done)(struct lc_collate *);
int (*strc)(struct lc_collate *, const char *, const char *);
int (*wcsc)(struct lc_collate *, const wchar_t *, const wchar_t *);
size_t (*strx)(struct lc_collate *, char *, const char *, size_t);
size_t (*wcsx)(struct lc_collate *, wchar_t *, const wchar_t *, size_t);
#endif
const char *mapobj;
size_t mapsize;
unsigned long nmain;
short nuse;
unsigned short flags;
unsigned char elemsize;
unsigned char nweight;
unsigned char order[COLL_WEIGHTS_MAX];
};
#define ELEM_BADCHAR ((CollElem *)0)
#define ELEM_ENCODED ((CollElem *)-1)
/*
LIBUXRE_STATIC int libuxre_old_collate(struct lc_collate *);
LIBUXRE_STATIC int libuxre_strqcoll(struct lc_collate *, const char *,
const char *);
LIBUXRE_STATIC int libuxre_wcsqcoll(struct lc_collate *, const wchar_t *,
const wchar_t *);
*/
extern struct lc_collate *libuxre_lc_collate(struct lc_collate *);
LIBUXRE_STATIC const CollElem *libuxre_collelem(struct lc_collate *,
CollElem *, wchar_t);
LIBUXRE_STATIC const CollElem *libuxre_collmult(struct lc_collate *,
const CollElem *, wchar_t);
/*
LIBUXRE_STATIC const CollElem *libuxre_collmbs(struct lc_collate *,
CollElem *, const unsigned char **);
LIBUXRE_STATIC const CollElem *libuxre_collwcs(struct lc_collate *,
CollElem *, const wchar_t **);
*/
#endif /* !LIBUXRE_COLLDATA_H */
|