1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
|
/*
* Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
*
* Sccsid @(#)re.h 1.15 (gritter) 2/6/05
*/
/* UNIX(R) Regular Expresssion Library
*
* Note: Code is released under the GNU LGPL
*
* Copyright (C) 2001 Caldera International, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to:
* Free Software Foundation, Inc.
* 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#ifndef LIBUXRE_RE_H
#define LIBUXRE_RE_H
/*
* Maps safe external tag to internal one
*/
#define re_coll_ lc_collate /* <regex.h> */
/* #define __fnm_collate lc_collate */ /* <fnmatch.h> */
#include <limits.h>
#include "regex.h"
/* #include <fnmatch.h> */
#include <colldata.h>
#define NBSHT (sizeof(unsigned short) * CHAR_BIT)
#define NBYTE (((1 << CHAR_BIT) + NBSHT - 1) / NBSHT)
#define NTYPE 4
#define NWIDE 32
#define NQUIV 4
typedef struct
{
struct lc_collate *col; /* only member set by caller */
wctype_t *extype;
wuchar_type *exquiv;
wchar_t *exwide;
wctype_t type[NTYPE];
wuchar_type quiv[NQUIV];
wchar_t wide[NWIDE];
unsigned short byte[NBYTE];
unsigned short ntype;
unsigned short nquiv;
unsigned short nwide;
unsigned int flags;
} Bracket;
#define BKT_NEGATED 0x001 /* complemented set */
#define BKT_ONECASE 0x002 /* uppercase same as lowercase */
#define BKT_NOTNL 0x004 /* do not match newline when BKT_NEGATED */
#define BKT_BADRANGE 0x008 /* accept [m-a] ranges as [ma] */
#define BKT_SEPRANGE 0x010 /* disallow [a-m-z] style ranges */
#define BKT_NLBAD 0x020 /* newline disallowed */
#define BKT_SLASHBAD 0x040 /* slash disallowed (for pathnames) */
#define BKT_EMPTY 0x080 /* take leading ] is end (empty set) */
#define BKT_ESCAPE 0x100 /* allow \ as quote for next anything */
#define BKT_QUOTE 0x200 /* allow \ as quote for \\, \^, \- or \] */
#define BKT_ESCNL 0x400 /* take \n as the newline character */
#define BKT_ESCSEQ 0x800 /* otherwise, take \ as in C escapes */
#define BKT_ODDRANGE 0x1000 /* oawk oddity: [m-a] means [m] */
#define BKT_NOI18N 0x2000 /* disable [::] [==] [..] */
#define BKT_OLDESC 0x4000 /* enable \b \f \n \r \t only */
/*
* These error returns for libuxre_bktmbcomp() are directly tied to
* the error returns for regcomp() for convenience.
*/
#define BKT_BADPAT (-REG_BADPAT)
#define BKT_ECOLLATE (-REG_ECOLLATE)
#define BKT_ECTYPE (-REG_ECTYPE)
#define BKT_EEQUIV (-REG_EEQUIV)
#define BKT_BADCHAR (-REG_EBKTCHAR)
#define BKT_EBRACK (-REG_EBRACK)
#define BKT_EMPTYSUBBKT (-REG_EMPTYSUBBKT)
#define BKT_ERANGE (-REG_ERANGE)
#define BKT_ESPACE (-REG_ESPACE)
#define BKT_BADESC (-REG_BADESC)
#define BKT_ILLSEQ (-REG_ILLSEQ)
/*
* These must be distinct from the flags in <fnmatch.h>.
*/
#define FNM_COLLATE 0x2000 /* have collation information */
#define FNM_CURRENT 0x4000 /* have full-sized fnm_t structure */
/*
* These must be distinct from the flags in <regex.h>.
*/
#define REG_NFA 0x20000000
#define REG_DFA 0x40000000
#define REG_GOTBKT 0x80000000
#define BRACE_INF USHRT_MAX
#define BRACE_MAX 5100 /* arbitrary number < SHRT_MAX */
#define BRACE_DFAMAX 255 /* max amount for r.e. duplication */
typedef union /* extra info always kept for some tokens/nodes */
{
Bracket *bkt; /* ROP_BKT */
size_t sub; /* ROP_LP (ROP_RP), ROP_REF */
unsigned short num[2]; /* ROP_BRACE: num[0]=low, num[1]=high */
} Info;
typedef struct /* lexical context while parsing */
{
Info info;
const unsigned char *pat;
unsigned char *clist;
struct lc_collate *col;
unsigned long flags;
w_type tok;
size_t maxref;
size_t nleft;
size_t nright;
size_t nclist;
int bktflags;
int err;
int mb_cur_max;
} Lex;
typedef struct t_tree Tree; /* RE parse tree node */
struct t_tree
{
union
{
Tree *ptr; /* unary & binary nodes */
size_t pos; /* position for DFA leaves */
} left;
union
{
Tree *ptr; /* binary nodes */
Info info;
} right;
Tree *parent;
w_type op; /* positive => char. to match */
};
typedef struct re_dfa_ Dfa; /* DFA engine description */
typedef struct re_nfa_ Nfa; /* NFA engine description */
typedef struct
{
const unsigned char *str;
regmatch_t *match;
size_t nmatch;
unsigned long flags;
int mb_cur_max;
} Exec;
/*
* Regular expression operators. Some only used internally.
* All are negative, to distinguish them from the regular
* "match this particular wide character" operation.
*/
#define BINARY_ROP 0x02
#define UNARY_ROP 0x01
#define LEAF_ROP 0x00
#define MAKE_ROP(k, v) (-((v) | ((k) << 4)))
#define KIND_ROP(v) ((-(v)) >> 4)
#define ROP_OR MAKE_ROP(BINARY_ROP, 1)
#define ROP_CAT MAKE_ROP(BINARY_ROP, 2)
#define ROP_STAR MAKE_ROP(UNARY_ROP, 1)
#define ROP_PLUS MAKE_ROP(UNARY_ROP, 2)
#define ROP_QUEST MAKE_ROP(UNARY_ROP, 3)
#define ROP_BRACE MAKE_ROP(UNARY_ROP, 4)
#define ROP_LP MAKE_ROP(UNARY_ROP, 5)
#define ROP_RP MAKE_ROP(UNARY_ROP, 6)
#define ROP_NOP MAKE_ROP(LEAF_ROP, 1) /* temporary */
#define ROP_BOL MAKE_ROP(LEAF_ROP, 2) /* ^ anchor */
#define ROP_EOL MAKE_ROP(LEAF_ROP, 3) /* $ anchor */
#define ROP_ALL MAKE_ROP(LEAF_ROP, 4) /* anything (added) */
#define ROP_ANYCH MAKE_ROP(LEAF_ROP, 5) /* . w/\n */
#define ROP_NOTNL MAKE_ROP(LEAF_ROP, 6) /* . w/out \n */
#define ROP_EMPTY MAKE_ROP(LEAF_ROP, 7) /* empty string */
#define ROP_NONE MAKE_ROP(LEAF_ROP, 8) /* match failure */
#define ROP_BKT MAKE_ROP(LEAF_ROP, 9) /* [...] */
#define ROP_BKTCOPY MAKE_ROP(LEAF_ROP, 10) /* [...] (duplicated) */
#define ROP_LT MAKE_ROP(LEAF_ROP, 11) /* \< word begin */
#define ROP_GT MAKE_ROP(LEAF_ROP, 12) /* \> word end */
#define ROP_REF MAKE_ROP(LEAF_ROP, 13) /* \digit */
#define ROP_END MAKE_ROP(LEAF_ROP, 14) /* final (added) */
/*
* Return values:
* libuxre_bktmbcomp()
* <0 error (see BKT_* above); >0 #bytes scanned
* libuxre_bktmbexec()
* <0 doesn't match; >=0 matches, #extra bytes scanned
*/
LIBUXRE_STATIC void libuxre_bktfree(Bracket *);
LIBUXRE_STATIC int libuxre_bktmbcomp(Bracket *, const unsigned char *,
int, int);
LIBUXRE_STATIC int libuxre_bktmbexec(Bracket *, wchar_t,
const unsigned char *, int);
LIBUXRE_STATIC void libuxre_regdeltree(Tree *, int);
LIBUXRE_STATIC Tree *libuxre_reg1tree(w_type, Tree *);
LIBUXRE_STATIC Tree *libuxre_reg2tree(w_type, Tree *, Tree *);
LIBUXRE_STATIC Tree *libuxre_regparse(Lex *, const unsigned char *, int);
extern void libuxre_regdeldfa(Dfa *);
LIBUXRE_STATIC int libuxre_regdfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int libuxre_regdfaexec(Dfa *, Exec *);
extern void libuxre_regdelnfa(Nfa *);
LIBUXRE_STATIC int libuxre_regnfacomp(regex_t *, Tree *, Lex *);
LIBUXRE_STATIC int libuxre_regnfaexec(Nfa *, Exec *);
#endif /* !LIBUXRE_RE_H */
|