summaryrefslogtreecommitdiff
path: root/libuxre/bracket.c
diff options
context:
space:
mode:
authorThomas Ulmer <thomasmulmer02@gmail.com>2026-02-23 16:54:28 -0800
committerThomas Ulmer <thomasmulmer02@gmail.com>2026-02-23 16:54:28 -0800
commit15bd7946cc838a3151c357e4b0bc1ab85eecda62 (patch)
tree56977cb9bfc4349f46e2c608503a298df30ca957 /libuxre/bracket.c
add musl and vi
Diffstat (limited to 'libuxre/bracket.c')
-rw-r--r--libuxre/bracket.c829
1 files changed, 829 insertions, 0 deletions
diff --git a/libuxre/bracket.c b/libuxre/bracket.c
new file mode 100644
index 0000000..bc31b23
--- /dev/null
+++ b/libuxre/bracket.c
@@ -0,0 +1,829 @@
+/*
+ * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
+ *
+ * Sccsid @(#)bracket.c 1.14 (gritter) 10/18/03
+ */
+/* UNIX(R) Regular Expresssion Library
+ *
+ * Note: Code is released under the GNU LGPL
+ *
+ * Copyright (C) 2001 Caldera International, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* #include "synonyms.h" */
+#include <ctype.h>
+#include <stdlib.h>
+#include <string.h>
+#include "re.h"
+
+/*
+* Build and match the [...] part of REs.
+*
+* In general, each compiled bracket construct holds a set of mapped
+* wide character values and a set of character classifications.
+* The mapping applied (when the current LC_COLLATE is not CHF_ENCODED)
+* is the "basic" weight (cep->weight[0]); otherwise the actual wide
+* character is used.
+*
+* To support simplified range handling, this code assumes that a w_type,
+* a signed integer type, can hold all valid basic weight values (as well
+* as all wide character values for CHF_ENCODED locales) and that these
+* are all positive. Negative values indicate error conditions (BKT_*);
+* zero (which must be the same as WGHT_IGNORE) indicates success, but
+* that the item installed is not a range endpoint.
+*/
+
+static int
+addwide(Bracket *bp, wchar_t ord)
+{
+ unsigned int nw;
+
+ if ((nw = bp->nwide) < NWIDE)
+ bp->wide[nw] = ord;
+ else
+ {
+ if (nw % NWIDE == 0 && (bp->exwide =
+ realloc(bp->exwide, nw * sizeof(wchar_t))) == 0)
+ {
+ return BKT_ESPACE;
+ }
+ nw -= NWIDE;
+ bp->exwide[nw] = ord;
+ }
+ bp->nwide++;
+ return 0;
+}
+
+#if USHRT_MAX == 65535 /* have 16 bits */
+#define PLIND(n) ((n) >> 4)
+#define PLBIT(n) (1 << ((n) & 0xf))
+#else
+#define PLIND(n) ((n) / CHAR_BIT)
+#define PLBIT(n) (1 << ((n) % CHAR_BIT))
+#endif
+
+#define RANGE ((wchar_t)'-') /* separates wide chars in ranges */
+
+static int
+addrange(Bracket *bp, wchar_t ord, w_type prev)
+{
+ int ret;
+
+ if (prev > 0 && prev != ord) /* try for range */
+ {
+ if (prev > ord)
+ {
+ if (bp->flags & BKT_ODDRANGE) /* prev only - done */
+ return 0;
+ else if ((bp->flags & BKT_BADRANGE) == 0)
+ return BKT_ERANGE;
+ }
+ else
+ {
+ if (++prev <= UCHAR_MAX) /* "prev" already there */
+ {
+ do
+ {
+ bp->byte[PLIND(prev)] |= PLBIT(prev);
+ if (prev == ord)
+ return 0;
+ } while (++prev <= UCHAR_MAX);
+ }
+ if ((ret = addwide(bp, prev)) != 0)
+ return ret;
+ if (++prev > ord)
+ return 0;
+ if (prev < ord && (ret = addwide(bp, RANGE)) != 0)
+ return ret;
+ return addwide(bp, ord);
+ }
+ }
+ if (ord <= UCHAR_MAX)
+ {
+ bp->byte[PLIND(ord)] |= PLBIT(ord);
+ return 0;
+ }
+ if (prev == ord) /* don't bother */
+ return 0;
+ return addwide(bp, ord);
+}
+
+static w_type
+place(Bracket *bp, wchar_t wc, w_type prev, int mb_cur_max)
+{
+ const CollElem *cep;
+ CollElem spare;
+ int ret;
+
+ if ((cep = libuxre_collelem(bp->col, &spare, wc)) != ELEM_ENCODED)
+ {
+ if (cep == ELEM_BADCHAR)
+ return BKT_BADCHAR;
+ wc = cep->weight[0];
+ }
+ if ((ret = addrange(bp, wc, prev)) != 0)
+ return ret;
+ return wc;
+}
+
+#ifndef CHARCLASS_NAME_MAX
+# define CHARCLASS_NAME_MAX 127
+#endif
+
+static w_type
+chcls(Bracket *bp, const unsigned char *s, int n)
+{
+ char clsstr[CHARCLASS_NAME_MAX + 1];
+ unsigned int nt;
+ wctype_t wct;
+
+ if (n > CHARCLASS_NAME_MAX)
+ return BKT_ECTYPE;
+ (void)memcpy(clsstr, s, n);
+ clsstr[n] = '\0';
+ if ((wct = wctype(clsstr)) == 0)
+ return BKT_ECTYPE;
+ if ((nt = bp->ntype) < NTYPE)
+ bp->type[nt] = wct;
+ else
+ {
+ if (nt % NTYPE == 0 && (bp->extype =
+ realloc(bp->extype, nt * sizeof(wctype_t))) == 0)
+ {
+ return BKT_ESPACE;
+ }
+ nt -= NTYPE;
+ bp->extype[nt] = wct;
+ }
+ bp->ntype++;
+ return 0; /* cannot be end point of a range */
+}
+
+ /*
+ * The purpose of mcce() and its Mcce structure is to locate
+ * the next full collation element from "wc" and "s". It is
+ * called both at compile and execute time. These two differ
+ * primarily in that at compile time there is an exact number
+ * of bytes to be consumed, while at execute time the longest
+ * valid collation element is to be found.
+ *
+ * When BKT_ONECASE is set, MCCEs become particularly messy.
+ * There is no guarantee that all possible combinations of
+ * upper/lower case are defined as MCCEs. Thus, this code
+ * tries both lower- and uppercase (in that order) for each
+ * character than might be part of an MCCE.
+ */
+
+typedef struct
+{
+ const unsigned char *max; /* restriction by caller */
+ const unsigned char *aft; /* longest successful */
+ Bracket *bp; /* readonly */
+ struct lc_collate *col; /* readonly */
+ const CollElem *cep; /* entry matching longest */
+ wchar_t ch; /* initial character (if any) */
+ w_type wc; /* character matching "aft" */
+} Mcce;
+
+static int
+mcce(Mcce *mcp, const CollElem *cep, const unsigned char *s, int mb_cur_max,
+ int compile_time)
+{
+ const CollElem *nxt;
+ CollElem spare;
+ w_type ch, wc;
+ int i;
+
+ /*
+ * Get next character.
+ */
+ if ((wc = mcp->ch) != '\0')
+ {
+ mcp->ch = '\0';
+ }
+ else if (ISONEBYTE(wc = *s++))
+ {
+ if (wc == '\0')
+ return 0;
+ }
+ else if ((i = libuxre_mb2wc(&wc, s)) > 0)
+ {
+ s += i;
+ if (mcp->max != 0 && s > mcp->max)
+ return 0;
+ }
+ else if (i < 0)
+ return BKT_ILLSEQ;
+ /*
+ * Try out the this character as part of an MCCE.
+ * If BKT_ONECASE is set, this code tries both the lower- and
+ * uppercase version, continuing if it matches so far.
+ */
+ ch = wc;
+ if (mcp->bp->flags & BKT_ONECASE)
+ {
+ if ((wc = to_lower(wc)) == ch)
+ ch = to_upper(wc);
+ }
+ for (;;) /* at most twice */
+ {
+ if (cep == ELEM_BADCHAR) /* first character */
+ {
+ if ((nxt = libuxre_collelem(mcp->col, &spare, wc))
+ == ELEM_ENCODED
+ || (mcp->col->flags & CHF_MULTICH) == 0
+ || s == mcp->max)
+ {
+ mcp->aft = s;
+ mcp->cep = nxt;
+ mcp->wc = wc;
+ break;
+ }
+ }
+ else
+ {
+ nxt = libuxre_collmult(mcp->col, cep, wc);
+ }
+ if (nxt != ELEM_BADCHAR)
+ {
+ /*
+ * Okay so far. Record this collating element
+ * if it's really one (not WGHT_IGNORE) and
+ * we've reached a new high point or it's the
+ * first match.
+ *
+ * If there's a possibility for more, call mcce()
+ * recursively for the subsequent characters.
+ */
+ if (nxt->weight[0] != WGHT_IGNORE
+ && (mcp->aft < s || mcp->cep == ELEM_BADCHAR))
+ {
+ mcp->aft = s;
+ mcp->cep = nxt;
+ mcp->wc = wc;
+ }
+ if (nxt->multbeg != 0
+ && (mcp->max == 0 || s < mcp->max))
+ {
+ if ((i = mcce(mcp, nxt, s, mb_cur_max,
+ compile_time)) != 0)
+ return i;
+ }
+ }
+ if (wc == ch)
+ break;
+ wc = ch;
+ }
+ return 0;
+}
+
+static w_type
+eqcls(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max)
+{
+ w_type last;
+ Mcce mcbuf;
+ int err;
+
+ mcbuf.max = &s[n];
+ mcbuf.aft = &s[0];
+ mcbuf.bp = bp;
+ mcbuf.col = bp->col;
+ mcbuf.cep = ELEM_BADCHAR;
+ mcbuf.ch = '\0';
+ if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0)
+ return err;
+ if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max)
+ return BKT_EEQUIV;
+ last = mcbuf.wc;
+ if (mcbuf.cep != ELEM_ENCODED && mcbuf.col->nweight > 1)
+ {
+ const CollElem *cep;
+
+ /*
+ * The first and last weight[0] values for equivalence
+ * classes are stuffed into the terminator for the
+ * multiple character lists. If these values are
+ * scattered (elements that are not part of this
+ * equivalence class have weight[0] values between the
+ * two end points), then SUBN_SPECIAL is placed in
+ * this terminator. Note that weight[1] of the
+ * terminator must be other than WGHT_IGNORE, too.
+ */
+ last = mcbuf.cep->weight[0];
+ if ((cep = libuxre_collmult(bp->col, mcbuf.cep, 0))
+ != ELEM_BADCHAR
+ && cep->weight[1] != WGHT_IGNORE)
+ {
+ last = cep->weight[1];
+ if (cep->subnbeg == SUBN_SPECIAL)
+ {
+ unsigned int nq;
+
+ /*
+ * Permit ranges up to the first and
+ * after the last.
+ */
+ if (prev > 0 && prev != cep->weight[0]
+ && (prev = addrange(bp,
+ cep->weight[0], prev)) != 0)
+ {
+ return prev;
+ }
+ /*
+ * Record the equivalence class by storing
+ * the primary weight.
+ */
+ if ((nq = bp->nquiv) < NQUIV)
+ bp->quiv[nq] = mcbuf.cep->weight[1];
+ else
+ {
+ if (nq % NQUIV == 0 && (bp->exquiv =
+ realloc(bp->exquiv,
+ nq * sizeof(wuchar_type)))
+ == 0)
+ {
+ return REG_ESPACE;
+ }
+ nq -= NQUIV;
+ bp->exquiv[nq] = mcbuf.cep->weight[1];
+ }
+ bp->nquiv++;
+ return last;
+ }
+ mcbuf.cep = cep;
+ }
+ mcbuf.wc = mcbuf.cep->weight[0];
+ }
+ /*
+ * Determine range, if any, to install.
+ *
+ * If there's a pending low (prev > 0), then try to use it.
+ *
+ * Otherwise, try to use mcbuf.wc as the low end of the range.
+ * Since addrange() assumes that the low point has already been
+ * placed, we try to fool it by using a prev of one less than
+ * mcbuf.wc. But, if that value would not look like a valid
+ * low point of a range, we have to explicitly place mcbuf.wc.
+ */
+ if (prev <= 0 && (prev = mcbuf.wc - 1) <= 0)
+ {
+ if ((prev = addrange(bp, mcbuf.wc, 0)) != 0)
+ return prev;
+ }
+ if ((mcbuf.wc = addrange(bp, last, prev)) != 0)
+ return mcbuf.wc;
+ return last;
+}
+
+static w_type
+clsym(Bracket *bp, const unsigned char *s, int n, w_type prev, int mb_cur_max)
+{
+ Mcce mcbuf;
+ int err;
+
+ mcbuf.max = &s[n];
+ mcbuf.aft = &s[0];
+ mcbuf.bp = bp;
+ mcbuf.col = bp->col;
+ mcbuf.cep = ELEM_BADCHAR;
+ mcbuf.ch = '\0';
+ if ((err = mcce(&mcbuf, ELEM_BADCHAR, s, mb_cur_max, 1)) != 0)
+ return err;
+ if (mcbuf.cep == ELEM_BADCHAR || mcbuf.aft != mcbuf.max)
+ return BKT_ECOLLATE;
+ if (mcbuf.cep != ELEM_ENCODED)
+ mcbuf.wc = mcbuf.cep->weight[0];
+ if ((err = addrange(bp, mcbuf.wc, prev)) != 0)
+ return err;
+ return mcbuf.wc;
+}
+
+ /*
+ * Scans the rest of a bracket construction within a regular
+ * expression and fills in a description for it.
+ * The leading [ and the optional set complement indicator
+ * were handled already by the caller.
+ * Returns:
+ * <0 error (a BKT_* value)
+ * >0 success; equals how many bytes were scanned.
+ */
+LIBUXRE_STATIC int
+libuxre_bktmbcomp(Bracket *bp, const unsigned char *pat0,
+ int flags, int mb_cur_max)
+{
+ static const Bracket zero = {0};
+ const unsigned char *pat = pat0;
+ struct lc_collate *savecol;
+ w_type n, wc, prev = 0;
+
+ /*
+ * Set represented set to empty. Easiest to copy an empty
+ * version over the caller's, (re)setting col and flags.
+ */
+ savecol = bp->col;
+ *bp = zero;
+ bp->col = savecol;
+ bp->flags = flags
+ & (BKT_NEGATED | BKT_ONECASE | BKT_NOTNL | BKT_BADRANGE |
+ BKT_ODDRANGE);
+ /*
+ * Handle optional "empty" brackets; typically only used
+ * in combination with BKT_QUOTE or BKT_ESCAPE.
+ */
+ if ((wc = *pat) == ']' && (flags & BKT_EMPTY) != 0)
+ return 1;
+ /*
+ * Populate *bp.
+ */
+ for (;; prev = n)
+ {
+ switch (wc)
+ {
+ case '\0':
+ ebrack:;
+ n = BKT_EBRACK;
+ goto err;
+ case '\n':
+ if (flags & BKT_NLBAD)
+ goto ebrack;
+ goto regular;
+ case '/':
+ if (flags & BKT_SLASHBAD)
+ goto ebrack;
+ goto regular;
+ case '\\':
+ if ((flags & (BKT_ESCAPE | BKT_QUOTE
+ | BKT_ESCNL | BKT_ESCSEQ)) == 0)
+ {
+ goto regular;
+ }
+ switch (wc = *++pat)
+ {
+ default:
+ noesc:;
+ if ((flags & BKT_ESCAPE) == 0)
+ {
+ wc = '\\';
+ pat--;
+ }
+ break;
+ case '\\':
+ case ']':
+ case '-':
+ case '^':
+ if ((flags & BKT_QUOTE) == 0)
+ goto noesc;
+ break;
+ case 'a':
+ if ((flags & BKT_ESCSEQ) == 0 ||
+ (flags & BKT_OLDESC))
+ goto noesc;
+ wc = '\a';
+ break;
+ case 'b':
+ if ((flags & BKT_ESCSEQ) == 0)
+ goto noesc;
+ wc = '\b';
+ break;
+ case 'f':
+ if ((flags & BKT_ESCSEQ) == 0)
+ goto noesc;
+ wc = '\f';
+ break;
+ case 'n':
+ if ((flags & (BKT_ESCSEQ | BKT_ESCNL)) == 0)
+ goto noesc;
+ wc = '\n';
+ break;
+ case 'r':
+ if ((flags & BKT_ESCSEQ) == 0)
+ goto noesc;
+ wc = '\r';
+ break;
+ case 't':
+ if ((flags & BKT_ESCSEQ) == 0)
+ goto noesc;
+ wc = '\t';
+ break;
+ case 'v':
+ if ((flags & BKT_ESCSEQ) == 0 ||
+ (flags & BKT_OLDESC))
+ goto noesc;
+ wc = '\v';
+ break;
+ case 'x':
+ if ((flags & BKT_ESCSEQ) == 0 ||
+ (flags & BKT_OLDESC))
+ goto noesc;
+ if (!isxdigit(wc = *++pat))
+ {
+ pat--;
+ goto noesc;
+ }
+ /*
+ * Take as many hex digits as possible,
+ * ignoring overflows.
+ * Any positive result is okay.
+ */
+ n = 0;
+ do
+ {
+ if (isdigit(wc))
+ wc -= '0';
+ else if (isupper(wc))
+ wc -= 'A' + 10;
+ else
+ wc -= 'a' + 10;
+ n <<= 4;
+ n |= wc;
+ } while (isxdigit(wc = *++pat));
+ pat--;
+ if ((wc = n) <= 0)
+ {
+ n = BKT_BADESC;
+ goto err;
+ }
+ break;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if ((flags & BKT_ESCSEQ) == 0 ||
+ (flags & BKT_OLDESC))
+ goto noesc;
+ /*
+ * For compatibility (w/awk),
+ * permit "octal" 8 and 9.
+ */
+ n = wc - '0';
+ if ((wc = *++pat) >= '0' && wc <= '9')
+ {
+ n <<= 3;
+ n += wc - '0';
+ if ((wc = *++pat) >= '0' && wc <= '9')
+ {
+ n <<= 3;
+ n += wc - '0';
+ }
+ }
+ pat--;
+ if ((wc = n) <= 0)
+ {
+ n = BKT_BADESC;
+ goto err;
+ }
+ break;
+ }
+ goto regular;
+ case '[':
+ if (((wc = *++pat) == ':' || wc == '=' || wc == '.') &&
+ (flags & BKT_NOI18N) == 0)
+ {
+ n = 0;
+ while (*++pat != wc || pat[1] != ']')
+ {
+ if (*pat == '\0')
+ {
+ badpat:;
+ n = BKT_BADPAT;
+ goto err;
+ }
+ else if (*pat == '/')
+ {
+ if (flags & BKT_SLASHBAD)
+ goto badpat;
+ }
+ else if (*pat == '\n')
+ {
+ if (flags & BKT_NLBAD)
+ goto badpat;
+ }
+ n++;
+ }
+ if (n == 0)
+ {
+ n = BKT_EMPTYSUBBKT;
+ goto err;
+ }
+ if (wc == ':')
+ n = chcls(bp, &pat[-n], n);
+ else if (wc == '=')
+ n = eqcls(bp, &pat[-n], n, prev,
+ mb_cur_max);
+ else /* wc == '.' */
+ n = clsym(bp, &pat[-n], n, prev,
+ mb_cur_max);
+ pat++;
+ break;
+ }
+ wc = '[';
+ pat--;
+ goto regular;
+ default:
+ if (!ISONEBYTE(wc) &&
+ (n = libuxre_mb2wc(&wc, pat + 1)) > 0)
+ pat += n;
+ regular:;
+ n = place(bp, wc, prev, mb_cur_max);
+ break;
+ }
+ if (n < 0) {
+ n = BKT_ILLSEQ;
+ goto err;
+ }
+ if ((wc = *++pat) == ']')
+ break;
+ if (wc == '-' && n != 0)
+ {
+ if (prev == 0 || (flags & BKT_SEPRANGE) == 0)
+ {
+ if ((wc = *++pat) != ']')
+ continue; /* valid range */
+ wc = '-';
+ pat--;
+ }
+ }
+ n = 0; /* no range this time */
+ }
+ return pat - pat0 + 1;
+err:;
+ libuxre_bktfree(bp);
+ return n;
+}
+
+LIBUXRE_STATIC void
+libuxre_bktfree(Bracket *bp)
+{
+ if (bp->extype != 0)
+ free(bp->extype);
+ if (bp->exquiv != 0)
+ free(bp->exquiv);
+ if (bp->exwide != 0)
+ free(bp->exwide);
+}
+
+LIBUXRE_STATIC int
+libuxre_bktmbexec(Bracket *bp, wchar_t wc,
+ const unsigned char *str, int mb_cur_max)
+{
+ unsigned int i;
+ wchar_t lc, uc;
+ Mcce mcbuf;
+
+ mcbuf.aft = str; /* in case of match in character classes */
+ mcbuf.ch = wc;
+ /*
+ * First: check the single wc against any character classes.
+ * Since multiple character collating elements are not part
+ * of this world, they don't apply here.
+ */
+ if ((i = bp->ntype) != 0)
+ {
+ wctype_t *wctp = &bp->type[0];
+
+ if (bp->flags & BKT_ONECASE)
+ {
+ if ((wc = to_lower(wc)) == mcbuf.ch)
+ mcbuf.ch = to_upper(wc);
+ }
+ for (;;)
+ {
+ if (iswctype(mb_cur_max==1?btowc(wc):wc, *wctp))
+ goto match;
+ if (wc != mcbuf.ch &&
+ iswctype(mb_cur_max==1?btowc(mcbuf.ch):mcbuf.ch,
+ *wctp))
+ goto match;
+ if (--i == 0)
+ break;
+ if (++wctp == &bp->type[NTYPE])
+ wctp = &bp->extype[0];
+ }
+ }
+ /*
+ * The main match is determined by the weight[0] value
+ * of the character (or characters, if the input can be
+ * taken as a multiple character collating element).
+ */
+ mcbuf.max = 0;
+ mcbuf.bp = bp;
+ mcbuf.col = bp->col;
+ mcbuf.cep = ELEM_BADCHAR;
+ mcce(&mcbuf, ELEM_BADCHAR, str, mb_cur_max, 0);
+ if (mcbuf.cep == ELEM_BADCHAR)
+ return -1; /* never matches */
+ if (mcbuf.cep != ELEM_ENCODED)
+ mcbuf.wc = mcbuf.cep->weight[0];
+ /*
+ * POSIX.2 demands that both a character and its case counterpart
+ * can match if REG_ICASE is set. This means that [B-z] matches
+ * 'A', 'a', and '['.
+ */
+ if (bp->flags & BKT_ONECASE)
+ {
+ lc = to_lower(mcbuf.wc);
+ uc = to_upper(mcbuf.wc);
+ }
+ else
+ lc = uc = mcbuf.wc;
+ /*
+ * See if it's in the set. Note that the list of true wide
+ * character values has explicit ranges.
+ */
+ if (mcbuf.wc <= UCHAR_MAX)
+ {
+ if (bp->byte[PLIND(lc)] & PLBIT(lc))
+ goto match;
+ if (lc != uc && (bp->byte[PLIND(uc)] & PLBIT(uc)))
+ goto match;
+ }
+ else if ((i = bp->nwide) != 0)
+ {
+ wchar_t *wcp = &bp->wide[0];
+ long lcmp, ucmp;
+
+ for (;;)
+ {
+ if ((lcmp = lc - *wcp) == 0)
+ goto match;
+ ucmp = uc - *wcp;
+ if (lc != uc && ucmp == 0)
+ goto match;
+ if (--i == 0)
+ break;
+ if (++wcp == &bp->wide[NWIDE])
+ wcp = &bp->exwide[0];
+ if (*wcp == RANGE)
+ {
+ if (++wcp == &bp->wide[NWIDE])
+ wcp = &bp->exwide[0];
+ if (lcmp > 0 && lc <= *wcp)
+ goto match;
+ if (lc != uc && ucmp > 0 && uc < *wcp)
+ goto match;
+ if ((i -= 2) == 0)
+ break;
+ if (++wcp == &bp->wide[NWIDE])
+ wcp = &bp->exwide[0];
+ }
+ }
+ }
+ /*
+ * The last chance for a match is if an equivalence class
+ * was specified for which the primary weights are scattered
+ * through the weight[0]s.
+ */
+ if ((i = bp->nquiv) != 0 && mcbuf.cep != ELEM_ENCODED)
+ {
+ wuchar_type *wucp = &bp->quiv[0];
+
+ mcbuf.wc = mcbuf.cep->weight[1];
+ for (;;)
+ {
+ if (mcbuf.wc == *wucp)
+ goto match;
+ if (--i == 0)
+ break;
+ if (++wucp == &bp->quiv[NQUIV])
+ wucp = &bp->exquiv[0];
+ }
+ }
+ /*
+ * Only here when no match against the set was found.
+ * One final special case w/r/t newline.
+ */
+ if (bp->flags & BKT_NEGATED)
+ {
+ if (wc != '\n' || (bp->flags & BKT_NOTNL) == 0)
+ return mcbuf.aft - str;
+ }
+ return -1;
+match:;
+ /*
+ * Only here when a match against the described set is found.
+ */
+ if (bp->flags & BKT_NEGATED)
+ return -1;
+ return mcbuf.aft - str;
+}