summaryrefslogtreecommitdiff
path: root/libuxre/regdfa.c
diff options
context:
space:
mode:
authorThomas Ulmer <thomasmulmer02@gmail.com>2026-02-23 16:54:28 -0800
committerThomas Ulmer <thomasmulmer02@gmail.com>2026-02-23 16:54:28 -0800
commit15bd7946cc838a3151c357e4b0bc1ab85eecda62 (patch)
tree56977cb9bfc4349f46e2c608503a298df30ca957 /libuxre/regdfa.c
add musl and vi
Diffstat (limited to 'libuxre/regdfa.c')
-rw-r--r--libuxre/regdfa.c877
1 files changed, 877 insertions, 0 deletions
diff --git a/libuxre/regdfa.c b/libuxre/regdfa.c
new file mode 100644
index 0000000..8142e8d
--- /dev/null
+++ b/libuxre/regdfa.c
@@ -0,0 +1,877 @@
+/*
+ * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
+ *
+ * Sccsid @(#)regdfa.c 1.9 (gritter) 9/22/03
+ */
+/* UNIX(R) Regular Expresssion Library
+ *
+ * Note: Code is released under the GNU LGPL
+ *
+ * Copyright (C) 2001 Caldera International, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* #include "synonyms.h" */
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include "regdfa.h"
+
+/*
+* Deterministic Finite Automata.
+*/
+
+ /*
+ * Postorder traversal that returns a copy of the subtree,
+ * except that ROP_BKT becomes ROP_BKTCOPY (since they
+ * share the same pointed to Bracket object).
+ */
+static Tree *
+copy(regex_t *ep, Tree *tp)
+{
+ Tree *np;
+
+ if ((np = malloc(sizeof(Tree))) == 0)
+ return 0;
+ switch (np->op = tp->op) /* almost always correct */
+ {
+ case ROP_BKT:
+ np->op = ROP_BKTCOPY;
+ /*FALLTHROUGH*/
+ case ROP_BKTCOPY:
+ np->right.info.bkt = tp->right.info.bkt;
+ /*FALLTHROUGH*/
+ default:
+ np->left.pos = ep->re_dfa->nposn++;
+ /*FALLTHROUGH*/
+ case ROP_EMPTY:
+ return np;
+ case ROP_CAT:
+ case ROP_OR:
+ if ((np->right.ptr = copy(ep, tp->right.ptr)) == 0)
+ {
+ free(np);
+ return 0;
+ }
+ np->right.ptr->parent = np;
+ /*FALLTHROUGH*/
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_QUEST:
+ case ROP_LP:
+ if ((np->left.ptr = copy(ep, tp->left.ptr)) == 0)
+ break;
+ np->left.ptr->parent = np;
+ return np;
+ }
+ libuxre_regdeltree(np, 1);
+ return 0;
+}
+
+ /*
+ * Postorder traversal.
+ * Assign unique ascending integer values to the leaves.
+ * Since the right child is traversed before the left,
+ * the position for ROP_END is guaranteed to be zero.
+ * The parse tree is rewritten in two cases:
+ * - Each ROP_BRACE is replaced by an equivalent--sometimes
+ * large--subtree using only ROP_CAT, ROP_QUEST, and
+ * ROP_PLUS.
+ * - If REG_ICASE, replace each simple character that has
+ * an uppercase equivalent with a ROP_OR subtree over the
+ * two versions.
+ * Since these rewrites occur bottom up, they have already
+ * been applied before any subtrees passed to copy().
+ */
+static Tree *
+findposn(regex_t *ep, Tree *tp, int mb_cur_max)
+{
+ unsigned int lo, hi;
+ Tree *ptr, *par;
+ w_type wc;
+
+ switch (tp->op)
+ {
+ default:
+ if (ep->re_flags & REG_ICASE
+ && (wc = to_upper(tp->op)) != tp->op)
+ {
+ if ((ptr = libuxre_reg1tree(tp->op, 0)) == 0)
+ return 0;
+ ptr->parent = tp;
+ ptr->left.pos = ep->re_dfa->nposn++;
+ tp->op = ROP_OR;
+ tp->left.ptr = ptr;
+ ptr = libuxre_reg1tree(wc, 0);
+ if ((tp->right.ptr = ptr) == 0)
+ return 0;
+ ptr->parent = tp;
+ ptr->left.pos = ep->re_dfa->nposn++;
+ return tp;
+ }
+ /*FALLTHROUGH*/
+ case ROP_BOL:
+ case ROP_EOL:
+ case ROP_ALL:
+ case ROP_ANYCH:
+ case ROP_NOTNL:
+ case ROP_NONE:
+ case ROP_BKT:
+ case ROP_BKTCOPY:
+ case ROP_END:
+ tp->left.pos = ep->re_dfa->nposn++;
+ return tp;
+ case ROP_EMPTY:
+ return tp;
+ case ROP_OR:
+ case ROP_CAT:
+ if ((tp->right.ptr = findposn(ep, tp->right.ptr,
+ mb_cur_max)) == 0)
+ return 0;
+ /*FALLTHROUGH*/
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_QUEST:
+ case ROP_LP:
+ if ((tp->left.ptr = findposn(ep, tp->left.ptr,
+ mb_cur_max)) == 0)
+ return 0;
+ return tp;
+ case ROP_BRACE:
+ if ((tp->left.ptr = findposn(ep, tp->left.ptr,
+ mb_cur_max)) == 0)
+ return 0;
+ break;
+ }
+ /*
+ * ROP_BRACE as is cannot be handled in a DFA. This code
+ * duplicates the ROP_BRACE subtree as a left-towering
+ * series of ROP_CAT nodes, the first "lo" of which are
+ * direct copies of the original subtree. The tail of
+ * the series are either some number of ROP_QUESTs over
+ * copies of the original subtree, or a single ROP_PLUS
+ * over a copy (when "hi" is infinity).
+ *
+ * All interesting cases {lo,hi}:
+ * {0,0} -> ROP_EMPTY, parsing, temporary
+ * {0,1} -> ROP_QUEST, parsing
+ * {0,2} -> CAT(QUEST(left), QUEST(copy))
+ * {0,n} -> CAT({0,n-1}, QUEST(copy))
+ * {0,} -> ROP_STAR, parsing
+ *
+ * {1,1} -> ROP_NOP, parsing, temporary
+ * {1,2} -> CAT(left, QUEST(copy))
+ * {1,n} -> CAT({1,n-1}, QUEST(copy))
+ * {1,} -> ROP_PLUS, parsing
+ *
+ * {2,2} -> CAT(left, copy)
+ * {2,n} -> CAT({2,n-1}, QUEST(copy))
+ * {2,} -> CAT(left, PLUS(copy))
+ *
+ * {3,3} -> CAT({2,2}, copy)
+ * {3,n} -> CAT({3,n-1}, QUEST(copy))
+ * {3,} -> CAT({2,2}, PLUS(copy))
+ *
+ * {n,} -> CAT({n-1,n-1}, PLUS(copy))
+ *
+ * In all cases, the ROP_BRACE node is turned into the
+ * left-most ROP_CAT, and a copy of its original subtree
+ * is connected as the right child. Note that the bottom-
+ * up nature of this duplication guarantees that copy()
+ * never sees a ROP_BRACE node.
+ */
+ par = tp->parent;
+ lo = tp->right.info.num[0];
+ hi = tp->right.info.num[1];
+ if ((ptr = copy(ep, tp->left.ptr)) == 0)
+ return 0;
+ ptr->parent = tp;
+ tp->op = ROP_CAT;
+ tp->right.ptr = ptr;
+ if (lo == 0)
+ {
+ if ((tp->left.ptr = libuxre_reg1tree(ROP_QUEST, tp->left.ptr))
+ == 0)
+ return 0;
+ tp->left.ptr->parent = tp;
+ }
+ else
+ {
+ if (hi == BRACE_INF || (hi -= lo) == 0)
+ lo--; /* lo > 1; no extra needed */
+ while (--lo != 0)
+ {
+ if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr)))
+ == 0)
+ return 0;
+ }
+ }
+ if (hi == BRACE_INF)
+ {
+ if ((tp->right.ptr = libuxre_reg1tree(ROP_PLUS, tp->right.ptr))
+ == 0)
+ return 0;
+ tp->right.ptr->parent = tp;
+ }
+ else if (hi != 0)
+ {
+ if ((tp->right.ptr = libuxre_reg1tree(ROP_QUEST, tp->right.ptr))
+ == 0)
+ return 0;
+ ptr = tp->right.ptr;
+ ptr->parent = tp;
+ while (--hi != 0)
+ {
+ if ((tp = libuxre_reg2tree(ROP_CAT, tp, copy(ep, ptr)))
+ == 0)
+ return 0;
+ }
+ }
+ tp->parent = par;
+ return tp;
+}
+
+ /*
+ * Postorder traversal, but not always entire subtree.
+ * For each leaf reachable by the empty string, add it
+ * to the set. Return 0 if the subtree can match empty.
+ */
+static int
+first(Dfa *dp, Tree *tp)
+{
+ switch (tp->op)
+ {
+ case ROP_BOL:
+ if (dp->flags & REG_NOTBOL)
+ return 0;
+ break;
+ case ROP_EOL:
+ if (dp->flags & REG_NOTEOL)
+ return 0;
+ break;
+ case ROP_EMPTY:
+ return 0;
+ case ROP_OR:
+ return first(dp, tp->left.ptr) & first(dp, tp->right.ptr);
+ case ROP_CAT:
+ if (first(dp, tp->left.ptr) != 0)
+ return 1;
+ return first(dp, tp->right.ptr);
+ case ROP_BRACE:
+ if (tp->right.info.num[0] != 0 && first(dp, tp->left.ptr) != 0)
+ return 1;
+ /*FALLTHROUGH*/
+ case ROP_STAR:
+ case ROP_QUEST:
+ first(dp, tp->left.ptr);
+ return 0;
+ case ROP_LP:
+ case ROP_PLUS:
+ return first(dp, tp->left.ptr);
+ }
+ if (dp->posset[tp->left.pos] == 0)
+ {
+ dp->posset[tp->left.pos] = 1;
+ dp->nset++;
+ }
+ return 1;
+}
+
+ /*
+ * Walk from leaf up (most likely not to root).
+ * Determine follow set for the leaf by filling
+ * set[] with the positions reachable.
+ */
+static void
+follow(Dfa *dp, Tree *tp)
+{
+ Tree *pp;
+
+ switch ((pp = tp->parent)->op)
+ {
+ case ROP_CAT:
+ if (pp->left.ptr == tp && first(dp, pp->right.ptr) != 0)
+ break;
+ /*FALLTHROUGH*/
+ case ROP_OR:
+ case ROP_QUEST:
+ case ROP_LP:
+ follow(dp, pp);
+ break;
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_BRACE:
+ first(dp, tp);
+ follow(dp, pp);
+ break;
+ }
+}
+
+ /*
+ * Postorder traversal.
+ * At each leaf, copy it into posn[] and assign its follow set.
+ * Because the left-most subtree is ROP_ALL under ROP_STAR, the
+ * follow set for its leaf (position dp->nposn-1) is the same
+ * as the initial state's signature (prior to any ROP_BOL).
+ */
+static int
+posnfoll(Dfa *dp, Tree *tp)
+{
+ unsigned char *s;
+ size_t i, n;
+ size_t *fp;
+ Posn *p;
+ int ret;
+
+ switch (tp->op)
+ {
+ case ROP_OR:
+ case ROP_CAT:
+ if ((ret = posnfoll(dp, tp->right.ptr)) != 0)
+ return ret;
+ /*FALLTHROUGH*/
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_QUEST:
+ case ROP_LP:
+ if ((ret = posnfoll(dp, tp->left.ptr)) != 0)
+ return ret;
+ return 0;
+ case ROP_END: /* keeps follow() from walking above the root */
+ p = &dp->posn[tp->left.pos];
+ p->op = tp->op;
+ p->seti = 0;
+ p->nset = 0;
+ return 0;
+ case ROP_BKT:
+ case ROP_BKTCOPY:
+ p = &dp->posn[tp->left.pos];
+ p->bkt = tp->right.info.bkt;
+ goto skip;
+ case ROP_BOL:
+ dp->flags |= REG_NOTBOL; /* adjacent ROP_BOLs match empty */
+ break;
+ case ROP_EOL:
+ dp->flags |= REG_NOTEOL; /* adjacent ROP_EOLs match empty */
+ break;
+ }
+ p = &dp->posn[tp->left.pos];
+skip:;
+ p->op = tp->op;
+ memset(dp->posset, 0, dp->nposn);
+ dp->nset = 0;
+ follow(dp, tp);
+ dp->flags &= ~(REG_NOTBOL | REG_NOTEOL);
+ fp = dp->posfoll;
+ if ((p->nset = dp->nset) > dp->avail) /* need more */
+ {
+ if ((n = p->nset << 1) < dp->nposn)
+ n = dp->nposn;
+ dp->avail += n;
+ if ((fp = realloc(dp->posfoll,
+ sizeof(size_t) * (dp->avail + dp->used))) == 0)
+ {
+ return REG_ESPACE;
+ }
+ dp->posfoll = fp;
+ }
+ p->seti = dp->used;
+ if ((i = dp->nset) != 0)
+ {
+ dp->used += i;
+ dp->avail -= i;
+ fp += p->seti;
+ s = dp->posset;
+ n = 0;
+ do
+ {
+ if (*s++ != 0)
+ {
+ *fp++ = n;
+ if (--i == 0)
+ break;
+ }
+ } while (++n != dp->nposn);
+ }
+ return 0;
+}
+
+static int
+addstate(Dfa *dp) /* install state if unique; return its index */
+{
+ size_t *sp, *fp;
+ size_t t, n, i;
+ int flushed;
+
+ /*
+ * Compare dp->nset/dp->cursig[] against remembered states.
+ */
+ t = dp->top;
+ do
+ {
+ if (dp->nsig[--t] != dp->nset)
+ continue;
+ if ((n = dp->nset) != 0)
+ {
+ fp = &dp->sigfoll[dp->sigi[t]];
+ sp = &dp->cursig[0];
+ loop:;
+ if (*fp++ != *sp++)
+ continue; /* to the do-while */
+ if (--n != 0)
+ goto loop;
+ }
+ return t + 1;
+ } while (t != 0);
+ /*
+ * Not in currently cached states; add it.
+ */
+ flushed = 0;
+ if ((t = dp->top) >= CACHESZ) /* need to flush the cache */
+ {
+ flushed = 1;
+ n = dp->anybol;
+ n = dp->sigi[n] + dp->nsig[n]; /* past invariant states */
+ dp->avail += dp->used - n;
+ dp->used = n;
+ dp->top = n = dp->nfix;
+ memset((void *)&dp->trans, 0, sizeof(dp->trans));
+ memset((void *)&dp->acc[n], 0, CACHESZ - n);
+ t = n;
+ }
+ dp->top++;
+ fp = dp->sigfoll;
+ if ((n = dp->nset) > dp->avail) /* grow strip */
+ {
+ i = dp->avail + n << 1;
+ if ((fp = realloc(fp, sizeof(size_t) * (i + dp->used))) == 0)
+ return 0;
+ dp->avail = i;
+ dp->sigfoll = fp;
+ }
+ dp->acc[t] = 0;
+ if ((dp->nsig[t] = n) != 0)
+ {
+ sp = dp->cursig;
+ if (sp[0] == 0)
+ dp->acc[t] = 1;
+ dp->sigi[t] = i = dp->used;
+ dp->used += n;
+ dp->avail -= n;
+ fp += i;
+ do
+ *fp++ = *sp++;
+ while (--n != 0);
+ }
+ t++;
+ if (flushed)
+ return -t;
+ return t;
+}
+
+void
+libuxre_regdeldfa(Dfa *dp)
+{
+ Posn *pp;
+ size_t np;
+
+ if (dp->posfoll != 0)
+ free(dp->posfoll);
+ if (dp->sigfoll != 0)
+ free(dp->sigfoll);
+ if (dp->cursig != 0)
+ free(dp->cursig);
+ if ((pp = dp->posn) != 0)
+ {
+ /*
+ * Need to walk the positions list to free any
+ * space used for ROP_BKTs.
+ */
+ np = dp->nposn;
+ do
+ {
+ if (pp->op == ROP_BKT)
+ {
+ libuxre_bktfree(pp->bkt);
+ free(pp->bkt);
+ }
+ } while (++pp, --np != 0);
+ free(dp->posn);
+ }
+ free(dp);
+}
+
+int
+regtrans(Dfa *dp, int st, w_type wc, int mb_cur_max)
+{
+ const unsigned char *s;
+ size_t *fp, *sp;
+ size_t i, n;
+ Posn *pp;
+ int nst;
+
+ if ((n = dp->nsig[st]) == 0) /* dead state */
+ return st + 1; /* stay here */
+ memset(dp->posset, 0, dp->nposn);
+ dp->nset = 0;
+ fp = &dp->sigfoll[dp->sigi[st]];
+ do
+ {
+ pp = &dp->posn[*fp];
+ switch (pp->op)
+ {
+ case ROP_EOL:
+ if (wc == '\0' && (dp->flags & REG_NOTEOL) == 0)
+ break;
+ /*FALLTHROUGH*/
+ case ROP_BOL:
+ default:
+ if (pp->op == wc)
+ break;
+ /*FALLTHROUGH*/
+ case ROP_END:
+ case ROP_NONE:
+ continue;
+ case ROP_NOTNL:
+ if (wc == '\n')
+ continue;
+ /*FALLTHROUGH*/
+ case ROP_ANYCH:
+ if (wc <= '\0')
+ continue;
+ break;
+ case ROP_ALL:
+ if (wc == '\0')
+ continue;
+ break;
+ case ROP_BKT:
+ case ROP_BKTCOPY:
+ /*
+ * Note that multiple character bracket matches
+ * are precluded from DFAs. (See regparse.c and
+ * regcomp.c.) Thus, the continuation string
+ * argument is not used in libuxre_bktmbexec().
+ */
+ if (wc > '\0' &&
+ libuxre_bktmbexec(pp->bkt, wc, 0, mb_cur_max) == 0)
+ break;
+ continue;
+ }
+ /*
+ * Current character matches this position.
+ * For each position in its follow list,
+ * add that position to the new state's signature.
+ */
+ i = pp->nset;
+ sp = &dp->posfoll[pp->seti];
+ do
+ {
+ if (dp->posset[*sp] == 0)
+ {
+ dp->posset[*sp] = 1;
+ dp->nset++;
+ }
+ } while (++sp, --i != 0);
+ } while (++fp, --n != 0);
+ /*
+ * Move the signature (if any) into cursig[] and install it.
+ */
+ if ((i = dp->nset) != 0)
+ {
+ fp = dp->cursig;
+ s = dp->posset;
+ for (n = 0;; n++)
+ {
+ if (*s++ != 0)
+ {
+ *fp++ = n;
+ if (--i == 0)
+ break;
+ }
+ }
+ }
+ if ((nst = addstate(dp)) < 0) /* flushed cache */
+ nst = -nst;
+ else if (nst > 0 && (wc & ~(long)(NCHAR - 1)) == 0)
+ dp->trans[st][wc] = nst;
+ return nst;
+}
+
+LIBUXRE_STATIC int
+libuxre_regdfacomp(regex_t *ep, Tree *tp, Lex *lxp)
+{
+ Tree *lp;
+ Dfa *dp;
+ Posn *p;
+ int st;
+
+ /*
+ * It's convenient to insert an STAR(ALL) subtree to the
+ * immediate left of the current tree. This makes the
+ * "any match" libuxre_regdfaexec() not a special case,
+ * and the initial state signature will fall out when
+ * building the follow sets for all the leaves.
+ */
+ if ((lp = libuxre_reg1tree(ROP_ALL, 0)) == 0
+ || (lp = libuxre_reg1tree(ROP_STAR, lp)) == 0
+ || (tp->left.ptr = lp
+ = libuxre_reg2tree(ROP_CAT, lp, tp->left.ptr)) == 0)
+ {
+ return REG_ESPACE;
+ }
+ lp->parent = tp;
+ if ((dp = calloc(1, sizeof(Dfa))) == 0)
+ return REG_ESPACE;
+ ep->re_dfa = dp;
+ /*
+ * Just in case null pointers aren't just all bits zero...
+ */
+ dp->posfoll = 0;
+ dp->sigfoll = 0;
+ dp->cursig = 0;
+ dp->posn = 0;
+ /*
+ * Assign position values to each of the tree's leaves
+ * (the important parts), meanwhile potentially rewriting
+ * the parse tree so that it fits within the restrictions
+ * of our DFA.
+ */
+ if ((tp = findposn(ep, tp, lxp->mb_cur_max)) == 0)
+ goto err;
+ /*
+ * Get space for the array of positions and current set,
+ * now that the number of positions is known.
+ */
+ if ((dp->posn = malloc(sizeof(Posn) * dp->nposn + dp->nposn)) == 0)
+ goto err;
+ dp->posset = (unsigned char *)&dp->posn[dp->nposn];
+ /*
+ * Get follow sets for each position.
+ */
+ if (posnfoll(dp, tp) != 0)
+ goto err;
+ /*
+ * Set up the special invariant states:
+ * - dead state (no valid transitions); index 0.
+ * - initial state for any match [STAR(ALL) follow set]; index 1.
+ * - initial state for any match after ROP_BOL.
+ * - initial state for left-most longest if REG_NOTBOL.
+ * - initial state for left-most longest after ROP_BOL.
+ * The final two are not allocated if leftmost() cannot be called.
+ * The pairs of initial states are the same if there is no
+ * explicit ROP_BOL transition.
+ */
+ dp->avail += dp->used;
+ dp->used = 0;
+ if ((dp->sigfoll = malloc(sizeof(size_t) * dp->avail)) == 0)
+ goto err;
+ p = &dp->posn[dp->nposn - 1]; /* same as first(root) */
+ dp->cursig = &dp->posfoll[p->seti];
+ dp->nset = p->nset;
+ dp->top = 1; /* index 0 is dead state */
+ addstate(dp); /* must be state index 1 (returns 2) */
+ if ((dp->cursig = malloc(sizeof(size_t) * dp->nposn)) == 0)
+ goto err;
+ dp->nfix = 2;
+ if ((st = regtrans(dp, 1, ROP_BOL, lxp->mb_cur_max)) == 0)
+ goto err;
+ if ((dp->anybol = st - 1) == 2) /* new state */
+ dp->nfix = 3;
+ if ((ep->re_flags & REG_NOSUB) == 0) /* leftmost() might be called */
+ {
+ /*
+ * leftmost() initial states are the same as the
+ * "any match" ones without the STAR(ALL) position.
+ */
+ dp->sigi[dp->nfix] = 0;
+ dp->nsig[dp->nfix] = dp->nsig[1] - 1;
+ dp->acc[dp->nfix] = dp->acc[1];
+ dp->leftbol = dp->leftmost = dp->nfix;
+ dp->nfix++;
+ if (dp->anybol != 1) /* distinct state w/BOL */
+ {
+ dp->sigi[dp->nfix] = dp->sigi[2];
+ dp->nsig[dp->nfix] = dp->nsig[2] - 1;
+ dp->acc[dp->nfix] = dp->acc[2];
+ dp->leftbol = dp->nfix;
+ dp->nfix++;
+ }
+ dp->top = dp->nfix;
+ }
+ return 0;
+err:;
+ libuxre_regdeldfa(dp);
+ return REG_ESPACE;
+}
+
+static int
+leftmost(Dfa *dp, Exec *xp)
+{
+ const unsigned char *s, *beg, *end;
+ int i, nst, st, mb_cur_max;
+ w_type wc;
+
+ mb_cur_max = xp->mb_cur_max;
+ beg = s = xp->str;
+ end = 0;
+ st = dp->leftbol;
+ if (xp->flags & REG_NOTBOL)
+ st = dp->leftmost;
+ if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
+ end = s; /* initial empty match allowed */
+ for (;;)
+ {
+ if ((wc = *s++) == '\n')
+ {
+ if (xp->flags & REG_NEWLINE)
+ wc = ROP_EOL;
+ }
+ else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0)
+ s += i;
+ if ((wc & ~(long)(NCHAR - 1)) != 0
+ || (nst = dp->trans[st][wc]) == 0)
+ {
+ if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0)
+ return REG_ESPACE;
+ if (wc == ROP_EOL) /* REG_NEWLINE only */
+ {
+ if (dp->acc[nst - 1])
+ {
+ if (end == 0 || end < s)
+ end = s;
+ break;
+ }
+ beg = s;
+ st = dp->leftbol;
+ goto newst;
+ }
+ }
+ if ((st = nst - 1) == 0) /* dead state */
+ {
+ if (end != 0)
+ break;
+ if ((wc = *beg++) == '\0')
+ return REG_NOMATCH;
+ else if (!ISONEBYTE(wc) &&
+ (i = libuxre_mb2wc(&wc, beg)) > 0)
+ beg += i;
+ s = beg;
+ st = dp->leftmost;
+ goto newst;
+ }
+ if (wc == '\0')
+ {
+ if (dp->acc[st])
+ {
+ s--; /* don't include \0 */
+ if (end == 0 || end < s)
+ end = s;
+ break;
+ }
+ if (end != 0)
+ break;
+ return REG_NOMATCH;
+ }
+ newst:;
+ if (dp->acc[st])
+ {
+ if (end == 0 || end < s)
+ end = s;
+ }
+ }
+ xp->match[0].rm_so = beg - xp->str;
+ xp->match[0].rm_eo = end - xp->str;
+ return 0;
+}
+
+/*
+* Optimization by simplification: singlebyte locale and REG_NEWLINE not set.
+* Performance gain for grep is 25% so it's worth the hack.
+*/
+static int
+regdfaexec_opt(Dfa *dp, Exec *xp)
+{
+ const unsigned char *s;
+ int nst, st;
+
+ s = xp->str;
+ st = dp->anybol;
+ if (xp->flags & REG_NOTBOL)
+ st = 1;
+ if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
+ return 0; /* initial empty match allowed */
+ do
+ {
+ if ((nst = dp->trans[st][*s]) == 0)
+ {
+ if ((nst = regtrans(dp, st, *s, 1)) == 0)
+ return REG_ESPACE;
+ }
+ if (dp->acc[st = nst - 1])
+ return 0;
+ } while (*s++ != '\0'); /* st != 0 */
+ return REG_NOMATCH;
+}
+
+LIBUXRE_STATIC int
+libuxre_regdfaexec(Dfa *dp, Exec *xp)
+{
+ const unsigned char *s;
+ int i, nst, st, mb_cur_max;
+ w_type wc;
+
+ dp->flags = xp->flags & REG_NOTEOL; /* for regtrans() */
+ mb_cur_max = xp->mb_cur_max;
+ if (xp->nmatch != 0)
+ return leftmost(dp, xp);
+ if (mb_cur_max == 1 && (xp->flags & REG_NEWLINE) == 0)
+ return regdfaexec_opt(dp, xp);
+ s = xp->str;
+ st = dp->anybol;
+ if (xp->flags & REG_NOTBOL)
+ st = 1;
+ if (dp->acc[st] && (xp->flags & REG_NONEMPTY) == 0)
+ return 0; /* initial empty match allowed */
+ for (;;)
+ {
+ if ((wc = *s++) == '\n')
+ {
+ if (xp->flags & REG_NEWLINE)
+ wc = ROP_EOL;
+ }
+ else if (!ISONEBYTE(wc) && (i = libuxre_mb2wc(&wc, s)) > 0)
+ s += i;
+ if ((wc & ~(long)(NCHAR - 1)) != 0
+ || (nst = dp->trans[st][wc]) == 0)
+ {
+ if ((nst=regtrans(dp, st, wc, mb_cur_max)) == 0)
+ return REG_ESPACE;
+ if (wc == ROP_EOL) /* REG_NEWLINE only */
+ {
+ if (dp->acc[nst - 1])
+ return 0;
+ if (dp->acc[st = dp->anybol])
+ return 0;
+ continue;
+ }
+ }
+ if (dp->acc[st = nst - 1])
+ return 0;
+ if (wc == '\0') /* st == 0 */
+ return REG_NOMATCH;
+ }
+}