summaryrefslogtreecommitdiff
path: root/libuxre/regnfa.c
diff options
context:
space:
mode:
authorThomas Ulmer <thomasmulmer02@gmail.com>2026-02-23 16:54:28 -0800
committerThomas Ulmer <thomasmulmer02@gmail.com>2026-02-23 16:54:28 -0800
commit15bd7946cc838a3151c357e4b0bc1ab85eecda62 (patch)
tree56977cb9bfc4349f46e2c608503a298df30ca957 /libuxre/regnfa.c
add musl and vi
Diffstat (limited to 'libuxre/regnfa.c')
-rw-r--r--libuxre/regnfa.c1070
1 files changed, 1070 insertions, 0 deletions
diff --git a/libuxre/regnfa.c b/libuxre/regnfa.c
new file mode 100644
index 0000000..6953f1f
--- /dev/null
+++ b/libuxre/regnfa.c
@@ -0,0 +1,1070 @@
+/*
+ * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002.
+ *
+ * Sccsid @(#)regnfa.c 1.8 (gritter) 2/6/05
+ */
+/* UNIX(R) Regular Expresssion Library
+ *
+ * Note: Code is released under the GNU LGPL
+ *
+ * Copyright (C) 2001 Caldera International, Inc.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to:
+ * Free Software Foundation, Inc.
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* #include "synonyms.h" */
+#include <string.h>
+#include <stdlib.h>
+#include "re.h"
+#include <stddef.h>
+#include <ctype.h>
+
+typedef unsigned char Uchar;
+typedef unsigned short Ushort;
+
+/*
+* Nondeterministic Finite Automata.
+*/
+typedef struct t_graph Graph;
+struct t_graph
+{
+ union
+ {
+ Graph *ptr;
+ Info info;
+ } alt;
+ Graph *next;
+ w_type op;
+};
+
+typedef struct t_stack Stack;
+struct t_stack
+{
+ Stack *link; /* simplifies cleanup */
+ Stack *prev; /* covered states */
+ Graph *wasgp; /* node associated with this state */
+ const Uchar *str; /* saved position in the string */
+ Ushort cnt; /* ROP_BRACE: traversal count */
+};
+
+ /*
+ * A Context holds all the information needed for each
+ * potential path through the NFA graph.
+ */
+typedef struct t_ctxt Context;
+struct t_ctxt
+{
+ Context *link; /* simplifies cleanup */
+ Context *next; /* singly linked */
+ Stack *sp; /* nested counts */
+ Graph *gp; /* starting node */
+ Graph *wasgp; /* node associated with this state */
+ const Uchar *str; /* saved position in the string */
+ Ushort cnt; /* ROP_BRACE: traversal count */
+ size_t nset; /* length of rm[] that is currently set */
+ regmatch_t rm[1]; /* enough to cover re_nsub+1 (np->rmlen) */
+};
+
+struct re_nfa_ /*Nfa*/
+{
+ Graph *gp; /* entire NFA */
+ Stack *sp; /* unused Stacks */
+ Stack *allsp; /* linked Stacks (for cleanup) */
+ Context *allcp; /* linked Contexts (for cleanup) */
+ Context *cur; /* Contexts to be continued now */
+ Context *step; /* Contexts waiting for a step of the NFA */
+ Context *avail; /* unused Contexts */
+ Context **ecur; /* ends cur list of Contexts */
+ Context **estp; /* ends step list of Contexts */
+ size_t rmlen; /* length of rm[] in each Context */
+ size_t rmmin; /* minimum length needed */
+ size_t used; /* length used for this libuxre_regnfaexec() */
+ w_type beg; /* nonzero for fixed char initial node NFAs */
+};
+
+#define ROP_MTOR ROP_CAT /* ROP_OR, except might be empty loop */
+
+ /*
+ * Depth first traversal.
+ * Make a singly linked list (in alt.ptr) of the graph's nodes.
+ * Must toss any ROP_BKTs, too, since "alt" is overwritten.
+ */
+static void
+deltolist(Graph *gp, Graph **list)
+{
+ Graph *ptr;
+
+ if ((ptr = gp->next) != 0) /* first time */
+ {
+ gp->next = 0;
+ if (gp->op == ROP_OR || gp->op == ROP_MTOR)
+ deltolist(gp->alt.ptr, list);
+ deltolist(ptr, list);
+ if (gp->op == ROP_BKT)
+ {
+ libuxre_bktfree(gp->alt.info.bkt);
+ free(gp->alt.info.bkt);
+ }
+ }
+ else if (gp->op == ROP_END)
+ gp->op = ROP_NOP;
+ else
+ return;
+ gp->alt.ptr = *list;
+ *list = gp;
+}
+
+ /*
+ * After the list is turned into a linked list,
+ * walk that list freeing the nodes.
+ */
+static void
+delgraph(Graph *gp)
+{
+ Graph *gp2, end;
+
+ gp2 = &end;
+ deltolist(gp, &gp2);
+ while ((gp = gp2) != &end)
+ {
+ gp2 = gp->alt.ptr;
+ free(gp);
+ }
+}
+
+ /*
+ * Depth first traversal.
+ * Look for ROP_NOPs and prune them from the graph.
+ * Chain them all together on *nop's list.
+ */
+static Graph *
+nopskip(Graph *gp, Graph **nop)
+{
+ Graph *ptr;
+
+ if ((ptr = gp->next) != 0) /* might have yet to do this subgraph */
+ {
+ if (gp->op == ROP_NOP)
+ {
+ if (gp->alt.ptr != 0) /* touched */
+ return gp->next; /* already did it */
+ gp->alt.ptr = *nop;
+ *nop = gp;
+ }
+ gp->next = 0; /* this subgraph's pending */
+ if (gp->op == ROP_OR || gp->op == ROP_MTOR)
+ gp->alt.ptr = nopskip(gp->alt.ptr, nop);
+ gp->next = nopskip(ptr, nop);
+ if (gp->op == ROP_NOP)
+ return gp->next;
+ }
+ return gp;
+}
+
+ /*
+ * Postorder traversal of the parse tree.
+ * Build a graph using "Thompson's" algorithm.
+ * The only significant modification is the
+ * ROP_BRACE->ROP_MTOR construction.
+ * Returns 1 => graph might match empty
+ * 0 => graph cannot match empty
+ * -1 => error (in allocation)
+ */
+static int
+mkgraph(Tree *tp, Graph **first, Graph **last)
+{
+ Graph *new = 0, *nop, *lf, *ll, *rf, *rl;
+ int lmt, rmt = 0;
+
+ if (tp->op != ROP_CAT)
+ {
+ if ((new = malloc(sizeof(Graph))) == 0)
+ return 0;
+ new->op = tp->op; /* usually */
+ }
+ switch (tp->op)
+ {
+ case ROP_REF:
+ new->alt.info.sub = tp->right.info.sub;
+ *first = new;
+ *last = new;
+ return 1; /* safe--can't really tell */
+ case ROP_BKT:
+ tp->op = ROP_BKTCOPY; /* now graph owns clean up */
+ /*FALLTHROUGH*/
+ case ROP_BKTCOPY:
+ new->alt.info.bkt = tp->right.info.bkt;
+ /*FALLTHROUGH*/
+ default:
+ *first = new;
+ *last = new;
+ return 0;
+ case ROP_EMPTY:
+ new->op = ROP_NOP;
+ new->alt.ptr = 0; /* untouched */
+ *first = new;
+ *last = new;
+ return 1;
+ case ROP_OR:
+ case ROP_CAT:
+ lf = 0; /* in case of error */
+ if ((rmt = mkgraph(tp->right.ptr, &rf, &rl)) < 0)
+ goto err;
+ /*FALLTHROUGH*/
+ case ROP_STAR:
+ case ROP_PLUS:
+ case ROP_QUEST:
+ case ROP_BRACE:
+ case ROP_LP:
+ if ((lmt = mkgraph(tp->left.ptr, &lf, &ll)) < 0)
+ goto err;
+ break;
+ }
+ /*
+ * Note that ROP_NOP only serves as the node that reconnects
+ * the two choices of an incoming ROP_OR or ROP_QUEST. To
+ * prevent rewalking portions of the graph in nopskip(),
+ * this code marks all ROP_NOP nodes as currently untouched.
+ */
+ switch (tp->op)
+ {
+ case ROP_OR:
+ if ((nop = malloc(sizeof(Graph))) == 0)
+ goto err;
+ nop->op = ROP_NOP;
+ nop->alt.ptr = 0; /* untouched */
+ ll->next = nop;
+ rl->next = nop;
+ new->next = lf;
+ new->alt.ptr = rf;
+ *first = new;
+ *last = nop;
+ return lmt | rmt;
+ case ROP_CAT: /* no "new" */
+ ll->next = rf;
+ *first = lf;
+ *last = rl;
+ return lmt & rmt;
+ case ROP_QUEST:
+ if ((nop = malloc(sizeof(Graph))) == 0)
+ goto err;
+ nop->op = ROP_NOP;
+ nop->alt.ptr = 0; /* untouched */
+ new->op = ROP_OR;
+ new->next = lf;
+ new->alt.ptr = nop;
+ ll->next = nop;
+ *first = new;
+ *last = nop;
+ return 1;
+ case ROP_STAR:
+ *first = new;
+ rmt = 1;
+ star:;
+ new->op = lmt ? ROP_MTOR : ROP_OR;
+ new->alt.ptr = lf;
+ ll->next = new;
+ *last = new;
+ return rmt;
+ case ROP_PLUS:
+ *first = lf;
+ rmt = lmt;
+ goto star;
+ case ROP_BRACE:
+ if ((nop = malloc(sizeof(Graph))) == 0)
+ goto err;
+ nop->op = ROP_MTOR; /* going to save state anyway... */
+ nop->alt.ptr = lf;
+ ll->next = new;
+ new->next = nop;
+ new->alt.info.num[1] = tp->right.info.num[1];
+ if ((new->alt.info.num[0] = tp->right.info.num[0]) == 0)
+ {
+ lmt = 1;
+ *first = new;
+ }
+ else
+ {
+ new->alt.info.num[0]--; /* already done 1 */
+ if (new->alt.info.num[1] != BRACE_INF)
+ new->alt.info.num[1]--; /* likewise */
+ *first = lf;
+ }
+ *last = nop;
+ return lmt;
+ case ROP_LP:
+ if ((nop = malloc(sizeof(Graph))) == 0)
+ goto err;
+ nop->op = ROP_RP;
+ nop->alt.info.sub = tp->right.info.sub;
+ new->alt.info.sub = tp->right.info.sub;
+ new->next = lf;
+ ll->next = nop;
+ *first = new;
+ *last = nop;
+ return lmt;
+ }
+err:;
+ if (KIND_ROP(tp->op) == BINARY_ROP && rf != 0)
+ delgraph(rf);
+ if (lf != 0)
+ delgraph(lf);
+ if (tp->op != ROP_CAT)
+ free(new);
+ return -1;
+}
+
+ /*
+ * Semi-preorder traversal.
+ * Return zero if there's no simple first character
+ * (including the operation ROP_BOL) that must always
+ * be at the start of a matching string.
+ * This code doesn't attempt to get an answer if the
+ * first of the tree many be empty.
+ */
+static w_type
+firstop(Tree *tp)
+{
+ w_type op;
+
+ switch (tp->op)
+ {
+ case ROP_OR:
+ if ((op = firstop(tp->left.ptr)) == 0
+ || op != firstop(tp->right.ptr))
+ {
+ return 0;
+ }
+ return op;
+ case ROP_BRACE:
+ if (tp->right.info.num[0] == 0)
+ return 0;
+ /*FALLTHROUGH*/
+ case ROP_CAT:
+ case ROP_PLUS:
+ case ROP_LP:
+ return firstop(tp->left.ptr);
+ default:
+ if (tp->op < 0)
+ return 0;
+ /*FALLTHROUGH*/
+ case ROP_BOL:
+ return tp->op;
+ }
+}
+
+void
+libuxre_regdelnfa(Nfa *np)
+{
+ Context *cp, *cpn;
+ Stack *sp, *spn;
+
+ if (np->gp != 0)
+ delgraph(np->gp);
+ for (cp = np->allcp; cp != 0; cp = cpn)
+ {
+ cpn = cp->link;
+ free(cp);
+ }
+ for (sp = np->allsp; sp != 0; sp = spn)
+ {
+ spn = sp->link;
+ free(sp);
+ }
+ free(np);
+}
+
+LIBUXRE_STATIC int
+libuxre_regnfacomp(regex_t *ep, Tree *tp, Lex *lxp)
+{
+ Graph *gp, end;
+ Nfa *np;
+
+ if ((np = malloc(sizeof(Nfa))) == 0)
+ goto err;
+ np->gp = 0; /* in case of error */
+ if (mkgraph(tp, &np->gp, &gp) < 0)
+ goto err;
+ gp->next = 0; /* nothing follows ROP_END */
+ np->rmlen = 0;
+ if ((ep->re_flags & REG_NOSUB) == 0)
+ np->rmlen = ep->re_nsub + 1;
+ np->rmmin = 0;
+ if (lxp->maxref != 0 && (np->rmmin = lxp->maxref + 1) > np->rmlen)
+ np->rmlen = np->rmmin;
+ /*
+ * Delete all ROP_NOPs from the graph.
+ * nopskip() disconnects them from the graph and
+ * links them together through their alt.ptr's.
+ */
+ gp = &end;
+ np->gp = nopskip(np->gp, &gp);
+ while (gp != &end)
+ {
+ Graph *gp2 = gp;
+
+ gp = gp->alt.ptr;
+ free(gp2);
+ }
+ np->sp = 0;
+ np->allsp = 0;
+ np->avail = 0;
+ np->allcp = 0;
+ ep->re_nfa = np;
+ np->beg = firstop(tp);
+ return 0;
+err:;
+ if (np != 0)
+ {
+ if (np->gp != 0)
+ delgraph(np->gp);
+ free(np);
+ }
+ return REG_ESPACE;
+}
+
+static Stack *
+newstck(Nfa *np)
+{
+ Stack *sp, **spp;
+ int i;
+
+ if ((sp = np->sp) == 0) /* get more */
+ {
+ spp = &np->sp;
+ i = 4;
+ while ((sp = malloc(sizeof(Stack))) != 0)
+ {
+ sp->link = np->allsp;
+ np->allsp = sp;
+ *spp = sp;
+ spp = &sp->prev;
+ if (--i == 0)
+ break;
+ }
+ *spp = 0;
+ if ((sp = np->sp) == 0) /* first malloc failed */
+ return 0;
+ }
+ np->sp = sp->prev;
+ return sp;
+}
+
+static int
+mkstck(Nfa *np, Context *cp, Graph *gp)
+{
+ Stack *new, *sp;
+
+ if (gp == 0) /* copy existing stack tail */
+ {
+ /*
+ * Hoist up top of stack.
+ */
+ new = cp->sp;
+ cp->wasgp = new->wasgp;
+ cp->str = new->str;
+ cp->cnt = new->cnt;
+ cp->sp = new->prev;
+ if ((sp = new->prev) == 0) /* only one below */
+ {
+ new->prev = np->sp;
+ np->sp = new;
+ cp->sp = 0;
+ return 0;
+ }
+ for (;;) /* copy the rest; reusing the old top */
+ {
+ new->wasgp = sp->wasgp;
+ new->str = sp->str;
+ new->cnt = sp->cnt;
+ if ((new->prev = sp->prev) == 0)
+ break;
+ if ((new->prev = newstck(np)) == 0)
+ return REG_ESPACE;
+ new = new->prev;
+ sp = sp->prev;
+ }
+ return 0;
+ }
+ if (cp->wasgp != 0) /* push current down */
+ {
+ if ((new = newstck(np)) == 0)
+ return REG_ESPACE;
+ new->prev = cp->sp;
+ cp->sp = new;
+ new->wasgp = cp->wasgp;
+ new->str = cp->str;
+ new->cnt = cp->cnt;
+ }
+ cp->wasgp = gp;
+ cp->str = 0;
+ cp->cnt = 0;
+ return 0;
+}
+
+ /*
+ * Allocate a new Context (from np->avail)
+ * and add it to the end of the current list.
+ */
+static int
+newctxt(Nfa *np, Context *cp, Graph *gp)
+{
+ Context *new;
+ size_t n;
+
+ if ((new = np->avail) == 0) /* need more */
+ {
+ Context *ncp, **cpp;
+ int i;
+
+ /*
+ * Can't easily allocate Contexts in one call because
+ * the alignments (given the varying length of rm[])
+ * are potentially nontrivial.
+ */
+ n = offsetof(Context, rm) + np->rmlen * sizeof(regmatch_t);
+ i = 4;
+ cpp = &np->avail;
+ while ((ncp = malloc(n)) != 0)
+ {
+ ncp->link = np->allcp;
+ np->allcp = ncp;
+ *cpp = ncp;
+ cpp = &ncp->next;
+ if (--i == 0)
+ break;
+ }
+ *cpp = 0;
+ if ((new = np->avail) == 0) /* first malloc failed */
+ return REG_ESPACE;
+ }
+ np->avail = new->next;
+ new->next = 0;
+ new->gp = gp;
+ new->sp = 0;
+ new->wasgp = 0;
+ new->nset = 0;
+ if (cp != 0) /* copy existing context information */
+ {
+ if (cp->sp != 0) /* copy tail of stack */
+ {
+ new->sp = cp->sp;
+ if (mkstck(np, new, 0) != 0)
+ return REG_ESPACE;
+ }
+ new->wasgp = cp->wasgp;
+ new->str = cp->str;
+ new->cnt = cp->cnt;
+ /*
+ * Copy any valid subexpression match information
+ * from the existing context.
+ */
+ if (np->used != 0 && (n = cp->nset) != 0)
+ {
+ regmatch_t *rmn = new->rm, *rmo = cp->rm;
+
+ new->nset = n;
+ for (;; ++rmn, ++rmo)
+ {
+ rmn->rm_so = rmo->rm_so;
+ rmn->rm_eo = rmo->rm_eo;
+ if (--n == 0)
+ break;
+ }
+ }
+ }
+ /*
+ * Append it to the end of the current Context list.
+ */
+ *np->ecur = new;
+ np->ecur = &new->next;
+ return 0;
+}
+
+ /*
+ * Compare two byte string sequences for equality.
+ * If REG_ICASE, walk through the strings doing
+ * caseless comparisons of the wide characters.
+ */
+static int
+casecmp(const Uchar *s, Exec *xp, ssize_t i, ssize_t n, int mb_cur_max)
+{
+ const Uchar *p = &xp->str[i];
+ const Uchar *end;
+ w_type wc1, wc2;
+ int k;
+
+ if (strncmp((char *)s, (char *)p, n) == 0) /* try for exact match */
+ return 1;
+ if ((xp->flags & REG_ICASE) == 0)
+ return 0;
+ /*
+ * Walk through each testing for a match, ignoring case,
+ * of the resulting wide characters.
+ * Note that only "s" can run out of characters.
+ */
+ end = &p[n];
+ do
+ {
+ if ((wc1 = *s++) == '\0')
+ return 0;
+ if (!ISONEBYTE(wc1) && (k = libuxre_mb2wc(&wc1, s)) > 0)
+ s += k;
+ if (!ISONEBYTE(wc2 = *p++) && (k = libuxre_mb2wc(&wc2, p)) > 0)
+ p += k;
+ if (wc1 != wc2)
+ {
+ wc1 = to_lower(wc1);
+ wc2 = to_lower(wc2);
+ if (wc1 != wc2)
+ return 0;
+ }
+ } while (p < end);
+ return 1;
+}
+
+LIBUXRE_STATIC int
+libuxre_regnfaexec(Nfa *np, Exec *xp)
+{
+ const Uchar *s, *s1, *s2;
+ Context *cp, *cpn;
+ Graph *gp, *brace;
+ Stack *sp, *spn;
+ ssize_t rmso, len;
+ int i, ret, mb_cur_max;
+ w_type wc;
+ size_t n;
+
+ ret = 0; /* assume it matches */
+ rmso = -1; /* but no match yet */
+ np->cur = 0;
+ np->step = 0;
+ np->ecur = &np->cur;
+ np->estp = &np->step;
+ if ((np->used = xp->nmatch) < np->rmmin)
+ np->used = np->rmmin;
+ s1 = 0; /* one char back */
+ s = xp->str; /* current high water in string */
+ mb_cur_max = xp->mb_cur_max;
+ for (;;)
+ {
+ /*
+ * Get next character from string.
+ * If the engine proper hasn't started and the engine
+ * requires a particular character to start and this
+ * character isn't it, try the next one.
+ */
+ for (;;)
+ {
+ s2 = s1;
+ s1 = s;
+ if (!ISONEBYTE(wc = *s++) &&
+ (i = libuxre_mb2wc(&wc, s)) > 0)
+ s += i;
+ if (np->cur != 0 || np->beg == wc || np->beg == 0)
+ break;
+ if (np->beg == ROP_BOL)
+ {
+ if (s2 == 0 && (xp->flags & REG_NOTBOL) == 0)
+ break;
+ if ((xp->flags & REG_NEWLINE) == 0)
+ goto nomatch;
+ if (s2 != 0 && *s2 == '\n')
+ break;
+ }
+ if (wc == '\0')
+ goto nomatch;
+ }
+ /*
+ * Start the engine by inserting a fresh initial context
+ * if there's no known match as yet. (Once some match
+ * has been found, the end is near.)
+ */
+ if (rmso < 0 && newctxt(np, 0, np->gp) != 0)
+ goto err;
+ /*
+ * Walk the current Contexts list, trying each.
+ * "loop" is when a new Context is to be tried,
+ * "again" is when the same Context continues,
+ * but wc was not yet matched.
+ */
+ cp = np->cur;
+ loop:;
+ gp = cp->gp;
+ again:;
+ switch (gp->op)
+ {
+ case ROP_BRACE: /* gp->next->op == ROP_MTOR */
+ brace = gp;
+ gp = gp->next;
+ goto mtor;
+ case ROP_MTOR:
+ brace = 0;
+ mtor:;
+ if (cp->wasgp != gp) /* first time */
+ {
+ if (mkstck(np, cp, gp) != 0)
+ goto err;
+ }
+ else if (cp->str == s) /* spinning */
+ goto poptonext;
+ cp->str = s;
+ if (brace != 0)
+ {
+ if (cp->cnt >= brace->alt.info.num[1])
+ goto poptonext;
+ if (++cp->cnt <= brace->alt.info.num[0])
+ {
+ gp = gp->alt.ptr;
+ goto again;
+ }
+ if (cp->cnt > BRACE_MAX)
+ cp->cnt = BRACE_MAX;
+ }
+ if (newctxt(np, cp, gp->alt.ptr) != 0)
+ goto err;
+ poptonext:;
+ cp->wasgp = 0;
+ if ((sp = cp->sp) != 0) /* pop stack */
+ {
+ cp->sp = sp->prev;
+ cp->wasgp = sp->wasgp;
+ cp->str = sp->str;
+ cp->cnt = sp->cnt;
+ sp->prev = np->sp;
+ np->sp = sp;
+ }
+ /*FALLTHROUGH*/
+ case ROP_EMPTY:
+ tonext:;
+ gp = gp->next;
+ goto again;
+ case ROP_OR:
+ if (newctxt(np, cp, gp->alt.ptr) != 0)
+ goto err;
+ goto tonext;
+ case ROP_LP:
+ if ((n = gp->alt.info.sub) < np->used)
+ {
+ size_t k;
+
+ cp->rm[n].rm_so = s1 - xp->str;
+ cp->rm[n].rm_eo = -1;
+ /*
+ * Mark any skipped subexpressions as
+ * failing to participate in the match.
+ */
+ if ((k = cp->nset) < n)
+ {
+ regmatch_t *rmp = &cp->rm[k];
+
+ for (;; rmp++)
+ {
+ rmp->rm_so = -1;
+ rmp->rm_eo = -1;
+ if (++k >= n)
+ break;
+ }
+ }
+ cp->nset = n + 1;
+ }
+ goto tonext;
+ case ROP_RP:
+ if ((n = gp->alt.info.sub) < np->used)
+ cp->rm[n].rm_eo = s1 - xp->str;
+ goto tonext;
+ case ROP_BOL:
+ if (s2 == 0)
+ {
+ if (xp->flags & REG_NOTBOL)
+ goto failed;
+ }
+ else if ((xp->flags & REG_NEWLINE) == 0 || *s2 != '\n')
+ goto failed;
+ goto tonext;
+ case ROP_EOL:
+ if (wc == '\0')
+ {
+ if (xp->flags & REG_NOTEOL)
+ goto failed;
+ }
+ else if ((xp->flags & REG_NEWLINE) == 0 || wc != '\n')
+ goto failed;
+ goto tonext;
+ default: /* character match */
+ if (gp->op != wc)
+ {
+ if ((xp->flags & REG_ICASE) == 0
+ || gp->op != to_lower(wc))
+ {
+ goto failed;
+ }
+ }
+ nextwc:;
+ cp->gp = gp->next;
+ tostep:;
+ cpn = cp->next;
+ cp->next = 0;
+ *np->estp = cp;
+ np->estp = &cp->next;
+ if ((cp = cpn) == 0)
+ break;
+ goto loop;
+ case ROP_NOTNL:
+ if (wc == '\n')
+ goto failed;
+ /*FALLTHROUGH*/
+ case ROP_ANYCH:
+ if (wc > '\0')
+ goto nextwc;
+ /*FALLTHROUGH*/
+ case ROP_NONE:
+ failed:;
+ cpn = cp->next;
+ cp->next = np->avail;
+ np->avail = cp;
+ if ((cp = cpn) == 0)
+ break;
+ goto loop;
+ case ROP_LT:
+ if (s2 == 0)
+ {
+ if (xp->flags & REG_NOTBOL)
+ goto failed;
+ }
+ else
+ {
+ w_type pwc;
+
+ if (wc != '_' &&
+ !iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
+ goto failed;
+ if (!ISONEBYTE(pwc = *s2))
+ libuxre_mb2wc(&pwc, &s2[1]);
+ if (pwc == '_' ||
+ iswalnum(mb_cur_max== 1 ? btowc(pwc) : pwc))
+ goto failed;
+ }
+ goto tonext;
+ case ROP_GT:
+ if (wc == '_' ||
+ iswalnum(mb_cur_max == 1 ? btowc(wc) : wc))
+ goto failed;
+ goto tonext;
+ case ROP_BKT:
+ case ROP_BKTCOPY:
+ if (cp->wasgp == gp) /* rest of MCCE */
+ {
+ checkspin:;
+ if (s1 >= cp->str) /* got it all */
+ goto poptonext;
+ goto tostep;
+ }
+ if ((i = libuxre_bktmbexec(gp->alt.info.bkt, wc, s,
+ mb_cur_max)) < 0)
+ goto failed;
+ if ((n = i) == 0) /* only matched wc */
+ goto nextwc;
+ spin:;
+ if (mkstck(np, cp, gp) != 0)
+ goto err;
+ cp->gp = gp; /* stay here until reach past s+n */
+ cp->str = s + n;
+ goto tostep;
+ case ROP_REF:
+ if (cp->wasgp == gp) /* rest of matched string */
+ goto checkspin;
+ if ((n = gp->alt.info.sub) >= cp->nset)
+ goto failed;
+ if ((len = cp->rm[n].rm_eo) < 0)
+ goto failed;
+ if ((len -= n = cp->rm[n].rm_so) == 0)
+ goto tonext;
+ if (casecmp(s1, xp, n, len, mb_cur_max) == 0)
+ goto failed;
+ if ((n = s - s1) >= len)
+ goto nextwc;
+ n = len - n;
+ goto spin;
+ case ROP_END: /* success! */
+ if (xp->flags & REG_NONEMPTY)
+ {
+ if (s2 == 0)
+ goto failed;
+ }
+ if (xp->nmatch == 0)
+ goto match;
+ /*
+ * Mark any skipped subexpressions as failing to match.
+ */
+ if ((n = cp->nset) < xp->nmatch)
+ {
+ do
+ {
+ cp->rm[n].rm_so = -1;
+ cp->rm[n].rm_eo = -1;
+ } while (++n < xp->nmatch);
+ }
+ /*
+ * Note the left-most match that's longest.
+ */
+ n = cp->rm[0].rm_so;
+ if (rmso < 0 || n < rmso)
+ {
+ rmso = n;
+ record:;
+ memcpy(xp->match, cp->rm,
+ xp->nmatch * sizeof(regmatch_t));
+ goto failed;
+ }
+ if (rmso < n || xp->match[0].rm_eo > cp->rm[0].rm_eo)
+ goto failed;
+ if (xp->match[0].rm_eo < cp->rm[0].rm_eo)
+ goto record;
+#if 0 /* maximize the lengths of earlier LP...RPs */
+ /*
+ * If both are of the same length and start
+ * at the same point, choose the one with
+ * a "longest submatch from left to right"
+ * where an empty string wins over a nonmatch.
+ */
+ for (n = 1; n < xp->nmatch; n++)
+ {
+ ssize_t nlen;
+
+ /*
+ * First, go with the choice that has any
+ * match for subexpr n.
+ */
+ len = xp->match[n].rm_eo;
+ nlen = cp->rm[n].rm_eo;
+ if (nlen < 0)
+ {
+ if (len >= 0)
+ break;
+ }
+ else if (len < 0)
+ goto record;
+ /*
+ * Both have a match; go with the longer.
+ */
+ len -= xp->match[n].rm_so;
+ nlen -= cp->rm[n].rm_so;
+ if (nlen < len)
+ break;
+ if (nlen > len)
+ goto record;
+ }
+#else /* take LP and RP as "fence posts" and maximize earlier gaps */
+ /*
+ * If both are of the same length and start
+ * at the same point, choose the one with
+ * the larger earlier subpatterns, in which
+ * each rm_so and rm_eo serves as a separator.
+ */
+ for (n = 1; n < xp->nmatch; n++)
+ {
+ ssize_t nlen;
+ int use;
+
+ if (xp->flags & REG_AVOIDNULL) {
+ /*
+ * This is to to satisfy POSIX.1-2001
+ * XBD pp. 172-173 ll. 6127-6129, whose
+ * translation is "do not match null
+ * expressions if there is a choice".
+ * See also POSIX.2 interpretation #43
+ * in which the question was raised.
+ *
+ * The first subexpression of "\(x*\)*"
+ * must thus match the string "xxx".
+ */
+ use = cp->rm[n].rm_eo -
+ cp->rm[n].rm_so >=
+ xp->match[n].rm_eo -
+ xp->match[n].rm_so ||
+ xp->match[n].rm_so < 0;
+ } else
+ use = 1;
+ /*
+ * Choose the rightmost ROP_LP as that
+ * maximizes the gap from before.
+ */
+ len = xp->match[n].rm_so;
+ nlen = cp->rm[n].rm_so;
+ if (len < nlen && use)
+ goto record;
+ if (len > nlen)
+ break;
+ /*
+ * The ROP_LPs are at the same point:
+ * Choose the rightmost ROP_RP.
+ */
+ len = xp->match[n].rm_eo;
+ nlen = cp->rm[n].rm_eo;
+ if (len < nlen && use)
+ goto record;
+ if (len > nlen)
+ break;
+ }
+#endif
+ goto failed;
+ }
+ /*
+ * Finished the current Context list. If the input string
+ * has been entirely scanned, we're done. Otherwise, make
+ * the next step list current for the next character.
+ * If the next step list was empty and there's an existing
+ * match, that's the left-most longest.
+ */
+ if (wc == '\0')
+ {
+ if (rmso >= 0)
+ goto match;
+ goto nomatch;
+ }
+ np->ecur = np->estp;
+ if ((np->cur = np->step) == 0)
+ {
+ if (rmso >= 0)
+ goto match;
+ np->ecur = &np->cur; /* was pointing at step */
+ }
+ np->step = 0;
+ np->estp = &np->step;
+ }
+nomatch:;
+ ret = REG_NOMATCH;
+match:;
+ np->avail = 0;
+ for (cp = np->allcp; cp != 0; cp = cpn)
+ {
+ cpn = cp->link;
+ cp->next = np->avail;
+ np->avail = cp;
+ }
+ np->sp = 0;
+ for (sp = np->allsp; sp != 0; sp = spn)
+ {
+ spn = sp->link;
+ sp->prev = np->sp;
+ np->sp = sp;
+ }
+ return ret;
+err:;
+ ret = REG_ESPACE;
+ goto match;
+}