diff options
| author | Thomas Ulmer <thomasmulmer02@gmail.com> | 2026-02-23 16:54:28 -0800 |
|---|---|---|
| committer | Thomas Ulmer <thomasmulmer02@gmail.com> | 2026-02-23 16:54:28 -0800 |
| commit | 15bd7946cc838a3151c357e4b0bc1ab85eecda62 (patch) | |
| tree | 56977cb9bfc4349f46e2c608503a298df30ca957 /libuxre/_collelem.c | |
add musl and vi
Diffstat (limited to 'libuxre/_collelem.c')
| -rw-r--r-- | libuxre/_collelem.c | 119 |
1 files changed, 119 insertions, 0 deletions
diff --git a/libuxre/_collelem.c b/libuxre/_collelem.c new file mode 100644 index 0000000..c5dbb05 --- /dev/null +++ b/libuxre/_collelem.c @@ -0,0 +1,119 @@ +/* + * Changes by Gunnar Ritter, Freiburg i. Br., Germany, November 2002. + * + * Sccsid @(#)_collelem.c 1.4 (gritter) 10/18/03 + */ +/* UNIX(R) Regular Expresssion Library + * + * Note: Code is released under the GNU LGPL + * + * Copyright (C) 2001 Caldera International, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to: + * Free Software Foundation, Inc. + * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +/* #include "synonyms.h" */ +#include "colldata.h" +#include <stddef.h> + +#define CCE(p) ((const CollElem *)(p)) +#define CCM(p) ((const CollMult *)(p)) + +LIBUXRE_STATIC const CollElem * +libuxre_collelem(struct lc_collate *col, CollElem *spare, wchar_t wc) +{ + const char *tbl; + size_t hi, lo, cur; + const CollMult *cmp; + const CollElem *cep; + long diff; + int sz; + + /* + * ELEM_ENCODED is returned when the collation is entirely + * based on the encoded value of the character. + */ + if (col == 0 || col->flags & CHF_ENCODED + || (tbl = (const char *)col->maintbl) == 0) + { + return ELEM_ENCODED; + } + if ((wuchar_type)wc <= UCHAR_MAX) + { + indexed:; + cep = CCE(&tbl[(wuchar_type)wc * col->elemsize]); + if (cep->weight[0] == WGHT_SPECIAL) + return ELEM_BADCHAR; + return cep; + } + if (col->flags & CHF_INDEXED) + { + if ((wuchar_type)wc >= col->nmain) + return ELEM_BADCHAR; + goto indexed; + } + /* + * Binary search for a match. Could speed up the search if + * some interpolation was used, but keep it simple for now. + * Note that this is actually a table of CollMult's. + * + * To save space in the file, sequences of similar elements + * are sometimes compressed into a single CollMult that + * describes many entries. This is denoted by a subnbeg + * with the SUBN_SPECIAL bit set. The rest of the bits give + * the range covered by this entry. + */ + sz = col->elemsize + (sizeof(CollMult) - sizeof(CollElem)); + tbl += (1 + UCHAR_MAX) * col->elemsize; + lo = 0; + hi = col->nmain - UCHAR_MAX; + while (lo < hi) + { + if ((cur = (hi + lo) >> 1) < lo) /* hi+lo overflowed */ + cur |= ~(~(size_t)0 >> 1); /* lost high order bit */ + cmp = CCM(&tbl[cur * sz]); + if ((diff = wc - cmp->ch) < 0) + hi = cur; + else if (cmp->elem.subnbeg & SUBN_SPECIAL) + { + if (diff > (long)(cmp->elem.subnbeg & ~SUBN_SPECIAL)) + lo = cur + 1; + else /* create an entry from the sequence in spare */ + { + spare->multbeg = cmp->elem.multbeg; + spare->subnbeg = 0; + spare->weight[0] = cmp->elem.weight[0] + diff; + for (lo = 1; lo < col->nweight; lo++) + { + wuchar_type w; + + if ((w = cmp->elem.weight[lo]) + == WGHT_SPECIAL) + { + w = spare->weight[0]; + } + spare->weight[lo] = w; + } + return spare; + } + } + else if (diff == 0) + return &cmp->elem; + else + lo = cur + 1; + } + return ELEM_BADCHAR; +} |
