diff --git a/CVS/Entries b/CVS/Entries index eae0471..0b64b68 100644 --- a/CVS/Entries +++ b/CVS/Entries @@ -2,13 +2,13 @@ /buf.c/1.24/Sat Dec 21 21:51:59 2019// D/USD.doc//// D/test//// -/Makefile/1.12/Fri May 22 22:22:54 2020// -/POSIX/1.8/Fri May 22 22:22:48 2020// -/README/1.5/Fri May 22 22:22:54 2020// -/ed.1/1.75/Fri May 22 22:22:48 2020// -/ed.h/1.22/Fri May 22 22:22:48 2020// -/glbl.c/1.20/Fri May 22 22:22:48 2020// -/io.c/1.24/Fri May 22 22:22:48 2020// -/re.c/1.19/Fri May 22 22:22:48 2020// -/sub.c/1.18/Fri May 22 22:22:48 2020// -/undo.c/1.14/Fri May 22 22:22:48 2020// +/Makefile/1.12/Tue Jan 5 23:15:24 2021// +/POSIX/1.8/Tue Jan 5 23:11:26 2021// +/README/1.5/Tue Jan 5 23:15:24 2021// +/ed.1/1.75/Tue Jan 5 23:11:26 2021// +/ed.h/1.22/Tue Jan 5 23:11:26 2021// +/glbl.c/1.20/Tue Jan 5 23:11:26 2021// +/io.c/1.24/Tue Jan 5 23:11:26 2021// +/re.c/1.19/Tue Jan 5 23:11:26 2021// +/sub.c/1.18/Tue Jan 5 23:11:26 2021// +/undo.c/1.14/Tue Jan 5 23:11:26 2021// diff --git a/cclass.h b/cclass.h index 9f144a6..267a1ed 100644 --- a/cclass.h +++ b/cclass.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cclass.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */ +/* $OpenBSD: cclass.h,v 1.7 2020/12/30 08:54:42 tb Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -36,33 +36,25 @@ */ /* character-class table */ -static struct cclass { - char *name; - char *chars; - char *multis; +static const struct cclass { + const char *name; + const char *chars; } cclasses[] = { { "alnum", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789", ""} , - { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", - ""} , - { "blank", " \t", ""} , +0123456789" }, + { "alpha", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" }, + { "blank", " \t" }, { "cntrl", "\007\b\t\n\v\f\r\1\2\3\4\5\6\16\17\20\21\22\23\24\ -\25\26\27\30\31\32\33\34\35\36\37\177", ""} , - { "digit", "0123456789", ""} , +\25\26\27\30\31\32\33\34\35\36\37\177" }, + { "digit", "0123456789" }, { "graph", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""} , - { "lower", "abcdefghijklmnopqrstuvwxyz", - ""} , +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" }, + { "lower", "abcdefghijklmnopqrstuvwxyz" }, { "print", "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\ -0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ ", - ""} , - { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~", - ""} , - { "space", "\t\n\v\f\r ", ""} , - { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ", - ""} , - { "xdigit", "0123456789ABCDEFabcdef", - ""} , - { NULL, 0, "" } +0123456789!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~ " }, + { "punct", "!\"#$%&'()*+,-./:;<=>?@[\\]^_`{|}~" }, + { "space", "\t\n\v\f\r " }, + { "upper", "ABCDEFGHIJKLMNOPQRSTUVWXYZ" }, + { "xdigit", "0123456789ABCDEFabcdef" }, + { NULL, 0 } }; diff --git a/cname.h b/cname.h index 9cce44e..667cb54 100644 --- a/cname.h +++ b/cname.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cname.h,v 1.5 2003/06/02 20:18:36 millert Exp $ */ +/* $OpenBSD: cname.h,v 1.6 2020/12/30 08:53:30 tb Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -36,8 +36,8 @@ */ /* character-name table */ -static struct cname { - char *name; +static const struct cname { + const char *name; char code; } cnames[] = { { "NUL", '\0' }, diff --git a/engine.c b/engine.c index 9d3ca24..62da73f 100644 --- a/engine.c +++ b/engine.c @@ -1,4 +1,4 @@ -/* $OpenBSD: engine.c,v 1.25 2018/07/11 12:38:46 martijn Exp $ */ +/* $OpenBSD: engine.c,v 1.26 2020/12/28 21:41:55 millert Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -935,7 +935,7 @@ step(struct re_guts *g, OP(s = g->strip[pc+look]) != O_CH; look += OPND(s)) assert(OP(s) == OOR2); - FWD(aft, aft, look); + FWD(aft, aft, look + 1); } break; case OOR2: /* propagate OCH_'s marking */ diff --git a/regcomp.c b/regcomp.c index 3f2d46d..cf6acbb 100644 --- a/regcomp.c +++ b/regcomp.c @@ -1,4 +1,4 @@ -/* $OpenBSD: regcomp.c,v 1.34 2019/02/05 20:57:30 millert Exp $ */ +/* $OpenBSD: regcomp.c,v 1.43 2021/01/03 17:07:57 tb Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 @@ -53,8 +53,8 @@ * other clumsinesses */ struct parse { - char *next; /* next character in RE */ - char *end; /* end of string (-> NUL normally) */ + const char *next; /* next character in RE */ + const char *end; /* end of string (-> NUL normally) */ int error; /* has an error been seen? */ sop *strip; /* malloced strip */ sopno ssize; /* malloced strip size (allocated) */ @@ -84,18 +84,12 @@ static void ordinary(struct parse *, int); static void backslash(struct parse *, int); static void nonnewline(struct parse *); static void repeat(struct parse *, sopno, int, int); -static int seterr(struct parse *, int); +static void seterr(struct parse *, int); static cset *allocset(struct parse *); static void freeset(struct parse *, cset *); static int freezeset(struct parse *, cset *); static int firstch(struct parse *, cset *); static int nch(struct parse *, cset *); -static void mcadd(struct parse *, cset *, char *); -static void mcinvert(struct parse *, cset *); -static void mccase(struct parse *, cset *); -static int isinsets(struct re_guts *, int); -static int samesets(struct re_guts *, int, int); -static void categorize(struct parse *, struct re_guts *); static sopno dupl(struct parse *, sopno, sopno); static void doemit(struct parse *, sop, size_t); static void doinsert(struct parse *, sop, size_t, sopno); @@ -113,10 +107,10 @@ static char nuls[10]; /* place to point scanner in event of error */ */ #define PEEK() (*p->next) #define PEEK2() (*(p->next+1)) -#define MORE() (p->next < p->end) -#define MORE2() (p->next+1 < p->end) +#define MORE() (p->end - p->next > 0) +#define MORE2() (p->end - p->next > 1) #define SEE(c) (MORE() && PEEK() == (c)) -#define SEETWO(a, b) (MORE() && MORE2() && PEEK() == (a) && PEEK2() == (b)) +#define SEETWO(a, b) (MORE2() && PEEK() == (a) && PEEK2() == (b)) #define EAT(c) ((SEE(c)) ? (NEXT(), 1) : 0) #define EATTWO(a, b) ((SEETWO(a, b)) ? (NEXT2(), 1) : 0) #define NEXT() (p->next++) @@ -124,7 +118,7 @@ static char nuls[10]; /* place to point scanner in event of error */ #define NEXTn(n) (p->next += (n)) #define GETNEXT() (*p->next++) #define SETERROR(e) seterr(p, (e)) -#define REQUIRE(co, e) (void) ((co) || SETERROR(e)) +#define REQUIRE(co, e) do { if (!(co)) SETERROR(e); } while (0) #define EMIT(op, sopnd) doemit(p, (sop)(op), (size_t)(sopnd)) #define INSERT(op, pos) doinsert(p, (sop)(op), HERE()-(pos)+1, pos) #define AHEAD(pos) dofwd(p, pos, HERE()-(pos)) @@ -182,7 +176,7 @@ regcomp(regex_t *preg, const char *pattern, int cflags) /* set things up */ p->g = g; - p->next = (char *)pattern; /* convenience; we do not modify it */ + p->next = pattern; p->end = p->next + len; p->error = 0; p->ncsalloc = 0; @@ -201,9 +195,6 @@ regcomp(regex_t *preg, const char *pattern, int cflags) g->must = NULL; g->mlen = 0; g->nsub = 0; - g->ncategories = 1; /* category 0 is "everything else" */ - g->categories = &g->catspace[-(CHAR_MIN)]; - memset(g->catspace, 0, sizeof(g->catspace)); g->backrefs = 0; /* do it */ @@ -219,7 +210,6 @@ regcomp(regex_t *preg, const char *pattern, int cflags) g->laststate = THERE(); /* tidy up loose ends and fill things in */ - categorize(p, g); stripsnug(p, g); findmust(p, g); g->nplus = pluscount(p, g); @@ -623,15 +613,17 @@ p_bracket(struct parse *p) int invert = 0; /* Dept of Truly Sickening Special-Case Kludges */ - if (p->next + 5 < p->end && strncmp(p->next, "[:<:]]", 6) == 0) { - EMIT(OBOW, 0); - NEXTn(6); - return; - } - if (p->next + 5 < p->end && strncmp(p->next, "[:>:]]", 6) == 0) { - EMIT(OEOW, 0); - NEXTn(6); - return; + if (p->end - p->next > 5) { + if (strncmp(p->next, "[:<:]]", 6) == 0) { + EMIT(OBOW, 0); + NEXTn(6); + return; + } + if (strncmp(p->next, "[:>:]]", 6) == 0) { + EMIT(OEOW, 0); + NEXTn(6); + return; + } } if ((cs = allocset(p)) == NULL) { @@ -666,8 +658,6 @@ p_bracket(struct parse *p) if (ci != i) CHadd(cs, ci); } - if (cs->multis != NULL) - mccase(p, cs); } if (invert) { int i; @@ -679,12 +669,8 @@ p_bracket(struct parse *p) CHadd(cs, i); if (p->g->cflags®_NEWLINE) CHsub(cs, '\n'); - if (cs->multis != NULL) - mcinvert(p, cs); } - assert(cs->multis == NULL); /* xxx */ - if (nch(p, cs) == 1) { /* optimize singleton sets */ ordinary(p, firstch(p, cs)); freeset(p, cs); @@ -761,10 +747,10 @@ p_b_term(struct parse *p, cset *cs) static void p_b_cclass(struct parse *p, cset *cs) { - char *sp = p->next; - struct cclass *cp; + const char *sp = p->next; + const struct cclass *cp; size_t len; - char *u; + const char *u; char c; while (MORE() && isalpha((uch)PEEK())) @@ -782,8 +768,6 @@ p_b_cclass(struct parse *p, cset *cs) u = cp->chars; while ((c = *u++) != '\0') CHadd(cs, c); - for (u = cp->multis; *u != '\0'; u += strlen(u) + 1) - MCadd(p, cs, u); } /* @@ -825,8 +809,8 @@ static char /* value of collating element */ p_b_coll_elem(struct parse *p, int endc) /* name ended by endc,']' */ { - char *sp = p->next; - struct cname *cp; + const char *sp = p->next; + const struct cname *cp; size_t len; while (MORE() && !SEETWO(endc, ']')) @@ -869,8 +853,8 @@ othercase(int ch) static void bothcases(struct parse *p, int ch) { - char *oldnext = p->next; - char *oldend = p->end; + const char *oldnext = p->next; + const char *oldend = p->end; char bracket[3]; ch = (uch)ch; @@ -892,15 +876,10 @@ bothcases(struct parse *p, int ch) static void ordinary(struct parse *p, int ch) { - cat_t *cap = p->g->categories; - if ((p->g->cflags®_ICASE) && isalpha((uch)ch) && othercase(ch) != ch) bothcases(p, ch); - else { + else EMIT(OCHAR, (uch)ch); - if (cap[ch] == 0) - cap[ch] = p->g->ncategories++; - } } /* @@ -930,16 +909,12 @@ backslash(struct parse *p, int ch) static void nonnewline(struct parse *p) { - char *oldnext = p->next; - char *oldend = p->end; - char bracket[4]; + const char *oldnext = p->next; + const char *oldend = p->end; + static const char bracket[4] = { '^', '\n', ']', '\0' }; p->next = bracket; p->end = bracket+3; - bracket[0] = '^'; - bracket[1] = '\n'; - bracket[2] = ']'; - bracket[3] = '\0'; p_bracket(p); assert(p->next == bracket+3); p->next = oldnext; @@ -1019,14 +994,13 @@ repeat(struct parse *p, /* - seterr - set an error condition */ -static int /* useless but makes type checking happy */ +static void seterr(struct parse *p, int e) { if (p->error == 0) /* keep earliest error condition */ p->error = e; p->next = nuls; /* try to bring things to a halt */ p->end = nuls; - return(0); /* make the return value well-defined */ } /* @@ -1073,8 +1047,6 @@ allocset(struct parse *p) cs->ptr = p->g->setbits + css*((no)/CHAR_BIT); cs->mask = 1 << ((no) % CHAR_BIT); cs->hash = 0; - cs->smultis = 0; - cs->multis = NULL; return(cs); nomem: @@ -1127,7 +1099,7 @@ freezeset(struct parse *p, cset *cs) if (cs2->hash == h && cs2 != cs) { /* maybe */ for (i = 0; i < css; i++) - if (!!CHIN(cs2, i) != !!CHIN(cs, i)) + if (CHIN(cs2, i) != CHIN(cs, i)) break; /* no */ if (i == css) break; /* yes */ @@ -1173,112 +1145,6 @@ nch(struct parse *p, cset *cs) return(n); } -/* - - mcadd - add a collating element to a cset - */ -static void -mcadd( struct parse *p, cset *cs, char *cp) -{ - size_t oldend = cs->smultis; - void *np; - - cs->smultis += strlen(cp) + 1; - np = realloc(cs->multis, cs->smultis); - if (np == NULL) { - free(cs->multis); - cs->multis = NULL; - SETERROR(REG_ESPACE); - return; - } - cs->multis = np; - - strlcpy(cs->multis + oldend - 1, cp, cs->smultis - oldend + 1); -} - -/* - - mcinvert - invert the list of collating elements in a cset - * - * This would have to know the set of possibilities. Implementation - * is deferred. - */ -static void -mcinvert(struct parse *p, cset *cs) -{ - assert(cs->multis == NULL); /* xxx */ -} - -/* - - mccase - add case counterparts of the list of collating elements in a cset - * - * This would have to know the set of possibilities. Implementation - * is deferred. - */ -static void -mccase(struct parse *p, cset *cs) -{ - assert(cs->multis == NULL); /* xxx */ -} - -/* - - isinsets - is this character in any sets? - */ -static int /* predicate */ -isinsets(struct re_guts *g, int c) -{ - uch *col; - int i; - int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - unsigned uc = (uch)c; - - for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) - if (col[uc] != 0) - return(1); - return(0); -} - -/* - - samesets - are these two characters in exactly the same sets? - */ -static int /* predicate */ -samesets(struct re_guts *g, int c1, int c2) -{ - uch *col; - int i; - int ncols = (g->ncsets+(CHAR_BIT-1)) / CHAR_BIT; - unsigned uc1 = (uch)c1; - unsigned uc2 = (uch)c2; - - for (i = 0, col = g->setbits; i < ncols; i++, col += g->csetsize) - if (col[uc1] != col[uc2]) - return(0); - return(1); -} - -/* - - categorize - sort out character categories - */ -static void -categorize(struct parse *p, struct re_guts *g) -{ - cat_t *cats = g->categories; - int c; - int c2; - cat_t cat; - - /* avoid making error situations worse */ - if (p->error != 0) - return; - - for (c = CHAR_MIN; c <= CHAR_MAX; c++) - if (cats[c] == 0 && isinsets(g, c)) { - cat = g->ncategories++; - cats[c] = cat; - for (c2 = c+1; c2 <= CHAR_MAX; c2++) - if (cats[c2] == 0 && samesets(g, c, c2)) - cats[c2] = cat; - } -} - /* - dupl - emit a duplicate of a bunch of sops */ @@ -1491,7 +1357,7 @@ findmust(struct parse *p, struct re_guts *g) *cp++ = (char)OPND(s); } assert(cp == g->must + g->mlen); - *cp++ = '\0'; /* just on general principles */ + *cp = '\0'; /* just on general principles */ } /* diff --git a/regerror.c b/regerror.c index be6fcc8..692b666 100644 --- a/regerror.c +++ b/regerror.c @@ -1,4 +1,4 @@ -/* $OpenBSD: regerror.c,v 1.14 2015/11/01 03:45:29 guenther Exp $ */ +/* $OpenBSD: regerror.c,v 1.15 2020/12/30 08:56:38 tb Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. * Copyright (c) 1992, 1993, 1994 @@ -44,12 +44,12 @@ #include "utils.h" -static char *regatoi(const regex_t *, char *, int); +static const char *regatoi(const regex_t *, char *, int); -static struct rerr { +static const struct rerr { int code; - char *name; - char *explain; + const char *name; + const char *explain; } rerrs[] = { { REG_NOMATCH, "REG_NOMATCH", "regexec() failed to match" }, { REG_BADPAT, "REG_BADPAT", "invalid regular expression" }, @@ -77,10 +77,10 @@ static struct rerr { size_t regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) { - struct rerr *r; + const struct rerr *r; size_t len; int target = errcode &~ REG_ITOA; - char *s; + const char *s; char convbuf[50]; if (errcode == REG_ATOI) @@ -102,21 +102,21 @@ regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) s = r->explain; } - len = strlen(s) + 1; - if (errbuf_size > 0) { - strlcpy(errbuf, s, errbuf_size); - } + if (errbuf_size != 0) + len = strlcpy(errbuf, s, errbuf_size); + else + len = strlen(s); - return(len); + return len + 1; } /* - regatoi - internal routine to implement REG_ATOI */ -static char * +static const char * regatoi(const regex_t *preg, char *localbuf, int localbufsize) { - struct rerr *r; + const struct rerr *r; for (r = rerrs; r->code != 0; r++) if (strcmp(r->name, preg->re_endp) == 0) diff --git a/regex.h b/regex.h index 2da07b7..e24d9ae 100644 --- a/regex.h +++ b/regex.h @@ -41,8 +41,6 @@ #include -#include "config.h" - /* types */ typedef off_t regoff_t; @@ -96,9 +94,11 @@ typedef struct { #define REG_LARGE 01000 /* force large representation */ #define REG_BACKR 02000 /* force use of backref code */ +__BEGIN_DECLS int regcomp(regex_t *, const char *, int); size_t regerror(int, const regex_t *, char *, size_t); int regexec(const regex_t *, const char *, size_t, regmatch_t [], int); void regfree(regex_t *); +__END_DECLS #endif /* !_REGEX_H_ */ diff --git a/regex2.h b/regex2.h index 7484ba2..6fb9dcc 100644 --- a/regex2.h +++ b/regex2.h @@ -1,4 +1,4 @@ -/* $OpenBSD: regex2.h,v 1.8 2013/04/17 17:39:29 tedu Exp $ */ +/* $OpenBSD: regex2.h,v 1.12 2021/01/03 17:07:58 tb Exp $ */ /*- * Copyright (c) 1992, 1993, 1994 Henry Spencer. @@ -106,19 +106,27 @@ typedef struct { uch *ptr; /* -> uch [csetsize] */ uch mask; /* bit within array */ uch hash; /* hash code */ - size_t smultis; - char *multis; /* -> char[smulti] ab\0cd\0ef\0\0 */ } cset; -/* note that CHadd and CHsub are unsafe, and CHIN doesn't yield 0/1 */ -#define CHadd(cs, c) ((cs)->ptr[(uch)(c)] |= (cs)->mask, (cs)->hash += (c)) -#define CHsub(cs, c) ((cs)->ptr[(uch)(c)] &= ~(cs)->mask, (cs)->hash -= (c)) -#define CHIN(cs, c) ((cs)->ptr[(uch)(c)] & (cs)->mask) -#define MCadd(p, cs, cp) mcadd(p, cs, cp) /* regcomp() internal fns */ -#define MCsub(p, cs, cp) mcsub(p, cs, cp) -#define MCin(p, cs, cp) mcin(p, cs, cp) -/* stuff for character categories */ -typedef unsigned char cat_t; +static inline void +CHadd(cset *cs, char c) +{ + cs->ptr[(uch)c] |= cs->mask; + cs->hash += c; +} + +static inline void +CHsub(cset *cs, char c) +{ + cs->ptr[(uch)c] &= ~cs->mask; + cs->hash -= c; +} + +static inline int +CHIN(const cset *cs, char c) +{ + return (cs->ptr[(uch)c] & cs->mask) != 0; +} /* * main compiled-expression structure @@ -141,15 +149,11 @@ struct re_guts { # define BAD 04 /* something wrong */ int nbol; /* number of ^ used */ int neol; /* number of $ used */ - int ncategories; /* how many character categories */ - cat_t *categories; /* ->catspace[-CHAR_MIN] */ char *must; /* match must contain this string */ int mlen; /* length of must */ size_t nsub; /* copy of re_nsub */ int backrefs; /* does it use back references? */ sopno nplus; /* how deep does it nest +s? */ - /* catspace must be last */ - cat_t catspace[NC]; /* actually [NC] */ }; /* misc utilities */ diff --git a/utils.h b/utils.h index 9c533a1..2dea710 100644 --- a/utils.h +++ b/utils.h @@ -36,10 +36,6 @@ */ /* utility definitions */ -#ifndef _POSIX2_RE_DUP_MAX -#define _POSIX2_RE_DUP_MAX 255 /* Haiku support */ -#endif - #define DUPMAX _POSIX2_RE_DUP_MAX /* xxx is this right? */ #define INFINITY (DUPMAX + 1) #define NC (CHAR_MAX - CHAR_MIN + 1)