From 650bb9da7cf5b677960c03e0a6a5616d48340845 Mon Sep 17 00:00:00 2001 From: Mark Wooding Date: Fri, 9 Aug 2019 12:00:37 +0100 Subject: [PATCH] pcre.c, etc.: Support the PCRE2 library. --- Makefile.am | 10 +++++++++- anag.c | 6 +++--- anag.h | 2 +- configure.ac | 10 ++++++++-- pcre.c | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-- 5 files changed, 80 insertions(+), 9 deletions(-) diff --git a/Makefile.am b/Makefile.am index 9425a68..29e19b3 100644 --- a/Makefile.am +++ b/Makefile.am @@ -31,9 +31,13 @@ bin_PROGRAMS = bin_SCRIPTS = LDADD = +if HAVE_PCRE2 + LDADD += $(PCRE2_LIBS) +else if HAVE_PCRE LDADD += $(PCRE_LIBS) endif +endif ###-------------------------------------------------------------------------- ### Making substitutions. @@ -59,8 +63,12 @@ anag_SOURCES = anag.c anag.h util.c anag_SOURCES += anagram.c anag_SOURCES += longest.c anag_SOURCES += mono.c -if HAVE_PCRE +if HAVE_PCRE2 anag_SOURCES += pcre.c +else +if HAVE_PCRE + anag_SOURCES += pcre.c +endif endif if HAVE_REGCOMP anag_SOURCES += regexp.c diff --git a/anag.c b/anag.c index d27523b..80da3c3 100644 --- a/anag.c +++ b/anag.c @@ -72,7 +72,7 @@ The basic tests in the expression are:\n\ -regexp REGEXP matches with an (extended) regular expression\n\ " #endif -#ifdef HAVE_PCRE +#if defined(HAVE_PCRE) || defined(HAVE_PCRE2) "\ -pcre REGEXP matches with a Perl-like regular expression\n\ " @@ -138,7 +138,7 @@ static const struct opt opttab[] = { #ifdef HAVE_REGCOMP { "regexp", 1, 0, O_REGEXP }, #endif -#ifdef HAVE_PCRE +#if defined(HAVE_PCRE) || defined(HAVE_PCRE2) { "pcre", 1, 0, O_PCRE }, #endif { "length", 1, 0, O_LENGTH }, @@ -400,7 +400,7 @@ static void p_factor(p_ctx *p, node **nn) #ifdef HAVE_REGCOMP case O_REGEXP: *nn = regexp(p->a + 1); break; #endif -#ifdef HAVE_PCRE +#if defined(HAVE_PCRE) || defined(HAVE_PCRE2) case O_PCRE: *nn = pcrenode(p->a + 1); break; #endif case O_MONO: *nn = mono(p->a + 1); break; diff --git a/anag.h b/anag.h index 6763d0f..32b5ea5 100644 --- a/anag.h +++ b/anag.h @@ -98,7 +98,7 @@ extern node *mono(const char *const */*av*/); #ifdef HAVE_REGCOMP extern node *regexp(const char *const */*av*/); #endif -#ifdef HAVE_PCRE +#if defined(HAVE_PCRE) || defined(HAVE_PCRE2) extern node *pcrenode(const char *const */*av*/); #endif extern node *longest(const char *const */*av*/); diff --git a/configure.ac b/configure.ac index 272fe9c..79f4950 100644 --- a/configure.ac +++ b/configure.ac @@ -43,8 +43,14 @@ AC_CHECK_FUNCS([regcomp]) AM_CONDITIONAL([HAVE_REGCOMP], [test $ac_cv_func_regcomp = yes]) mdw_have_pcre2=nil mdw_have_pcre=nil -PKG_CHECK_MODULES([PCRE], [libpcre], - [mdw_have_pcre=t AM_CFLAGS="$AM_CFLAGS $PCRE_CFLAGS"], []) +PKG_CHECK_MODULES([PCRE2], [libpcre2-8], + [mdw_have_pcre2=t AM_CFLAGS="$AM_CFLAGS $PCRE2_CFLAGS"], + [PKG_CHECK_MODULES([PCRE], [libpcre], + [mdw_have_pcre=t AM_CFLAGS="$AM_CFLAGS $PCRE_CFLAGS"], [])]) +AM_CONDITIONAL([HAVE_PCRE2], [test $mdw_have_pcre2 = t]) +case $mdw_have_pcre2 in + t) AC_DEFINE([HAVE_PCRE2], [1], [PCRE2 library is available.]) ;; +esac AM_CONDITIONAL([HAVE_PCRE], [test $mdw_have_pcre = t]) case $mdw_have_pcre in t) AC_DEFINE([HAVE_PCRE], [1], [PCRE library is available.]) ;; diff --git a/pcre.c b/pcre.c index 5c5994c..b30fc9b 100644 --- a/pcre.c +++ b/pcre.c @@ -28,17 +28,30 @@ #include "anag.h" -#include +#ifdef HAVE_PCRE2 +# define PCRE2_CODE_UNIT_WIDTH 8 +# include +#endif + +#ifdef HAVE_PCRE +# include +#endif /*----- Data structures ---------------------------------------------------*/ typedef struct node_pcre { node n; const char *s; +#ifdef HAVE_PCRE2 + pcre2_code *rx; + pcre2_match_data *m; +#endif +#ifdef HAVE_PCRE pcre *rx; pcre_extra *rx_study; int *ovec; int ovecsz; +#endif } node_pcre; /*----- Main code ---------------------------------------------------------*/ @@ -48,12 +61,30 @@ typedef struct node_pcre { static int n_pcre(node *nn, const char *p, size_t sz) { node_pcre *n = (node_pcre *)nn; +#ifdef HAVE_PCRE2 + char buf[128]; + int rc; +#endif +#ifdef HAVE_PCRE int e; - +#endif + +#ifdef HAVE_PCRE2 + rc = pcre2_match(n->rx, (PCRE2_SPTR)p, sz, 0, 0, n->m, 0); + if (rc >= 0) return (1); + else switch (rc) { + case PCRE2_ERROR_NOMATCH: return (0); + default: + rc = pcre2_get_error_message(rc, (PCRE2_UCHAR *)buf, sizeof(buf)); + assert(!rc); die("pcre2 matching failed': %s", buf); + } +#endif +#ifdef HAVE_PCRE e = pcre_exec(n->rx, n->rx_study, p, sz, 0, 0, n->ovec, n->ovecsz); if (e >= 0) return (1); if (e == PCRE_ERROR_NOMATCH) return (0); die("unexpected PCRE error code %d", e); +#endif } /* --- Node creation --- */ @@ -61,12 +92,37 @@ static int n_pcre(node *nn, const char *p, size_t sz) node *pcrenode(const char *const *av) { node_pcre *n = xmalloc(sizeof(*n)); +#ifdef HAVE_PCRE2 + char buf[128]; + int err; + PCRE2_SIZE eo; + uint32_t c; +#endif +#ifdef HAVE_PCRE const char *e; int eo; int c; +#endif n->n.func = n_pcre; +#ifdef HAVE_PCRE2 + n->rx = pcre2_compile((PCRE2_SPTR)av[0], strlen(av[0]), PCRE2_CASELESS, + &err, &eo, 0); + if (!n->rx) { + err = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, sizeof(buf)); + assert(!err); die("bad regular expression `%s': %s", av[0], buf); + } + err = pcre2_pattern_info(n->rx, PCRE2_INFO_BACKREFMAX, &c); + assert(!err); + n->m = pcre2_match_data_create_from_pattern(n->rx, 0); + if (!n->m) { + err = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, sizeof(buf)); + assert(!err); die("failed to allocate match data: %s", buf); + } + pcre2_jit_compile(n->rx, PCRE2_JIT_COMPLETE); +#endif +#ifdef HAVE_PCRE n->rx = pcre_compile(av[0], PCRE_CASELESS, &e, &eo, 0); if (!n->rx) die("bad regular expression `%s': %s", av[0], e); n->rx_study = pcre_study(n->rx, 0, &e); @@ -74,6 +130,7 @@ node *pcrenode(const char *const *av) pcre_fullinfo(n->rx, n->rx_study, PCRE_INFO_CAPTURECOUNT, &c); n->ovecsz = 2*c; n->ovec = xmalloc(n->ovecsz*sizeof(*n->ovec)); +#endif return (&n->n); } -- 2.11.0