切换使用prce2 并且重命名 colorgrep.sh

(cherry picked from commit e9255d1736d582890f0b9f7643124431110030c5)
This commit is contained in:
zhoupengcheng 2023-11-07 11:24:01 +08:00 committed by openeuler-sync-bot
parent d7e4f7bcd9
commit fae3300054
9 changed files with 821 additions and 13 deletions

View File

@ -0,0 +1,59 @@
From e2aec8c91e9d6ed3fc76f9f145dec8a456ce623a Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 24 Jun 2022 17:53:34 -0500
Subject: grep: fix regex compilation memory leaks
Problem reported by Jim Meyering in:
https://lists.gnu.org/r/grep-devel/2022-06/msg00012.html
* src/dfasearch.c (regex_compile): Fix memory leaks when SYNTAX_ONLY.
---
src/dfasearch.c | 24 ++++++++++++++++--------
1 file changed, 16 insertions(+), 8 deletions(-)
diff --git a/src/dfasearch.c b/src/dfasearch.c
index 2d0e861..a5e348f 100644
--- a/src/dfasearch.c
+++ b/src/dfasearch.c
@@ -148,24 +148,32 @@ regex_compile (struct dfa_comp *dc, char const *p, ptrdiff_t len,
ptrdiff_t pcount, ptrdiff_t lineno, reg_syntax_t syntax_bits,
bool syntax_only)
{
- struct re_pattern_buffer pat0;
- struct re_pattern_buffer *pat = syntax_only ? &pat0 : &dc->patterns[pcount];
- pat->buffer = NULL;
- pat->allocated = 0;
+ struct re_pattern_buffer pat;
+ pat.buffer = NULL;
+ pat.allocated = 0;
/* Do not use a fastmap with -i, to work around glibc Bug#20381. */
- pat->fastmap = (syntax_only | match_icase) ? NULL : xmalloc (UCHAR_MAX + 1);
+ pat.fastmap = syntax_only | match_icase ? NULL : xmalloc (UCHAR_MAX + 1);
- pat->translate = NULL;
+ pat.translate = NULL;
if (syntax_only)
re_set_syntax (syntax_bits | RE_NO_SUB);
else
re_set_syntax (syntax_bits);
- char const *err = re_compile_pattern (p, len, pat);
+ char const *err = re_compile_pattern (p, len, &pat);
if (!err)
- return true;
+ {
+ if (syntax_only)
+ regfree (&pat);
+ else
+ dc->patterns[pcount] = pat;
+
+ return true;
+ }
+
+ free (pat.fastmap);
/* Emit a filename:lineno: prefix for patterns taken from files. */
size_t pat_lineno;
--
cgit v1.1

View File

@ -0,0 +1,43 @@
From 6f84f3be1cdd3aadacc42007582116d1c2c0a3e4 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Fri, 12 Nov 2021 21:30:25 -0800
Subject: [PATCH] =?UTF-8?q?grep:=20Don=E2=80=99t=20limit=20jitstack=5Fmax?=
=?UTF-8?q?=20to=20INT=5FMAX?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* src/pcresearch.c (jit_exec): Remove arbitrary INT_MAX limit on JIT
stack size.
Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=6f84f3be1cdd3aadacc42007582116d1c2c0a3e4
Conflict:context adaptation
---
src/pcresearch.c | 8 +++++++-
1 file changed, 7 insertions(+), 1 deletion(-)
diff --git a/src/pcresearch.c b/src/pcresearch.c
index caedf49..68ac1fd 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -60,10 +60,16 @@ jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes,
{
while (true)
{
+ /* STACK_GROWTH_RATE is taken from PCRE's src/pcre2_jit_compile.c.
+ Going over the jitstack_max limit could trigger an int
+ overflow bug within PCRE. */
+ int STACK_GROWTH_RATE = 8192;
+ size_t jitstack_max = SIZE_MAX - (STACK_GROWTH_RATE - 1);
+
int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes,
search_offset, options, pc->data, pc->mcontext);
if (e == PCRE2_ERROR_JIT_STACKLIMIT
- && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
+ && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2)
{
PCRE2_SIZE old_size = pc->jit_stack_size;
PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2;
--
2.33.0

View File

@ -0,0 +1,29 @@
From ad6e5cbcf598f55cafe83a11487ea4a6694e433b Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sun, 14 Nov 2021 10:54:12 -0800
Subject: [PATCH] grep: fix minor -P memory leak
* src/pcresearch.c (Pcompile): Free ccontext when no longer needed.
Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=ad6e5cbcf598f55cafe83a11487ea4a6694e433b
Conflict:context adaptation
---
src/pcresearch.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/pcresearch.c b/src/pcresearch.c
index badcd4c..c287d99 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -184,6 +184,8 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
die (EXIT_TROUBLE, 0, "%s", ep);
}
+ pcre2_compile_context_free (ccontext);
+
pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL);
ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE);
--
2.33.0

View File

@ -0,0 +1,570 @@
From e0d39a9133e1507345d73ac5aff85f037f39aa54 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
Date: Fri, 12 Nov 2021 16:45:04 -0800
Subject: [PATCH] grep: migrate to pcre2
Mostly a bug by bug translation of the original code to the PCRE2 API.
Code still could do with some optimizations but should be good as a
starting point.
The API changes the sign of some types and therefore some ugly casts
were needed, some of the changes are just to make sure all variables
fit into the newer types better.
Includes backward compatibility and could be made to build all the way
to 10.00, but assumes a recent enough version and has been tested with
10.23 (from CentOS 7, the oldest).
Performance seems equivalent, and it also seems functionally complete.
* m4/pcre.m4 (gl_FUNC_PCRE): Check for PCRE2, not the original PCRE.
* src/pcresearch.c (struct pcre_comp, jit_exec)
(Pcompile, Pexecute):
Use PCRE2, not the original PCRE.
* tests/filename-lineno.pl: Adjust to match PCRE2 diagnostics.
Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=e0d39a9133e1507345d73ac5aff85f037f39aa54
Conflict:context adaptation
---
doc/grep.in.1 | 8 +-
doc/grep.texi | 2 +-
m4/pcre.m4 | 21 ++--
src/pcresearch.c | 249 +++++++++++++++++++--------------------
tests/filename-lineno.pl | 4 +-
5 files changed, 138 insertions(+), 146 deletions(-)
diff --git a/doc/grep.in.1 b/doc/grep.in.1
index e8854f2..21bb471 100644
--- a/doc/grep.in.1
+++ b/doc/grep.in.1
@@ -767,7 +767,7 @@ In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions;
differences for basic regular expressions are summarized afterwards.
Perl-compatible regular expressions give additional functionality, and are
-documented in B<pcresyntax>(3) and B<pcrepattern>(3), but work only if
+documented in B<pcre2syntax>(3) and B<pcre2pattern>(3), but work only if
PCRE support is enabled.
.PP
The fundamental building blocks are the regular expressions
@@ -1371,9 +1371,9 @@ from the globbing syntax that the shell uses to match file names.
.BR sort (1),
.BR xargs (1),
.BR read (2),
-.BR pcre (3),
-.BR pcresyntax (3),
-.BR pcrepattern (3),
+.BR pcre2 (3),
+.BR pcre2syntax (3),
+.BR pcre2pattern (3),
.BR terminfo (5),
.BR glob (7),
.BR regex (7)
diff --git a/doc/grep.texi b/doc/grep.texi
index 01ac81e..aae8571 100644
--- a/doc/grep.texi
+++ b/doc/grep.texi
@@ -1186,7 +1186,7 @@ In other implementations, basic regular expressions are less powerful.
The following description applies to extended regular expressions;
differences for basic regular expressions are summarized afterwards.
Perl-compatible regular expressions give additional functionality, and
-are documented in the @i{pcresyntax}(3) and @i{pcrepattern}(3) manual
+are documented in the @i{pcre2syntax}(3) and @i{pcre2pattern}(3) manual
pages, but work only if PCRE is available in the system.
@menu
diff --git a/m4/pcre.m4 b/m4/pcre.m4
index 78b7fda..a1c6c82 100644
--- a/m4/pcre.m4
+++ b/m4/pcre.m4
@@ -1,4 +1,4 @@
-# pcre.m4 - check for libpcre support
+# pcre.m4 - check for PCRE library support
# Copyright (C) 2010-2021 Free Software Foundation, Inc.
# This file is free software; the Free Software Foundation
@@ -9,7 +9,7 @@ AC_DEFUN([gl_FUNC_PCRE],
[
AC_ARG_ENABLE([perl-regexp],
AS_HELP_STRING([--disable-perl-regexp],
- [disable perl-regexp (pcre) support]),
+ [disable perl-regexp (pcre2) support]),
[case $enableval in
yes|no) test_pcre=$enableval;;
*) AC_MSG_ERROR([invalid value $enableval for --disable-perl-regexp]);;
@@ -21,24 +21,25 @@ AC_DEFUN([gl_FUNC_PCRE],
use_pcre=no
if test $test_pcre != no; then
- PKG_CHECK_MODULES([PCRE], [libpcre], [], [: ${PCRE_LIBS=-lpcre}])
+ PKG_CHECK_MODULES([PCRE], [libpcre2-8], [], [: ${PCRE_LIBS=-lpcre2-8}])
- AC_CACHE_CHECK([for pcre_compile], [pcre_cv_have_pcre_compile],
+ AC_CACHE_CHECK([for pcre2_compile], [pcre_cv_have_pcre2_compile],
[pcre_saved_CFLAGS=$CFLAGS
pcre_saved_LIBS=$LIBS
CFLAGS="$CFLAGS $PCRE_CFLAGS"
LIBS="$PCRE_LIBS $LIBS"
AC_LINK_IFELSE(
- [AC_LANG_PROGRAM([[#include <pcre.h>
+ [AC_LANG_PROGRAM([[#define PCRE2_CODE_UNIT_WIDTH 8
+ #include <pcre2.h>
]],
- [[pcre *p = pcre_compile (0, 0, 0, 0, 0);
+ [[pcre2_code *p = pcre2_compile (0, 0, 0, 0, 0, 0);
return !p;]])],
- [pcre_cv_have_pcre_compile=yes],
- [pcre_cv_have_pcre_compile=no])
+ [pcre_cv_have_pcre2_compile=yes],
+ [pcre_cv_have_pcre2_compile=no])
CFLAGS=$pcre_saved_CFLAGS
LIBS=$pcre_saved_LIBS])
- if test "$pcre_cv_have_pcre_compile" = yes; then
+ if test "$pcre_cv_have_pcre2_compile" = yes; then
use_pcre=yes
elif test $test_pcre = maybe; then
AC_MSG_WARN([AC_PACKAGE_NAME will be built without pcre support.])
@@ -50,7 +51,7 @@ AC_DEFUN([gl_FUNC_PCRE],
if test $use_pcre = yes; then
AC_DEFINE([HAVE_LIBPCRE], [1],
[Define to 1 if you have the Perl Compatible Regular Expressions
- library (-lpcre).])
+ library (-lpcre2).])
else
PCRE_CFLAGS=
PCRE_LIBS=
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 8070d06..2916d31 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -17,41 +17,32 @@
02110-1301, USA. */
/* Written August 1992 by Mike Haertel. */
+/* Updated for PCRE2 by Carlo Arenas. */
#include <config.h>
#include "search.h"
#include "die.h"
-#include <pcre.h>
+#define PCRE2_CODE_UNIT_WIDTH 8
+#include <pcre2.h>
-/* This must be at least 2; everything after that is for performance
- in pcre_exec. */
-enum { NSUB = 300 };
-
-#ifndef PCRE_EXTRA_MATCH_LIMIT_RECURSION
-# define PCRE_EXTRA_MATCH_LIMIT_RECURSION 0
-#endif
-#ifndef PCRE_STUDY_JIT_COMPILE
-# define PCRE_STUDY_JIT_COMPILE 0
-#endif
-#ifndef PCRE_STUDY_EXTRA_NEEDED
-# define PCRE_STUDY_EXTRA_NEEDED 0
+/* Needed for backward compatibility for PCRE2 < 10.30 */
+#ifndef PCRE2_CONFIG_DEPTHLIMIT
+#define PCRE2_CONFIG_DEPTHLIMIT PCRE2_CONFIG_RECURSIONLIMIT
+#define PCRE2_ERROR_DEPTHLIMIT PCRE2_ERROR_RECURSIONLIMIT
+#define pcre2_set_depth_limit pcre2_set_recursion_limit
#endif
struct pcre_comp
{
- /* Compiled internal form of a Perl regular expression. */
- pcre *cre;
-
- /* Additional information about the pattern. */
- pcre_extra *extra;
-
-#if PCRE_STUDY_JIT_COMPILE
/* The JIT stack and its maximum size. */
- pcre_jit_stack *jit_stack;
- int jit_stack_size;
-#endif
+ pcre2_jit_stack *jit_stack;
+ PCRE2_SIZE jit_stack_size;
+ /* Compiled internal form of a Perl regular expression. */
+ pcre2_code *cre;
+ pcre2_match_context *mcontext;
+ pcre2_match_data *data;
/* Table, indexed by ! (flag & PCRE_NOTBOL), of whether the empty
string matches when that flag is used. */
int empty_match[2];
@@ -60,54 +51,49 @@ struct pcre_comp
/* Match the already-compiled PCRE pattern against the data in SUBJECT,
of size SEARCH_BYTES and starting with offset SEARCH_OFFSET, with
- options OPTIONS, and storing resulting matches into SUB. Return
- the (nonnegative) match location or a (negative) error number. */
+ options OPTIONS.
+ Return the (nonnegative) match count or a (negative) error number. */
static int
-jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
- int search_offset, int options, int *sub)
+jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes,
+ PCRE2_SIZE search_offset, int options)
{
while (true)
{
- int e = pcre_exec (pc->cre, pc->extra, subject, search_bytes,
- search_offset, options, sub, NSUB);
-
-#if PCRE_STUDY_JIT_COMPILE
- /* Going over this would trigger an int overflow bug within PCRE. */
- int jitstack_max = INT_MAX - 8 * 1024;
-
- if (e == PCRE_ERROR_JIT_STACKLIMIT
- && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2)
+ int e = pcre2_match (pc->cre, (PCRE2_SPTR)subject, search_bytes,
+ search_offset, options, pc->data, pc->mcontext);
+ if (e == PCRE2_ERROR_JIT_STACKLIMIT
+ && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
{
- int old_size = pc->jit_stack_size;
- int new_size = pc->jit_stack_size = old_size * 2;
+ PCRE2_SIZE old_size = pc->jit_stack_size;
+ PCRE2_SIZE new_size = pc->jit_stack_size = old_size * 2;
+
if (pc->jit_stack)
- pcre_jit_stack_free (pc->jit_stack);
- pc->jit_stack = pcre_jit_stack_alloc (old_size, new_size);
- if (!pc->jit_stack)
+ pcre2_jit_stack_free (pc->jit_stack);
+ pc->jit_stack = pcre2_jit_stack_create (old_size, new_size, NULL);
+
+ if (!pc->mcontext)
+ pc->mcontext = pcre2_match_context_create (NULL);
+
+ if (!pc->jit_stack || !pc->mcontext)
die (EXIT_TROUBLE, 0,
_("failed to allocate memory for the PCRE JIT stack"));
- pcre_assign_jit_stack (pc->extra, NULL, pc->jit_stack);
+ pcre2_jit_stack_assign (pc->mcontext, NULL, pc->jit_stack);
continue;
}
-#endif
-
-#if PCRE_EXTRA_MATCH_LIMIT_RECURSION
- if (e == PCRE_ERROR_RECURSIONLIMIT
- && (PCRE_STUDY_EXTRA_NEEDED || pc->extra))
+ if (e == PCRE2_ERROR_DEPTHLIMIT)
{
- unsigned long lim
- = (pc->extra->flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION
- ? pc->extra->match_limit_recursion
- : 0);
- if (lim <= ULONG_MAX / 2)
- {
- pc->extra->match_limit_recursion = lim ? 2 * lim : (1 << 24) - 1;
- pc->extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
- continue;
- }
- }
-#endif
+ uint32_t lim;
+ pcre2_config (PCRE2_CONFIG_DEPTHLIMIT, &lim);
+ if (lim >= UINT32_MAX / 2)
+ return e;
+
+ lim <<= 1;
+ if (!pc->mcontext)
+ pc->mcontext = pcre2_match_context_create (NULL);
+ pcre2_set_depth_limit (pc->mcontext, lim);
+ continue;
+ }
return e;
}
}
@@ -118,27 +104,35 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
void *
Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
{
- int e;
- char const *ep;
+ PCRE2_SIZE e;
+ int ec;
+ PCRE2_UCHAR8 ep[128]; /* 120 code units is suggested to avoid truncation */
static char const wprefix[] = "(?<!\\w)(?:";
static char const wsuffix[] = ")(?!\\w)";
static char const xprefix[] = "^(?:";
static char const xsuffix[] = ")$";
int fix_len_max = MAX (sizeof wprefix - 1 + sizeof wsuffix - 1,
sizeof xprefix - 1 + sizeof xsuffix - 1);
- char *re = xnmalloc (4, size + (fix_len_max + 4 - 1) / 4);
- int flags = PCRE_DOLLAR_ENDONLY | (match_icase ? PCRE_CASELESS : 0);
+ unsigned char *re = xmalloc (size + fix_len_max + 1);
+ int flags = PCRE2_DOLLAR_ENDONLY | (match_icase ? PCRE2_CASELESS : 0);
char *patlim = pattern + size;
- char *n = re;
- char const *p;
- char const *pnul;
+ char *n = (char *)re;
struct pcre_comp *pc = xcalloc (1, sizeof (*pc));
+ pcre2_compile_context *ccontext = pcre2_compile_context_create(NULL);
if (localeinfo.multibyte)
{
if (! localeinfo.using_utf8)
die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
- flags |= PCRE_UTF8;
+ flags |= PCRE2_UTF;
+#if 0
+ /* do not match individual code units but only UTF-8 */
+ flags |= PCRE2_NEVER_BACKSLASH_C;
+#endif
+#ifdef PCRE2_MATCH_INVALID_UTF
+ /* consider invalid UTF-8 as a barrier, instead of error */
+ flags |= PCRE2_MATCH_INVALID_UTF;
+#endif
}
/* FIXME: Remove this restriction. */
@@ -151,56 +145,42 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
if (match_lines)
strcpy (n, xprefix);
n += strlen (n);
-
- /* The PCRE interface doesn't allow NUL bytes in the pattern, so
- replace each NUL byte in the pattern with the four characters
- "\000", removing a preceding backslash if there are an odd
- number of backslashes before the NUL. */
- *patlim = '\0';
- for (p = pattern; (pnul = p + strlen (p)) < patlim; p = pnul + 1)
+ memcpy (n, pattern, size);
+ n += size;
+ if (match_words && !match_lines)
{
- memcpy (n, p, pnul - p);
- n += pnul - p;
- for (p = pnul; pattern < p && p[-1] == '\\'; p--)
- continue;
- n -= (pnul - p) & 1;
- strcpy (n, "\\000");
- n += 4;
- }
- memcpy (n, p, patlim - p + 1);
- n += patlim - p;
- *patlim = '\n';
-
- if (match_words)
strcpy (n, wsuffix);
+ n += strlen(wsuffix);
+ }
if (match_lines)
+ {
strcpy (n, xsuffix);
+ n += strlen(xsuffix);
+ }
- pc->cre = pcre_compile (re, flags, &ep, &e, pcre_maketables ());
+ pcre2_set_character_tables (ccontext, pcre2_maketables (NULL));
+ pc->cre = pcre2_compile (re, n - (char *)re, flags, &ec, &e, ccontext);
if (!pc->cre)
- die (EXIT_TROUBLE, 0, "%s", ep);
-
- int pcre_study_flags = PCRE_STUDY_EXTRA_NEEDED | PCRE_STUDY_JIT_COMPILE;
- pc->extra = pcre_study (pc->cre, pcre_study_flags, &ep);
- if (ep)
- die (EXIT_TROUBLE, 0, "%s", ep);
+ {
+ pcre2_get_error_message (ec, ep, sizeof (ep));
+ die (EXIT_TROUBLE, 0, "%s", ep);
+ }
-#if PCRE_STUDY_JIT_COMPILE
- if (pcre_fullinfo (pc->cre, pc->extra, PCRE_INFO_JIT, &e))
- die (EXIT_TROUBLE, 0, _("internal error (should never happen)"));
+ pc->data = pcre2_match_data_create_from_pattern (pc->cre, NULL);
- /* The PCRE documentation says that a 32 KiB stack is the default. */
- if (e)
- pc->jit_stack_size = 32 << 10;
-#endif
+ ec = pcre2_jit_compile (pc->cre, PCRE2_JIT_COMPLETE);
+ if (ec && ec != PCRE2_ERROR_JIT_BADOPTION && ec != PCRE2_ERROR_NOMEMORY)
+ die (EXIT_TROUBLE, 0, _("JIT internal error: %d"), ec);
+ else
+ {
+ /* The PCRE documentation says that a 32 KiB stack is the default. */
+ pc->jit_stack_size = 32 << 10;
+ }
free (re);
- int sub[NSUB];
- pc->empty_match[false] = pcre_exec (pc->cre, pc->extra, "", 0, 0,
- PCRE_NOTBOL, sub, NSUB);
- pc->empty_match[true] = pcre_exec (pc->cre, pc->extra, "", 0, 0, 0, sub,
- NSUB);
+ pc->empty_match[false] = jit_exec (pc, "", 0, 0, PCRE2_NOTBOL);
+ pc->empty_match[true] = jit_exec (pc, "", 0, 0, 0);
return pc;
}
@@ -209,15 +189,15 @@ size_t
Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
char const *start_ptr)
{
- int sub[NSUB];
char const *p = start_ptr ? start_ptr : buf;
bool bol = p[-1] == eolbyte;
char const *line_start = buf;
- int e = PCRE_ERROR_NOMATCH;
+ int e = PCRE2_ERROR_NOMATCH;
char const *line_end;
struct pcre_comp *pc = vcp;
+ PCRE2_SIZE *sub = pcre2_get_ovector_pointer (pc->data);
- /* The search address to pass to pcre_exec. This is the start of
+ /* The search address to pass to PCRE. This is the start of
the buffer, or just past the most-recently discovered encoding
error or line end. */
char const *subject = buf;
@@ -229,14 +209,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
better and the correctness issues were too puzzling. See
Bug#22655. */
line_end = rawmemchr (p, eolbyte);
- if (INT_MAX < line_end - p)
+ if (PCRE2_SIZE_MAX < line_end - p)
die (EXIT_TROUBLE, 0, _("exceeded PCRE's line length limit"));
for (;;)
{
/* Skip past bytes that are easily determined to be encoding
errors, treating them as data that cannot match. This is
- faster than having pcre_exec check them. */
+ faster than having PCRE check them. */
while (localeinfo.sbclen[to_uchar (*p)] == -1)
{
p++;
@@ -244,10 +224,10 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
bol = false;
}
- int search_offset = p - subject;
+ PCRE2_SIZE search_offset = p - subject;
/* Check for an empty match; this is faster than letting
- pcre_exec do it. */
+ PCRE do it. */
if (p == line_end)
{
sub[0] = sub[1] = search_offset;
@@ -257,13 +237,14 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
int options = 0;
if (!bol)
- options |= PCRE_NOTBOL;
+ options |= PCRE2_NOTBOL;
- e = jit_exec (pc, subject, line_end - subject, search_offset,
- options, sub);
- if (e != PCRE_ERROR_BADUTF8)
+ e = jit_exec (pc, subject, line_end - subject,
+ search_offset, options);
+ /* PCRE2 provides 22 different error codes for bad UTF-8 */
+ if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1))
break;
- int valid_bytes = sub[0];
+ PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data);
if (search_offset <= valid_bytes)
{
@@ -273,14 +254,15 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
/* Handle the empty-match case specially, for speed.
This optimization is valid if VALID_BYTES is zero,
which means SEARCH_OFFSET is also zero. */
+ sub[0] = valid_bytes;
sub[1] = 0;
e = pc->empty_match[bol];
}
else
e = jit_exec (pc, subject, valid_bytes, search_offset,
- options | PCRE_NO_UTF8_CHECK | PCRE_NOTEOL, sub);
+ options | PCRE2_NO_UTF_CHECK | PCRE2_NOTEOL);
- if (e != PCRE_ERROR_NOMATCH)
+ if (e != PCRE2_ERROR_NOMATCH)
break;
/* Treat the encoding error as data that cannot match. */
@@ -291,7 +273,7 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
subject += valid_bytes + 1;
}
- if (e != PCRE_ERROR_NOMATCH)
+ if (e != PCRE2_ERROR_NOMATCH)
break;
bol = true;
p = subject = line_start = line_end + 1;
@@ -302,26 +284,35 @@ Pexecute (void *vcp, char const *buf, size_t size, size_t *match_size,
{
switch (e)
{
- case PCRE_ERROR_NOMATCH:
+ case PCRE2_ERROR_NOMATCH:
break;
- case PCRE_ERROR_NOMEMORY:
+ case PCRE2_ERROR_NOMEMORY:
die (EXIT_TROUBLE, 0, _("%s: memory exhausted"), input_filename ());
-#if PCRE_STUDY_JIT_COMPILE
- case PCRE_ERROR_JIT_STACKLIMIT:
+ case PCRE2_ERROR_JIT_STACKLIMIT:
die (EXIT_TROUBLE, 0, _("%s: exhausted PCRE JIT stack"),
input_filename ());
-#endif
- case PCRE_ERROR_MATCHLIMIT:
+ case PCRE2_ERROR_MATCHLIMIT:
die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's backtracking limit"),
input_filename ());
- case PCRE_ERROR_RECURSIONLIMIT:
- die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's recursion limit"),
+ case PCRE2_ERROR_DEPTHLIMIT:
+ die (EXIT_TROUBLE, 0,
+ _("%s: exceeded PCRE's nested backtracking limit"),
input_filename ());
+ case PCRE2_ERROR_RECURSELOOP:
+ die (EXIT_TROUBLE, 0, _("%s: PCRE detected recurse loop"),
+ input_filename ());
+
+#ifdef PCRE2_ERROR_HEAPLIMIT
+ case PCRE2_ERROR_HEAPLIMIT:
+ die (EXIT_TROUBLE, 0, _("%s: exceeded PCRE's heap limit"),
+ input_filename ());
+#endif
+
default:
/* For now, we lump all remaining PCRE failures into this basket.
If anyone cares to provide sample grep usage that can trigger
diff --git a/tests/filename-lineno.pl b/tests/filename-lineno.pl
index 1e84b45..1ff3d6a 100755
--- a/tests/filename-lineno.pl
+++ b/tests/filename-lineno.pl
@@ -101,13 +101,13 @@ my @Tests =
],
['invalid-re-P-paren', '-P ")"', {EXIT=>2},
{ERR => $ENV{PCRE_WORKS} == 1
- ? "$prog: unmatched parentheses\n"
+ ? "$prog: unmatched closing parenthesis\n"
: $no_pcre
},
],
['invalid-re-P-star-paren', '-P "a.*)"', {EXIT=>2},
{ERR => $ENV{PCRE_WORKS} == 1
- ? "$prog: unmatched parentheses\n"
+ ? "$prog: unmatched closing parenthesis\n"
: $no_pcre
},
],
--
2.33.0

View File

@ -0,0 +1,54 @@
From 6e1450408a7921771c41973761995e06445ba18b Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Sat, 13 Nov 2021 13:52:23 -0800
Subject: [PATCH] grep: speed up, fix bad-UTF8 check with -P
* src/pcresearch.c (bad_utf8_from_pcre2): New function. Fix bug
where PCRE2_ERROR_UTF8_ERR1 was not treated as an encoding error.
Improve performance when PCRE2_MATCH_INVALID_UTF is defined.
(Pexecute): Use it.
Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=6e1450408a7921771c41973761995e06445ba18b
Conflict:NA
---
src/pcresearch.c | 16 ++++++++++++++--
1 file changed, 14 insertions(+), 2 deletions(-)
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 286e1dc..953aca2 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -104,6 +104,18 @@ jit_exec (struct pcre_comp *pc, char const *subject, PCRE2_SIZE search_bytes,
}
}
+/* Return true if E is an error code for bad UTF-8, and if pcre2_match
+ could return E because PCRE lacks PCRE2_MATCH_INVALID_UTF. */
+static bool
+bad_utf8_from_pcre2 (int e)
+{
+#ifdef PCRE2_MATCH_INVALID_UTF
+ return false;
+#else
+ return PCRE2_ERROR_UTF8_ERR21 <= e && e <= PCRE2_ERROR_UTF8_ERR1;
+#endif
+}
+
/* Compile the -P style PATTERN, containing SIZE bytes that are
followed by '\n'. Return a description of the compiled pattern. */
@@ -248,9 +260,9 @@ Pexecute (void *vcp, char const *buf, idx_t size, idx_t *match_size,
e = jit_exec (pc, subject, line_end - subject,
search_offset, options);
- /* PCRE2 provides 22 different error codes for bad UTF-8 */
- if (! (PCRE2_ERROR_UTF8_ERR21 <= e && e < PCRE2_ERROR_UTF8_ERR1))
+ if (!bad_utf8_from_pcre2 (e))
break;
+
PCRE2_SIZE valid_bytes = pcre2_get_startchar (pc->data);
if (search_offset <= valid_bytes)
--
2.23.0

View File

@ -0,0 +1,38 @@
From b3a85a1a8a816f4f6f9c01399c16efe92a86ca06 Mon Sep 17 00:00:00 2001
From: Paul Eggert <eggert@cs.ucla.edu>
Date: Tue, 9 Nov 2021 10:11:42 -0800
Subject: [PATCH] grep: work around PCRE bug
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Problem reported by Carlo Marcelo Arenas Belón (Bug#51710).
* src/pcresearch.c (jit_exec): Dont attempt to grow the JIT stack
over INT_MAX - 8 * 1024.
Reference: https://git.savannah.gnu.org/cgit/grep.git/commit?id=b3a85a1a8a816f4f6f9c01399c16efe92a86ca06
Conflict: NA
---
src/pcresearch.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/pcresearch.c b/src/pcresearch.c
index 3bdaee9..09f92c8 100644
--- a/src/pcresearch.c
+++ b/src/pcresearch.c
@@ -72,8 +72,11 @@ jit_exec (struct pcre_comp *pc, char const *subject, int search_bytes,
search_offset, options, sub, NSUB);
#if PCRE_STUDY_JIT_COMPILE
+ /* Going over this would trigger an int overflow bug within PCRE. */
+ int jitstack_max = INT_MAX - 8 * 1024;
+
if (e == PCRE_ERROR_JIT_STACKLIMIT
- && 0 < pc->jit_stack_size && pc->jit_stack_size <= INT_MAX / 2)
+ && 0 < pc->jit_stack_size && pc->jit_stack_size <= jitstack_max / 2)
{
int old_size = pc->jit_stack_size;
int new_size = pc->jit_stack_size = old_size * 2;
--
2.27.0

View File

@ -52,11 +52,11 @@ index a107f4d..45b67ee 100644
{ {
if (! localeinfo.using_utf8) if (! localeinfo.using_utf8)
die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales")); die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
- flags |= PCRE_UTF8; - flags |= PCRE2_UTF;
+ flags |= (PCRE_UTF8 | PCRE_UCP); + flags |= (PCRE2_UTF | PCRE2_UCP);
} #if 0
/* do not match individual code units but only UTF-8 */
/* FIXME: Remove this restriction. */ flags |= PCRE2_NEVER_BACKSLASH_C;
diff --git a/tests/Makefile.am b/tests/Makefile.am diff --git a/tests/Makefile.am b/tests/Makefile.am
index e0b0503..a47cf5c 100644 index e0b0503..a47cf5c 100644
--- a/tests/Makefile.am --- a/tests/Makefile.am
@ -106,4 +106,3 @@ index 0000000..4cd7db6
-- --
2.33.0 2.33.0

View File

@ -1,20 +1,26 @@
Name: grep Name: grep
Version: 3.7 Version: 3.7
Release: 6 Release: 9
Summary: A string search utility Summary: A string search utility
License: GPLv3+ License: GPLv3+
URL: http://www.gnu.org/software/grep/ URL: http://www.gnu.org/software/grep/
Source0: https://ftp.gnu.org/gnu/grep/grep-%{version}.tar.xz Source0: https://ftp.gnu.org/gnu/grep/grep-%{version}.tar.xz
Source1: colorgrep.sh Source1: color_grep.sh
Source2: colorgrep.csh Source2: colorgrep.csh
Source3: grepconf.sh Source3: grepconf.sh
Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch
Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch
Patch3: backport-grep-bug-backref-in-last-of-multiple-patter.patch Patch3: backport-grep-bug-backref-in-last-of-multiple-patter.patch
Patch4: backport-pcre-use-UCP-in-UTF-mode.patch Patch4: backport-fix-regex-compilation-memory-leaks.patch
Patch5: backport-grep-work-around-PCRE-bug.patch
Patch6: backport-grep-migrate-to-pcre2.patch
Patch7: backport-grep-Don-t-limit-jitstack_max-to-INT_MAX.patch
Patch8: backport-grep-speed-up-fix-bad-UTF8-check-with-P.patch
Patch9: backport-grep-fix-minor-P-memory-leak.patch
Patch10: backport-pcre-use-UCP-in-UTF-mode.patch
BuildRequires: gcc pcre-devel >= 3.9-10 texinfo gettext libsigsegv-devel automake BuildRequires: gcc pcre2-devel texinfo gettext libsigsegv-devel automake
Provides: /bin/egrep /bin/fgrep /bin/grep bundled(gnulib) Provides: /bin/egrep /bin/fgrep /bin/grep bundled(gnulib)
%description %description
@ -27,7 +33,7 @@ a specified pattern. By default, Grep outputs the matching lines.
%build %build
autoreconf autoreconf
%configure --disable-silent-rules \ %configure --disable-silent-rules \
CPPFLAGS="-I%{_includedir}/pcre" CFLAGS="$RPM_OPT_FLAGS -fsigned-char" CPPFLAGS="-I%{_includedir}/pcre2" CFLAGS="$RPM_OPT_FLAGS -fsigned-char"
%make_build %make_build
%install %install
@ -48,7 +54,8 @@ make check
%files %files
%{_datadir}/locale/* %{_datadir}/locale/*
%config(noreplace) %{_sysconfdir}/profile.d/colorgrep.*sh %config(noreplace) %{_sysconfdir}/profile.d/color_grep.sh
%config(noreplace) %{_sysconfdir}/profile.d/colorgrep.csh
%doc NEWS README THANKS TODO %doc NEWS README THANKS TODO
%license COPYING AUTHORS %license COPYING AUTHORS
%{_bindir}/*grep %{_bindir}/*grep
@ -58,9 +65,18 @@ make check
%changelog %changelog
* Mon Apr 24 2023 gaoruoshu <gaoruoshu@huawei.com> - 3.7-6 * Tue Nov 7 2023 zhoupengcheng <zhoupengcheng11@huawei.com> - 3.7-9
- rename colorgrep.sh to color_grep.sh
* Mon Nov 6 2023 zhoupengcheng <zhoupengcheng11@huawei.com> - 3.7-8
- Modify the dependency from pcre to pcre2 and fix bug
* Mon Apr 24 2023 gaoruoshu <gaoruoshu@huawei.com> - 3.7-7
- pcre: use UCP in UTF mode - pcre: use UCP in UTF mode
* Mon Apr 3 2023 renchunhui <renchunhui2@huawei.com> - 3.7-6
- backport patch from upstream
* Mon Dec 26 2022 gaoruoshu <gaoruoshu@huawei.com> - 3.7-5 * Mon Dec 26 2022 gaoruoshu <gaoruoshu@huawei.com> - 3.7-5
- backport patch from upstream - backport patch from upstream