grep:pcre use UCP in UTF mode
(cherry picked from commit fba0aecf8a7b94b77225555e041113846d85aad2)
This commit is contained in:
parent
da9c643827
commit
ee5ef74a98
109
backport-pcre-use-UCP-in-UTF-mode.patch
Normal file
109
backport-pcre-use-UCP-in-UTF-mode.patch
Normal file
@ -0,0 +1,109 @@
|
||||
From 5e3b760f65f13856e5717e5b9d935f5b4a615be3 Mon Sep 17 00:00:00 2001
|
||||
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= <carenas@gmail.com>
|
||||
Date: Fri, 6 Jan 2023 19:34:56 -0800
|
||||
Subject: [PATCH] pcre: use UCP in UTF mode
|
||||
|
||||
This fixes a serious bug affecting word-boundary and word-constituent regular
|
||||
expressions when the desired match involves non-ASCII UTF8 characters.
|
||||
* src/pcresearch.c: Set PCRE2_UCP together with PCRE2_UTF
|
||||
* tests/pcre-utf8-w: New file.
|
||||
* tests/Makefile.am (TESTS): Add it.
|
||||
* NEWS (Bug fixes): Mention this.
|
||||
* THANKS.in: Add Gro-Tsen and Karl Petterson.
|
||||
Reported by Gro-Tsen https://twitter.com/gro_tsen/status/1610972356972875777
|
||||
via Karl Pettersson in https://github.com/PCRE2Project/pcre2/issues/185
|
||||
This bug was present from grep-2.5, when --perl-regexp (-P) support was added.
|
||||
|
||||
Reference:https://git.savannah.gnu.org/cgit/grep.git/commit?id=5e3b760f65f13856e5717e5b9d935f5b4a615be3
|
||||
Conflict:delete NEWS and change src/pcresearch.c
|
||||
---
|
||||
THANKS.in | 2 ++
|
||||
src/pcresearch.c | 2 +-
|
||||
tests/Makefile.am | 1 +
|
||||
tests/pcre-utf8-w | 28 ++++++++++++++++++++++++++++
|
||||
4 files changed, 32 insertions(+), 1 deletion(-)
|
||||
create mode 100755 tests/pcre-utf8-w
|
||||
|
||||
diff --git a/THANKS.in b/THANKS.in
|
||||
index 9872bfa..d0d6f92 100644
|
||||
--- a/THANKS.in
|
||||
+++ b/THANKS.in
|
||||
@@ -35,6 +35,7 @@ Gerald Stoller gerald_stoller@hotmail.com
|
||||
Grant McDorman grant@isgtec.com
|
||||
Greg Boyd gboyd.ccsf@gmail.com
|
||||
Greg Louis glouis@dynamicro.on.ca
|
||||
+Gro-Tsen https://twitter.com/gro_tsen
|
||||
Guglielmo 'bond' Bondioni g.bondioni@libero.it
|
||||
H. Merijn Brand h.m.brand@hccnet.nl
|
||||
Harald Hanche-Olsen hanche@math.ntnu.no
|
||||
@@ -50,6 +51,7 @@ Joel N. Weber II devnull@gnu.org
|
||||
John Hughes john@nitelite.calvacom.fr
|
||||
Jorge Stolfi stolfi@dcc.unicamp.br
|
||||
Karl Heuer kwzh@gnu.org
|
||||
+Karl Petterson karl.pettersson@klpn.se
|
||||
Kaveh R. Ghazi ghazi@caip.rutgers.edu
|
||||
Kazuro Furukawa furukawa@apricot.kek.jp
|
||||
Keith Bostic bostic@bsdi.com
|
||||
diff --git a/src/pcresearch.c b/src/pcresearch.c
|
||||
index a107f4d..45b67ee 100644
|
||||
--- a/src/pcresearch.c
|
||||
+++ b/src/pcresearch.c
|
||||
@@ -141,7 +141,7 @@ Pcompile (char *pattern, size_t size, reg_syntax_t ignored, bool exact)
|
||||
{
|
||||
if (! localeinfo.using_utf8)
|
||||
die (EXIT_TROUBLE, 0, _("-P supports only unibyte and UTF-8 locales"));
|
||||
- flags |= PCRE_UTF8;
|
||||
+ flags |= (PCRE_UTF8 | PCRE_UCP);
|
||||
}
|
||||
|
||||
/* FIXME: Remove this restriction. */
|
||||
diff --git a/tests/Makefile.am b/tests/Makefile.am
|
||||
index e0b0503..a47cf5c 100644
|
||||
--- a/tests/Makefile.am
|
||||
+++ b/tests/Makefile.am
|
||||
@@ -147,6 +147,7 @@ TESTS = \
|
||||
pcre-jitstack \
|
||||
pcre-o \
|
||||
pcre-utf8 \
|
||||
+ pcre-utf8-w \
|
||||
pcre-w \
|
||||
pcre-wx-backref \
|
||||
pcre-z \
|
||||
diff --git a/tests/pcre-utf8-w b/tests/pcre-utf8-w
|
||||
new file mode 100755
|
||||
index 0000000..4cd7db6
|
||||
--- /dev/null
|
||||
+++ b/tests/pcre-utf8-w
|
||||
@@ -0,0 +1,28 @@
|
||||
+#!/bin/sh
|
||||
+# Ensure non-ASCII UTF-8 characters are correctly identified as word-consituent
|
||||
+#
|
||||
+# Copyright (C) 2023 Free Software Foundation, Inc.
|
||||
+#
|
||||
+# Copying and distribution of this file, with or without modification,
|
||||
+# are permitted in any medium without royalty provided the copyright
|
||||
+# notice and this notice are preserved.
|
||||
+
|
||||
+. "${srcdir=.}/init.sh"; path_prepend_ ../src
|
||||
+require_en_utf8_locale_
|
||||
+LC_ALL=en_US.UTF-8
|
||||
+export LC_ALL
|
||||
+require_pcre_
|
||||
+
|
||||
+fail=0
|
||||
+
|
||||
+echo 'Perú'> in || framework_failure_
|
||||
+
|
||||
+echo 'ú' > exp || framework_failure_
|
||||
+grep -Po '.\b' in > out || fail=1
|
||||
+compare exp out || fail=1
|
||||
+
|
||||
+echo 'rú' > exp || framework_failure_
|
||||
+grep -Po 'r\w' in > out || fail=1
|
||||
+compare exp out || fail=1
|
||||
+
|
||||
+Exit $fail
|
||||
--
|
||||
2.33.0
|
||||
|
||||
|
||||
@ -1,6 +1,6 @@
|
||||
Name: grep
|
||||
Version: 3.7
|
||||
Release: 5
|
||||
Release: 6
|
||||
Summary: A string search utility
|
||||
License: GPLv3+
|
||||
URL: http://www.gnu.org/software/grep/
|
||||
@ -12,6 +12,7 @@ Source3: grepconf.sh
|
||||
Patch1: backport-grep-avoid-sticky-problem-with-f-f.patch
|
||||
Patch2: backport-grep-s-does-not-suppress-binary-file-matches.patch
|
||||
Patch3: backport-grep-bug-backref-in-last-of-multiple-patter.patch
|
||||
Patch4: backport-pcre-use-UCP-in-UTF-mode.patch
|
||||
|
||||
BuildRequires: gcc pcre-devel >= 3.9-10 texinfo gettext libsigsegv-devel automake
|
||||
Provides: /bin/egrep /bin/fgrep /bin/grep bundled(gnulib)
|
||||
@ -24,6 +25,7 @@ a specified pattern. By default, Grep outputs the matching lines.
|
||||
%autosetup -n %{name}-%{version} -p1
|
||||
|
||||
%build
|
||||
autoreconf
|
||||
%configure --disable-silent-rules \
|
||||
CPPFLAGS="-I%{_includedir}/pcre" CFLAGS="$RPM_OPT_FLAGS -fsigned-char"
|
||||
%make_build
|
||||
@ -56,6 +58,9 @@ make check
|
||||
|
||||
|
||||
%changelog
|
||||
* Mon Apr 24 2023 gaoruoshu <gaoruoshu@huawei.com> - 3.7-6
|
||||
- pcre: use UCP in UTF mode
|
||||
|
||||
* Mon Dec 26 2022 gaoruoshu <gaoruoshu@huawei.com> - 3.7-5
|
||||
- backport patch from upstream
|
||||
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user