!26 [sync] PR-25: AArch64: Fix issue #I9DE8T

From: @openeuler-sync-bot 
Reviewed-by: @eastb233 
Signed-off-by: @eastb233
This commit is contained in:
openeuler-ci-bot 2024-04-09 11:12:27 +00:00 committed by Gitee
commit c664b36473
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
4 changed files with 175 additions and 1 deletions

View File

@ -0,0 +1,53 @@
From 58c3ee1f6886490fd8149147553ce3aac82a31eb Mon Sep 17 00:00:00 2001
From: Michael Collison <collison@rivosinc.com>
Date: Sat, 6 May 2023 12:37:50 -0600
Subject: [PATCH 1/3] RISC-V: autovec: Verify that GET_MODE_NUNITS is a
multiple of 2.
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9
While working on autovectorizing for the RISCV port I encountered an issue
where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a
evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode),
where GET_MODE_NUNITS is equal to one.
Tested on RISCV and x86_64-linux-gnu. Okay?
gcc/
* tree-vect-slp.cc (can_duplicate_and_interleave_p):
Check that GET_MODE_NUNITS is a multiple of 2.
---
gcc/tree-vect-slp.cc | 7 +++++--
1 file changed, 5 insertions(+), 2 deletions(-)
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index af477c31a..39c0955e1 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -399,10 +399,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
(GET_MODE_BITSIZE (int_mode), 1);
tree vector_type
= get_vectype_for_scalar_type (vinfo, int_type, count);
+ poly_int64 half_nelts;
if (vector_type
&& VECTOR_MODE_P (TYPE_MODE (vector_type))
&& known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
- GET_MODE_SIZE (base_vector_mode)))
+ GET_MODE_SIZE (base_vector_mode))
+ && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)),
+ 2, &half_nelts))
{
/* Try fusing consecutive sequences of COUNT / NVECTORS elements
together into elements of type INT_TYPE and using the result
@@ -410,7 +413,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
vec_perm_builder sel1 (nelts, 2, 3);
vec_perm_builder sel2 (nelts, 2, 3);
- poly_int64 half_nelts = exact_div (nelts, 2);
+
for (unsigned int i = 0; i < 3; ++i)
{
sel1.quick_push (i);
--
2.19.1

View File

@ -0,0 +1,41 @@
From 97fba4337709aaaaa08375e6990887ea314bf8e3 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Tue, 18 Apr 2023 16:58:26 +0200
Subject: [PATCH 2/3] Add operator* to gimple_stmt_iterator and gphi_iterator
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c39cdd9e654540f74cd2478019c40f1611554a44
This allows STL style iterator dereference. It's the same
as gsi_stmt () or .phi ().
* gimple-iterator.h (gimple_stmt_iterator::operator*): Add.
(gphi_iterator::operator*): Likewise.
---
gcc/gimple-iterator.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h
index 216ebee24..5d281e4f7 100644
--- a/gcc/gimple-iterator.h
+++ b/gcc/gimple-iterator.h
@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see
struct gimple_stmt_iterator
{
+ gimple *operator * () const { return ptr; }
+
/* Sequence node holding the current statement. */
gimple_seq_node ptr;
@@ -38,6 +40,8 @@ struct gimple_stmt_iterator
/* Iterator over GIMPLE_PHI statements. */
struct gphi_iterator : public gimple_stmt_iterator
{
+ gphi *operator * () const { return as_a <gphi *> (ptr); }
+
gphi *phi () const
{
return as_a <gphi *> (ptr);
--
2.19.1

View File

@ -0,0 +1,74 @@
From 2379b38302ea3548d8c1ee19f90c28b411ba48b5 Mon Sep 17 00:00:00 2001
From: Richard Biener <rguenther@suse.de>
Date: Fri, 10 Nov 2023 12:39:11 +0100
Subject: [PATCH 3/3] tree-optimization/110221 - SLP and loop mask/len
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169
The following fixes the issue that when SLP stmts are internal defs
but appear invariant because they end up only using invariant defs
then they get scheduled outside of the loop. This nice optimization
breaks down when loop masks or lens are applied since those are not
explicitly tracked as dependences. The following makes sure to never
schedule internal defs outside of the vectorized loop when the
loop uses masks/lens.
PR tree-optimization/110221
* tree-vect-slp.cc (vect_schedule_slp_node): When loop
masking / len is applied make sure to not schedule
intenal defs outside of the loop.
* gfortran.dg/pr110221.f: New testcase.
---
gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++
gcc/tree-vect-slp.cc | 10 ++++++++++
2 files changed, 27 insertions(+)
create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f
diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f
new file mode 100644
index 000000000..8b5738431
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr110221.f
@@ -0,0 +1,17 @@
+C PR middle-end/68146
+C { dg-do compile }
+C { dg-options "-O2 -w" }
+C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } }
+ SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY)
+ IMPLICIT DOUBLE PRECISION (A,B,G,O-Y)
+ IMPLICIT COMPLEX*16 (C,Z)
+ DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*)
+ N=INT(V)
+ CALL GAMMA2(VG,GA)
+ DO 65 K=1,N
+ CBY(K)=CYY
+65 CONTINUE
+ CDJ(0)=V0/Z*CBJ(0)-CBJ(1)
+ DO 70 K=1,N
+70 CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1)
+ END
diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
index 39c0955e1..71f20cf56 100644
--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
@@ -7266,6 +7266,16 @@ vect_schedule_slp_node (vec_info *vinfo,
/* Emit other stmts after the children vectorized defs which is
earliest possible. */
gimple *last_stmt = NULL;
+ if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+ if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+ || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+ {
+ /* But avoid scheduling internal defs outside of the loop when
+ we might have only implicitly tracked loop mask/len defs. */
+ gimple_stmt_iterator si
+ = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header);
+ last_stmt = *si;
+ }
bool seen_vector_def = false;
FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
--
2.19.1

View File

@ -86,7 +86,7 @@
Summary: Various compilers (C, C++, Objective-C, ...)
Name: %{?scl_prefix}gcc%{gcc_ver}
Version: 12.3.1
Release: 17
Release: 18
# libgcc, libgfortran, libgomp, libstdc++ and crtstuff have
# GCC Runtime Exception.
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
@ -288,6 +288,9 @@ Patch3144: SME-0141-Canonicalize-X-Y-as-X-Y-in-match.pd-when-Y-is-0-1.patch
Patch3145: SME-0142-middle-end-Add-new-tbranch-optab-to-add-support-for-.patch
Patch3146: SME-0143-explow-Allow-dynamic-allocations-after-vregs.patch
Patch3147: SME-0144-PR105169-Fix-references-to-discarded-sections.patch
Patch3148: SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch
Patch3149: SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch
Patch3150: SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch
# Patch 5000 -
@ -2832,6 +2835,9 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Tue Apr 09 2024 eastb233 <xiezhiheng@huawei.com> 12.3.1-18
- AArch64: Fix issue https://gitee.com/src-openeuler/gcc-12/issues/I9DE8T
* Sat Mar 09 2024 eastb233 <xiezhiheng@huawei.com> 12.3.1-17
- AArch64: Support SME intrinsics