!26 [sync] PR-25: AArch64: Fix issue #I9DE8T

From: @openeuler-sync-bot Reviewed-by: @eastb233 Signed-off-by: @eastb233
2024-04-09 11:12:27 +00:00 · 2024-04-09 11:12:27 +00:00 · c664b36473
commit c664b36473
parent de06e39b02 788f9f4073
4 changed files with 175 additions and 1 deletions
--- a/SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch
+++ b/SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch
@ -0,0 +1,53 @@
+From 58c3ee1f6886490fd8149147553ce3aac82a31eb Mon Sep 17 00:00:00 2001
+From: Michael Collison <collison@rivosinc.com>
+Date: Sat, 6 May 2023 12:37:50 -0600
+Subject: [PATCH 1/3] RISC-V: autovec: Verify that GET_MODE_NUNITS is a
+ multiple of 2.
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=730909fa858bd691095bc23655077aa13b7941a9
+
+While working on autovectorizing for the RISCV port I encountered an issue
+where can_duplicate_and_interleave_p assumes that GET_MODE_NUNITS is a
+evenly divisible by two. The RISC-V target has vector modes (e.g. VNx1DImode),
+where GET_MODE_NUNITS is equal to one.
+
+Tested on RISCV and x86_64-linux-gnu. Okay?
+
+gcc/
+	* tree-vect-slp.cc (can_duplicate_and_interleave_p):
+	Check that GET_MODE_NUNITS is a multiple of 2.
+---
+ gcc/tree-vect-slp.cc | 7 +++++--
+ 1 file changed, 5 insertions(+), 2 deletions(-)
+
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index af477c31a..39c0955e1 100644
+--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
+@@ -399,10 +399,13 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
+ 	    (GET_MODE_BITSIZE (int_mode), 1);
+ 	  tree vector_type
+ 	    = get_vectype_for_scalar_type (vinfo, int_type, count);
+	  poly_int64 half_nelts;
+ 	  if (vector_type
+ 	      && VECTOR_MODE_P (TYPE_MODE (vector_type))
+ 	      && known_eq (GET_MODE_SIZE (TYPE_MODE (vector_type)),
+-			   GET_MODE_SIZE (base_vector_mode)))
+			   GET_MODE_SIZE (base_vector_mode))
+	      && multiple_p (GET_MODE_NUNITS (TYPE_MODE (vector_type)),
+			     2, &half_nelts))
+ 	    {
+ 	      /* Try fusing consecutive sequences of COUNT / NVECTORS elements
+ 		 together into elements of type INT_TYPE and using the result
+@@ -410,7 +413,7 @@ can_duplicate_and_interleave_p (vec_info *vinfo, unsigned int count,
+ 	      poly_uint64 nelts = GET_MODE_NUNITS (TYPE_MODE (vector_type));
+ 	      vec_perm_builder sel1 (nelts, 2, 3);
+ 	      vec_perm_builder sel2 (nelts, 2, 3);
+-	      poly_int64 half_nelts = exact_div (nelts, 2);
+
+ 	      for (unsigned int i = 0; i < 3; ++i)
+ 		{
+ 		  sel1.quick_push (i);
+-- 
+2.19.1
+
--- a/SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch
+++ b/SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch
@ -0,0 +1,41 @@
+From 97fba4337709aaaaa08375e6990887ea314bf8e3 Mon Sep 17 00:00:00 2001
+From: Richard Biener <rguenther@suse.de>
+Date: Tue, 18 Apr 2023 16:58:26 +0200
+Subject: [PATCH 2/3] Add operator* to gimple_stmt_iterator and gphi_iterator
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=c39cdd9e654540f74cd2478019c40f1611554a44
+
+This allows STL style iterator dereference.  It's the same
+as gsi_stmt () or .phi ().
+
+	* gimple-iterator.h (gimple_stmt_iterator::operator*): Add.
+	(gphi_iterator::operator*): Likewise.
+---
+ gcc/gimple-iterator.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+diff --git a/gcc/gimple-iterator.h b/gcc/gimple-iterator.h
+index 216ebee24..5d281e4f7 100644
+--- a/gcc/gimple-iterator.h
+++ b/gcc/gimple-iterator.h
+@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3.  If not see
+ 
+ struct gimple_stmt_iterator
+ {
+  gimple *operator * () const { return ptr; }
+
+   /* Sequence node holding the current statement.  */
+   gimple_seq_node ptr;
+ 
+@@ -38,6 +40,8 @@ struct gimple_stmt_iterator
+ /* Iterator over GIMPLE_PHI statements.  */
+ struct gphi_iterator : public gimple_stmt_iterator
+ {
+  gphi *operator * () const { return as_a <gphi *> (ptr); }
+
+   gphi *phi () const
+   {
+     return as_a <gphi *> (ptr);
+-- 
+2.19.1
+
--- a/SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch
+++ b/SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch
@ -0,0 +1,74 @@
+From 2379b38302ea3548d8c1ee19f90c28b411ba48b5 Mon Sep 17 00:00:00 2001
+From: Richard Biener <rguenther@suse.de>
+Date: Fri, 10 Nov 2023 12:39:11 +0100
+Subject: [PATCH 3/3] tree-optimization/110221 - SLP and loop mask/len
+
+Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=e5f1956498251a4973d52c8aad3faf34d0443169
+
+The following fixes the issue that when SLP stmts are internal defs
+but appear invariant because they end up only using invariant defs
+then they get scheduled outside of the loop.  This nice optimization
+breaks down when loop masks or lens are applied since those are not
+explicitly tracked as dependences.  The following makes sure to never
+schedule internal defs outside of the vectorized loop when the
+loop uses masks/lens.
+
+	PR tree-optimization/110221
+	* tree-vect-slp.cc (vect_schedule_slp_node): When loop
+	masking / len is applied make sure to not schedule
+	intenal defs outside of the loop.
+
+	* gfortran.dg/pr110221.f: New testcase.
+---
+ gcc/testsuite/gfortran.dg/pr110221.f | 17 +++++++++++++++++
+ gcc/tree-vect-slp.cc                 | 10 ++++++++++
+ 2 files changed, 27 insertions(+)
+ create mode 100644 gcc/testsuite/gfortran.dg/pr110221.f
+
+diff --git a/gcc/testsuite/gfortran.dg/pr110221.f b/gcc/testsuite/gfortran.dg/pr110221.f
+new file mode 100644
+index 000000000..8b5738431
+--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/pr110221.f
+@@ -0,0 +1,17 @@
+C PR middle-end/68146
+C { dg-do compile }
+C { dg-options "-O2 -w" }
+C { dg-additional-options "-mavx512f --param vect-partial-vector-usage=2" { target avx512f } }
+      SUBROUTINE CJYVB(V,Z,V0,CBJ,CDJ,CBY,CYY)
+      IMPLICIT DOUBLE PRECISION (A,B,G,O-Y)
+      IMPLICIT COMPLEX*16 (C,Z)
+      DIMENSION CBJ(0:*),CDJ(0:*),CBY(0:*)
+      N=INT(V)
+      CALL GAMMA2(VG,GA)
+      DO 65 K=1,N
+        CBY(K)=CYY
+65    CONTINUE
+      CDJ(0)=V0/Z*CBJ(0)-CBJ(1)
+      DO 70 K=1,N
+70      CDJ(K)=-(K+V0)/Z*CBJ(K)+CBJ(K-1)
+      END
+diff --git a/gcc/tree-vect-slp.cc b/gcc/tree-vect-slp.cc
+index 39c0955e1..71f20cf56 100644
+--- a/gcc/tree-vect-slp.cc
+++ b/gcc/tree-vect-slp.cc
+@@ -7266,6 +7266,16 @@ vect_schedule_slp_node (vec_info *vinfo,
+       /* Emit other stmts after the children vectorized defs which is
+ 	 earliest possible.  */
+       gimple *last_stmt = NULL;
+      if (auto loop_vinfo = dyn_cast <loop_vec_info> (vinfo))
+	if (LOOP_VINFO_FULLY_MASKED_P (loop_vinfo)
+	    || LOOP_VINFO_FULLY_WITH_LENGTH_P (loop_vinfo))
+	  {
+	    /* But avoid scheduling internal defs outside of the loop when
+	       we might have only implicitly tracked loop mask/len defs.  */
+	    gimple_stmt_iterator si
+	      = gsi_after_labels (LOOP_VINFO_LOOP (loop_vinfo)->header);
+	    last_stmt = *si;
+	  }
+       bool seen_vector_def = false;
+       FOR_EACH_VEC_ELT (SLP_TREE_CHILDREN (node), i, child)
+ 	if (SLP_TREE_DEF_TYPE (child) == vect_internal_def)
+-- 
+2.19.1
+
--- a/gcc-12.spec
+++ b/gcc-12.spec
@ -86,7 +86,7 @@
 Summary: Various compilers (C, C++, Objective-C, ...)
 Name: %{?scl_prefix}gcc%{gcc_ver}
 Version: 12.3.1
-Release: 17
+Release: 18
 # libgcc, libgfortran, libgomp, libstdc++ and crtstuff have
 # GCC Runtime Exception.
 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
@ -288,6 +288,9 @@ Patch3144: SME-0141-Canonicalize-X-Y-as-X-Y-in-match.pd-when-Y-is-0-1.patch
 Patch3145: SME-0142-middle-end-Add-new-tbranch-optab-to-add-support-for-.patch
 Patch3146: SME-0143-explow-Allow-dynamic-allocations-after-vregs.patch
 Patch3147: SME-0144-PR105169-Fix-references-to-discarded-sections.patch
+Patch3148: SME-0145-RISC-V-autovec-Verify-that-GET_MODE_NUNITS-is-a-mult.patch
+Patch3149: SME-0146-Add-operator-to-gimple_stmt_iterator-and-gphi_iterat.patch
+Patch3150: SME-0147-tree-optimization-110221-SLP-and-loop-mask-len.patch

 # Patch 5000 -

@ -2832,6 +2835,9 @@ end
 %doc rpm.doc/changelogs/libcc1/ChangeLog*

 %changelog
+* Tue Apr 09 2024 eastb233 <xiezhiheng@huawei.com> 12.3.1-18
+- AArch64: Fix issue https://gitee.com/src-openeuler/gcc-12/issues/I9DE8T
+
 * Sat Mar 09 2024 eastb233 <xiezhiheng@huawei.com> 12.3.1-17
 - AArch64: Support SME intrinsics