gcc-12/SME-0073-aarch64-Add-r-m-and-m-r-alternatives-to-64-bit-vecto.patch
eastb233 dbbaf1198d AArch64: Support SME intrinsics
(cherry picked from commit 5a8b1f3fd450f440943ca1eabccdfe6e9abdf668)
2024-03-20 09:15:24 +08:00

169 lines
5.4 KiB
Diff

From fa4e2267ec2408f95f750167b3d542eeca288124 Mon Sep 17 00:00:00 2001
From: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Thu, 1 Jun 2023 09:37:06 +0100
Subject: [PATCH 073/144] aarch64: Add =r,m and =m,r alternatives to 64-bit
vector move patterns
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=12e71b593ea0c64d919df525cd75ea10b7be8a4b
We can use the X registers to load and store 64-bit vector modes, we just need to add the alternatives
to the mov patterns. This straightforward patch does that and for the pair variants too.
For the testcase in the code we now generate the optimal assembly without any superfluous
GP<->SIMD moves.
Bootstrapped and tested on aarch64-none-linux-gnu and aarch64_be-none-elf.
gcc/ChangeLog:
* config/aarch64/aarch64-simd.md (*aarch64_simd_mov<VDMOV:mode>):
Add =r,m and =r,m alternatives.
(load_pair<DREG:mode><DREG2:mode>): Likewise.
(vec_store_pair<DREG:mode><DREG2:mode>): Likewise.
gcc/testsuite/ChangeLog:
* gcc.target/aarch64/xreg-vec-modes_1.c: New test.
---
gcc/config/aarch64/aarch64-simd.md | 40 ++++++++++--------
.../gcc.target/aarch64/xreg-vec-modes_1.c | 42 +++++++++++++++++++
2 files changed, 65 insertions(+), 17 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 845f0298e..a974b374b 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -116,26 +116,28 @@
(define_insn "*aarch64_simd_mov<VDMOV:mode>"
[(set (match_operand:VDMOV 0 "nonimmediate_operand"
- "=w, m, m, w, ?r, ?w, ?r, w, w")
+ "=w, r, m, m, m, w, ?r, ?w, ?r, w, w")
(match_operand:VDMOV 1 "general_operand"
- "m, Dz, w, w, w, r, r, Dn, Dz"))]
+ "m, m, Dz, w, r, w, w, r, r, Dn, Dz"))]
"TARGET_FLOAT
&& (register_operand (operands[0], <MODE>mode)
|| aarch64_simd_reg_or_zero (operands[1], <MODE>mode))"
"@
ldr\t%d0, %1
+ ldr\t%x0, %1
str\txzr, %0
str\t%d1, %0
+ str\t%x1, %0
* return TARGET_SIMD ? \"mov\t%0.<Vbtype>, %1.<Vbtype>\" : \"fmov\t%d0, %d1\";
* return TARGET_SIMD ? \"umov\t%0, %1.d[0]\" : \"fmov\t%x0, %d1\";
fmov\t%d0, %1
mov\t%0, %1
* return aarch64_output_simd_mov_immediate (operands[1], 64);
fmov\t%d0, xzr"
- [(set_attr "type" "neon_load1_1reg<q>, store_8, neon_store1_1reg<q>,\
- neon_logic<q>, neon_to_gp<q>, f_mcr,\
+ [(set_attr "type" "neon_load1_1reg<q>, load_8, store_8, neon_store1_1reg<q>,\
+ store_8, neon_logic<q>, neon_to_gp<q>, f_mcr,\
mov_reg, neon_move<q>, f_mcr")
- (set_attr "arch" "*,*,*,*,*,*,*,simd,*")]
+ (set_attr "arch" "*,*,*,*,*,*,*,*,*,simd,*")]
)
(define_insn "*aarch64_simd_mov<VQMOV:mode>"
@@ -177,31 +179,35 @@
)
(define_insn "load_pair<DREG:mode><DREG2:mode>"
- [(set (match_operand:DREG 0 "register_operand" "=w")
- (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump"))
- (set (match_operand:DREG2 2 "register_operand" "=w")
- (match_operand:DREG2 3 "memory_operand" "m"))]
+ [(set (match_operand:DREG 0 "register_operand" "=w,r")
+ (match_operand:DREG 1 "aarch64_mem_pair_operand" "Ump,Ump"))
+ (set (match_operand:DREG2 2 "register_operand" "=w,r")
+ (match_operand:DREG2 3 "memory_operand" "m,m"))]
"TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[3], 0),
plus_constant (Pmode,
XEXP (operands[1], 0),
GET_MODE_SIZE (<DREG:MODE>mode)))"
- "ldp\\t%d0, %d2, %z1"
- [(set_attr "type" "neon_ldp")]
+ "@
+ ldp\t%d0, %d2, %z1
+ ldp\t%x0, %x2, %z1"
+ [(set_attr "type" "neon_ldp,load_16")]
)
(define_insn "vec_store_pair<DREG:mode><DREG2:mode>"
- [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump")
- (match_operand:DREG 1 "register_operand" "w"))
- (set (match_operand:DREG2 2 "memory_operand" "=m")
- (match_operand:DREG2 3 "register_operand" "w"))]
+ [(set (match_operand:DREG 0 "aarch64_mem_pair_operand" "=Ump,Ump")
+ (match_operand:DREG 1 "register_operand" "w,r"))
+ (set (match_operand:DREG2 2 "memory_operand" "=m,m")
+ (match_operand:DREG2 3 "register_operand" "w,r"))]
"TARGET_FLOAT
&& rtx_equal_p (XEXP (operands[2], 0),
plus_constant (Pmode,
XEXP (operands[0], 0),
GET_MODE_SIZE (<DREG:MODE>mode)))"
- "stp\\t%d1, %d3, %z0"
- [(set_attr "type" "neon_stp")]
+ "@
+ stp\t%d1, %d3, %z0
+ stp\t%x1, %x3, %z0"
+ [(set_attr "type" "neon_stp,store_16")]
)
(define_insn "load_pair<VQ:mode><VQ2:mode>"
diff --git a/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
new file mode 100644
index 000000000..fc4dcb1ad
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/xreg-vec-modes_1.c
@@ -0,0 +1,42 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+/* { dg-final { check-function-bodies "**" "" "" } } */
+
+typedef unsigned int v2si __attribute__((vector_size (8)));
+
+#define force_gp(V1) asm volatile ("" \
+ : "=r"(V1) \
+ : "r"(V1) \
+ : /* No clobbers */);
+
+/*
+** foo:
+** ldr (x[0-9]+), \[x1\]
+** str \1, \[x0\]
+** ret
+*/
+
+void
+foo (v2si *a, v2si *b)
+{
+ v2si tmp = *b;
+ force_gp (tmp);
+ *a = tmp;
+}
+
+/*
+** foo2:
+** ldp (x[0-9]+), (x[0-9]+), \[x0\]
+** stp \1, \2, \[x1\]
+** ret
+*/
+void
+foo2 (v2si *a, v2si *b)
+{
+ v2si t1 = *a;
+ v2si t2 = a[1];
+ force_gp (t1);
+ force_gp (t2);
+ *b = t1;
+ b[1] = t2;
+}
--
2.19.1