gcc-12/SME-0101-aarch64-Put-LR-save-slot-first-in-more-cases.patch
eastb233 dbbaf1198d AArch64: Support SME intrinsics
(cherry picked from commit 5a8b1f3fd450f440943ca1eabccdfe6e9abdf668)
2024-03-20 09:15:24 +08:00

107 lines
4.8 KiB
Diff

From ced5ca8fbcf315c3a60aaeb539ada607a810b17f Mon Sep 17 00:00:00 2001
From: Richard Sandiford <richard.sandiford@arm.com>
Date: Tue, 17 Oct 2023 23:46:33 +0100
Subject: [PATCH 101/144] aarch64: Put LR save slot first in more cases
Reference: https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=773306e9ef4ea1407f89686eb513a50602493666
Now that the prologue and epilogue code iterates over saved
registers in offset order, we can put the LR save slot first
without compromising LDP/STP formation.
This isn't worthwhile when shadow call stacks are enabled, since the
first two registers are also push/pop candidates, and LR cannot be
popped when shadow call stacks are enabled. (LR is instead loaded
first and compared against the shadow stack's value.)
But otherwise, it seems better to put the LR save slot first,
to reduce unnecessary variation with the layout for stack clash
protection.
gcc/
* config/aarch64/aarch64.cc (aarch64_layout_frame): Don't make
the position of the LR save slot dependent on stack clash
protection unless shadow call stacks are enabled.
gcc/testsuite/
* gcc.target/aarch64/test_frame_2.c: Expect x30 to come before x19.
* gcc.target/aarch64/test_frame_4.c: Likewise.
* gcc.target/aarch64/test_frame_7.c: Likewise.
* gcc.target/aarch64/test_frame_10.c: Likewise.
---
gcc/config/aarch64/aarch64.cc | 2 +-
gcc/testsuite/gcc.target/aarch64/test_frame_10.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/test_frame_2.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/test_frame_4.c | 4 ++--
gcc/testsuite/gcc.target/aarch64/test_frame_7.c | 4 ++--
5 files changed, 9 insertions(+), 9 deletions(-)
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 24762bcfc..96809b03e 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -8703,7 +8703,7 @@ aarch64_layout_frame (void)
allocate_gpr_slot (R29_REGNUM);
allocate_gpr_slot (R30_REGNUM);
}
- else if (flag_stack_clash_protection
+ else if ((flag_stack_clash_protection || !frame.is_scs_enabled)
&& known_eq (frame.reg_offset[R30_REGNUM], SLOT_REQUIRED))
/* Put the LR save slot first, since it makes a good choice of probe
for stack clash purposes. The idea is that the link register usually
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
index c19505082..c54ab2d0c 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_10.c
@@ -14,6 +14,6 @@
t_frame_pattern_outgoing (test10, 480, "x19", 24, a[8], a[9], a[10])
t_frame_run (test10)
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, \[0-9\]+\\\]" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp, \[0-9\]+\\\]" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, \[0-9\]+\\\]" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp, \[0-9\]+\\\]" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
index 7e5df84cf..0d715314c 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_2.c
@@ -14,6 +14,6 @@ t_frame_pattern (test2, 200, "x19")
t_frame_run (test2)
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
index ed13487a0..b41229c42 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_4.c
@@ -13,6 +13,6 @@
t_frame_pattern (test4, 400, "x19")
t_frame_run (test4)
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\], \[0-9\]+" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp, -\[0-9\]+\\\]!" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\], \[0-9\]+" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
index 964527949..5702656a5 100644
--- a/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
+++ b/gcc/testsuite/gcc.target/aarch64/test_frame_7.c
@@ -13,6 +13,6 @@
t_frame_pattern (test7, 700, "x19")
t_frame_run (test7)
-/* { dg-final { scan-assembler-times "stp\tx19, x30, \\\[sp]" 1 } } */
-/* { dg-final { scan-assembler "ldp\tx19, x30, \\\[sp\\\]" } } */
+/* { dg-final { scan-assembler-times "stp\tx30, x19, \\\[sp]" 1 } } */
+/* { dg-final { scan-assembler "ldp\tx30, x19, \\\[sp\\\]" } } */
--
2.19.1