!444 Sync patch from openeuler/gcc

From: @zhenyu--zhao_admin 
Reviewed-by: @huang-xiaoquan 
Signed-off-by: @huang-xiaoquan
This commit is contained in:
openeuler-ci-bot 2024-06-15 01:22:09 +00:00 committed by Gitee
commit 478230ea66
No known key found for this signature in database
GPG Key ID: 173E9B9CA92EEF8F
4 changed files with 1488 additions and 2 deletions

View File

@ -0,0 +1,563 @@
From 591a1ed8489e2c230f19220599b4ce8bb89d6148 Mon Sep 17 00:00:00 2001
From: yzyssdd <yuzeyang4@huawei.com>
Date: Thu, 6 Jun 2024 15:42:52 +0800
Subject: [PATCH 2/2] fix bugs in loop detections, add filter to SSA statement
and corresponding deja cases. Fix bugs so llc pass can detect it when going
back into a loop after jumping out of a loop. Return directly from processing
a non-ssa statement when looking for references in a gimple call.
---
.../gcc.dg/llc-allocate/llc-filter-ssa.c | 30 ++++
.../gcc.dg/llc-allocate/llc-loop-generate.c | 168 ++++++++++++++++++
.../aarch64/sve/acle/general-c/prefetch_1.c | 10 +-
.../acle/general-c/prefetch_gather_index_1.c | 8 +-
.../acle/general-c/prefetch_gather_index_2.c | 8 +-
.../acle/general-c/prefetch_gather_offset_1.c | 7 +-
.../acle/general-c/prefetch_gather_offset_2.c | 7 +-
.../acle/general-c/prefetch_gather_offset_3.c | 7 +-
.../acle/general-c/prefetch_gather_offset_4.c | 7 +-
gcc/tree-ssa-llc-allocate.c | 70 +++++---
10 files changed, 277 insertions(+), 45 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c
new file mode 100644
index 000000000..4478f7531
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-filter-ssa.c
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-parallelize-loops=2 -fipa-pta -fllc-allocate -S -fdump-tree-llc_allocate-details-lineno" } */
+
+int a, b;
+int *d;
+void f(void)
+{
+ int c;
+ b %= 1;
+
+ if(1 - (b < 1))
+ {
+ int *q = 0;
+
+ if(a)
+ {
+ c = 0;
+lbl:
+ for(*d; *d; ++*d)
+ if(c ? : a ? : (c = 1) ? : 0)
+ *q &= 1;
+ return;
+ }
+
+ q = (int *)1;
+ }
+ goto lbl;
+}
+
+/* { dg-final { scan-tree-dump "Unhandled scenario for non-ssa pointer." "llc-allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c
new file mode 100644
index 000000000..dc1f0eadc
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-loop-generate.c
@@ -0,0 +1,168 @@
+/* { dg-require-effective-target label_values } */
+/* { dg-require-stack-size "4000" } */
+/* { dp-option "-O2 -fllc-allocate" } */
+
+#include <stdlib.h>
+
+#if __INT_MAX__ >= 2147483647
+typedef unsigned int uint32;
+typedef signed int sint32;
+
+typedef uint32 reg_t;
+
+typedef unsigned long int host_addr_t;
+typedef uint32 target_addr_t;
+typedef sint32 target_saddr_t;
+
+typedef union
+{
+ struct
+ {
+ signed int offset:18;
+ unsigned int ignore:4;
+ unsigned int s1:8;
+ int :2;
+ signed int simm:14;
+ unsigned int s3:8;
+ unsigned int s2:8;
+ int pad2:2;
+ } f1;
+ long long ll;
+ double d;
+} insn_t;
+
+typedef struct
+{
+ target_addr_t vaddr_tag;
+ unsigned long int rigged_paddr;
+} tlb_entry_t;
+
+typedef struct
+{
+ insn_t *pc;
+ reg_t registers[256];
+ insn_t *program;
+ tlb_entry_t tlb_tab[0x100];
+} environment_t;
+
+enum operations
+{
+ LOAD32_RR,
+ METAOP_DONE
+};
+
+host_addr_t
+f ()
+{
+ abort ();
+}
+
+reg_t
+simulator_kernel (int what, environment_t *env)
+{
+ register insn_t *pc = env->pc;
+ register reg_t *regs = env->registers;
+ register insn_t insn;
+ register int s1;
+ register reg_t r2;
+ register void *base_addr = &&sim_base_addr;
+ register tlb_entry_t *tlb = env->tlb_tab;
+
+ if (what != 0)
+ {
+ int i;
+ static void *op_map[] =
+ {
+ &&L_LOAD32_RR,
+ &&L_METAOP_DONE,
+ };
+ insn_t *program = env->program;
+ for (i = 0; i < what; i++)
+ program[i].f1.offset = op_map[program[i].f1.offset] - base_addr;
+ }
+
+ sim_base_addr:;
+
+ insn = *pc++;
+ r2 = (*(reg_t *) (((char *) regs) + (insn.f1.s2 << 2)));
+ s1 = (insn.f1.s1 << 2);
+ goto *(base_addr + insn.f1.offset);
+
+ L_LOAD32_RR:
+ {
+ target_addr_t vaddr_page = r2 / 4096;
+ unsigned int x = vaddr_page % 0x100;
+ insn = *pc++;
+
+ for (;;)
+ {
+ target_addr_t tag = tlb[x].vaddr_tag;
+ host_addr_t rigged_paddr = tlb[x].rigged_paddr;
+
+ if (tag == vaddr_page)
+ {
+ *(reg_t *) (((char *) regs) + s1) = *(uint32 *) (rigged_paddr + r2);
+ r2 = *(reg_t *) (((char *) regs) + (insn.f1.s2 << 2));
+ s1 = insn.f1.s1 << 2;
+ goto *(base_addr + insn.f1.offset);
+ }
+
+ if (((target_saddr_t) tag < 0))
+ {
+ *(reg_t *) (((char *) regs) + s1) = *(uint32 *) f ();
+ r2 = *(reg_t *) (((char *) regs) + (insn.f1.s2 << 2));
+ s1 = insn.f1.s1 << 2;
+ goto *(base_addr + insn.f1.offset);
+ }
+
+ x = (x - 1) % 0x100;
+ }
+
+ L_METAOP_DONE:
+ return (*(reg_t *) (((char *) regs) + s1));
+ }
+}
+
+insn_t program[2 + 1];
+
+void *malloc ();
+
+int
+main ()
+{
+ environment_t env;
+ insn_t insn;
+ int i, res;
+ host_addr_t a_page = (host_addr_t) malloc (2 * 4096);
+ target_addr_t a_vaddr = 0x123450;
+ target_addr_t vaddr_page = a_vaddr / 4096;
+ a_page = (a_page + 4096 - 1) & -4096;
+
+ env.tlb_tab[((vaddr_page) % 0x100)].vaddr_tag = vaddr_page;
+ env.tlb_tab[((vaddr_page) % 0x100)].rigged_paddr = a_page - vaddr_page * 4096;
+ insn.f1.offset = LOAD32_RR;
+ env.registers[0] = 0;
+ env.registers[2] = a_vaddr;
+ *(sint32 *) (a_page + a_vaddr % 4096) = 88;
+ insn.f1.s1 = 0;
+ insn.f1.s2 = 2;
+
+ for (i = 0; i < 2; i++)
+ program[i] = insn;
+
+ insn.f1.offset = METAOP_DONE;
+ insn.f1.s1 = 0;
+ program[2] = insn;
+
+ env.pc = program;
+ env.program = program;
+
+ res = simulator_kernel (2 + 1, &env);
+
+ if (res != 88)
+ abort ();
+ exit (0);
+}
+#else
+main(){ exit (0); }
+#endif
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c
index 316f77fc7..fba3b7447 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_1.c
@@ -10,8 +10,12 @@ f1 (svbool_t pg, int32_t *s32_ptr, enum svprfop op)
svprfb (pg, s32_ptr, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */
svprfb (pg, s32_ptr, (enum svprfop) 0);
svprfb (pg, s32_ptr, (enum svprfop) 5);
- svprfb (pg, s32_ptr, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */
- svprfb (pg, s32_ptr, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */
+ svprfb (pg, s32_ptr, (enum svprfop) 6);
+ svprfb (pg, s32_ptr, (enum svprfop) 7);
svprfb (pg, s32_ptr, (enum svprfop) 8);
- svprfb (pg, s32_ptr, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */
+ svprfb (pg, s32_ptr, (enum svprfop) 14);
+ svprfb (pg, s32_ptr, (enum svprfop) 15);
+ svprfb (pg, s32_ptr, (enum svprfop) 16); /* { dg-error {passing 16 to argument 3 of 'svprfb', which expects a valid 'enum svprfop' value} } */
+
+
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c
index c33c95440..cf387bf92 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_1.c
@@ -46,8 +46,10 @@ f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr,
svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */
svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 0);
svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 5);
- svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */
- svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */
+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 6);
+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 7);
svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 8);
- svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */
+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 14);
+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 15);
+ svprfh_gather_index (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfh_gather_index', which expects a valid 'enum svprfop' value} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c
index 3d7797305..bc99b29d1 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_index_2.c
@@ -10,8 +10,10 @@ f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op)
svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */
svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 0);
svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 5);
- svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */
- svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */
+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 6);
+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 7);
svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 8);
- svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */
+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 14);
+ svprfh_gather_s32index (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfh_gather_s32index', which expects a valid 'enum svprfop' value} } */
+
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c
index cc61901cb..8b304ed89 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_1.c
@@ -46,8 +46,9 @@ f1 (svbool_t pg, int32_t *s32_ptr, void *void_ptr, void **ptr_ptr,
svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */
svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 0);
svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 5);
- svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */
- svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 6);
+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 7);
svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 8);
- svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 14);
+ svprfb_gather_offset (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfb_gather_offset', which expects a valid 'enum svprfop' value} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c
index b74721fad..64e55dd76 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_2.c
@@ -30,8 +30,9 @@ f1 (svbool_t pg, svint8_t s8, svuint8_t u8,
svprfb_gather (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */
svprfb_gather (pg, u32, (enum svprfop) 0);
svprfb_gather (pg, u32, (enum svprfop) 5);
- svprfb_gather (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */
- svprfb_gather (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather (pg, u32, (enum svprfop) 6);
+ svprfb_gather (pg, u32, (enum svprfop) 7);
svprfb_gather (pg, u32, (enum svprfop) 8);
- svprfb_gather (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather (pg, u32, (enum svprfop) 14);
+ svprfb_gather (pg, u32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 3 of 'svprfb_gather', which expects a valid 'enum svprfop' value} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c
index 24b4aa190..f400e91e8 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_3.c
@@ -10,8 +10,9 @@ f1 (svbool_t pg, int32_t *s32_ptr, svint32_t s32, enum svprfop op)
svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */
svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 0);
svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 5);
- svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */
- svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 6);
+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 7);
svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 8);
- svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 14);
+ svprfb_gather_s32offset (pg, s32_ptr, s32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 4 of 'svprfb_gather_s32offset', which expects a valid 'enum svprfop' value} } */
}
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c
index 63ccdc5a4..7b91dbd2e 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve/acle/general-c/prefetch_gather_offset_4.c
@@ -10,8 +10,9 @@ f1 (svbool_t pg, svuint32_t u32, enum svprfop op)
svprfb_gather_u32base (pg, u32, (enum svprfop) -1); /* { dg-error {passing 4294967295 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */
svprfb_gather_u32base (pg, u32, (enum svprfop) 0);
svprfb_gather_u32base (pg, u32, (enum svprfop) 5);
- svprfb_gather_u32base (pg, u32, (enum svprfop) 6); /* { dg-error {passing 6 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */
- svprfb_gather_u32base (pg, u32, (enum svprfop) 7); /* { dg-error {passing 7 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather_u32base (pg, u32, (enum svprfop) 6);
+ svprfb_gather_u32base (pg, u32, (enum svprfop) 7);
svprfb_gather_u32base (pg, u32, (enum svprfop) 8);
- svprfb_gather_u32base (pg, u32, (enum svprfop) 14); /* { dg-error {passing 14 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */
+ svprfb_gather_u32base (pg, u32, (enum svprfop) 14);
+ svprfb_gather_u32base (pg, u32, (enum svprfop) 16); /* { dg-error {passing 16 to argument 3 of 'svprfb_gather_u32base', which expects a valid 'enum svprfop' value} } */
}
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
index 75501f41c..3f6ff3623 100644
--- a/gcc/tree-ssa-llc-allocate.c
+++ b/gcc/tree-ssa-llc-allocate.c
@@ -1020,6 +1020,14 @@ trace_ptr_mem_ref (data_ref &mem_ref, std::set<gimple *> &traced_ref_stmt,
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Unhandled scenario for non-constant offset.\n");
+
+ return false;
+ }
+ if (TREE_CODE (pointer) != SSA_NAME)
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Unhandled scenario for non-ssa pointer.\n");
+
return false;
}
@@ -2330,7 +2338,7 @@ enum bb_traversal_state
bool
revisit_bb_abnormal_p (basic_block bb, std::vector<int> &bb_visited,
const std::set<int> &header_bb_idx_set,
- std::set<std::pair<int, int> > &backedges,
+ std::set<std::pair<int, int> > &unused_edges,
int src_bb_idx)
{
/* If the header bb has been already fully traversed, early exit
@@ -2340,19 +2348,20 @@ revisit_bb_abnormal_p (basic_block bb, std::vector<int> &bb_visited,
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Already visited bb index %d. Abort.\n",
bb->index);
+ unused_edges.insert (std::make_pair (src_bb_idx, bb->index));
return true;
}
/* If we revisit a non-header bb during next-bb traversal, we detect
an inner-loop cycle and dump warning info. Record this abnormal edge
- in `backedges` for special treatment in path weight update. */
+ in `unused_edges` for special treatment in path weight update. */
if (!header_bb_idx_set.count (bb->index)
&& bb_visited[bb->index] == UNDER_TRAVERSAL)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Warning: Find cycle at bb index %d. Abort.\n",
bb->index);
- backedges.insert (std::make_pair (src_bb_idx, bb->index));
+ unused_edges.insert (std::make_pair (src_bb_idx, bb->index));
return true;
}
@@ -2397,7 +2406,7 @@ void
get_next_toposort_bb (basic_block bb, std::vector<int> &bb_visited,
std::list<basic_block> &bb_topo_order,
const std::set<int> &header_bb_idx_set,
- std::set<std::pair<int, int> > &backedges,
+ std::set<std::pair<int, int> > &unused_edges,
int src_bb_idx)
{
/* 1) Before bb returns to the loop header, bb will not go to the outer loop.
@@ -2412,7 +2421,7 @@ get_next_toposort_bb (basic_block bb, std::vector<int> &bb_visited,
if (bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
return;
- if (revisit_bb_abnormal_p (bb, bb_visited, header_bb_idx_set, backedges,
+ if (revisit_bb_abnormal_p (bb, bb_visited, header_bb_idx_set, unused_edges,
src_bb_idx))
return;
@@ -2431,7 +2440,7 @@ get_next_toposort_bb (basic_block bb, std::vector<int> &bb_visited,
FOR_EACH_VEC_ELT (exits, i, e)
{
get_next_toposort_bb (e->dest, bb_visited, bb_topo_order,
- header_bb_idx_set, backedges, bb->index);
+ header_bb_idx_set, unused_edges, src_bb_idx);
}
return;
}
@@ -2447,7 +2456,7 @@ get_next_toposort_bb (basic_block bb, std::vector<int> &bb_visited,
continue;
get_next_toposort_bb (e->dest, bb_visited, bb_topo_order,
- header_bb_idx_set, backedges, bb->index);
+ header_bb_idx_set, unused_edges, bb->index);
}
/* bb is marked as fully traversed and all its descendents have been
@@ -2526,7 +2535,8 @@ check_null_info_in_path_update (basic_block bb, edge e)
to header bb using a backedge. */
void
-update_backedge_path_weight (std::vector<weight> &bb_weights, basic_block bb)
+update_backedge_path_weight (std::vector<weight> &bb_weights, basic_block bb,
+ const std::set<std::pair<int, int> > &unused_edges)
{
unsigned i;
edge e_exit;
@@ -2542,6 +2552,11 @@ update_backedge_path_weight (std::vector<weight> &bb_weights, basic_block bb)
continue;
}
+ if (unused_edges.count (std::make_pair (bb->index, e_exit->dest->index)))
+ {
+ /* Inner-loop-cycle backedge case. */
+ continue;
+ }
update_path_weight (bb_weights, bb->index, e_exit->dest->index,
e_exit->dest->count.to_gcov_type ());
}
@@ -2553,7 +2568,7 @@ void
update_max_length_of_path (std::vector<weight> &bb_weights,
std::list<basic_block> &bb_topo_order,
const std::set<int> &header_bb_idx_set,
- const std::set<std::pair<int, int> > &backedges)
+ const std::set<std::pair<int, int> > &unused_edges)
{
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "Start update weight traversal:\n");
@@ -2573,22 +2588,22 @@ update_max_length_of_path (std::vector<weight> &bb_weights,
if (check_null_info_in_path_update (bb, e))
continue;
- if (header_bb_idx_set.count (e->dest->index)
- && bb->loop_father == e->dest->loop_father)
+ if (unused_edges.count (std::make_pair (bb->index, e->dest->index)))
{
- /* Backedge case. */
- update_backedge_path_weight (bb_weights, bb);
+ /* Inner-loop-cycle backedge case. */
+ continue;
}
- else if (bb->loop_father->num != 0
+ else if (bb->loop_father->num != 0
&& !flow_bb_inside_loop_p (bb->loop_father, e->dest))
{
/* Outer-loop edge case. */
continue;
}
- else if (backedges.count (std::make_pair (bb->index, e->dest->index)))
+ else if (header_bb_idx_set.count (e->dest->index)
+ && bb->loop_father == e->dest->loop_father)
{
- /* Inner-loop-cycle backedge case. */
- continue;
+ /* Backedge case. */
+ update_backedge_path_weight (bb_weights, bb, unused_edges);
}
else
{
@@ -2676,9 +2691,9 @@ filter_and_sort_kernels_feedback (std::vector<class loop *> &sorted_kernel,
basic_block bb_start = ENTRY_BLOCK_PTR_FOR_FN (cfun);
/* Step 1: Get topological order of bb during traversal. */
- std::set<std::pair<int, int> > backedges;
+ std::set<std::pair<int, int> > unused_edges;
get_next_toposort_bb (bb_start, bb_visited, bb_topo_order, header_bb_idx_set,
- backedges, -1);
+ unused_edges, -1);
if (dump_file && (dump_flags & TDF_DETAILS))
{
fprintf (dump_file, "\nCheck bbs in topological order:\n");
@@ -2693,7 +2708,7 @@ filter_and_sort_kernels_feedback (std::vector<class loop *> &sorted_kernel,
std::vector<weight> bb_weights = std::vector<weight>(bb_num_max, weight_init);
bb_weights[0].bb_count = 0; /* ENTRY bb has count 0 and prev bb as -1. */
update_max_length_of_path (bb_weights, bb_topo_order, header_bb_idx_set,
- backedges);
+ unused_edges);
/* Step 3: Backtrack a path from EXIT bb to ENTRY bb. */
if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2706,6 +2721,13 @@ filter_and_sort_kernels_feedback (std::vector<class loop *> &sorted_kernel,
tmp_bb_idx = bb_weights[tmp_bb_idx].prev_bb_idx;
while (tmp_bb_idx > 0 && tmp_bb_idx < bb_num_max)
{
+ if (bb_pathset.count (tmp_bb_idx))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf(dump_file, "ERROR: already seen bb index %d\n",
+ tmp_bb_idx);
+ return false;
+ }
if (dump_file && (dump_flags & TDF_DETAILS))
fprintf (dump_file, "%d: %ld, ", tmp_bb_idx,
bb_weights[tmp_bb_idx].bb_count);
@@ -3398,14 +3420,14 @@ issue_builtin_prefetch (data_ref &mem_ref)
if (param_llc_level == 3)
{
/* for simulation.
- BUILT_IN_PREFETCH (addr, rw, locality). */
+ BUILT_IN_PREFETCH (addr, rw, locality). */
call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
- 3, addr, integer_zero_node, integer_one_node);
+ 3, addr, integer_zero_node, integer_one_node);
}
else if (param_llc_level == 4)
{
- tree prfop = build_int_cst (TREE_TYPE (integer_zero_node), 6);
- call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH_FULL),
+ tree prfop = build_int_cst (TREE_TYPE (integer_zero_node), 6);
+ call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH_FULL),
3, addr, integer_zero_node, prfop);
}
else
--
2.33.0

View File

@ -0,0 +1,873 @@
From 8953134c07329ff0841f2fa18da0db07c94e0167 Mon Sep 17 00:00:00 2001
From: xingyushuai <xingyushuai@huawei.com>
Date: Fri, 3 Mar 2023 09:31:04 +0800
Subject: [PATCH 04/10] Add hip09 machine discribtion
Here is the patch introducing hip09 machine model
for the scheduler.
---
gcc/ChangeLog | 8 +
gcc/config/aarch64/aarch64-cores.def | 1 +
gcc/config/aarch64/aarch64-cost-tables.h | 104 +++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.c | 83 ++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip09.md | 558 +++++++++++++++++++++++
7 files changed, 756 insertions(+), 1 deletion(-)
create mode 100644 gcc/config/aarch64/hip09.md
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 07aea9b86..59e5b8a3d 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,11 @@
+2023-03-17 xingyushuai <xingyushuai@huawei.com>
+ * config/aarch64/aarch64-cores.def: Add support for hip09 CPU
+ * config/aarch64/aarch64-cost-tables.h: Add cost tables for hip09 CPU
+ * config/aarch64/aarch64-tune.md: Regenerated
+ * config/aarch64/aarch64.c: Add tuning table for hip09 CPU
+ * config/aarch64/aarch64.md: Include the new model of hip09 CPU
+ * config/aarch64/lc910.md: New file to support for hip09 CPU
+
2020-05-12 Richard Sandiford <richard.sandiford@arm.com>
PR tree-optimization/94980
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 53125f6bd..eb1c6c894 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -124,6 +124,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0x1, 0xd01)
/* ARMv8.3-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 377650be0..89b3c180f 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -645,6 +645,110 @@ const struct cpu_cost_table hip11_extra_costs =
}
};
+const struct cpu_cost_table hip09_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table a64fx_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index f33a3330d..1349ec04b 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,hip11,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,hip09,thunderx3t110,zeus,neoversev1,saphira,neoversen2,hip11,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 938948f29..4ef7bd8b3 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -464,6 +464,22 @@ static const struct cpu_addrcost_table hip11_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip09_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
{
{
@@ -601,6 +617,16 @@ static const struct cpu_regmove_cost hip11_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip09_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost a64fx_regmove_cost =
{
1, /* GP2GP */
@@ -709,6 +735,25 @@ static const struct cpu_vector_cost hip11_vector_cost =
1 /* cond_not_taken_branch_cost */
};
+static const struct cpu_vector_cost hip09_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 2, /* vec_int_stmt_cost */
+ 2, /* vec_fp_stmt_cost */
+ 2, /* vec_permute_cost */
+ 3, /* vec_to_scalar_cost */
+ 2, /* scalar_to_vec_cost */
+ 5, /* vec_align_load_cost */
+ 5, /* vec_unalign_load_cost */
+ 1, /* vec_unalign_store_cost */
+ 1, /* vec_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1 /* cond_not_taken_branch_cost */
+};
+
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost cortexa57_vector_cost =
{
@@ -958,6 +1003,17 @@ static const cpu_prefetch_tune hip11_prefetch_tune =
-1 /* default_opt_level */
};
+static const cpu_prefetch_tune hip09_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune xgene1_prefetch_tune =
{
8, /* num_slots */
@@ -1252,6 +1308,33 @@ static const struct tune_params tsv110_tunings =
&tsv110_prefetch_tune
};
+static const struct tune_params hip09_tunings =
+{
+ &hip09_extra_costs,
+ &hip09_addrcost_table,
+ &hip09_regmove_cost,
+ &hip09_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_128, /* sve_width */
+ 4, /* memmov_cost */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */
+ &hip09_prefetch_tune
+};
+
static const struct tune_params hip11_tunings =
{
&hip11_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 38af8d000..04d1e4ead 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -459,6 +459,7 @@
(include "thunderx2t99.md")
(include "tsv110.md")
(include "thunderx3t110.md")
+(include "hip09.md")
;; -------------------------------------------------------------------
;; Jumps and other miscellaneous insns
diff --git a/gcc/config/aarch64/hip09.md b/gcc/config/aarch64/hip09.md
new file mode 100644
index 000000000..25428de9a
--- /dev/null
+++ b/gcc/config/aarch64/hip09.md
@@ -0,0 +1,558 @@
+;; hip09 pipeline description
+;; Copyright (C) 2023 Free Software Foundation, Inc.
+;;
+;;Contributed by Yushuai Xing
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip09")
+(define_automaton "hip09_ldst")
+(define_automaton "hip09_fsu")
+
+(define_attr "hip09_type"
+ "hip09_neon_abs, hip09_neon_fp_arith, hip09_neon_mul, hip09_neon_mla,
+ hip09_neon_dot, hip09_neon_fp_div, hip09_neon_fp_sqrt,
+ hip09_neon_ins, hip09_neon_load1, hip09_neon_load1_lanes,
+ hip09_neon_load2and4, hip09_neon_load3_3reg,
+ hip09_neon_load4_4reg, hip09_neon_store1and2,
+ hip09_neon_store1_1reg, hip09_neon_store1_2reg,
+ hip09_neon_store1_3reg, hip09_neon_store1_4reg,
+ hip09_neon_store3and4_lane, hip09_neon_store3_3reg,
+ hip09_neon_store4_4reg, unknown"
+ (cond [
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_add,neon_add_q,\
+ neon_neg,neon_neg_q,neon_sub,neon_sub_q,neon_add_widen,\
+ neon_sub_widen,neon_qadd,neon_qadd_q,\
+ neon_add_long,neon_sub_long,\
+ neon_qabs,neon_qabs_q,neon_qneg,\
+ neon_qneg_q,neon_qsub,neon_qsub_q,neon_compare,\
+ neon_compare_q,neon_compare_zero,\
+ neon_compare_zero_q,neon_logic,neon_logic_q,\
+ neon_minmax,neon_minmax_q,neon_tst,\
+ neon_tst_q,neon_bsl,neon_bsl_q,\
+ neon_cls,neon_cls_q,neon_ext,\
+ neon_ext_q,neon_rev,neon_rev_q,\
+ neon_tbl1,neon_tbl1_q,neon_fp_abs_s,\
+ neon_fp_abs_s_q,neon_fp_abs_d,\
+ neon_fp_neg_s,neon_fp_neg_s_q,\
+ neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_shift_imm_narrow_q,neon_move,neon_move_q")
+ (const_string "hip09_neon_abs")
+ (eq_attr "type" "neon_abd,neon_abd_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_add_halve_narrow_q,\
+ neon_sub_halve_narrow_q,neon_reduc_add,\
+ neon_reduc_add_q,\
+ neon_sat_mul_b,neon_sat_mul_b_q,\
+ neon_sat_mul_b_long,neon_mul_b,neon_mul_b_q,\
+ neon_mul_b_long,neon_mla_b,neon_mla_b_q,\
+ neon_mla_b_long,neon_sat_mla_b_long,\
+ neon_sat_shift_imm,\
+ neon_sat_shift_imm_q,neon_shift_imm_long,\
+ neon_shift_imm,neon_shift_imm_q,neon_cnt,\
+ neon_cnt_q,neon_fp_recpe_s,neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q,\
+ neon_fp_recpx_s,neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,neon_fp_recpx_d_q,\
+ neon_tbl2,neon_tbl2_q,neon_to_gp,\
+ neon_to_gp_q,neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_cvt_widen_s,neon_fp_to_int_s,\
+ neon_fp_to_int_s_q,neon_fp_to_int_d,\
+ neon_fp_to_int_d_q,neon_fp_minmax_s,\
+ neon_fp_minmax_s_q,neon_fp_minmax_d,\
+ neon_fp_minmax_d_q,neon_fp_round_s,\
+ neon_fp_round_s_q,neon_fp_cvt_narrow_d_q,\
+ neon_fp_round_d,neon_fp_round_d_q,\
+ neon_fp_cvt_narrow_s_q")
+ (const_string "hip09_neon_fp_arith")
+ (eq_attr "type" "neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,\
+ neon_sat_mul_h_scalar_q,neon_sat_mul_h_long,\
+ neon_sat_mul_s_long,neon_sat_mul_h_scalar_long,\
+ neon_sat_mul_s_scalar_long,neon_mul_h,neon_mul_h_q,\
+ neon_mul_s,neon_mul_s_q,neon_mul_h_long,\
+ neon_mul_s_long,neon_mul_h_scalar_long,\
+ neon_mul_s_scalar_long,neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,neon_mla_h_scalar,\
+ neon_mla_h_scalar_q,neon_mla_s_scalar,\
+ neon_mla_h_long,\
+ neon_mla_s_long,neon_sat_mla_h_long,\
+ neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,\
+ neon_sat_mla_s_scalar_long,neon_mla_s_scalar_long,\
+ neon_mla_h_scalar_long,neon_mla_s_scalar_q,\
+ neon_shift_acc,neon_shift_acc_q,neon_shift_reg,\
+ neon_shift_reg_q,neon_sat_shift_reg,\
+ neon_sat_shift_reg_q,neon_sat_shift_imm_narrow_q,\
+ neon_tbl3,neon_tbl3_q,neon_fp_reduc_add_s,\
+ neon_fp_reduc_add_s_q,neon_fp_reduc_add_d,\
+ neon_fp_reduc_add_d_q,neon_fp_reduc_minmax_s,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d_q,\
+ neon_fp_mul_s_q,\
+ neon_fp_mul_d,neon_fp_mul_d_q,\
+ neon_fp_mul_d_scalar_q,neon_fp_mul_s_scalar,\
+ neon_fp_mul_s_scalar_q")
+ (const_string "hip09_neon_mul")
+ (eq_attr "type" "neon_mla_s_q,neon_reduc_minmax,\
+ neon_reduc_minmax_q,neon_fp_recps_s,\
+ neon_fp_recps_s_q,neon_fp_recps_d,\
+ neon_fp_recps_d_q,neon_tbl4,neon_tbl4_q,\
+ neon_fp_mla_s,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mla_d_scalar_q")
+ (const_string "hip09_neon_mla")
+ (eq_attr "type" "neon_dot,neon_dot_q")
+ (const_string "hip09_neon_dot")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
+ neon_fp_div_d,neon_fp_div_d_q")
+ (const_string "hip09_neon_fp_div")
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q")
+ (const_string "hip09_neon_fp_sqrt")
+ (eq_attr "type" "neon_dup,neon_dup_q,\
+ neon_ins,neon_ins_q")
+ (const_string "hip09_neon_ins")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,\
+ neon_load1_2reg,neon_load1_2reg_q,\
+ neon_load1_3reg,neon_load1_3reg_q,\
+ neon_load1_4reg,neon_load1_4reg_q")
+ (const_string "hip09_neon_load1")
+ (eq_attr "type" "neon_load1_one_lane,\
+ neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q")
+ (const_string "hip09_neon_load1_lanes")
+ (eq_attr "type" "neon_load2_all_lanes,\
+ neon_load2_all_lanes_q,\
+ neon_load2_one_lane,neon_load2_2reg,\
+ neon_load2_2reg_q,neon_load3_one_lane,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q,\
+ neon_load4_one_lane,neon_load4_all_lanes,\
+ neon_load4_all_lanes_q")
+ (const_string "hip09_neon_load2and4")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q")
+ (const_string "hip09_neon_load3_3reg")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q")
+ (const_string "hip09_neon_load4_4reg")
+ (eq_attr "type" "neon_store1_one_lane,\
+ neon_store1_one_lane_q,neon_store2_one_lane,\
+ neon_store2_one_lane_q,neon_store2_2reg,\
+ neon_store2_2reg_q")
+ (const_string "hip09_neon_store1and2")
+ (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q")
+ (const_string "hip09_neon_store1_1reg")
+ (eq_attr "type" "neon_store1_2reg,neon_store1_2reg_q")
+ (const_string "hip09_neon_store1_2reg")
+ (eq_attr "type" "neon_store1_3reg,neon_store1_3reg_q")
+ (const_string "hip09_neon_store1_3reg")
+ (eq_attr "type" "neon_store1_4reg,neon_store1_4reg_q")
+ (const_string "hip09_neon_store1_4reg")
+ (eq_attr "type" "neon_store3_one_lane,\
+ neon_store3_one_lane_q,neon_store4_one_lane,\
+ neon_store4_one_lane_q")
+ (const_string "hip09_neon_store3and4_lane")
+ (eq_attr "type" "neon_store3_3reg,\
+ neon_store3_3reg_q")
+ (const_string "hip09_neon_store3_3reg")
+ (eq_attr "type" "neon_store4_4reg,\
+ neon_store4_4reg_q")
+ (const_string "hip09_neon_store4_4reg")]
+ (const_string "unknown")))
+
+; The hip09 core is modelled as issues pipeline that has
+; the following functional units.
+; 1. Two pipelines for branch micro operations: BRU1, BRU2
+
+(define_cpu_unit "hip09_bru0" "hip09")
+(define_cpu_unit "hip09_bru1" "hip09")
+
+(define_reservation "hip09_bru01" "hip09_bru0|hip09_bru1")
+
+; 2. Four pipelines for single cycle integer micro operations: ALUs1, ALUs2, ALUs3, ALUs4
+
+(define_cpu_unit "hip09_alus0" "hip09")
+(define_cpu_unit "hip09_alus1" "hip09")
+(define_cpu_unit "hip09_alus2" "hip09")
+(define_cpu_unit "hip09_alus3" "hip09")
+
+(define_reservation "hip09_alus0123" "hip09_alus0|hip09_alus1|hip09_alus2|hip09_alus3")
+(define_reservation "hip09_alus01" "hip09_alus0|hip09_alus1")
+(define_reservation "hip09_alus23" "hip09_alus2|hip09_alus3")
+
+; 3. Two pipelines for multi cycles integer micro operations: ALUm1, ALUm2
+
+(define_cpu_unit "hip09_alum0" "hip09")
+(define_cpu_unit "hip09_alum1" "hip09")
+
+(define_reservation "hip09_alum01" "hip09_alum0|hip09_alum1")
+
+; 4. Two pipelines for load micro opetations: Load1, Load2
+
+(define_cpu_unit "hip09_load0" "hip09_ldst")
+(define_cpu_unit "hip09_load1" "hip09_ldst")
+
+(define_reservation "hip09_ld01" "hip09_load0|hip09_load1")
+
+; 5. Two pipelines for store micro operations: Store1, Store2
+
+(define_cpu_unit "hip09_store0" "hip09_ldst")
+(define_cpu_unit "hip09_store1" "hip09_ldst")
+
+(define_reservation "hip09_st01" "hip09_store0|hip09_store1")
+
+; 6. Two pipelines for store data micro operations: STD0,STD1
+
+(define_cpu_unit "hip09_store_data0" "hip09_ldst")
+(define_cpu_unit "hip09_store_data1" "hip09_ldst")
+
+(define_reservation "hip09_std01" "hip09_store_data0|hip09_store_data1")
+
+; 7. Four asymmetric pipelines for Asimd and FP micro operations: FSU1, FSU2, FSU3, FSU4
+
+(define_cpu_unit "hip09_fsu0" "hip09_fsu")
+(define_cpu_unit "hip09_fsu1" "hip09_fsu")
+(define_cpu_unit "hip09_fsu2" "hip09_fsu")
+(define_cpu_unit "hip09_fsu3" "hip09_fsu")
+
+(define_reservation "hip09_fsu0123" "hip09_fsu0|hip09_fsu1|hip09_fsu2|hip09_fsu3")
+(define_reservation "hip09_fsu02" "hip09_fsu0|hip09_fsu2")
+
+
+; 8. Two pipelines for sve operations but same with fsu1 and fsu3: SVE1, SVE2
+
+;; Simple Execution Unit:
+;
+;; Simple ALU without shift
+(define_insn_reservation "hip09_alu" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ adc_imm,adc_reg,\
+ alu_sreg,logic_reg,\
+ mov_imm,mov_reg,\
+ csel,rotate_imm,bfm,mov_imm,\
+ clz,rbit,rev"))
+ "hip09_alus0123")
+
+(define_insn_reservation "hip09_alus" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alus_sreg,alus_imm,\
+ adcs_reg,adcs_imm,\
+ logics_imm,logics_reg,adr"))
+ "hip09_alus23")
+
+;; ALU ops with shift and extend
+(define_insn_reservation "hip09_alu_ext_shift" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "alu_ext,alus_ext,\
+ logics_shift_imm,logics_shift_reg,\
+ logic_shift_reg,logic_shift_imm,\
+ "))
+ "hip09_alum01")
+
+;; Multiplies instructions
+(define_insn_reservation "hip09_mult" 3
+ (and (eq_attr "tune" "hip09")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "hip09_alum01")
+
+;; Integer divide
+(define_insn_reservation "hip09_div" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip09_alum0")
+
+;; Branch execution Unit
+;
+; Branches take two issue slot.
+; No latency as there is no result
+(define_insn_reservation "hip09_branch" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "branch,call"))
+ "hip09_bru01 + hip09_alus23")
+
+;; Load execution Unit
+;
+; Loads of up to two words.
+(define_insn_reservation "hip09_load1" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "load_4,load_8"))
+ "hip09_ld01")
+
+; Stores of up to two words.
+(define_insn_reservation "hip09_store1" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "store_4,store_8"))
+ "hip09_st01")
+
+;; FP data processing instructions.
+
+(define_insn_reservation "hip09_fp_arith" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "ffariths,ffarithd,fmov,fconsts,fconstd,\
+ f_mrc"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_cmp" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fcmps,fcmpd"))
+ "hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_ccmp" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fccmps,fccmpd"))
+ "hip09_alus01+hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_csel" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fcsel,f_mcr"))
+ "hip09_alus01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_divs" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fdivs"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_divd" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fdivd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_sqrts" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fsqrts"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fsqrtd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_mul" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fmuls,fmuld"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_add" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fadds,faddd,f_minmaxs,f_minmaxd,f_cvt,\
+ f_rints,f_rintd"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_fp_mac" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "fmacs,fmacd"))
+ "hip09_fsu0123")
+
+;; FP miscellaneous instructions.
+
+(define_insn_reservation "hip09_fp_cvt" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_cvtf2i"))
+ "hip09_fsu0123+hip09_alus23")
+
+(define_insn_reservation "hip09_fp_cvt2" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_cvti2f"))
+ "hip09_alus01+hip09_fsu0123")
+
+;; FP Load Instructions
+
+(define_insn_reservation "hip09_fp_load" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip09_ld01")
+
+(define_insn_reservation "hip09_fp_load2" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_ldp_q,neon_ldp"))
+ "hip09_ld01+hip09_alus01")
+
+;; FP store instructions
+
+(define_insn_reservation "hip09_fp_store" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "f_stores,f_stored"))
+ "hip09_st01+hip09_std01")
+
+;; ASIMD integer instructions
+
+(define_insn_reservation "hip09_asimd_base1" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_abs"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base2" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_arith"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base3" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_mul"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base4" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_mla"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_base5" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_fp_mul_s"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_dot" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_dot"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_bfmmla" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "neon_fp_mla_s_q"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_fdiv" 15
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_div"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_fsqrt" 25
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_fp_sqrt"))
+ "hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_pmull" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_pmull"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_dup" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_ins"))
+ "hip09_alus01+hip09_fsu0123")
+
+;; ASIMD load instructions
+
+(define_insn_reservation "hip09_asimd_ld1_reg" 6
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load1"))
+ "hip09_ld01")
+
+(define_insn_reservation "hip09_asimd_ld1_lane" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load1_lanes"))
+ "hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld23" 8
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load2and4"))
+"hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld3_mtp" 9
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load3_3reg"))
+ "hip09_ld01+hip09_fsu0123")
+
+(define_insn_reservation "hip09_asimd_ld4_mtp" 13
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_load4_4reg"))
+ "hip09_ld01+hip09_fsu0123")
+
+;; ASIMD store instructions
+
+(define_insn_reservation "hip09_asimd_st12" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1and2"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_1reg" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_1reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_2reg" 3
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_2reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_3reg" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_3reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st1_4reg" 5
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store1_4reg"))
+ "hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st34_lane" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store3and4_lane"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st3_mtp" 7
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store3_3reg"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+(define_insn_reservation "hip09_asimd_st4_mtp" 10
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "hip09_type" "hip09_neon_store4_4reg"))
+ "hip09_fsu0123+hip09_st01+hip09_std01")
+
+;; Cryptography extensions
+
+(define_insn_reservation "hip09_asimd_aes" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_aese,crypto_aesmc"))
+ "hip09_fsu02")
+
+(define_insn_reservation "hip09_asimd_sha3" 1
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha3"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_sha1" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha256_fast,crypto_sha512,\
+ crypto_sm3"))
+ "hip09_fsu2")
+
+(define_insn_reservation "hip09_asimd_sha1_and256" 4
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crypto_sha1_slow,crypto_sha256_slow,\
+ crypto_sm4"))
+ "hip09_fsu2")
+
+;; CRC extension.
+
+(define_insn_reservation "hip09_crc" 2
+ (and (eq_attr "tune" "hip09")
+ (eq_attr "type" "crc"))
+ "hip09_alum01")
--
2.33.0

View File

@ -0,0 +1,39 @@
From 8bfb0125f6c2aed9b1f5c2cd43563ce403c00d71 Mon Sep 17 00:00:00 2001
From: XingYuShuai <1150775134@qq.com>
Date: Tue, 11 Jun 2024 20:39:48 +0800
Subject: [PATCH 1/2] [bugfix] Modify the hip09 CPU information.
---
gcc/config/aarch64/aarch64-cores.def | 2 +-
gcc/config/aarch64/aarch64.c | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index eb1c6c894..a8f3376d4 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -124,7 +124,7 @@ AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F
/* HiSilicon ('H') cores. */
AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
-AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0x1, 0xd01)
+AARCH64_CORE("hip09", hip09, hip09, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_SVE | AARCH64_FL_I8MM | AARCH64_FL_F32MM | AARCH64_FL_F64MM | AARCH64_FL_PROFILE | AARCH64_FL_PREDRES, hip09, 0x48, 0xd02, 0x0)
/* ARMv8.3-A Architecture Processors. */
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 4ef7bd8b3..2117326ba 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1316,7 +1316,7 @@ static const struct tune_params hip09_tunings =
&hip09_vector_cost,
&generic_branch_cost,
&generic_approx_modes,
- SVE_128, /* sve_width */
+ SVE_256, /* sve_width */
4, /* memmov_cost */
4, /* issue_rate */
(AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
--
2.33.0

View File

@ -61,7 +61,7 @@
Summary: Various compilers (C, C++, Objective-C, ...)
Name: gcc
Version: %{gcc_version}
Release: 57
Release: 58
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
URL: https://gcc.gnu.org
@ -303,7 +303,9 @@ Patch192: 0192-Add-feedback-directed-filter_and_sort_kernels-in-Pha.patch
Patch193: 0193-Add-prefetch-level-parameter-to-specify-the-last-lev.patch
Patch194: 0194-AutoFDO-avoid-accessing-dump_file-null-pointer.patch
Patch195: 0195-add-whitelist-feature-for-OneProfile.patch
Patch196: 0196-fix-bugs-in-loop-detections-add-filter-to-SSA-statem.patch
Patch197: 0197-Add-hip09-machine-discribtion.patch
Patch198: 0198-bugfix-Modify-the-hip09-CPU-information.patch
%global gcc_target_platform %{_arch}-linux-gnu
%if %{build_go}
@ -951,6 +953,9 @@ not stable, so plugins must be rebuilt any time GCC is updated.
%patch193 -p1
%patch194 -p1
%patch195 -p1
%patch196 -p1
%patch197 -p1
%patch198 -p1
%build
@ -2985,6 +2990,12 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Fri Jun 14 2024 zhenyu zhao <zhaozhenyu17@huawei.com> - 10.3.1-58
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Thu Jun 13 2024 zhenyu zhao <zhaozhenyu17@huawei.com> - 10.3.1-57
- Type:Sync
- ID:NA