Sync from 22.03-LTS-SP3

This commit is contained in:
郑晨卉 2024-05-29 13:06:22 +08:00
commit e3ae83a05a
37 changed files with 18915 additions and 13 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,477 @@
From 1e886b98ff7ffdac023dcee8645717f2849d2eb7 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Wed, 25 Oct 2023 18:12:28 +0300
Subject: [PATCH 1/6] Add maxmin and uzp1/uzp2 combining
---
gcc/config/aarch64/aarch64-simd.md | 339 +++++++++++++++++++++++++-
gcc/config/aarch64/predicates.md | 19 ++
gcc/testsuite/gcc.dg/combine-maxmin.c | 46 ++++
3 files changed, 399 insertions(+), 5 deletions(-)
create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 6049adc3f..7f707de57 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1034,6 +1034,82 @@
[(set_attr "type" "neon_shift_imm<q>")]
)
+;; Simplify the extension with following truncation for shift+neg operation.
+
+(define_insn_and_split "*aarch64_sshr_neg_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
+ (match_dup 2)))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (ashiftrt:V8HI
+ (neg:V8HI
+ (match_operand:V8HI 1 "register_operand" "w"))
+ (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))]
+ {
+ /* Reduce the shift amount to smaller mode. */
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[2], 0))
+ - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands[2])) / 2);
+ operands[2] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
+ }
+ [(set_attr "type" "multiple")]
+)
+
+;; The helper definition that allows combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_operand:V4SI 1 "register_operand" "w"))
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_operand:V4SI 3 "register_operand" "w"))
+ (match_dup 2)))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_dup 1))
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+ (set (match_operand:V4SI 3 "register_operand" "=w")
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_dup 3))
+ (match_dup 2)))
+ (set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (match_dup 1))
+ (truncate:V4HI
+ (match_dup 3))))]
+ ""
+ [(set_attr "type" "multiple")]
+)
+
(define_insn "*aarch64_simd_sra<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(plus:VDQ_I
@@ -1459,6 +1535,78 @@
[(set_attr "type" "neon_minmax<q>")]
)
+;; Use sequential smax+smin to replace vector arithmetic operations like this:
+;; a = ((x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x);
+;; TODO: maybe extend to scalar operations.
+
+(define_insn_and_split "*aarch64_maxmin_arith<mode>"
+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (xor:VDQHSD
+ (and:VDQHSD
+ (xor:VDQHSD
+ (ashiftrt:VDQHSD
+ (neg:VDQHSD
+ (match_operand:VDQHSD 1 "register_operand"))
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand"))
+ (match_dup 1))
+ (neg:VDQHSD
+ (eq:VDQHSD
+ (and:VDQHSD
+ (match_dup 1)
+ (match_operand:VDQHSD 3 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:VDQHSD 4 "aarch64_simd_or_scalar_imm_zero"))))
+ (ashiftrt:VDQHSD
+ (neg:VDQHSD
+ (match_dup 1))
+ (match_dup 2))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:VDQHSD 5 "register_operand" "w") (match_dup 3))
+ (set (match_operand:VDQHSD 6 "register_operand" "w") (match_dup 4))
+ (set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (smax:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")
+ (match_operand:VDQHSD 6 "register_operand" "w")))
+ (set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (smin:VDQHSD (match_operand:VDQHSD 0 "register_operand" "w")
+ (match_operand:VDQHSD 5 "register_operand" "w")))]
+ {
+ if (can_create_pseudo_p ())
+ {
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[3], 0));
+ operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ ~val);
+ operands[5] = gen_reg_rtx (<MODE>mode);
+ operands[6] = gen_reg_rtx (<MODE>mode);
+ }
+ else
+ FAIL;
+ }
+ [(set_attr "type" "neon_minmax<q>")]
+)
+
+;; The helper definition that allows combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_maxmin_tmp<mode>"
+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (ashiftrt:VDQHSD
+ (neg:VDQHSD
+ (match_operand:VDQHSD 1 "register_operand" "w"))
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
+ "TARGET_SIMD"
+ "#"
+ "&& reload_completed"
+ [(set (match_operand:VDQHSD 0 "register_operand")
+ (neg:VDQHSD
+ (match_operand:VDQHSD 1 "register_operand" "w")))
+ (set (match_dup 0)
+ (ashiftrt:VDQHSD
+ (match_dup 0)
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
+ ""
+ [(set_attr "type" "neon_minmax<q>")]
+)
+
;; Pairwise FP Max/Min operations.
(define_insn "aarch64_<maxmin_uns>p<mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
@@ -1599,7 +1747,8 @@
DONE;
})
-;; For quads.
+;; For quads. Use UZP1 on the narrower type, which discards the high part of
+;; each wide element.
(define_insn "vec_pack_trunc_<mode>"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
@@ -1609,12 +1758,32 @@
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
- return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
+ return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
else
- return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
+ return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
}
- [(set_attr "type" "multiple")
- (set_attr "length" "8")]
+ [(set_attr "type" "neon_permute<q>")
+ (set_attr "length" "4")]
+)
+
+(define_insn "vec_pack_trunc_shifted_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
+ (vec_concat:<VNARROWQ2>
+ (truncate:<VNARROWQ>
+ (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "half_size_operand" "w")))
+ (truncate:<VNARROWQ>
+ (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
+ (match_operand:VQN 4 "half_size_operand" "w")))))]
+ "TARGET_SIMD"
+ {
+ if (BYTES_BIG_ENDIAN)
+ return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
+ else
+ return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
+ }
+ [(set_attr "type" "neon_permute<q>")
+ (set_attr "length" "4")]
)
;; Widening operations.
@@ -4852,6 +5021,166 @@
[(set_attr "type" "neon_tst<q>")]
)
+;; Simplify the extension with following truncation for cmtst-like operation.
+
+(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (plus:V4HI
+ (truncate:V4HI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
+ (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
+ (plus:V4HI
+ (truncate:V4HI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_dup 1)
+ (match_dup 2))
+ (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
+ (match_dup 4)))
+ (match_dup 5))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 6 "register_operand" "=w")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (set (match_operand:V8HI 0 "register_operand" "=w")
+ (plus:V8HI
+ (eq:V8HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand" "w")
+ (match_dup 6))
+ (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
+ (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))]
+ {
+ if (can_create_pseudo_p ())
+ {
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[4], 0));
+ operands[4] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
+ int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[5], 0));
+ operands[5] = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
+
+ operands[6] = gen_reg_rtx (V8HImode);
+ }
+ else
+ FAIL;
+ }
+ [(set_attr "type" "neon_tst_q")]
+)
+
+;; Three helper definitions that allow combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (neg:V4SI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
+ (set (match_operand:V4SI 0 "register_operand" "=w")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 5)
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
+ (set (match_dup 0)
+ (neg:V4SI
+ (eq:V4SI
+ (match_dup 0)
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ {
+ if (can_create_pseudo_p ())
+ operands[5] = gen_reg_rtx (V8HImode);
+ else
+ FAIL;
+ }
+ [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_hi_v8hi"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (neg:V4SI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half")))
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
+ (set (match_operand:V4SI 0 "register_operand" "=w")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 5)
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half"))))
+ (set (match_dup 0)
+ (neg:V4SI
+ (eq:V4SI
+ (match_dup 0)
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ {
+ if (can_create_pseudo_p ())
+ operands[5] = gen_reg_rtx (V8HImode);
+ else
+ FAIL;
+ }
+ [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmpv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (not:V4SI
+ (match_operand:V4SI 1 "register_operand" "w")))
+ (truncate:V4HI
+ (not:V4SI
+ (match_operand:V4SI 2 "register_operand" "w")))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
+ (not:V4SI
+ (match_dup 1)))
+ (set (match_operand:V4SI 2 "register_operand" "=w")
+ (not:V4SI
+ (match_dup 2)))
+ (set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (match_dup 1))
+ (truncate:V4HI
+ (match_dup 2))))]
+ ""
+ [(set_attr "type" "multiple")]
+)
+
(define_insn_and_split "aarch64_cmtstdi"
[(set (match_operand:DI 0 "register_operand" "=w,r")
(neg:DI
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 1754b1eff..3cd83334b 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -91,6 +91,25 @@
(match_test "aarch64_simd_valid_immediate (op, NULL,
AARCH64_CHECK_ORR)"))))
+(define_predicate "aarch64_bic_imm_for_maxmin"
+ (match_code "const_vector")
+{
+ if (!aarch64_simd_valid_immediate (op, NULL, AARCH64_CHECK_BIC))
+ return false;
+ op = unwrap_const_vec_duplicate (op);
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode);
+ return CONST_INT_P (op)
+ && ((~UINTVAL (op)) < (((long unsigned int) 1 << size) - 1));
+})
+
+(define_predicate "maxmin_arith_shift_operand"
+ (match_code "const_vector")
+{
+ op = unwrap_const_vec_duplicate (op);
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) - 1;
+ return CONST_INT_P (op) && (UINTVAL (op) == size);
+})
+
(define_predicate "aarch64_reg_or_bic_imm"
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
new file mode 100755
index 000000000..06bce7029
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
@@ -0,0 +1,46 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -fdump-rtl-combine-all" } */
+
+/* The test checks usage of smax/smin insns for clip evaluation and
+ * uzp1/uzp2 insns for vector element narrowing. It's inspired by
+ * sources of x264 codec. */
+
+typedef unsigned char uint8_t;
+typedef long int intptr_t;
+typedef signed short int int16_t;
+
+static __attribute__((always_inline)) inline uint8_t clip (int x )
+{
+ return ( (x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x );
+}
+
+void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+ intptr_t stride, int width, int height, int16_t *buf)
+{
+ const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
+ for( int y = 0; y < height; y++ ) {
+ for( int x = -2; x < width+3; x++ ) {
+ int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
+ + (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
+ dstv[x] = clip ( (v + 16) >> 5 );
+ buf[x+2] = v + pad;
+ }
+ for( int x = 0; x < width; x++ )
+ dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
+ + (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
+ - 32*pad + 512) >> 10);
+ for( int x = 0; x < width; x++ )
+ dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
+ + (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
+ + 16) >> 5);
+ dsth += stride;
+ dstv += stride;
+ dstc += stride;
+ src += stride;
+ }
+}
+
+/* { dg-final { scan-assembler-times {smax\t} 4 } } */
+/* { dg-final { scan-assembler-times {smin\t} 4 } } */
+/* { dg-final { scan-assembler-times {cmtst\t} 2 } } */
+/* { dg-final { scan-assembler-times {uzp1\t} 6 } } */
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,233 @@
From 3a48cd1be0915a0fabbfb3a30bd9b67ccd5c65d3 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Tue, 12 Dec 2023 10:41:12 +0800
Subject: [PATCH 6/6] Implement AES pattern matching
---
gcc/Makefile.in | 1 +
gcc/common.opt | 4 ++++
gcc/config/aarch64/aarch64.c | 24 +++++++++++++++++++++
gcc/doc/tm.texi | 29 +++++++++++++++++++++++++
gcc/doc/tm.texi.in | 12 +++++++++++
gcc/passes.def | 1 +
gcc/target.def | 41 ++++++++++++++++++++++++++++++++++++
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
9 files changed, 114 insertions(+)
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 31bf2cde2..75b28722e 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1288,6 +1288,7 @@ OBJS = \
cgraphunit.o \
cgraphclones.o \
combine.o \
+ crypto-accel.o \
combine-stack-adj.o \
compare-elim.o \
context.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 36b016253..eb995f701 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1069,6 +1069,10 @@ floop-crc
Common Report Var(flag_loop_crc) Optimization
Do the loop crc conversion.
+fcrypto-accel-aes
+Common Report Var(flag_crypto_accel_aes) Init(0) Optimization
+Perform crypto acceleration AES pattern matching.
+
fauto-inc-dec
Common Report Var(flag_auto_inc_dec) Init(1) Optimization
Generate auto-inc/dec instructions.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ae9e0802b..75efbcb97 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23894,6 +23894,30 @@ is_aarch64_stp_insn (int icode)
return false;
}
+machine_mode
+aarch64_get_v16qi_mode ()
+{
+ return V16QImode;
+}
+
+#undef TARGET_GET_V16QI_MODE
+#define TARGET_GET_V16QI_MODE aarch64_get_v16qi_mode
+
+#undef TARGET_GEN_REV32V16QI
+#define TARGET_GEN_REV32V16QI gen_aarch64_rev32v16qi
+
+#undef TARGET_GEN_AESEV16QI
+#define TARGET_GEN_AESEV16QI gen_aarch64_crypto_aesev16qi
+
+#undef TARGET_GEN_AESDV16QI
+#define TARGET_GEN_AESDV16QI gen_aarch64_crypto_aesdv16qi
+
+#undef TARGET_GEN_AESMCV16QI
+#define TARGET_GEN_AESMCV16QI gen_aarch64_crypto_aesmcv16qi
+
+#undef TARGET_GEN_AESIMCV16QI
+#define TARGET_GEN_AESIMCV16QI gen_aarch64_crypto_aesimcv16qi
+
#undef TARGET_IS_LDP_INSN
#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index ac1d665c5..4a998aa76 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11870,6 +11870,35 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@deftypefn {Target Hook} machine_mode TARGET_GET_V16QI_MODE ()
+This function get the 16 byte elements vector mode if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_REV32V16QI (rtx @var{dest}, rtx @var{src})
+This function generate the byte reverse instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESEV16QI (rtx @var{dest}, rtx @var{src1}, rtx @var{src2})
+This function generate the AES encryption instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESDV16QI (rtx @var{dest}, rtx @var{src1}, rtx @var{src2})
+This function generate the AES decryption instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESMCV16QI (rtx @var{dest}, rtx @var{src})
+This function generate the AES mix columns instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESIMCV16QI (rtx @var{dest}, rtx @var{src})
+This function generate the AES inversed mix columns instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
Return true if icode is corresponding to any of the LDP instruction types.
@end deftypefn
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 0cd70dda4..f7094d8c2 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8010,6 +8010,18 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@hook TARGET_GET_V16QI_MODE
+
+@hook TARGET_GEN_REV32V16QI
+
+@hook TARGET_GEN_AESEV16QI
+
+@hook TARGET_GEN_AESDV16QI
+
+@hook TARGET_GEN_AESMCV16QI
+
+@hook TARGET_GEN_AESIMCV16QI
+
@hook TARGET_IS_LDP_INSN
@hook TARGET_IS_STP_INSN
diff --git a/gcc/passes.def b/gcc/passes.def
index ba13d897c..da5d71646 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -448,6 +448,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_rtl_fwprop_addr);
NEXT_PASS (pass_inc_dec);
NEXT_PASS (pass_initialize_regs);
+ NEXT_PASS (pass_crypto_accel);
NEXT_PASS (pass_ud_rtl_dce);
NEXT_PASS (pass_combine);
NEXT_PASS (pass_if_after_combine);
diff --git a/gcc/target.def b/gcc/target.def
index 48c8a8234..b4dff78ea 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2727,6 +2727,47 @@ modes and they have different conditional execution capability, such as ARM.",
bool, (void),
default_have_conditional_execution)
+DEFHOOK
+(get_v16qi_mode,
+ "This function get the 16 byte elements vector mode if target supports this.",
+ machine_mode, (),
+ NULL)
+
+DEFHOOK
+(gen_rev32v16qi,
+ "This function generate the byte reverse instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src),
+ NULL)
+
+DEFHOOK
+(gen_aesev16qi,
+ "This function generate the AES encryption instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src1, rtx src2),
+ NULL)
+
+DEFHOOK
+(gen_aesdv16qi,
+ "This function generate the AES decryption instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src1, rtx src2),
+ NULL)
+
+DEFHOOK
+(gen_aesmcv16qi,
+ "This function generate the AES mix columns instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src),
+ NULL)
+
+DEFHOOK
+(gen_aesimcv16qi,
+ "This function generate the AES inversed mix columns instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src),
+ NULL)
+
DEFHOOK
(is_ldp_insn,
"Return true if icode is corresponding to any of the LDP instruction types.",
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 24caf1b5d..9ca74dffe 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -258,6 +258,7 @@ DEFTIMEVAR (TV_AUTO_INC_DEC , "auto inc dec")
DEFTIMEVAR (TV_CSE2 , "CSE 2")
DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction")
DEFTIMEVAR (TV_COMBINE , "combiner")
+DEFTIMEVAR (TV_CRYPTO_ACCEL , "crypto accel")
DEFTIMEVAR (TV_IFCVT , "if-conversion")
DEFTIMEVAR (TV_MODE_SWITCH , "mode switching")
DEFTIMEVAR (TV_SMS , "sms modulo scheduling")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 232a3fdf6..29dc7e34b 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -570,6 +570,7 @@ extern rtl_opt_pass *make_pass_cse2 (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_df_initialize_opt (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_df_initialize_no_opt (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_reginfo_init (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_crypto_accel (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_inc_dec (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_stack_ptr_mod (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_initialize_regs (gcc::context *ctxt);
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,51 @@
From 885c6fbfa6412a81740a8c806fa82273b7114b24 Mon Sep 17 00:00:00 2001
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
Date: Wed, 13 Dec 2023 18:38:33 +0800
Subject: [PATCH 1/2] Fix lost ftree-fold-phiopt option in tests
---
gcc/testsuite/gcc.dg/double_sized_mul-1.c | 2 +-
gcc/testsuite/gcc.dg/double_sized_mul-2.c | 2 +-
gcc/testsuite/gcc.dg/ifcvt-gimple.c | 2 +-
3 files changed, 3 insertions(+), 3 deletions(-)
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
index 4d475cc8a..bdb503bc4 100644
--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
proper overflow detection in some cases. */
-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -ftree-fold-phiopt -fdump-tree-widening_mul-stats" } */
#include <stdint.h>
typedef unsigned __int128 uint128_t;
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
index cc6e5af25..f9d58a2f6 100644
--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
@@ -1,7 +1,7 @@
/* { dg-do compile } */
/* fif-conversion-gimple is required for proper overflow detection
in some cases. */
-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -ftree-fold-phiopt -fdump-tree-widening_mul-stats" } */
#include <stdint.h>
typedef unsigned __int128 uint128_t;
diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
index 0f7c87e5c..4dc0f9206 100644
--- a/gcc/testsuite/gcc.dg/ifcvt-gimple.c
+++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */
+/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized -ftree-fold-phiopt" } */
int test_int (int optimizable_int) {
if (optimizable_int > 5)
--
2.33.0

View File

@ -0,0 +1,25 @@
From 126bd5722f96733e7fbe433062861d5c3534911a Mon Sep 17 00:00:00 2001
From: vchernon <chernonog.vyacheslav@huawei.com>
Date: Wed, 13 Dec 2023 21:52:03 +0800
Subject: [PATCH 2/2] [rtl-ifcvt] free dominance info before cleanup_cfg not
cleaned dominance info can cause infite loop in cleanup_cfg
---
gcc/ifcvt.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 209987ebc..04086c560 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -5624,6 +5624,7 @@ if_convert (bool after_combine)
df_live_add_problem ();
df_live_set_all_dirty ();
}
+ free_dominance_info (CDI_DOMINATORS);
cleanup_cfg (CLEANUP_EXPENSIVE);
/* Record whether we are after combine pass. */
--
2.33.0

View File

@ -0,0 +1,42 @@
From 3281cef37191a800d4fcc916c0e9d5c7a43802a4 Mon Sep 17 00:00:00 2001
From: XingYuShuai <1150775134@qq.com>
Date: Thu, 14 Dec 2023 20:11:35 +0800
Subject: [PATCH 1/2] [Loop CRC] Solving the problem of insufficient CRC table
validation
---
gcc/tree-ssa-loop-crc.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c
index 9878363eb..2dd9e1e3b 100644
--- a/gcc/tree-ssa-loop-crc.c
+++ b/gcc/tree-ssa-loop-crc.c
@@ -336,11 +336,14 @@ only_one_array_read (class loop *loop, tree &crc_table)
&& TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF)
return false;
+ /* Only one-dimensional integer arrays meet the condition. */
if (gimple_code (stmt) == GIMPLE_ASSIGN
- && TREE_CODE (gimple_assign_rhs1 (stmt)) == ARRAY_REF)
+ && TREE_CODE (gimple_assign_rhs1 (stmt)) == ARRAY_REF
+ && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) == VAR_DECL
+ && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) == INTEGER_TYPE)
{
if (crc_table == NULL
- && gimple_assign_rhs1 (stmt)->base.readonly_flag)
+ && TREE_READONLY (gimple_assign_rhs1 (stmt)))
{
crc_table = gimple_assign_rhs1 (stmt);
crc_table_read_stmt = stmt;
@@ -438,7 +441,6 @@ match_crc_table (tree crc_table)
return true;
}
-
/* Check the crc table. The loop should have only one data reference.
And match the data reference with the predefined array. */
static bool
--
2.33.0

View File

@ -0,0 +1,924 @@
From 1722afc51311a6bb0b892df50602f660c706162f Mon Sep 17 00:00:00 2001
From: liuf9 <liufeiyang6@huawei.com>
Date: Fri, 15 Dec 2023 11:25:48 +0800
Subject: [PATCH 2/2] [LLC Allocation] Fix some bugs and remove variable
prefetch tool. After outer loop analysis, it is possible to get nested loops
for kernel candidates and this situation has conflicts with the early exiting
criterion for kernel filtering process and we restrict this criterion for
innermost loops only. We also fix some pass configuration bugs in common.opt
and params.opt. We remove variable prefetch tool due to the consideration of
unsafe inputs from users.
---
gcc/common.opt | 2 +-
gcc/params.opt | 24 +-
gcc/testsuite/gcc.dg/llc-allocate/llc-2.c | 2 +-
.../llc-allocate/llc-issue-builtin-prefetch.c | 48 ----
.../llc-allocate/llc-tool-insertion-1.c | 48 ----
.../llc-allocate/llc-tool-insertion-2.c | 48 ----
.../llc-allocate/llc-tool-insertion-3.c | 48 ----
.../llc-allocate/llc-tool-insertion-4.c | 47 ---
.../llc-allocate/llc-tool-insertion-5.c | 48 ----
.../llc-allocate/llc-tool-insertion-6.c | 47 ---
.../llc-tool-insertion-7-null-var-name.c | 52 ----
.../llc-tool-insertion-8-tmp-var-name.c | 54 ----
gcc/tree-ssa-llc-allocate.c | 267 +-----------------
13 files changed, 11 insertions(+), 724 deletions(-)
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
diff --git a/gcc/common.opt b/gcc/common.opt
index 56ad9a378..a8a2264ee 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -2255,7 +2255,7 @@ Generate prefetch instructions, if available, for arrays in loops. The prefetch
level can control the optimize level to array prefetch.
fllc-allocate
-Common Report Var(flag_llc_allocate) Init(-1) Optimization
+Common Report Var(flag_llc_allocate) Optimization
Generate LLC hint instructions.
fipa-prefetch
diff --git a/gcc/params.opt b/gcc/params.opt
index 792ca5c35..ef7bea311 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -1030,13 +1030,11 @@ Common Joined UInteger Var(param_mem_access_num) Init(3) Param Optimization
Memory access num.
-param=prefetch-offset=
-Common Joined UInteger Var(param_prefetch_offset) Init(1024)
-IntegerRange(1, 999999) Param Optimization
+Common Joined UInteger Var(param_prefetch_offset) Init(1024) IntegerRange(1, 999999) Param Optimization
Prefetch Offset, which is usually a power of two due to cache line size.
-param=branch-prob-threshold=
-Common Joined UInteger Var(param_branch_prob_threshold) Init(80) IntegerRange(50, 100)
-Param Optimization
+Common Joined UInteger Var(param_branch_prob_threshold) Init(80) IntegerRange(50, 100) Param Optimization
High Execution Rate Branch Threshold.
-param=issue-topn=
@@ -1051,24 +1049,6 @@ Force issue the topn LLC mem_ref hint, without generating dynamic multi-branches
Common Joined UInteger Var(param_llc_capacity_per_core) Init(114) IntegerRange(0, 999999) Param
LLC capacity per core.
--param=target-variables=
-Common Joined Var(param_target_variables) Init("") Param Optimization
---param=target-variables=<var>[,<var>,...] Target variables for prefetching, separated by comma,
-without space. The representation of a variable can be complex and containing space, please surround
-it by quotation marks and escape special characters in Linux. The input length should be no more
-than 512 characters.
-
--param=use-ref-group-index=
-Common Joined UInteger Var(param_use_ref_group_index) Init(0) IntegerRange(0, 1) Param Optimization
-Prefetch the target variables by their indices in sorted ref_groups, use together with parameter
-target-variables.
-
--param=mem-ref-index=
-Common Joined Var(param_mem_ref_index) Init("") Param Optimization
---param=mem-ref-index=<idx>[,<idx>,...] Prefetch the target variable at the memory reference
-location with the index of customized order, separated by comma, without space. The input length
-should be no more than 512 characters.
-
-param=filter-kernels=
Common Joined UInteger Var(param_filter_kernels) Init(1) IntegerRange(0, 1) Param
Allow LLC allocate pass to greedily filter kernels by traversing the corresponding basic blocks
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
index 9f8a5c307..f8b1cc5c1 100644
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
@@ -45,7 +45,7 @@ main (int argc, char *argv[])
/* { dg-final { scan-tree-dump-not "static_data_size:" "llc_allocate" } } */
/* { dg-final { scan-tree-dump-times "\{ (?:\\d+\\(\\d+\\) ){1}\}" 2 "llc_allocate" } } */
/* { dg-final { scan-tree-dump-not ", size: (?!(0\.000000))" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 8 "llc_allocate" } } */
+/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 6 "llc_allocate" } } */
/* { dg-final { scan-tree-dump-times "\\d x_data \\(0.000000, 1, 0\\) : 3" 2 "llc_allocate" } } */
/* { dg-final { scan-tree-dump-times "\\d A_j \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
/* { dg-final { scan-tree-dump-times "\\d A_data \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
deleted file mode 100644
index 2a58c501f..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=uPtr" } */
-
-#include <stdio.h>
-
-#define N 131590
-#define F 384477
-
-double diagPtr[N];
-double psiPtr[N];
-double ApsiPtr[N];
-int lPtr[F];
-int uPtr[F];
-double lowerPtr[F];
-double upperPtr[F];
-
-void
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
-{
- for (int cell=0; cell<nCells; cell++)
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
-
- for (int face=0; face<nFaces; face++)
- {
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
- }
-}
-
-int
-main (int argc, char *argv[])
-{
- int nCells = N;
- int nFaces = F;
- int testIter = 2;
-
- for (int i=0; i<testIter; i++)
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
-
- return 0;
-}
-
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "\[&\]?uPtr(?:_\\d+\\(D\\))? \\+ \\d{4};" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "\[&\]?uPtr(?:_\\d+\\(D\\))? \\+ \[_\]\\d{1,4};" 2 "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
deleted file mode 100644
index 276781c4f..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr" } */
-
-#include <stdio.h>
-
-#define N 131590
-#define F 384477
-
-double diagPtr[N];
-double psiPtr[N];
-double ApsiPtr[N];
-int lPtr[F];
-int uPtr[F];
-double lowerPtr[F];
-double upperPtr[F];
-
-void
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
-{
- for (int cell=0; cell<nCells; cell++)
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
-
- for (int face=0; face<nFaces; face++)
- {
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
- }
-}
-
-int
-main (int argc, char *argv[])
-{
- int nCells = N;
- int nFaces = F;
- int testIter = 2;
-
- for (int i=0; i<testIter; i++)
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
-
- return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
- " lPtr \"" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
deleted file mode 100644
index 57c76f4a6..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=,lPtr, --param mem-ref-index=5" } */
-
-#include <stdio.h>
-
-#define N 131590
-#define F 384477
-
-double diagPtr[N];
-double psiPtr[N];
-double ApsiPtr[N];
-int lPtr[F];
-int uPtr[F];
-double lowerPtr[F];
-double upperPtr[F];
-
-void
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
-{
- for (int cell=0; cell<nCells; cell++)
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
-
- for (int face=0; face<nFaces; face++)
- {
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
- }
-}
-
-int
-main (int argc, char *argv[])
-{
- int nCells = N;
- int nFaces = F;
- int testIter = 2;
-
- for (int i=0; i<testIter; i++)
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
-
- return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "WARNING: The target data_ref index is "
- "out of range." 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
deleted file mode 100644
index d9c053566..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr,uPtr,, --param mem-ref-index=5" } */
-
-#include <stdio.h>
-
-#define N 131590
-#define F 384477
-
-double diagPtr[N];
-double psiPtr[N];
-double ApsiPtr[N];
-int lPtr[F];
-int uPtr[F];
-double lowerPtr[F];
-double upperPtr[F];
-
-void
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
-{
- for (int cell=0; cell<nCells; cell++)
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
-
- for (int face=0; face<nFaces; face++)
- {
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
- }
-}
-
-int
-main (int argc, char *argv[])
-{
- int nCells = N;
- int nFaces = F;
- int testIter = 2;
-
- for (int i=0; i<testIter; i++)
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
-
- return 0;
-}
-
-/* { dg-final { scan-tree-dump-not "WARNING: The number of provided memory "
- "reference indices is less" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "insert prfm" 4 "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
deleted file mode 100644
index b87f9903d..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr --param use-ref-group-index=1" } */
-
-#include <stdio.h>
-
-#define N 131590
-#define F 384477
-
-double diagPtr[N];
-double psiPtr[N];
-double ApsiPtr[N];
-int lPtr[F];
-int uPtr[F];
-double lowerPtr[F];
-double upperPtr[F];
-
-void
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
-{
- for (int cell=0; cell<nCells; cell++)
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
-
- for (int face=0; face<nFaces; face++)
- {
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
- }
-}
-
-int
-main (int argc, char *argv[])
-{
- int nCells = N;
- int nFaces = F;
- int testIter = 2;
-
- for (int i=0; i<testIter; i++)
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
-
- return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "ERROR: not an unsigned integer" 1
- "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
deleted file mode 100644
index d07836765..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
+++ /dev/null
@@ -1,48 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=1 --param use-ref-group-index=1" } */
-
-#include <stdio.h>
-
-#define N 131590
-#define F 384477
-
-double diagPtr[N];
-double psiPtr[N];
-double ApsiPtr[N];
-int lPtr[F];
-int uPtr[F];
-double lowerPtr[F];
-double upperPtr[F];
-
-void
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
-{
- for (int cell=0; cell<nCells; cell++)
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
-
- for (int face=0; face<nFaces; face++)
- {
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
- }
-}
-
-int
-main (int argc, char *argv[])
-{
- int nCells = N;
- int nFaces = F;
- int testIter = 2;
-
- for (int i=0; i<testIter; i++)
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
-
- return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
- " psiPtr \"" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "insert svprfd" 2 "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
deleted file mode 100644
index c0a6afe5b..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
+++ /dev/null
@@ -1,47 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=3,a --param use-ref-group-index=1" } */
-
-#include <stdio.h>
-
-#define N 131590
-#define F 384477
-
-double diagPtr[N];
-double psiPtr[N];
-double ApsiPtr[N];
-int lPtr[F];
-int uPtr[F];
-double lowerPtr[F];
-double upperPtr[F];
-
-void
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
-{
- for (int cell=0; cell<nCells; cell++)
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
-
- for (int face=0; face<nFaces; face++)
- {
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
- }
-}
-
-int
-main (int argc, char *argv[])
-{
- int nCells = N;
- int nFaces = F;
- int testIter = 2;
-
- for (int i=0; i<testIter; i++)
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
-
- return 0;
-}
-
-/* { dg-final { scan-tree-dump-times "ERROR: not an unsigned integer" 1
- "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
deleted file mode 100644
index 4ad331626..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
+++ /dev/null
@@ -1,52 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -c -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=0 --param target-variables=\"bb_16(D)->aux\"" } */
-
-#include <stdio.h>
-
-typedef struct stack_def
-{
- int top; /* index to top stack element */
- unsigned long reg_set; /* set of live registers */
- unsigned char reg[128]; /* register - stack mapping */
-} *stack;
-
-typedef struct block_info_def
-{
- struct stack_def stack_in; /* Input stack configuration. */
- struct stack_def stack_out; /* Output stack configuration. */
- unsigned long out_reg_set; /* Stack regs live on output. */
- int done; /* True if block already converted. */
- int predecessors; /* Number of predecessors that need
- to be visited. */
-} *block_info;
-
-typedef struct basic_block_def
-{
- void *aux;
-} *basic_block;
-
-unsigned char
-convert_regs_exit (basic_block bb, int value_reg_low, int value_reg_high)
-{
- stack output_stack;
-
- output_stack = &(((block_info) bb->aux)->stack_in);
- if (value_reg_low == -1)
- output_stack->top = -1;
- else
- {
- int reg;
- output_stack->top = value_reg_high - value_reg_low;
- for (reg = value_reg_low; reg <= value_reg_high; ++reg)
- {
- (output_stack->reg + 16)[value_reg_high - reg] = reg;
- output_stack->reg_set |= (unsigned long) 1 << reg;
- }
- }
- return output_stack->reg[0];
-}
-
-/* { dg-final { scan-tree-dump-not "Unrecognizable variable name"
- "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
deleted file mode 100644
index 09a525ce1..000000000
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
+++ /dev/null
@@ -1,54 +0,0 @@
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
-/* { dg-options "-O3 -c -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=0 --param target-variables=tmp_var_0" } */
-
-#include <stdio.h>
-
-typedef struct stack_def
-{
- int top; /* index to top stack element */
- unsigned long reg_set; /* set of live registers */
- unsigned char reg[128]; /* register - stack mapping */
-} *stack;
-
-typedef struct block_info_def
-{
- struct stack_def stack_in; /* Input stack configuration. */
- struct stack_def stack_out; /* Output stack configuration. */
- unsigned long out_reg_set; /* Stack regs live on output. */
- int done; /* True if block already converted. */
- int predecessors; /* Number of predecessors that need
- to be visited. */
-} *block_info;
-
-typedef struct basic_block_def
-{
- void *aux;
-} *basic_block;
-
-unsigned char
-convert_regs_exit (basic_block bb, int value_reg_low, int value_reg_high)
-{
- stack output_stack;
-
- output_stack = &(((block_info) bb->aux)->stack_in);
- if (value_reg_low == -1)
- output_stack->top = -1;
- else
- {
- int reg;
- output_stack->top = value_reg_high - value_reg_low;
- for (reg = value_reg_low; reg <= value_reg_high; ++reg)
- {
- (output_stack->reg + 16)[value_reg_high - reg] = reg;
- output_stack->reg_set |= (unsigned long) 1 << reg;
- }
- }
- return output_stack->reg[0];
-}
-
-/* { dg-final { scan-tree-dump-not "Unrecognizable variable name"
- "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
- " bb_16(D)->aux \"" 1 "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
-/* { dg-final { scan-tree-dump-times "static issue" 1 "llc_allocate" } } */
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
index 890f66e54..fa8979401 100644
--- a/gcc/tree-ssa-llc-allocate.c
+++ b/gcc/tree-ssa-llc-allocate.c
@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see
#define INCLUDE_VECTOR
#define INCLUDE_LIST
#define INCLUDE_ALGORITHM
-#define INCLUDE_STRING
#include "system.h"
#include "coretypes.h"
#include "backend.h"
@@ -1866,7 +1865,10 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
list<basic_block> walked_header_bb; /* Used to record nested loops. */
for (unsigned i = 0; i < kernels.size (); ++i)
- end_bb.insert (kernels[i]->header);
+ {
+ if (kernels[i]->inner == NULL)
+ end_bb.insert (kernels[i]->header);
+ }
dump_loop_headers ("kernels", kernels);
@@ -2380,30 +2382,6 @@ issue_builtin_prefetch (data_ref &mem_ref)
update_ssa (TODO_update_ssa_only_virtuals);
}
-/* Retrieve memory reference at the specific index. */
-
-data_ref
-get_data_ref_at_idx (ref_group &var_ref_group)
-{
- unsigned int mem_ref_size = static_cast<unsigned int>(
- var_ref_group.ref_scores.size ());
- if (strlen (param_mem_ref_index) == 0)
- return var_ref_group.first_use;
- else
- {
- /* Insert prefetch hint at highly-likely-used location with the given
- index. */
- if (var_ref_group.mem_ref_index >= mem_ref_size)
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "WARNING: The target data_ref index is out "
- "of range. Use top index instead!\n");
- return var_ref_group.ref_scores[0].d_ref;
- }
- return var_ref_group.ref_scores[var_ref_group.mem_ref_index].d_ref;
- }
-}
-
/* Static form insertion and issue instruction. We may check the
determination of the ARM SVE architecture before SVE hint insertion. */
@@ -2415,7 +2393,7 @@ static_issue (vector<ref_group> &ref_groups, int num_issue_var)
for (int i = 0; i < num_issue_var; ++i)
{
- data_ref mem_ref = get_data_ref_at_idx (ref_groups[i]);
+ data_ref mem_ref = ref_groups[i].first_use;
if (mem_ref.vectorize_p)
{
enum internal_fn ifn_code = gimple_call_internal_fn
@@ -2591,10 +2569,7 @@ issue_llc_hint (vector<ref_group> &ref_groups)
}
if (param_force_issue)
{
- if (strlen (param_target_variables) > 0)
- static_issue (ref_groups, static_cast<int>(ref_groups.size ()));
- else
- static_issue (ref_groups, num_issue_var);
+ static_issue (ref_groups, num_issue_var);
return;
}
calc_type topn_calc_type = STATIC_CALC;
@@ -2626,224 +2601,6 @@ issue_llc_hint (vector<ref_group> &ref_groups)
}
/* ==================== phase entry ==================== */
-/* Check whether a string can be converted to an unsigned integer. */
-
-bool is_unsigned_int (const string &s)
-{
- if (s.empty () || s.size () > PREFETCH_TOOL_NUM_MAX_LEN)
- return false;
-
- for (unsigned int i = 0; i < s.size (); ++i)
- {
- if (s[i] < '0' || s[i] > '9')
- return false;
- }
- return true;
-}
-
-/* Parse a substring separated by comma. If the substring is valid and
- non-empty, store it as a parsed element. */
-
-bool
-parse_string_helper (const string &substr, vector<string>& str_elts,
- bool check_unsigned, size_t start, size_t end)
-{
- if (substr == "" && dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "WARNING: The input string from %lu to %lu is "
- "empty.\n", start, end);
- else if (check_unsigned && !is_unsigned_int (substr))
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "ERROR: not an unsigned integer: %s\n",
- substr.c_str ());
- str_elts.clear ();
- return false;
- }
- else
- str_elts.push_back (substr);
- return true;
-}
-
-/* Parse a user input string, separated by comma. */
-
-void
-parse_string (const string &s, vector<string>& str_elts,
- bool check_unsigned = false)
-{
- string delim = ",";
- size_t start = 0;
- size_t end = s.find (delim);
- string substr = s.substr (start, end - start);
- while (end != string::npos)
- {
- if (!parse_string_helper (substr, str_elts, check_unsigned, start, end))
- return;
- start = end + delim.size ();
- end = s.find (delim, start);
- substr = s.substr (start, end - start);
- }
- parse_string_helper (substr, str_elts, check_unsigned, start, end);
-}
-
-/* Parse user input of target variables and memory indices and create a map
- that assigns a target variable to a memory index. */
-
-void
-parse_param_inputs (map<string, unsigned int> &var2mem_idx)
-{
- /* The user input length should have an input length limit. */
- if ((strlen (param_target_variables) >= PREFETCH_TOOL_INPUT_MAX_LEN
- || strlen (param_mem_ref_index) >= PREFETCH_TOOL_INPUT_MAX_LEN)
- && dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "INVALID INPUT: The user inputs for target variables "
- "and/or memory reference indices are too long for parsing.\n");
-
- vector<string> var_names;
- string target_variables = param_target_variables;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Start parsing target variables:\n");
- if (param_use_ref_group_index)
- parse_string (target_variables, var_names, true);
- else
- parse_string (target_variables, var_names, false);
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Finish parsing target variables.\n\n");
-
- vector<string> var_mem_indices;
- string mem_indices = param_mem_ref_index;
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Start parsing memory reference indices:\n");
- parse_string (mem_indices, var_mem_indices, true);
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "Finish parsing memory reference indices.\n\n");
-
- /* Construct a map of var_name: var_mem_index. */
- if (var_names.size () > 0)
- {
- if (var_mem_indices.size () < var_names.size ())
- {
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "WARNING: The number of provided memory "
- "reference indices is less than that of target "
- "variables.\nUse the top index for all variables "
- "instead.\n");
- for (string& var_name : var_names)
- var2mem_idx[var_name] = 0;
- }
- else
- {
- if (var_mem_indices.size () > var_names.size ()
- && dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "WARNING: The number of target variables is "
- "less than that of memory reference indices.\n");
- for (unsigned int i = 0; i < var_names.size (); ++i)
- {
- var2mem_idx[var_names[i]] = static_cast<unsigned int>(
- atoi (var_mem_indices[i].c_str ()));
- }
- }
- }
-}
-
-/* Filter reference groups by only selecting target variables from the user
- input. There are two options for prefetching target variables:
- 1. Specify variable name parsed by the pass, which you can double-check at
- "sorted ref_groups" section in the dump file.
- 2. Specify variable rank exhibited at "sorted ref_groups" section in the
- dump file.
-*/
-
-void
-prefetch_variables (const vector<ref_group>& ref_groups,
- vector<ref_group>& reduced_ref_groups)
-{
- map<unsigned int, unsigned int> ref_group2mem_idx;
-
- map<string, unsigned int> var2mem_idx; /* externally defined. */
- parse_param_inputs (var2mem_idx);
-
- if (param_use_ref_group_index)
- {
- /* Use ref_group index at "sorted ref_groups" section to specify
- variable. */
- /* Collect the variables in "reduced_ref_group" only if their indices
- show up at "sorted ref_groups" section. */
- for (const pair<string, unsigned int> &var_mem_idx : var2mem_idx)
- {
- unsigned int var_idx = static_cast<unsigned int>(atoi (
- var_mem_idx.first.c_str ()));
- if (var_idx < ref_groups.size ())
- ref_group2mem_idx[var_idx] = var_mem_idx.second;
- else if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "WARNING: The index \"%u\" does not show "
- "up in the ref_groups.\n", var_idx);
- }
- }
- else
- {
- /* Use variable name shown up at "sorted ref_groups" section to specify
- variable:
- var2ref_group_idx + var2mem_idx -> ref_group2mem_idx. */
- /* Create a map that assigns the variable name to its corresponding
- ref_group index. */
- map<string, unsigned int> var2ref_group_idx; /* internally detected. */
- for (unsigned int i = 0; i < ref_groups.size (); ++i)
- {
- const ref_group &curr_ref_group = ref_groups[i];
- const int UINT_MAX_DIGIT = 10;
- /* Unrecognizable variable name related to ref_group. */
- if (!get_name (curr_ref_group.var))
- {
- /* If the variable name does not have a string representation,
- we can rename it by "tmp_var_" + <sorted_ref_group_index>. */
- char group_idx[UINT_MAX_DIGIT];
- sprintf (group_idx, "%u", i);
- string tmp_var_name = "tmp_var_" + std::string (group_idx);
- fprintf (dump_file, "Unrecognizable variable name at ref_group "
- "index %u.\nThe tree expression for variable is: ", i);
- print_generic_expr (dump_file, curr_ref_group.var, TDF_SLIM);
- fprintf (dump_file, "\n");
- var2ref_group_idx[tmp_var_name] = i;
- }
- else
- var2ref_group_idx[std::string (get_name (curr_ref_group.var))] = i;
- }
- /* Collect the variables in "reduced_ref_group" only if they show up in
- the ref_groups. */
- for (const pair<string, unsigned int> &var_mem_idx : var2mem_idx)
- {
- if (var2ref_group_idx.count (var_mem_idx.first))
- {
- unsigned int ref_group_idx = var2ref_group_idx[var_mem_idx.first];
- ref_group2mem_idx[ref_group_idx] = var_mem_idx.second;
- }
- else if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "WARNING: Target variable \" %s \" does "
- "not show up in the ref_groups. Check whether it needs "
- "temporary variable name.\n",
- var_mem_idx.first.c_str ());
- }
- }
-
- for (const pair<unsigned int, unsigned int> &ref_group_mem_idx :
- ref_group2mem_idx)
- {
- ref_group curr_ref_group = ref_groups[ref_group_mem_idx.first];
- curr_ref_group.mem_ref_index = ref_group_mem_idx.second;
- reduced_ref_groups.push_back (curr_ref_group);
- if (dump_file && (dump_flags & TDF_DETAILS))
- {
- fprintf (dump_file, "\nNOTICE: Prefetching target variable \" ");
- print_generic_expr (dump_file, curr_ref_group.var, TDF_SLIM);
- fprintf (dump_file, " \" at ref_group index %u and memory location "
- "index %u.\n", ref_group_mem_idx.first,
- ref_group_mem_idx.second);
- }
- }
- if (dump_file && (dump_flags & TDF_DETAILS))
- fprintf (dump_file, "\n\n");
-}
-
/* The LLC intelligent allocation consists of 6 steps. */
@@ -2869,17 +2626,7 @@ llc_allocate (void)
if (!record_and_sort_ref_groups (ref_groups, sorted_kernels, kernels_refs))
return;
- if (strlen (param_target_variables) > 0)
- {
- /* If "param_target_variables" is not empty, we will issue parsed target
- variables compulsorily. */
- param_force_issue = true;
- vector<ref_group> reduced_ref_groups;
- prefetch_variables (ref_groups, reduced_ref_groups);
- issue_llc_hint (reduced_ref_groups);
- }
- else
- issue_llc_hint (ref_groups);
+ issue_llc_hint (ref_groups);
}
/* Check whether the function is an operator reloading function. */
--
2.33.0

View File

@ -0,0 +1,30 @@
From 506eb1ff5ca27cd8d741ddf1894a32645919f773 Mon Sep 17 00:00:00 2001
From: vchernon <chernonog.vyacheslav@huawei.com>
Date: Sun, 17 Dec 2023 01:47:02 +0800
Subject: [PATCH] [rtl-ifcvt][BugFix] change def selection logic in noce_arith
---
gcc/ifcvt.c | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 04086c560..a55ac16f3 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1985,11 +1985,10 @@ bbs_ok_for_cmove_arith (basic_block bb_a,
if (!sset_a)
goto end_cmove_arith_check_and_fail;
- if (a_insn == last_a)
- continue;
/* Record all registers that BB_A sets. */
FOR_EACH_INSN_DEF (def, a_insn)
- bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
+ if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
+ bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
}
bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
--
2.33.0

View File

@ -0,0 +1,57 @@
From 43b6906c94ce6a683d325b8789267b7ee2d9bf15 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Sat, 16 Dec 2023 11:56:30 +0300
Subject: [PATCH] [perm propagation][Bugfix] Check that the arithmetic
operations follow each other
---
gcc/tree-ssa-forwprop.c | 20 +++++++++++++++++---
1 file changed, 17 insertions(+), 3 deletions(-)
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
index 92ef5d036..d5e9ca9bb 100644
--- a/gcc/tree-ssa-forwprop.c
+++ b/gcc/tree-ssa-forwprop.c
@@ -2662,14 +2662,28 @@ check_def_use_order (vec<gimple *> &first_stmts, vec<gimple *> &second_stmts)
/* Check similarity of stmts in the block of arithmetic operations. */
static bool
-check_arithmetic_block (vec<gimple *> &initial_perm_stmts, unsigned nstmts)
+check_arithmetic_block (auto_vec<gimple *> &all_arith_stmts,
+ vec<gimple *> &initial_perm_stmts, unsigned nstmts)
{
auto_vec<gimple *> next_stmts (nstmts);
auto_vec<gimple *> prev_stmts (nstmts);
+ hash_set<gimple *> arith_stmt_set;
enum tree_code code;
unsigned i;
- gimple *stmt_it;
+ gimple *stmt_it, *last_stmt = all_arith_stmts[all_arith_stmts.length () - 1];
+
+ /* Check that the arithmetic operations follow each other. */
+ all_arith_stmts.qsort (gimple_uid_cmp);
+ FOR_EACH_VEC_ELT (all_arith_stmts, i, stmt_it)
+ arith_stmt_set.add (stmt_it);
+
+ gimple_stmt_iterator gsi;
+ for (gsi = gsi_for_stmt (all_arith_stmts[0]); gsi_stmt (gsi) != last_stmt;
+ gsi_next (&gsi))
+ if (!arith_stmt_set.contains (gsi_stmt (gsi)))
+ return false;
+
FOR_EACH_VEC_ELT (initial_perm_stmts, i, stmt_it)
prev_stmts.quick_push (stmt_it);
@@ -2778,7 +2792,7 @@ analyze_perm_fwprop (tree type, unsigned HOST_WIDE_INT nelts,
}
/* Check that all results has the same arithmetic patterns. */
- if (!check_arithmetic_block (final_arith_stmts, nelts))
+ if (!check_arithmetic_block (all_arith_stmts, final_arith_stmts, nelts))
return false;
if (final_arith_stmts.length () < nelts)
--
2.33.0

View File

@ -0,0 +1,62 @@
From bed123b58aaf435653e01692830def8d564cf51f Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Mon, 18 Dec 2023 22:49:54 +0300
Subject: [PATCH] [perm propagation][Bugfix] Fix shll/shll2 patterns for perm
prop
---
gcc/config/aarch64/aarch64-simd.md | 8 ++++----
gcc/config/aarch64/predicates.md | 7 +++++++
2 files changed, 11 insertions(+), 4 deletions(-)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 66fcf0074..c7503561f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -4791,10 +4791,10 @@
(vec_select:V4HI
(match_operand:V8HI 1 "register_operand" "w")
(match_operand:V8HI 2 "vect_par_cnst_lo_half" "")))
- (match_operand:V4SI 3 "aarch64_simd_rshift_imm" "Dr")))]
+ (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
"TARGET_SIMD"
"shll\t%0.4s, %1.4h, #%3"
- [(set_attr "type" "neon_compare_zero")]
+ [(set_attr "type" "neon_shift_imm_long")]
)
;; vshll_high_n
@@ -4821,10 +4821,10 @@
(vec_select:V4HI
(match_operand:V8HI 1 "register_operand" "w")
(match_operand:V8HI 2 "vect_par_cnst_hi_half" "")))
- (match_operand:V4SI 3 "aarch64_simd_rshift_imm" "Dr")))]
+ (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
"TARGET_SIMD"
"shll2\t%0.4s, %1.8h, #%3"
- [(set_attr "type" "neon_compare_zero")]
+ [(set_attr "type" "neon_shift_imm_long")]
)
;; vrshr_n
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index b1b3cf82c..90db0efba 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -618,6 +618,13 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 0, 64)")))
+(define_predicate "aarch64_simd_shift_imm_bitsize_v4si"
+ (match_code "const_vector")
+{
+ HOST_WIDE_INT val = INTVAL (unwrap_const_vec_duplicate (op));
+ return val == 8 || val == 16 || val == 32;
+})
+
(define_predicate "aarch64_constant_pool_symref"
(and (match_code "symbol_ref")
(match_test "CONSTANT_POOL_ADDRESS_P (op)")))
--
2.33.0

View File

@ -0,0 +1,175 @@
From 4369e823f0883c079c0681bef68cead870d02063 Mon Sep 17 00:00:00 2001
From: Feiyang Liu <liufeiyang6@huawei.com>
Date: Wed, 20 Dec 2023 09:48:02 +0800
Subject: [PATCH] [LLC Allocation][Bugfix] Terminate kernel filtering for
same-loop cycle.
---
.../gcc.dg/llc-allocate/llc-same-loop-cycle.c | 125 ++++++++++++++++++
gcc/tree-ssa-llc-allocate.c | 11 +-
2 files changed, 135 insertions(+), 1 deletion(-)
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c
new file mode 100644
index 000000000..ba5b5b0c8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c
@@ -0,0 +1,125 @@
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
+/* { dg-options "-O3 -fwhole-program -flto-partition=one -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=1 --param=branch-prob-threshold=50 -c -w" } */
+
+typedef unsigned long size_t;
+typedef long scalar_t__;
+
+typedef struct TYPE_13__ TYPE_3__ ;
+typedef struct TYPE_12__ TYPE_2__ ;
+typedef struct TYPE_11__ TYPE_1__ ;
+
+struct dom_info {int nodes; int* dfs_parent; int* dfs_order; int* key; int* next_bucket; int* bucket; int* dom; int fake_exit_edge; TYPE_3__** dfs_to_bb; } ;
+typedef enum cdi_direction { ____Placeholder_cdi_direction } cdi_direction ;
+struct TYPE_11__ {scalar_t__ index; } ;
+typedef TYPE_1__ edge_iterator ;
+typedef TYPE_2__* edge ;
+typedef TYPE_3__* basic_block ;
+struct TYPE_13__ {size_t index; int preds; int succs; } ;
+struct TYPE_12__ {TYPE_3__* src; TYPE_3__* dest; } ;
+typedef int TBB ;
+
+basic_block ENTRY_BLOCK_PTR ;
+basic_block EXIT_BLOCK_PTR ;
+scalar_t__ bitmap_bit_p (int,size_t) ;
+edge ei_edge (edge_iterator) ;
+int ei_end_p (edge_iterator) ;
+int ei_next (edge_iterator*) ;
+edge_iterator ei_start (int) ;
+size_t eval (struct dom_info*,int) ;
+size_t last_basic_block ;
+int link_roots (struct dom_info*,int,int) ;
+
+__attribute__((used)) static void
+calc_idoms (struct dom_info *di, enum cdi_direction reverse)
+{
+ TBB v, w, k, par;
+ basic_block en_block;
+ edge_iterator ei, einext;
+
+ if (reverse)
+ en_block = EXIT_BLOCK_PTR;
+ else
+ en_block = ENTRY_BLOCK_PTR;
+
+ /* Go backwards in DFS order, to first look at the leafs. */
+ v = di->nodes;
+ while (v > 1)
+ {
+ basic_block bb = di->dfs_to_bb[v];
+ edge e;
+
+ par = di->dfs_parent[v];
+ k = v;
+
+ ei = (reverse) ? ei_start (bb->succs) : ei_start (bb->preds);
+
+ if (reverse)
+ {
+ /* If this block has a fake edge to exit, process that first. */
+ if (bitmap_bit_p (di->fake_exit_edge, bb->index))
+ {
+ einext = ei;
+ einext.index = 0;
+ goto do_fake_exit_edge;
+ }
+ }
+
+ /* Search all direct predecessors for the smallest node with a path
+ to them. That way we have the smallest node with also a path to
+ us only over nodes behind us. In effect we search for our
+ semidominator. */
+ while (!ei_end_p (ei))
+ {
+ basic_block b;
+ TBB k1;
+
+ e = ei_edge (ei);
+ b = (reverse) ? e->dest : e->src;
+ einext = ei;
+ ei_next (&einext);
+
+ if (b == en_block)
+ {
+ do_fake_exit_edge:
+ k1 = di->dfs_order[last_basic_block];
+ }
+ else
+ k1 = di->dfs_order[b->index];
+
+ /* Call eval() only if really needed. If k1 is above V in DFS tree,
+ then we know, that eval(k1) == k1 and key[k1] == k1. */
+ if (k1 > v)
+ k1 = di->key[eval (di, k1)];
+ if (k1 < k)
+ k = k1;
+
+ ei = einext;
+ }
+
+ di->key[v] = k;
+ link_roots (di, par, v);
+ di->next_bucket[v] = di->bucket[k];
+ di->bucket[k] = v;
+
+ /* Transform semidominators into dominators. */
+ for (w = di->bucket[par]; w; w = di->next_bucket[w])
+ {
+ k = eval (di, w);
+ if (di->key[k] < di->key[w])
+ di->dom[w] = k;
+ else
+ di->dom[w] = par;
+ }
+ /* We don't need to cleanup next_bucket[]. */
+ di->bucket[par] = 0;
+ v--;
+ }
+
+ /* Explicitly define the dominators. */
+ di->dom[1] = 0;
+ for (v = 2; v <= di->nodes; v++)
+ if (di->dom[v] != di->key[v])
+ di->dom[v] = di->dom[di->dom[v]];
+}
+
+/* { dg-final { scan-tree-dump "Find same-loop cycle." "llc_allocate" } } */
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
index fa8979401..62b5f18ad 100644
--- a/gcc/tree-ssa-llc-allocate.c
+++ b/gcc/tree-ssa-llc-allocate.c
@@ -1863,6 +1863,7 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
set<basic_block> end_bb;
list<basic_block> walked_header_bb; /* Used to record nested loops. */
+ set<int> walked_non_header_bb_idx;
for (unsigned i = 0; i < kernels.size (); ++i)
{
@@ -1895,7 +1896,15 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
/* bb is not the head of the loop, go to the next. */
if (bb != bb->loop_father->header)
{
- bb = next_high_probability_bb (bb);
+ if (walked_non_header_bb_idx.count (bb->index))
+ {
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ fprintf (dump_file, "Find same-loop cycle. "
+ "Abort filtering process.\n");
+ return false;
+ }
+ walked_non_header_bb_idx.insert (bb->index);
+ bb = next_high_probability_bb (bb);
continue;
}
--
2.33.0

View File

@ -0,0 +1,183 @@
From 708ffe6f132ee39441b66b6ab6b98847d35916b7 Mon Sep 17 00:00:00 2001
From: eastb233 <xiezhiheng@huawei.com>
Date: Tue, 19 Dec 2023 17:03:12 +0800
Subject: [PATCH 1/2] [Struct Reorg] Fix several bugs
---
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 50 ++++++-------------
gcc/testsuite/gcc.dg/struct/struct_reorg-10.c | 29 +++++++++++
gcc/testsuite/gcc.dg/struct/struct_reorg-11.c | 16 ++++++
gcc/testsuite/gcc.dg/struct/struct_reorg-12.c | 26 ++++++++++
4 files changed, 85 insertions(+), 36 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 7aba74ff1..0064811ac 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -4105,6 +4105,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
maybe_mark_or_record_other_side (rhs, lhs, stmt);
if (TREE_CODE (lhs) == SSA_NAME)
maybe_mark_or_record_other_side (lhs, rhs, stmt);
+
+ /* Handle missing ARRAY_REF cases. */
+ if (TREE_CODE (lhs) == ARRAY_REF)
+ mark_type_as_escape (TREE_TYPE (lhs), escape_array, stmt);
+ if (TREE_CODE (rhs) == ARRAY_REF)
+ mark_type_as_escape (TREE_TYPE (rhs), escape_array, stmt);
}
}
@@ -6169,6 +6175,7 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_
bool escape_from_base = false;
tree newbase[max_split];
+ memset (newbase, 0, sizeof (tree[max_split]));
memset (newexpr, 0, sizeof(tree[max_split]));
if (TREE_CODE (expr) == CONSTRUCTOR)
@@ -8162,43 +8169,14 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt,
should be removed. */
bool
-ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
+ipa_struct_reorg::rewrite_debug (gimple *, gimple_stmt_iterator *)
{
- if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
- {
- /* Delete debug gimple now. */
- return true;
- }
- bool remove = false;
- if (gimple_debug_bind_p (stmt))
- {
- tree var = gimple_debug_bind_get_var (stmt);
- tree newvar[max_split];
- if (rewrite_expr (var, newvar, true))
- remove = true;
- if (gimple_debug_bind_has_value_p (stmt))
- {
- var = gimple_debug_bind_get_value (stmt);
- if (TREE_CODE (var) == POINTER_PLUS_EXPR)
- var = TREE_OPERAND (var, 0);
- if (rewrite_expr (var, newvar, true))
- remove = true;
- }
- }
- else if (gimple_debug_source_bind_p (stmt))
- {
- tree var = gimple_debug_source_bind_get_var (stmt);
- tree newvar[max_split];
- if (rewrite_expr (var, newvar, true))
- remove = true;
- var = gimple_debug_source_bind_get_value (stmt);
- if (TREE_CODE (var) == POINTER_PLUS_EXPR)
- var = TREE_OPERAND (var, 0);
- if (rewrite_expr (var, newvar, true))
- remove = true;
- }
-
- return remove;
+ /* In debug statements, there might be some statements that have
+ been optimized out in gimple but left in debug gimple. Sometimes
+ these statements need to be analyzed to escape, but in rewrite
+ stage it shouldn't happen. It needs to care a lot to handle these
+ cases but seems useless. So now we just delete debug gimple. */
+ return true;
}
/* Rewrite PHI nodes, return true if the PHI was replaced. */
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
new file mode 100644
index 000000000..ec422f76f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
+
+struct a {
+ int b;
+ char c;
+};
+struct {
+ double d;
+ _Bool e;
+} * f;
+struct g {
+ struct a h;
+} i;
+long j;
+void k();
+void l() { k(i); }
+void k(struct a m) {
+ f->e = 0;
+ for (;;)
+ l();
+}
+int main() {
+ for (; j; f = 0) {
+ struct g *n = 0;
+ char o = n->h.c;
+ }
+ l();
+}
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
new file mode 100644
index 000000000..3e42aa84a
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
+
+struct a {
+ int b;
+ double c;
+};
+struct d {
+ struct a e;
+};
+int f;
+int main() {
+ _Bool g;
+ struct d **h = 0;
+ g = *h += f;
+}
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
new file mode 100644
index 000000000..d434f9fe0
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
@@ -0,0 +1,26 @@
+/* { dg-do compile } */
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
+
+struct foo {
+ long element1;
+ long element2;
+};
+
+struct goo {
+ struct foo element_foo;
+};
+
+struct goo g1;
+
+void func () {
+ struct foo (*local)[] = 0;
+ long idx;
+ (g1).element_foo = (*local)[idx];
+}
+
+struct foo g2;
+int main () {
+ func ();
+ g2 = g1.element_foo;
+ return 0;
+}
--
2.33.0

View File

@ -0,0 +1,104 @@
From e875e4e7f3716aa268ffbbf55ee199ec82b6aeba Mon Sep 17 00:00:00 2001
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
Date: Thu, 21 Dec 2023 15:50:34 +0800
Subject: [PATCH 2/2] [DFE] Add escape check. Fields with escape risks should
not be processed.
---
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 15 +++++--
gcc/testsuite/gcc.dg/struct/dfe_escape.c | 50 ++++++++++++++++++++++++
2 files changed, 62 insertions(+), 3 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_escape.c
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
index 0064811ac..dcfa7cd95 100644
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
@@ -444,8 +444,13 @@ srtype::has_dead_field (void)
if (!(this_field->field_access & READ_FIELD)
&& !FUNCTION_POINTER_TYPE_P (this_field->fieldtype))
{
- may_dfe = true;
- break;
+ /* Fields with escape risks should not be processed. */
+ if (this_field->type == NULL
+ || (this_field->type->escapes == does_not_escape))
+ {
+ may_dfe = true;
+ break;
+ }
}
}
return may_dfe;
@@ -1030,7 +1035,11 @@ srtype::create_new_type (void)
if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
&& !(f->field_access & READ_FIELD)
&& !FUNCTION_POINTER_TYPE_P (f->fieldtype))
- continue;
+ {
+ /* Fields with escape risks should not be processed. */
+ if (f->type == NULL || (f->type->escapes == does_not_escape))
+ continue;
+ }
f->create_new_fields (newtype, newfields, newlast);
}
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_escape.c b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
new file mode 100644
index 000000000..1b143cd26
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
@@ -0,0 +1,50 @@
+/* { dg-do compile } */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+typedef struct arc arc_t;
+typedef struct arc *arc_p;
+
+typedef struct network
+{
+ int x;
+} network_t;
+
+struct arc
+{
+ int flow;
+ network_t* net_add;
+};
+
+const int MAX = 100;
+
+/* let it escape_array, "Type is used in an array [not handled yet]". */
+network_t* net[2];
+arc_p stop_arcs = NULL;
+
+int
+main ()
+{
+ net[0] = (network_t*) calloc (1, sizeof(network_t));
+ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
+
+ net[0]->x = 100;
+
+ for (unsigned i = 0; i < 3; i++)
+ {
+ net[0]->x = net[0]->x + 2;
+ stop_arcs->flow = net[0]->x / 2;
+ stop_arcs->flow = stop_arcs->flow + 20;
+ stop_arcs->net_add = net[0];
+ stop_arcs++;
+ }
+
+ if( net[1] != 0 && stop_arcs != 0)
+ {
+ return -1;
+ }
+ return 0;
+}
+
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
--
2.33.0

View File

@ -0,0 +1,80 @@
From 1f4d422fd8008f0af015df53f496c6dce3534b26 Mon Sep 17 00:00:00 2001
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
Date: Fri, 22 Dec 2023 11:38:15 +0800
Subject: [PATCH] [phiopt][testsuite] Add -ftree-fold-phiopt option to 5 test
cases.
Modified test cases include:
1.gcc.dg/pr45416.c
2.gcc.target/i386/pr65871-3.c
3.g++.dg/opt/pr99305.C
4.gcc.dg/pr107190.c
5.g++.dg/tree-ssa/mull64.C
---
gcc/testsuite/g++.dg/opt/pr99305.C | 2 +-
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +-
gcc/testsuite/gcc.dg/pr107190.c | 2 +-
gcc/testsuite/gcc.dg/pr45416.c | 2 +-
gcc/testsuite/gcc.target/i386/pr65871-3.c | 2 +-
5 files changed, 5 insertions(+), 5 deletions(-)
diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C
index 6fcdef391..06295116f 100644
--- a/gcc/testsuite/g++.dg/opt/pr99305.C
+++ b/gcc/testsuite/g++.dg/opt/pr99305.C
@@ -1,6 +1,6 @@
// PR tree-optimization/99305
// { dg-do compile }
-// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" }
+// { dg-options "-O3 -ftree-fold-phiopt -fno-ipa-icf -fdump-tree-optimized" }
// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" { target { ! unsigned_char } } } }
// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } }
// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } }
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
index cad891e62..ec359f2ba 100644
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
+/* { dg-options "-O2 -ftree-fold-phiopt -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
# define BN_BITS4 32
# define BN_MASK2 (0xffffffffffffffffL)
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
index d1e72e5df..d4e5fa0d0 100644
--- a/gcc/testsuite/gcc.dg/pr107190.c
+++ b/gcc/testsuite/gcc.dg/pr107190.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
+/* { dg-options "-O2 -ftree-fold-phiopt -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
# define BN_BITS4 32
# define BN_MASK2 (0xffffffffffffffffL)
diff --git a/gcc/testsuite/gcc.dg/pr45416.c b/gcc/testsuite/gcc.dg/pr45416.c
index a3f6a759d..dd37ec534 100644
--- a/gcc/testsuite/gcc.dg/pr45416.c
+++ b/gcc/testsuite/gcc.dg/pr45416.c
@@ -1,6 +1,6 @@
/* { dg-do compile } */
/* { dg-skip-if "Skip for Thumb1." { { arm*-*-* } && { arm_thumb1_ok } } } */
-/* { dg-options "-O2" } */
+/* { dg-options "-O2 -ftree-fold-phiopt" } */
int foo(long long a)
{
diff --git a/gcc/testsuite/gcc.target/i386/pr65871-3.c b/gcc/testsuite/gcc.target/i386/pr65871-3.c
index c7d9bdd96..4fd3b48f8 100644
--- a/gcc/testsuite/gcc.target/i386/pr65871-3.c
+++ b/gcc/testsuite/gcc.target/i386/pr65871-3.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-options "-O2 -mbmi" } */
+/* { dg-options "-O2 -ftree-fold-phiopt -mbmi" } */
int foo (int x, int y)
{
--
2.33.0

View File

@ -0,0 +1,323 @@
From df88d29c355c59e262397fdf3b22ee9099ce40c2 Mon Sep 17 00:00:00 2001
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
Date: Tue, 19 Dec 2023 12:19:14 +0300
Subject: [PATCH 1/5] [minmax] Move minmax pattern to gimple.
---
gcc/common.opt | 4 +
gcc/config/aarch64/aarch64-simd.md | 72 ----------------
gcc/match.pd | 104 ++++++++++++++++++++++++
gcc/testsuite/gcc.dg/combine-maxmin-1.c | 15 ++++
gcc/testsuite/gcc.dg/combine-maxmin-2.c | 14 ++++
gcc/testsuite/gcc.dg/combine-maxmin.c | 19 +++--
6 files changed, 151 insertions(+), 77 deletions(-)
create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-1.c
create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-2.c
diff --git a/gcc/common.opt b/gcc/common.opt
index a8a2264ee..73234dcc3 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1750,6 +1750,10 @@ fif-conversion-gimple
Common Report Var(flag_if_conversion_gimple) Optimization
Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
+fconvert-minmax
+Common Report Var(flag_convert_minmax) Optimization
+Convert saturating clipping to min max.
+
fstack-reuse=
Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
-fstack-reuse=[all|named_vars|none] Set stack reuse level for local variables.
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index c7503561f..754343abc 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1535,78 +1535,6 @@
[(set_attr "type" "neon_minmax<q>")]
)
-;; Use sequential smax+smin to replace vector arithmetic operations like this:
-;; a = ((x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x);
-;; TODO: maybe extend to scalar operations.
-
-(define_insn_and_split "*aarch64_maxmin_arith<mode>"
- [(set (match_operand:VDQHSD 0 "register_operand" "=w")
- (xor:VDQHSD
- (and:VDQHSD
- (xor:VDQHSD
- (ashiftrt:VDQHSD
- (neg:VDQHSD
- (match_operand:VDQHSD 1 "register_operand"))
- (match_operand:VDQHSD 2 "maxmin_arith_shift_operand"))
- (match_dup 1))
- (neg:VDQHSD
- (eq:VDQHSD
- (and:VDQHSD
- (match_dup 1)
- (match_operand:VDQHSD 3 "aarch64_bic_imm_for_maxmin"))
- (match_operand:VDQHSD 4 "aarch64_simd_or_scalar_imm_zero"))))
- (ashiftrt:VDQHSD
- (neg:VDQHSD
- (match_dup 1))
- (match_dup 2))))]
- "TARGET_SIMD && !reload_completed"
- "#"
- "&& true"
- [(set (match_operand:VDQHSD 5 "register_operand" "w") (match_dup 3))
- (set (match_operand:VDQHSD 6 "register_operand" "w") (match_dup 4))
- (set (match_operand:VDQHSD 0 "register_operand" "=w")
- (smax:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")
- (match_operand:VDQHSD 6 "register_operand" "w")))
- (set (match_operand:VDQHSD 0 "register_operand" "=w")
- (smin:VDQHSD (match_operand:VDQHSD 0 "register_operand" "w")
- (match_operand:VDQHSD 5 "register_operand" "w")))]
- {
- if (can_create_pseudo_p ())
- {
- int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[3], 0));
- operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
- ~val);
- operands[5] = gen_reg_rtx (<MODE>mode);
- operands[6] = gen_reg_rtx (<MODE>mode);
- }
- else
- FAIL;
- }
- [(set_attr "type" "neon_minmax<q>")]
-)
-
-;; The helper definition that allows combiner to use the previous pattern.
-
-(define_insn_and_split "*aarch64_maxmin_tmp<mode>"
- [(set (match_operand:VDQHSD 0 "register_operand" "=w")
- (ashiftrt:VDQHSD
- (neg:VDQHSD
- (match_operand:VDQHSD 1 "register_operand" "w"))
- (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
- "TARGET_SIMD"
- "#"
- "&& reload_completed"
- [(set (match_operand:VDQHSD 0 "register_operand")
- (neg:VDQHSD
- (match_operand:VDQHSD 1 "register_operand" "w")))
- (set (match_dup 0)
- (ashiftrt:VDQHSD
- (match_dup 0)
- (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
- ""
- [(set_attr "type" "neon_minmax<q>")]
-)
-
;; Pairwise FP Max/Min operations.
(define_insn "aarch64_<maxmin_uns>p<mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
diff --git a/gcc/match.pd b/gcc/match.pd
index 24ae157af..1097cd926 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -6595,3 +6595,107 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
(plus:c@4 (op2:c @0 @1)
(plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
(if (single_use (@4) && single_use (@5)))))
+
+/* MinMax pattern matching helpers. More info on the transformation below. */
+
+/* Match (a & 0b11..100..0) pattern. */
+(match (minmax_cmp_arg @0 @1)
+ (bit_and @0 INTEGER_CST@1)
+ (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
+
+/* Match (inversed_sign_bit >> sign_bit_pos) pattern.
+ This statement is blocking for the transformation of unsigned integers.
+ Do type check here to avoid unnecessary duplications. */
+(match (minmax_sat_arg @0)
+ (rshift (negate @0) INTEGER_CST@1)
+ (if (!TYPE_UNSIGNED (TREE_TYPE (@0))
+ && wi::eq_p (wi::to_widest (@1), TYPE_PRECISION (TREE_TYPE (@0)) - 1))))
+
+/* Transform ((x & ~mask) ? (-x)>>31 & mask : x) to (min (max (x, 0), mask)).
+ The matched pattern can be described as saturated clipping.
+
+ The pattern supports truncation via both casts and bit_and.
+ Also there are patterns for possible inverted conditions. */
+(if (flag_convert_minmax)
+/* Truncation via casts. Unfortunately convert? cannot be applied here
+ because convert and cond take different number of arguments. */
+ (simplify
+ (convert
+ (cond
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? (minmax_sat_arg @0))
+ (convert? @0)))
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; })))))
+ (simplify
+ (cond
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? (minmax_sat_arg @0))
+ (convert? @0))
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; })))))
+
+ (simplify
+ (convert
+ (cond
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? @0)
+ (convert? (minmax_sat_arg @0))))
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; })))))
+ (simplify
+ (cond
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? @0)
+ (convert? (minmax_sat_arg @0)))
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; })))))
+
+ /* Truncation via bit_and with mask. Same concerns on convert? here. */
+ (simplify
+ (convert
+ (cond
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
+ (convert? @0)))
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; })))))
+ (simplify
+ (cond
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
+ (convert? @0))
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; })))))
+
+ (simplify
+ (convert
+ (cond
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? @0)
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))))
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; })))))
+ (simplify
+ (cond
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
+ (convert? @0)
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2)))
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
+ (convert (min (max @0 { integer_zero_node; })
+ { mask; }))))))
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-1.c b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
new file mode 100644
index 000000000..859ff7df8
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
@@ -0,0 +1,15 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -fconvert-minmax" } */
+
+#include <inttypes.h>
+
+__attribute__((noinline))
+void test (int32_t *restrict a, int32_t *restrict x)
+{
+ for (int i = 0; i < 4; i++)
+ a[i] = ((((-x[i]) >> 31) ^ x[i])
+ & (-((int32_t)((x[i] & (~((1 << 8)-1))) == 0)))) ^ ((-x[i]) >> 31);
+}
+
+/* { dg-final { scan-assembler-not {smax\t} } } */
+/* { dg-final { scan-assembler-not {smin\t} } } */
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-2.c b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
new file mode 100644
index 000000000..63d4d85b3
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
@@ -0,0 +1,14 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -fconvert-minmax" } */
+
+#include <inttypes.h>
+
+__attribute__((noinline))
+void test (int8_t *restrict a, int32_t *restrict x)
+{
+ for (int i = 0; i < 8; i++)
+ a[i] = ((x[i] & ~((1 << 9)-1)) ? (-x[i])>>31 & ((1 << 9)-1) : x[i]);
+}
+
+/* { dg-final { scan-assembler-times {smax\t} 4 } } */
+/* { dg-final { scan-assembler-times {smin\t} 4 } } */
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
index 06bce7029..a984fa560 100755
--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
@@ -1,5 +1,5 @@
/* { dg-do compile { target aarch64-*-* } } */
-/* { dg-options "-O3 -fdump-rtl-combine-all" } */
+/* { dg-options "-O3 -fconvert-minmax" } */
/* The test checks usage of smax/smin insns for clip evaluation and
* uzp1/uzp2 insns for vector element narrowing. It's inspired by
@@ -19,20 +19,26 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
{
const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
for( int y = 0; y < height; y++ ) {
+ /* This loop is not being vectorized now. */
for( int x = -2; x < width+3; x++ ) {
int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
+ (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
dstv[x] = clip ( (v + 16) >> 5 );
buf[x+2] = v + pad;
}
+
+ /* Produces two versions of the code: 3xUZP1/2xMAX/2xMIN + 1xUZP1/1xMAX/1xMIN. */
for( int x = 0; x < width; x++ )
dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
+ (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
- 32*pad + 512) >> 10);
+
+ /* Priduces two versions of the code: 1xUZP1/2xMAX/2xMIN + 0xUZP1/1xMAX/1xMIN. */
for( int x = 0; x < width; x++ )
dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
+ (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
+ 16) >> 5);
+
dsth += stride;
dstv += stride;
dstc += stride;
@@ -40,7 +46,10 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
}
}
-/* { dg-final { scan-assembler-times {smax\t} 4 } } */
-/* { dg-final { scan-assembler-times {smin\t} 4 } } */
-/* { dg-final { scan-assembler-times {cmtst\t} 2 } } */
-/* { dg-final { scan-assembler-times {uzp1\t} 6 } } */
+/* Max is performed on 0 from signed values, match smax exactly. */
+/* { dg-final { scan-assembler-times {smax\t} 6 } } */
+/* Min is performed on signed val>0 and a mask, min sign doesn't matter. */
+/* { dg-final { scan-assembler-times {[us]min\t} 6 } } */
+/* All of the vectorized patterns are expected to be matched. */
+/* { dg-final { scan-assembler-not {cmtst\t} } } */
+/* { dg-final { scan-assembler-times {uzp1\t} 5 } } */
--
2.33.0

View File

@ -0,0 +1,24 @@
From d6ef1c0c182267d3ab68e3ae1d7f1a576a7bbb2a Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Wed, 20 Dec 2023 18:44:29 +0800
Subject: [PATCH 2/5] [IPA] Fix test completion-1.c
---
gcc/testsuite/gcc.dg/completion-1.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/gcc/testsuite/gcc.dg/completion-1.c b/gcc/testsuite/gcc.dg/completion-1.c
index 64da64f1c..df2319c76 100644
--- a/gcc/testsuite/gcc.dg/completion-1.c
+++ b/gcc/testsuite/gcc.dg/completion-1.c
@@ -2,6 +2,7 @@
/* { dg-options "--completion=-fipa-ic" } */
/* { dg-begin-multiline-output "" }
+-fipa-ic
-fipa-icf
-fipa-icf-functions
-fipa-icf-variables
--
2.33.0

View File

@ -0,0 +1,71 @@
From ed548cec9d8efe8ef742225c39f5d84aba4be81b Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Wed, 20 Dec 2023 13:53:47 +0300
Subject: [PATCH 3/5] [IPA] Fix fails on checked build and comments from review
---
gcc/ipa-prefetch.c | 24 ++++++++++++++++++++++--
gcc/params.opt | 4 ++--
2 files changed, 24 insertions(+), 4 deletions(-)
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
index 93483a6e8..d8bb9a251 100644
--- a/gcc/ipa-prefetch.c
+++ b/gcc/ipa-prefetch.c
@@ -167,6 +167,7 @@ analyse_cgraph ()
}
/* TODO: maybe remove loop info here. */
+ n->get_body ();
push_cfun (DECL_STRUCT_FUNCTION (n->decl));
calculate_dominance_info (CDI_DOMINATORS);
loop_optimizer_init (LOOPS_NORMAL);
@@ -1540,9 +1541,28 @@ optimize_function (cgraph_node *n, function *fn)
return 0;
}
else if (dump_file)
- fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index);
+ {
+ fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index);
+ gimple_dump_bb (dump_file, dom_bb, 0, dump_flags);
+ fprintf (dump_file, "\n");
+ }
+
+ /* Try to find comp_mr's stmt in the dominator bb. */
+ gimple *last_used = NULL;
+ for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
+ gsi_prev (&si))
+ if (comp_mr->stmts[0] == gsi_stmt (si))
+ {
+ last_used = gsi_stmt (si);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Last used stmt in dominator bb:\n");
+ print_gimple_stmt (dump_file, last_used, 0);
+ }
+ break;
+ }
- split_block (dom_bb, (gimple *) NULL);
+ split_block (dom_bb, last_used);
gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
/* Create new inc var. Insert new_var = old_var + step * factor. */
diff --git a/gcc/params.opt b/gcc/params.opt
index ef7bea311..76ae925fd 100644
--- a/gcc/params.opt
+++ b/gcc/params.opt
@@ -251,8 +251,8 @@ Common Joined UInteger Var(param_ipa_prefetch_distance_factor) Init(4) Param Opt
The factor represents the number of inductive variable incrementations to evaluate an indirect memory address for IPA prefetch.
-param=ipa-prefetch-locality=
-Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) Param Optimization
-The flag represents temporal locality values in the following way: 0:pstl1strm, 1:pstl3keep, 2:pstl2keep, 3:pstl1keep.
+Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization
+The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data.
-param=ira-loop-reserved-regs=
Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,61 @@
From d2742041454dbd4c4c3c3e0a27b5fb26d1e05832 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Thu, 21 Dec 2023 11:14:06 +0300
Subject: [PATCH 5/5] Fix bugs in ICP (src-openEuler/gcc: I8PYBF, I8PYLL)
---
gcc/ipa-devirt.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index fbde7eb94..a18cbe36a 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -4399,6 +4399,11 @@ print_type_set(unsigned ftype_uid, type_alias_map *map)
if (!map->count (ftype_uid))
return;
type_set* s = (*map)[ftype_uid];
+ if (!s)
+ {
+ fprintf (dump_file, "%d (no set)", ftype_uid);
+ return;
+ }
for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
}
@@ -4966,7 +4971,8 @@ analyze_assign_stmt (gimple *stmt)
{
rhs = TREE_OPERAND (rhs, 0);
if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
- || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL)
+ || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
+ || TREE_CODE (rhs) == LABEL_DECL)
rhs_type = build_pointer_type (TREE_TYPE (rhs));
else if (TREE_CODE (rhs) == COMPONENT_REF)
{
@@ -4980,7 +4986,12 @@ analyze_assign_stmt (gimple *stmt)
gcc_assert (POINTER_TYPE_P (rhs_type));
}
else
- gcc_unreachable();
+ {
+ fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
+ get_tree_code_name (TREE_CODE (rhs)));
+ print_gimple_stmt (dump_file, stmt, 0);
+ gcc_unreachable ();
+ }
}
else
rhs_type = TREE_TYPE (rhs);
@@ -5678,6 +5689,8 @@ merge_fs_map_for_ftype_aliases ()
decl_set *d_set = it1->second;
tree type = (*type_uid_map)[it1->first];
type_set *set = (*fta_map)[it1->first];
+ if (!set)
+ continue;
for (type_set::const_iterator it2 = set->begin ();
it2 != set->end (); it2++)
{
--
2.33.0

168
0177-Fix-sqlite-build.patch Normal file
View File

@ -0,0 +1,168 @@
From 71a992aca88f63ec1afb1608619b82a857d8e297 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Fri, 22 Dec 2023 10:11:24 +0800
Subject: [PATCH 1/4] Fix sqlite build
---
gcc/ipa-prefetch.c | 71 ++++++++++++++++++++++++++--------------------
gcc/ipa-sra.c | 7 +++++
2 files changed, 47 insertions(+), 31 deletions(-)
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
index d8bb9a251..371702ad8 100644
--- a/gcc/ipa-prefetch.c
+++ b/gcc/ipa-prefetch.c
@@ -1092,6 +1092,15 @@ analyse_loops ()
memref_t *mr = it->first, *mr2 = it->second;
if (mr2 == NULL || !(*fmrs_map)[fn]->count (mr))
continue;
+ /* For now optimize only MRs that mem is MEM_REF.
+ TODO: support other MR types. */
+ if (TREE_CODE (mr->mem) != MEM_REF)
+ {
+ if (dump_file)
+ fprintf (dump_file, "Skip MR %d: unsupported tree code = %s\n",
+ mr->mr_id, get_tree_code_name (TREE_CODE (mr->mem)));
+ continue;
+ }
if (!optimize_mrs_map->count (fn))
(*optimize_mrs_map)[fn] = new memref_set;
(*optimize_mrs_map)[fn]->insert (mr);
@@ -1104,7 +1113,7 @@ analyse_loops ()
it != (*optimize_mrs_map)[fn]->end (); it++)
{
memref_t *mr = *it, *mr2 = (*mr_candidate_map)[mr];
- fprintf (dump_file, "MRs %d,%d with incremental offset ",
+ fprintf (dump_file, "MRs %d, %d with incremental offset ",
mr->mr_id, mr2->mr_id);
print_generic_expr (dump_file, mr2->offset);
fprintf (dump_file, "\n");
@@ -1437,6 +1446,27 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
return NULL_TREE;
}
+/* Copy stmt and remap its operands. */
+
+static gimple *
+gimple_copy_and_remap (gimple *stmt)
+{
+ gimple *copy = gimple_copy (stmt);
+ gcc_checking_assert (!is_gimple_debug (copy));
+
+ /* Remap all the operands in COPY. */
+ struct walk_stmt_info wi;
+ memset (&wi, 0, sizeof (wi));
+ wi.info = copy;
+ walk_gimple_op (copy, remap_gimple_op_r, &wi);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Stmt copy after remap:\n");
+ print_gimple_stmt (dump_file, copy, 0);
+ }
+ return copy;
+}
+
static void
create_cgraph_edge (cgraph_node *n, gimple *stmt)
{
@@ -1585,7 +1615,6 @@ optimize_function (cgraph_node *n, function *fn)
/* Create other new vars. Insert new stmts. */
struct walk_stmt_info wi;
stmt_set processed_stmts;
- memref_tree_map mr_new_trees;
for (memref_set::const_iterator it = used_mrs.begin ();
it != used_mrs.end (); it++)
{
@@ -1606,23 +1635,10 @@ optimize_function (cgraph_node *n, function *fn)
}
/* Create a new copy of STMT and duplicate STMT's virtual
operands. */
- gimple *copy = gimple_copy (mr->stmts[i]);
- gcc_checking_assert (!is_gimple_debug (copy));
-
- /* Remap all the operands in COPY. */
- memset (&wi, 0, sizeof (wi));
- last_stmt = copy;
- wi.info = copy;
- walk_gimple_op (copy, remap_gimple_op_r, &wi);
- if (dump_file)
- {
- fprintf (dump_file, "Stmt %d after remap:\n",i);
- print_gimple_stmt (dump_file, copy, 0);
- }
- gimple_seq_add_stmt (&stmts, copy);
+ last_stmt = gimple_copy_and_remap (mr->stmts[i]);
+ gimple_seq_add_stmt (&stmts, last_stmt);
}
gcc_assert (last_stmt);
- mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
if (dump_file)
{
fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id);
@@ -1664,23 +1680,11 @@ optimize_function (cgraph_node *n, function *fn)
continue;
processed_stmts.insert (mr->stmts[i]);
- gimple *copy = gimple_copy (mr->stmts[i]);
- gcc_checking_assert (!is_gimple_debug (copy));
-
- /* Remap all the operands in COPY. */
- memset (&wi, 0, sizeof (wi));
- wi.info = copy;
- walk_gimple_op (copy, remap_gimple_op_r, &wi);
- if (dump_file)
- {
- fprintf (dump_file, "Stmt %d after remap:\n",i);
- print_gimple_stmt (dump_file, copy, 0);
- }
+ gimple *copy = gimple_copy_and_remap (mr->stmts[i]);
gimple_seq_add_stmt (&stmts, copy);
}
gimple *last_stmt = mr->stmts[0];
gcc_assert (last_stmt);
- mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
if (decl_map->count (addr))
@@ -1689,6 +1693,11 @@ optimize_function (cgraph_node *n, function *fn)
3, addr, write_p, local);
pcalls.safe_push (last_stmt);
gimple_seq_add_stmt (&stmts, last_stmt);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
+ print_gimple_stmt (dump_file, last_stmt, 0);
+ }
}
gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
@@ -1827,7 +1836,7 @@ pass_ipa_prefetch::gate (function *)
/* Don't bother doing anything if the program has errors. */
&& !seen_error ()
&& flag_lto_partition == LTO_PARTITION_ONE
- /* Only enable struct optimizations in lto or whole_program. */
+ /* Only enable prefetch optimizations in lto or whole_program. */
&& (in_lto_p || flag_whole_program));
}
diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c
index d7019ec42..ee927bf6a 100644
--- a/gcc/ipa-sra.c
+++ b/gcc/ipa-sra.c
@@ -3448,6 +3448,13 @@ param_splitting_across_edge (cgraph_edge *cs)
gcc_checking_assert (from_ifs && from_ifs->m_parameters);
isra_call_summary *csum = call_sums->get (cs);
+ /* TODO: implement better support for call edges inserted after summary
+ collection but before sra wpa invocation. */
+ if (!csum)
+ {
+ csum = call_sums->get_create (cs);
+ csum->m_return_ignored = true;
+ }
gcc_checking_assert (csum);
unsigned args_count = csum->m_arg_flow.length ();
isra_func_summary *to_ifs = func_sums->get (callee);
--
2.33.0

View File

@ -0,0 +1,52 @@
From b187b3043c5a7aa96e6d1106e4b0f37d14c914a6 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Fri, 22 Dec 2023 11:39:09 +0800
Subject: [PATCH 2/4] Fix freetype build
---
gcc/ipa-prefetch.c | 17 +++++++++++++++--
1 file changed, 15 insertions(+), 2 deletions(-)
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
index 371702ad8..f91ac3edc 100644
--- a/gcc/ipa-prefetch.c
+++ b/gcc/ipa-prefetch.c
@@ -1522,6 +1522,13 @@ optimize_function (cgraph_node *n, function *fn)
"Skip the case.\n");
return 0;
}
+ if (!tree_fits_shwi_p (inc_mr->step))
+ {
+ if (dump_file)
+ fprintf (dump_file, "Cannot represent incremental MR's step as "
+ "integer. Skip the case.\n");
+ return 0;
+ }
if (dump_file && !used_mrs.empty ())
print_mrs_ids (used_mrs, "Common list of used mrs:\n");
@@ -1607,13 +1614,19 @@ optimize_function (cgraph_node *n, function *fn)
else
inc_code = PLUS_EXPR;
tree step = inc_mr->step;
- unsigned dist_val = tree_to_uhwi (step) * param_ipa_prefetch_distance_factor;
+ HOST_WIDE_INT dist_val = tree_to_shwi (step)
+ * param_ipa_prefetch_distance_factor;
tree dist = build_int_cst (TREE_TYPE (step), dist_val);
tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist);
(*decl_map)[inc_var] = new_inc_var;
+ if (dump_file)
+ {
+ fprintf (dump_file, "New distance value: %ld, new inc var: ", dist_val);
+ print_generic_expr (dump_file, new_inc_var);
+ fprintf (dump_file, "\n");
+ }
/* Create other new vars. Insert new stmts. */
- struct walk_stmt_info wi;
stmt_set processed_stmts;
for (memref_set::const_iterator it = used_mrs.begin ();
it != used_mrs.end (); it++)
--
2.33.0

View File

@ -0,0 +1,29 @@
From 3d1b0da292e383ce2a139c1612ec7e07336bbcd8 Mon Sep 17 00:00:00 2001
From: vchernon <chernonog.vyacheslav@huawei.com>
Date: Fri, 22 Dec 2023 22:05:27 +0800
Subject: [PATCH 3/4] [rtl-ifcvt] refuse to rename def in the last instruction
in BB
---
gcc/ifcvt.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 3df0f6fdd..025eb6cd1 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -2176,7 +2176,10 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
rtx x = SET_DEST (sset);
if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x)))
continue;
-
+ /* Do not need to rename dest in the last instruction
+ it will be renamed anyway. */
+ if (insn == last_insn)
+ continue;
machine_mode mode = GET_MODE (x);
rtx tmp = gen_reg_rtx (mode);
if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn))
--
2.33.0

View File

@ -0,0 +1,25 @@
From aa66bcf2b684655d0fbcc6b4543ffef1b2e37288 Mon Sep 17 00:00:00 2001
From: vchernon <chernonog.vyacheslav@huawei.com>
Date: Thu, 28 Dec 2023 10:44:35 +0800
Subject: [PATCH] add optimization level requirement to the gate.
---
gcc/crypto-accel.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/gcc/crypto-accel.c b/gcc/crypto-accel.c
index f4e810a6b..e7766a585 100644
--- a/gcc/crypto-accel.c
+++ b/gcc/crypto-accel.c
@@ -2391,7 +2391,7 @@ public:
/* opt_pass methods: */
virtual bool gate (function *)
{
- if (flag_crypto_accel_aes <= 0)
+ if (flag_crypto_accel_aes <= 0 || optimize < 1)
return false;
return targetm.get_v16qi_mode
&& targetm.gen_rev32v16qi
--
2.33.0

115
0181-Fix-issue-I8QD9H.patch Normal file
View File

@ -0,0 +1,115 @@
From 25f1ebeb88a4eae247f58488cac9da878f188d9f Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Sat, 23 Dec 2023 10:05:10 +0800
Subject: [PATCH 4/4] Fix issue I8QD9H
---
gcc/ipa-prefetch.c | 64 +++++++++++++++++++++++++++-------------------
1 file changed, 37 insertions(+), 27 deletions(-)
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
index f91ac3edc..a471b118e 100644
--- a/gcc/ipa-prefetch.c
+++ b/gcc/ipa-prefetch.c
@@ -1467,6 +1467,31 @@ gimple_copy_and_remap (gimple *stmt)
return copy;
}
+/* Copy and remap stmts listed in MR in reverse order to last_idx, skipping
+ processed ones. Insert new stmts to the sequence. */
+
+static gimple *
+gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
+ int last_idx, stmt_set &processed)
+{
+ gimple *last_stmt = NULL;
+ for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
+ {
+ if (processed.count (mr->stmts[i]))
+ continue;
+ processed.insert (mr->stmts[i]);
+ if (dump_file)
+ {
+ fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
+ i, mr->mr_id);
+ print_gimple_stmt (dump_file, mr->stmts[i], 0);
+ }
+ last_stmt = gimple_copy_and_remap (mr->stmts[i]);
+ gimple_seq_add_stmt (&stmts, last_stmt);
+ }
+ return last_stmt;
+}
+
static void
create_cgraph_edge (cgraph_node *n, gimple *stmt)
{
@@ -1606,7 +1631,16 @@ optimize_function (cgraph_node *n, function *fn)
decl_map = new tree_map;
gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0]));
tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]);
+ /* If old_var definition dominates the current use, just use it, otherwise
+ evaluate it just before new inc var evaluation. */
gimple_seq stmts = NULL;
+ stmt_set processed_stmts;
+ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0])))
+ {
+ gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0,
+ processed_stmts);
+ inc_var = gimple_assign_lhs (tmp);
+ }
tree var_type = TREE_TYPE (inc_var);
enum tree_code inc_code;
if (TREE_CODE (var_type) == POINTER_TYPE)
@@ -1627,30 +1661,14 @@ optimize_function (cgraph_node *n, function *fn)
}
/* Create other new vars. Insert new stmts. */
- stmt_set processed_stmts;
for (memref_set::const_iterator it = used_mrs.begin ();
it != used_mrs.end (); it++)
{
memref_t *mr = *it;
- gimple *last_stmt = NULL;
if (mr == comp_mr)
continue;
- for (int i = mr->stmts.length () - 1; i >= 0 ; i--)
- {
- if (processed_stmts.count (mr->stmts[i]))
- continue;
- processed_stmts.insert (mr->stmts[i]);
- if (dump_file)
- {
- fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
- i, mr->mr_id);
- print_gimple_stmt (dump_file, mr->stmts[i], 0);
- }
- /* Create a new copy of STMT and duplicate STMT's virtual
- operands. */
- last_stmt = gimple_copy_and_remap (mr->stmts[i]);
- gimple_seq_add_stmt (&stmts, last_stmt);
- }
+ gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
+ processed_stmts);
gcc_assert (last_stmt);
if (dump_file)
{
@@ -1687,15 +1705,7 @@ optimize_function (cgraph_node *n, function *fn)
memref_t *mr = vmrs[j];
/* Don't need to copy the last stmt, since we insert prefetch insn
instead of it. */
- for (int i = mr->stmts.length () - 1; i >= 1 ; i--)
- {
- if (processed_stmts.count (mr->stmts[i]))
- continue;
- processed_stmts.insert (mr->stmts[i]);
-
- gimple *copy = gimple_copy_and_remap (mr->stmts[i]);
- gimple_seq_add_stmt (&stmts, copy);
- }
+ gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts);
gimple *last_stmt = mr->stmts[0];
gcc_assert (last_stmt);
tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
--
2.33.0

View File

@ -0,0 +1,47 @@
From 1724319692f3c5443802e0cef44a81667cfcc0ae Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Wed, 27 Dec 2023 07:29:26 +0800
Subject: [PATCH 1/4] Fix bugs in ICP (src-openEuler/gcc: I8RKFJ)
---
gcc/ipa-devirt.c | 13 ++++++++++---
1 file changed, 10 insertions(+), 3 deletions(-)
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index a18cbe36a..987f15a15 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -4669,12 +4669,19 @@ maybe_register_aliases (tree type1, tree type2)
if (register_ailas_type (type1, type2, ta_map))
analyze_pointees (type1, type2);
}
+ unsigned type1_uid = TYPE_UID (type1);
+ unsigned type2_uid = TYPE_UID (type2);
+ if (type_uid_map->count (type1_uid) == 0)
+ (*type_uid_map)[type1_uid] = type1;
+ if (type_uid_map->count (type2_uid) == 0)
+ (*type_uid_map)[type2_uid] = type2;
+
/* If function and non-function type pointers alias,
the function type is unsafe. */
if (FUNCTION_POINTER_TYPE_P (type1) && !FUNCTION_POINTER_TYPE_P (type2))
- unsafe_types->insert (TYPE_UID (type1));
+ unsafe_types->insert (type1_uid);
if (FUNCTION_POINTER_TYPE_P (type2) && !FUNCTION_POINTER_TYPE_P (type1))
- unsafe_types->insert (TYPE_UID (type2));
+ unsafe_types->insert (type2_uid);
/* Try to figure out with pointers to incomplete types. */
if (POINTER_TYPE_P (type1) && POINTER_TYPE_P (type2))
@@ -4972,7 +4979,7 @@ analyze_assign_stmt (gimple *stmt)
rhs = TREE_OPERAND (rhs, 0);
if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
|| TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
- || TREE_CODE (rhs) == LABEL_DECL)
+ || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL)
rhs_type = build_pointer_type (TREE_TYPE (rhs));
else if (TREE_CODE (rhs) == COMPONENT_REF)
{
--
2.33.0

View File

@ -0,0 +1,26 @@
From 351d049f09b1e96e48c3038ab3a6a9c1d6a13f8d Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Thu, 28 Dec 2023 09:51:34 +0800
Subject: [PATCH 2/4] Fix fail in ICP (src-openEuler/gcc: I8RP4H)
---
gcc/ipa-devirt.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index 987f15a15..ed19a57a4 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -4979,7 +4979,8 @@ analyze_assign_stmt (gimple *stmt)
rhs = TREE_OPERAND (rhs, 0);
if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
|| TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
- || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL)
+ || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL
+ || TREE_CODE (rhs) == RESULT_DECL)
rhs_type = build_pointer_type (TREE_TYPE (rhs));
else if (TREE_CODE (rhs) == COMPONENT_REF)
{
--
2.33.0

View File

@ -0,0 +1,45 @@
From 2a5c250262ec0497a5efbbd1d0d67e7147696074 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Thu, 28 Dec 2023 20:20:16 +0800
Subject: [PATCH 1/2] Fix fail in IPA prefetch (src-openEuler/gcc: I8RURA)
---
gcc/ipa-devirt.c | 9 ++++++---
gcc/ipa-prefetch.c | 1 +
2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index ed19a57a4..9863084e4 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -4995,9 +4995,12 @@ analyze_assign_stmt (gimple *stmt)
}
else
{
- fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
- get_tree_code_name (TREE_CODE (rhs)));
- print_gimple_stmt (dump_file, stmt, 0);
+ if (dump_file && (dump_flags & TDF_DETAILS))
+ {
+ fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
+ get_tree_code_name (TREE_CODE (rhs)));
+ print_gimple_stmt (dump_file, stmt, 0);
+ }
gcc_unreachable ();
}
}
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
index a471b118e..24cb4424a 100644
--- a/gcc/ipa-prefetch.c
+++ b/gcc/ipa-prefetch.c
@@ -1730,6 +1730,7 @@ optimize_function (cgraph_node *n, function *fn)
for (unsigned i = 0; i < pcalls.length (); i++)
create_cgraph_edge (n, pcalls[i]);
ipa_update_overall_fn_summary (n);
+ renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl));
return 1;
}
--
2.33.0

View File

@ -0,0 +1,26 @@
From 4014d651825c3e03e0ad2eabeddcfb94f5f00e68 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
Date: Fri, 29 Dec 2023 05:59:00 +0800
Subject: [PATCH 2/2] Fix fail in IPA prefetch (src-openEuler/gcc: I8RV7T)
---
gcc/ipa-prefetch.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
index 24cb4424a..d9456519c 100644
--- a/gcc/ipa-prefetch.c
+++ b/gcc/ipa-prefetch.c
@@ -943,6 +943,9 @@ compare_memrefs (memref_t* mr, memref_t* mr2)
(*mr_candidate_map)[mr] = mr2;
return;
}
+ /* Probably we shouldn't leave nulls in the map. */
+ if ((*mr_candidate_map)[mr] == NULL)
+ return;
/* TODO: support analysis with incrementation of different fields. */
if ((*mr_candidate_map)[mr]->offset != mr2->offset)
{
--
2.33.0

View File

@ -0,0 +1,26 @@
From 92dc99425b2566e8cc9cba7cec8774911db0c654 Mon Sep 17 00:00:00 2001
From: XingYuShuai <1150775134@qq.com>
Date: Fri, 2 Feb 2024 15:55:07 +0800
Subject: [PATCH 1/3] [Loop CRC] Solving the problem of insufficient CRC table.
---
gcc/tree-ssa-loop-crc.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c
index 2dd9e1e3b..26f8e64d1 100644
--- a/gcc/tree-ssa-loop-crc.c
+++ b/gcc/tree-ssa-loop-crc.c
@@ -421,7 +421,8 @@ match_crc_table (tree crc_table)
tree low_bound = array_ref_low_bound (crc_table);
tree up_bound = array_ref_up_bound (crc_table);
tree element_size = array_ref_element_size (crc_table);
- if (low_bound == NULL || up_bound == NULL || element_size == NULL)
+ if (!tree_fits_uhwi_p(low_bound) || !tree_fits_uhwi_p(up_bound) ||
+ !tree_fits_uhwi_p(element_size))
return false;
unsigned HOST_WIDE_INT lb = tree_to_uhwi (low_bound);
unsigned HOST_WIDE_INT ub = tree_to_uhwi (up_bound);
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,32 @@
From e7f50fc07c76b60b272cb97151b228d96b67938a Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Mon, 19 Feb 2024 11:06:37 +0300
Subject: [PATCH 3/3] Fix fails in ICP (for src-openEuler/gcc: I90P7M, I91CZ8)
---
gcc/ipa-devirt.c | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
index 9863084e4..194ad3f21 100644
--- a/gcc/ipa-devirt.c
+++ b/gcc/ipa-devirt.c
@@ -4805,10 +4805,12 @@ compare_block_and_init_type (tree block, tree t1)
static void
analyze_global_var (varpool_node *var)
{
- var->get_constructor();
tree decl = var->decl;
- if (TREE_CODE (decl) == SSA_NAME || !DECL_INITIAL (decl)
- || integer_zerop (DECL_INITIAL (decl)))
+ if (decl || !DECL_INITIAL (decl))
+ return;
+ var->get_constructor ();
+ if (TREE_CODE (decl) == SSA_NAME || integer_zerop (DECL_INITIAL (decl))
+ || TREE_CODE (DECL_INITIAL (decl)) == ERROR_MARK)
return;
if (dump_file && (dump_flags & TDF_DETAILS))
--
2.33.0

View File

@ -0,0 +1,739 @@
From 431f80e6d3a323e3382f73a80bf7fc7ee7a73f02 Mon Sep 17 00:00:00 2001
From: XingYuShuai <1150775134@qq.com>
Date: Mon, 26 Feb 2024 20:34:06 +0800
Subject: [PATCH] Add hip11 CPU pipeline scheduling
This patch adds an mcpu: hip11. It has been tested on aarch64
and no regressions from this patch.
---
gcc/config/aarch64/aarch64-cores.def | 3 +-
gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++
gcc/config/aarch64/aarch64-tune.md | 2 +-
gcc/config/aarch64/aarch64.c | 83 +++++
gcc/config/aarch64/aarch64.md | 1 +
gcc/config/aarch64/hip11.md | 418 +++++++++++++++++++++++
gcc/doc/invoke.texi | 2 +-
7 files changed, 610 insertions(+), 3 deletions(-)
create mode 100644 gcc/config/aarch64/hip11.md
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index 9c2902924..53125f6bd 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -123,7 +123,7 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_
AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
/* HiSilicon ('H') cores. */
-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
/* ARMv8.3-A Architecture Processors. */
@@ -141,6 +141,7 @@ AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_
/* Armv8.5-A Architecture Processors. */
AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1)
+AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
/* ARMv8-A big.LITTLE implementations. */
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index c6805717f..377650be0 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -541,6 +541,110 @@ const struct cpu_cost_table tsv110_extra_costs =
}
};
+const struct cpu_cost_table hip11_extra_costs =
+{
+ /* ALU */
+ {
+ 0, /* arith. */
+ 0, /* logical. */
+ 0, /* shift. */
+ 0, /* shift_reg. */
+ COSTS_N_INSNS (1), /* arith_shift. */
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
+ COSTS_N_INSNS (1), /* log_shift. */
+ COSTS_N_INSNS (1), /* log_shift_reg. */
+ 0, /* extend. */
+ COSTS_N_INSNS (1), /* extend_arith. */
+ 0, /* bfi. */
+ 0, /* bfx. */
+ 0, /* clz. */
+ 0, /* rev. */
+ 0, /* non_exec. */
+ true /* non_exec_costs_exec. */
+ },
+
+ {
+ /* MULT SImode */
+ {
+ COSTS_N_INSNS (2), /* simple. */
+ COSTS_N_INSNS (2), /* flag_setting. */
+ COSTS_N_INSNS (2), /* extend. */
+ COSTS_N_INSNS (2), /* add. */
+ COSTS_N_INSNS (2), /* extend_add. */
+ COSTS_N_INSNS (11) /* idiv. */
+ },
+ /* MULT DImode */
+ {
+ COSTS_N_INSNS (3), /* simple. */
+ 0, /* flag_setting (N/A). */
+ COSTS_N_INSNS (3), /* extend. */
+ COSTS_N_INSNS (3), /* add. */
+ COSTS_N_INSNS (3), /* extend_add. */
+ COSTS_N_INSNS (19) /* idiv. */
+ }
+ },
+ /* LD/ST */
+ {
+ COSTS_N_INSNS (3), /* load. */
+ COSTS_N_INSNS (4), /* load_sign_extend. */
+ COSTS_N_INSNS (3), /* ldrd. */
+ COSTS_N_INSNS (3), /* ldm_1st. */
+ 1, /* ldm_regs_per_insn_1st. */
+ 2, /* ldm_regs_per_insn_subsequent. */
+ COSTS_N_INSNS (4), /* loadf. */
+ COSTS_N_INSNS (4), /* loadd. */
+ COSTS_N_INSNS (4), /* load_unaligned. */
+ 0, /* store. */
+ 0, /* strd. */
+ 0, /* stm_1st. */
+ 1, /* stm_regs_per_insn_1st. */
+ 2, /* stm_regs_per_insn_subsequent. */
+ 0, /* storef. */
+ 0, /* stored. */
+ COSTS_N_INSNS (1), /* store_unaligned. */
+ COSTS_N_INSNS (4), /* loadv. */
+ COSTS_N_INSNS (4) /* storev. */
+ },
+ {
+ /* FP SFmode */
+ {
+ COSTS_N_INSNS (10), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (4), /* mult_addsub. */
+ COSTS_N_INSNS (4), /* fma. */
+ COSTS_N_INSNS (4), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ },
+ /* FP DFmode */
+ {
+ COSTS_N_INSNS (17), /* div. */
+ COSTS_N_INSNS (4), /* mult. */
+ COSTS_N_INSNS (6), /* mult_addsub. */
+ COSTS_N_INSNS (6), /* fma. */
+ COSTS_N_INSNS (3), /* addsub. */
+ COSTS_N_INSNS (1), /* fpconst. */
+ COSTS_N_INSNS (1), /* neg. */
+ COSTS_N_INSNS (1), /* compare. */
+ COSTS_N_INSNS (2), /* widen. */
+ COSTS_N_INSNS (2), /* narrow. */
+ COSTS_N_INSNS (2), /* toint. */
+ COSTS_N_INSNS (1), /* fromint. */
+ COSTS_N_INSNS (2) /* roundint. */
+ }
+ },
+ /* Vector */
+ {
+ COSTS_N_INSNS (1) /* alu. */
+ }
+};
+
const struct cpu_cost_table a64fx_extra_costs =
{
/* ALU */
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 7fda2294b..f33a3330d 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
;; -*- buffer-read-only: t -*-
;; Generated automatically by gentune.sh from aarch64-cores.def
(define_attr "tune"
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,hip11,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index da4983236..938948f29 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -448,6 +448,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table =
0, /* imm_offset */
};
+static const struct cpu_addrcost_table hip11_addrcost_table =
+{
+ {
+ 1, /* hi */
+ 0, /* si */
+ 0, /* di */
+ 1, /* ti */
+ },
+ 0, /* pre_modify */
+ 0, /* post_modify */
+ 0, /* register_offset */
+ 1, /* register_sextend */
+ 1, /* register_zextend */
+ 0, /* imm_offset */
+};
+
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
{
{
@@ -575,6 +591,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
2 /* FP2FP */
};
+static const struct cpu_regmove_cost hip11_regmove_cost =
+{
+ 1, /* GP2GP */
+ /* Avoid the use of slow int<->fp moves for spilling by setting
+ their cost higher than memmov_cost. */
+ 2, /* GP2FP */
+ 3, /* FP2GP */
+ 2 /* FP2FP */
+};
+
static const struct cpu_regmove_cost a64fx_regmove_cost =
{
1, /* GP2GP */
@@ -664,6 +690,25 @@ static const struct cpu_vector_cost tsv110_vector_cost =
1 /* cond_not_taken_branch_cost */
};
+static const struct cpu_vector_cost hip11_vector_cost =
+{
+ 1, /* scalar_int_stmt_cost */
+ 1, /* scalar_fp_stmt_cost */
+ 5, /* scalar_load_cost */
+ 1, /* scalar_store_cost */
+ 2, /* vec_int_stmt_cost */
+ 2, /* vec_fp_stmt_cost */
+ 2, /* vec_permute_cost */
+ 5, /* vec_to_scalar_cost */
+ 5, /* scalar_to_vec_cost */
+ 5, /* vec_align_load_cost */
+ 5, /* vec_unalign_load_cost */
+ 1, /* vec_unalign_store_cost */
+ 1, /* vec_store_cost */
+ 1, /* cond_taken_branch_cost */
+ 1 /* cond_not_taken_branch_cost */
+};
+
/* Generic costs for vector insn classes. */
static const struct cpu_vector_cost cortexa57_vector_cost =
{
@@ -902,6 +947,17 @@ static const cpu_prefetch_tune tsv110_prefetch_tune =
-1 /* default_opt_level */
};
+static const cpu_prefetch_tune hip11_prefetch_tune =
+{
+ 0, /* num_slots */
+ 64, /* l1_cache_size */
+ 64, /* l1_cache_line_size */
+ 512, /* l2_cache_size */
+ true, /* prefetch_dynamic_strides */
+ -1, /* minimum_stride */
+ -1 /* default_opt_level */
+};
+
static const cpu_prefetch_tune xgene1_prefetch_tune =
{
8, /* num_slots */
@@ -1196,6 +1252,33 @@ static const struct tune_params tsv110_tunings =
&tsv110_prefetch_tune
};
+static const struct tune_params hip11_tunings =
+{
+ &hip11_extra_costs,
+ &hip11_addrcost_table,
+ &hip11_regmove_cost,
+ &hip11_vector_cost,
+ &generic_branch_cost,
+ &generic_approx_modes,
+ SVE_512, /* sve_width */
+ 4, /* memmov_cost */
+ 4, /* issue_rate */
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
+ "16", /* function_align. */
+ "4", /* jump_align. */
+ "8", /* loop_align. */
+ 2, /* int_reassoc_width. */
+ 4, /* fp_reassoc_width. */
+ 1, /* vec_reassoc_width. */
+ 2, /* min_div_recip_mul_sf. */
+ 2, /* min_div_recip_mul_df. */
+ 0, /* max_case_values. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
+ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */
+ &hip11_prefetch_tune
+};
+
static const struct tune_params xgene1_tunings =
{
&xgene1_extra_costs,
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 7c2562f49..38af8d000 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -452,6 +452,7 @@
(include "../arm/cortex-a57.md")
(include "../arm/exynos-m1.md")
(include "falkor.md")
+(include "hip11.md")
(include "saphira.md")
(include "thunderx.md")
(include "../arm/xgene1.md")
diff --git a/gcc/config/aarch64/hip11.md b/gcc/config/aarch64/hip11.md
new file mode 100644
index 000000000..57944fbc2
--- /dev/null
+++ b/gcc/config/aarch64/hip11.md
@@ -0,0 +1,418 @@
+;; hip11 pipeline description
+;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+;; General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "hip11")
+
+;; The hip11 core is modelled as issues pipeline that has
+;; the following functional units.
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
+
+(define_cpu_unit "hip11_alu1_issue" "hip11")
+(define_reservation "hip11_alu1" "hip11_alu1_issue")
+
+(define_cpu_unit "hip11_alu2_issue" "hip11")
+(define_reservation "hip11_alu2" "hip11_alu2_issue")
+
+(define_cpu_unit "hip11_alu3_issue" "hip11")
+(define_reservation "hip11_alu3" "hip11_alu3_issue")
+
+(define_reservation "hip11alu" "hip11_alu1|hip11_alu2|hip11_alu3")
+
+;; 2. One pipeline for complex integer operations: MDU
+
+(define_cpu_unit "hip11_mdu_issue" "hip11")
+(define_reservation "hip11_mdu" "hip11_mdu_issue")
+
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
+(define_automaton "hip11_fsu")
+
+(define_cpu_unit "hip11_fsu1_issue"
+ "hip11_fsu")
+(define_cpu_unit "hip11_fsu2_issue"
+ "hip11_fsu")
+
+(define_reservation "hip11_fsu1" "hip11_fsu1_issue")
+(define_reservation "hip11_fsu2" "hip11_fsu2_issue")
+(define_reservation "hip11_fsu_pipe" "hip11_fsu1|hip11_fsu2")
+
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
+
+;; 5. Two pipelines for load and store operations: LS1, LS2.
+
+(define_cpu_unit "hip11_ls1_issue" "hip11")
+(define_cpu_unit "hip11_ls2_issue" "hip11")
+(define_reservation "hip11_ls1" "hip11_ls1_issue")
+(define_reservation "hip11_ls2" "hip11_ls2_issue")
+
+;; Block all issue queues.
+
+(define_reservation "hip11_block" "hip11_fsu1_issue + hip11_fsu2_issue
+ + hip11_mdu_issue + hip11_alu1_issue
+ + hip11_alu2_issue + hip11_alu3_issue + hip11_ls1_issue + hip11_ls2_issue")
+
+;; Branch execution Unit
+;;
+(define_insn_reservation "hip11_branch" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "branch"))
+ "hip11_alu2|hip11_alu3")
+
+(define_insn_reservation "hip11_return_from_subroutine" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "branch")
+ (eq_attr "sls_length" "retbr"))
+ "hip11_mdu,(hip11_alu2|hip11_alu3)")
+
+ ;; Simple Execution Unit:
+;;
+;; Simple ALU without shift
+(define_insn_reservation "hip11_alu" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alu_imm,logic_imm,\
+ alu_sreg,logic_reg,\
+ adc_imm,adc_reg,\
+ adr,bfm,clz,rbit,rev,\
+ shift_imm,shift_reg,\
+ mov_imm,mov_reg,\
+ mvn_imm,mvn_reg,\
+ mrs,multiple,csel,\
+ rotate_imm"))
+ "hip11_alu1|hip11_alu2|hip11_alu3")
+
+(define_insn_reservation "hip11_alus" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alus_imm,logics_imm,\
+ alus_sreg,logics_reg,\
+ adcs_imm,adcs_reg"))
+ "hip11_alu2|hip11_alu3")
+
+;; ALU ops with shift
+(define_insn_reservation "hip11_alu_shift" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "extend,\
+ alu_shift_imm,alu_shift_reg,\
+ crc,logic_shift_imm,logic_shift_reg,\
+ mov_shift,mvn_shift,\
+ mov_shift_reg,mvn_shift_reg"))
+ "hip11_mdu")
+
+(define_insn_reservation "hip11_alus_shift" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
+ logics_shift_imm,logics_shift_reg"))
+ "hip11_alu2|hip11_alu3")
+
+;; Multiplies instructions
+(define_insn_reservation "hip11_mult" 3
+ (and (eq_attr "tune" "hip11")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "widen_mul64" "yes")))
+ "hip11_mdu")
+
+;; Integer divide
+(define_insn_reservation "hip11_div" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "udiv,sdiv"))
+ "hip11_mdu")
+
+(define_insn_reservation "hip11_mla" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "mla,smlal,umlal,smull,umull"))
+ "hip11_mdu")
+
+;; Block all issue pipes for a cycle
+(define_insn_reservation "hip11_block" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "block"))
+ "hip11_block")
+
+;; Load-store execution Unit
+;;
+(define_insn_reservation "hip11_load1" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "load_4,load_8,load_16"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_fp_load" 5
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "f_loads,f_loadd"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_single" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld1_1reg" 5
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_2reg" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_3reg" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld1_4reg" 8
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
+ "hip11_ls1|hip11_ls2")
+
+(define_insn_reservation "hip11_neon_ld2" 8
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load2_one_lane,neon_load2_one_lane_q,\
+ neon_load2_all_lanes,neon_load2_all_lanes_q,\
+ neon_load2_2reg,neon_load2_2reg_q,\
+ neon_load2_4reg,neon_load2_4reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld3_single" 9
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld3_multiple" 13
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld4_single" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\
+ neon_load4_all_lanes,neon_load4_all_lanes_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+(define_insn_reservation "hip11_neon_ld4_multiple" 11
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q"))
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
+
+;; Stores of up to two words.
+(define_insn_reservation "hip11_store1" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "store_4,store_8,store_16,\
+ f_stored,f_stores"))
+ "hip11_ls1|hip11_ls2")
+
+;; Floating-Point Operations.
+(define_insn_reservation "hip11_fp_arith" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "ffariths,ffarithd,f_minmaxs,\
+ f_minmaxd,fadds,faddd,neon_fcadd"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_mul" 3
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,\
+ neon_fp_mul_s_scalar,neon_fp_mul_s_scalar_q,\
+ neon_fp_mul_d_scalar_q,fmuld,fmuls"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_cmp" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fccmpd,fccmps"))
+ "hip11alu,hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_csel" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fcsel"))
+ "hip11alu,hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_fcmp" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fcmpd,fcmps"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_divs" 7
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fdivs"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_divd" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fdivd"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_sqrts" 9
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fsqrts"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_sqrtd" 15
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fsqrtd"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_fp_mac" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_fp_mov" 1
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "fmov,neon_dup,neon_dup_q,\
+ neon_from_gp,neon_from_gp_q,\
+ neon_ins,neon_ins_q,\
+ neon_to_gp,neon_to_gp_q,\
+ neon_move,neon_move_q,\
+ neon_rev,neon_rev_q,\
+ neon_permute,neon_permute_q,\
+ neon_shift_imm_narrow_q,\
+ neon_ext,neon_ext_q,\
+ neon_rbit,\
+ crypto_sha3,neon_tbl1,neon_tbl1_q,\
+ neon_tbl2_q,f_mcr,neon_tst,neon_tst_q,\
+ neon_move_narrow_q"))
+ "hip11_fsu1")
+
+;; ASIMD instructions
+(define_insn_reservation "hip11_asimd_simple_arithmetic" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_neg,neon_neg_q,\
+ neon_abd,neon_abd_q,\
+ neon_add_long,neon_sub_long,neon_sub_widen,neon_add_widen,\
+ neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
+ neon_arith_acc,neon_arith_acc_q,\
+ neon_compare,neon_compare_q,\
+ neon_compare_zero,neon_compare_zero_q,\
+ neon_minmax,neon_minmax_q,\
+ neon_logic,neon_logic_q,\
+ neon_reduc_add,neon_reduc_add_q,\
+ neon_reduc_minmax,neon_reduc_minmax_q,\
+ neon_fp_to_int_s,neon_fp_to_int_s_q,\
+ neon_fp_to_int_d,neon_fp_to_int_d_q,\
+ neon_fp_cvt_widen_s,\
+ neon_fp_cvt_narrow_d_q,\
+ neon_cls,neon_cls_q,\
+ neon_cnt,neon_cnt_q,\
+ f_rints,f_rintd,f_cvtf2i,f_cvt,\
+ neon_tbl3,neon_fp_round_s,neon_fp_round_s_q,\
+ neon_fp_round_d,neon_fp_round_d_q,\
+ neon_int_to_fp_s,neon_fp_recpe_s,neon_fp_recpe_s_q,\
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
+ neon_fp_cvt_narrow_s_q,\
+ crypto_aese,crypto_aesmc,\
+ crypto_sha1_fast,crypto_sha1_xor,\
+ crypto_sha1_slow,\
+ crypto_sha256_fast,\
+ crypto_sha512,crypto_sm3,\
+ neon_qabs,neon_qabs_q,\
+ neon_qneg,neon_qneg_q,\
+ neon_qadd,neon_qadd_q,\
+ neon_qsub,neon_qsub_q,\
+ neon_add_halve,neon_add_halve_q,\
+ neon_sub_halve,neon_sub_halve_q,\
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_complex_arithmetic" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_mul_b,neon_mul_b_q,\
+ neon_mul_h,neon_mul_h_q,\
+ neon_mul_s,neon_mul_s_q,\
+ neon_mla_b,neon_mla_b_q,\
+ neon_mla_h,neon_mla_h_q,\
+ neon_mla_s,\
+ neon_mla_h_scalar,neon_mla_h_scalar_q,\
+ neon_mla_s_scalar,neon_mla_s_scalar_q,\
+ neon_sat_mul_h_scalar,neon_sat_mul_h_scalar_q,\
+ neon_sat_mul_s_scalar,neon_sat_mul_s_scalar_q,\
+ neon_sat_mul_b,neon_sat_mul_b_q,\
+ neon_sat_mul_h,neon_sat_mul_h_q,\
+ neon_sat_mul_s,neon_sat_mul_s_q,\
+ neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
+ neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,\
+ neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,\
+ neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long,\
+ neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,\
+ neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long,\
+ crypto_pmull,\
+ neon_sat_shift_reg,neon_sat_shift_reg_q,\
+ neon_shift_reg,neon_shift_reg_q,\
+ neon_shift_imm,neon_shift_imm_q,\
+ neon_shift_imm_long,\
+ neon_sat_shift_imm,neon_sat_shift_imm_q,\
+ neon_sat_shift_imm_narrow_q,\
+ neon_shift_acc,neon_shift_acc_q,\
+ crypto_sha256_slow"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fp_compare" 2
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_s_q,\
+ neon_fp_abs_d,neon_fp_abs_d_q,\
+ neon_fp_neg_s,neon_fp_neg_s_q,\
+ neon_fp_neg_d,neon_fp_neg_d_q,\
+ neon_fp_compare_s,neon_fp_compare_s_q,\
+ neon_fp_compare_d,neon_fp_compare_d_q,\
+ neon_fp_minmax_s,neon_fp_minmax_s_q,\
+ neon_fp_minmax_d,neon_fp_minmax_d_q,\
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
+ neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\
+ neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\
+ neon_fp_abd_s,neon_fp_abd_s_q,\
+ neon_fp_abd_d,neon_fp_abd_d_q"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_asimd_fdiv" 10
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
+ neon_fp_div_d,neon_fp_div_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fsqrt" 15
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_fp_multiply_add" 4
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_q,\
+ neon_fp_mla_d,neon_fp_mla_d_q,\
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
+ neon_fp_mul_s,neon_fp_mul_s_q,neon_fcmla,\
+ neon_fp_recps_s,neon_fp_recps_s_q,\
+ neon_fp_recps_d,neon_fp_recps_d_q,\
+ neon_fp_rsqrts_s,neon_fp_rsqrts_s_q,\
+ neon_fp_rsqrts_d,neon_fp_rsqrts_d_q"))
+ "hip11_fsu_pipe")
+
+(define_insn_reservation "hip11_asimd_frecpx" 3
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_fp_recpx_s,neon_fp_recpx_s_q,\
+ neon_fp_recpx_d,neon_fp_recpx_d_q,neon_tbl4,\
+ neon_dot,neon_dot_q"))
+ "hip11_fsu1")
+
+(define_insn_reservation "hip11_asimd_mmla" 6
+ (and (eq_attr "tune" "hip11")
+ (eq_attr "type" "neon_mla_s_q"))
+ "hip11_fsu1")
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index a0a84c20b..ce0dd2ff6 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -17049,7 +17049,7 @@ performance of the code. Permissible values for this option are:
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
@samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
@samp{octeontx2f95mm},
-@samp{a64fx},
+@samp{a64fx},@samp{hip11}
@samp{thunderx}, @samp{thunderxt88},
@samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
--
2.33.0

170
gcc.spec
View File

@ -46,7 +46,7 @@
%else
%global build_libitm 0
%endif
%global build_isl 1
%global build_isl 0
%global build_libstdcxx_docs 0
%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips}
%global attr_ifunc 1
@ -61,7 +61,7 @@
Summary: Various compilers (C, C++, Objective-C, ...)
Name: gcc
Version: %{gcc_version}
Release: 42
Release: 53
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
URL: https://gcc.gnu.org
@ -261,6 +261,42 @@ Patch150: 0150-Implement-propagation-of-permutations-in-fwprop.patch
Patch151: 0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch
Patch152: 0152-Add-LLC-Allocation-Pass.patch
Patch153: 0153-LLC-add-extending-outer-loop.patch
Patch154: 0154-Loop-CRC32-Judge-null-on-pointers-and-solving-coding.patch
Patch155: 0155-Add-maxmin-and-uzp1-uzp2-combining.patch
Patch156: 0156-add-icp-optimization.patch
Patch157: 0157-Add-split-complex-instructions-pass.patch
Patch158: 0158-Implement-IPA-prefetch-optimization.patch
Patch159: 0159-Implement-AES-pattern-matching.patch
Patch160: 0160-AES-Add-lost-files.patch
Patch161: 0161-Fix-lost-ftree-fold-phiopt-option-in-tests.patch
Patch162: 0162-rtl-ifcvt-free-dominance-info-before-cleanup_cfg.patch
Patch163: 0163-Loop-CRC-Solving-the-problem-of-insufficient-CRC-tab.patch
Patch164: 0164-LLC-Allocation-Fix-some-bugs-and-remove-variable-pre.patch
Patch165: 0165-rtl-ifcvt-BugFix-change-def-selection-logic-in-noce_.patch
Patch166: 0166-perm-propagation-Bugfix-Check-that-the-arithmetic-op.patch
Patch167: 0167-perm-propagation-Bugfix-Fix-shll-shll2-patterns-for-.patch
Patch168: 0168-LLC-Allocation-Bugfix-Terminate-kernel-filtering-for.patch
Patch169: 0169-Struct-Reorg-Fix-several-bugs.patch
Patch170: 0170-DFE-Add-escape-check.patch
Patch171: 0171-phiopt-testsuite-Add-ftree-fold-phiopt-option-to-5-t.patch
Patch172: 0172-minmax-Move-minmax-pattern-to-gimple.patch
Patch173: 0173-IPA-Fix-test-completion-1.c.patch
Patch174: 0174-IPA-Fix-fails-on-checked-build-and-comments-from-rev.patch
Patch175: 0175-split-ldp-stp-Extending-and-refactoring-of-pass_spli.patch
Patch176: 0176-Fix-bugs-in-ICP-src-openEuler-gcc-I8PYBF-I8PYLL.patch
Patch177: 0177-Fix-sqlite-build.patch
Patch178: 0178-Fix-freetype-build.patch
Patch179: 0179-rtl-ifcvt-refuse-to-rename-def-in-the-last-instructi.patch
Patch180: 0180-add-optimization-level-requirement-to-the-gate.patch
Patch181: 0181-Fix-issue-I8QD9H.patch
Patch182: 0182-Fix-bugs-in-ICP-src-openEuler-gcc-I8RKFJ.patch
Patch183: 0183-Fix-fail-in-ICP-src-openEuler-gcc-I8RP4H.patch
Patch184: 0184-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RURA.patch
Patch185: 0185-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RV7T.patch
Patch186: 0186-Loop-CRC-Solving-the-problem-of-insufficient-CRC-tab.patch
Patch187: 0187-Add-IPA-prefetch-test.patch
Patch188: 0188-Fix-fails-in-ICP-for-src-openEuler-gcc-I90P7M-I91CZ8.patch
Patch189: 0189-Add-hip11-CPU-pipeline-scheduling.patch
%global gcc_target_platform %{_arch}-linux-gnu
@ -867,6 +903,42 @@ not stable, so plugins must be rebuilt any time GCC is updated.
%patch151 -p1
%patch152 -p1
%patch153 -p1
%patch154 -p1
%patch155 -p1
%patch156 -p1
%patch157 -p1
%patch158 -p1
%patch159 -p1
%patch160 -p1
%patch161 -p1
%patch162 -p1
%patch163 -p1
%patch164 -p1
%patch165 -p1
%patch166 -p1
%patch167 -p1
%patch168 -p1
%patch169 -p1
%patch170 -p1
%patch171 -p1
%patch172 -p1
%patch173 -p1
%patch174 -p1
%patch175 -p1
%patch176 -p1
%patch177 -p1
%patch178 -p1
%patch179 -p1
%patch180 -p1
%patch181 -p1
%patch182 -p1
%patch183 -p1
%patch184 -p1
%patch185 -p1
%patch186 -p1
%patch187 -p1
%patch188 -p1
%patch189 -p1
%build
@ -932,10 +1004,15 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions \
--enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu \
--enable-languages=c,c++,fortran${enablelobjc}${enablelada}${enablelgo}${enableld},lto --enable-plugin \
--enable-initfini-array --disable-libgcj --with-isl --without-cloog \
--enable-initfini-array --disable-libgcj --without-cloog \
--enable-gnu-indirect-function --build=%{gcc_target_platform} \
--with-stage1-ldflags="$OPT_LDFLAGS" \
--with-boot-ldflags="$OPT_LDFLAGS" --disable-bootstrap \
%if %{build_isl}
--with-isl \
%else
--without-isl \
%endif
%ifarch x86_64
--with-tune=generic \
--with-arch_32=x86-64 \
@ -2891,23 +2968,90 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Wed May 29 2024 zhengchenhui <zhengchenhui1@huawei.com> - 10.3.1-42
- Type:Spec
- ID:NA
- SUG:NA
- DESC: Revert last two commits about isl and ppc64le, and Sync patch from openeuler/gcc
* Mon Apr 15 2024 huyubiao <huyubiao@huawei.com> - 10.3.1-41
* Mon Apr 15 2024 huyubiao <huyubiao@huawei.com> - 10.3.1-53
- Type:SPEC
- ID:NA
- SUG:NA
- DESC:disable isl
* Thu Mar 14 2024 chenyuanfeng <yuanfeng.chen@shingroup.cn> - 10.3.1-40
- Type: Spec
* Mon Apr 8 2024 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-52
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Set default configuration for the ppc64le
- DESC: Sync patch from openeuler/gcc
* Fri Feb 23 2024 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-51
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Thu Jan 4 2024 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-50
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Thu Dec 28 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-49
- Type:Revert & sync
- ID:NA
- SUG:NA
- DESC: Revert ICP and IPA prefetch related patches.
Sync patch from openeuler/gcc.
* Sat Dec 23 2023 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-48
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Fri Dec 22 2023 Feiyang Liu <liufeiyang6@huawei.com> - 10.3.1-47
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Thu Dec 21 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-46
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Mon Dec 18 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-45
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patches from openeuler/gcc
* Fri Dec 15 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-44
- Type:Sync
- ID:NA
- SUG:NA
- DESC: Sync patches from openeuler/gcc
* Tue Dec 12 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-43
- Type:Spec
- ID:NA
- SUG:NA
- DESC: Sync patches from openeuler/gcc
* Tue Dec 12 2023 Shujian Zhao <zhaoshujian@huawei.com> - 10.3.1-42
- Type:Spec
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc, add LLC extending outer loop.
* Mon Dec 11 2023 Feiyang Liu <liufeiyang6@huawei.com> - 10.3.1-41
- Type:Spec
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Wed Dec 6 2023 Wang Ding <wangding16@huawei.com> - 10.3.1-40
- Type:Spec
- ID:NA
- SUG:NA
- DESC: Sync patch from openeuler/gcc
* Wed Nov 29 2023 Mingchuan Wu <wumingchuan1992@foxmail.com> - 10.3.1-39
- Type:Spec