Sync from 22.03-LTS-SP3
This commit is contained in:
commit
e3ae83a05a
1772
0154-Loop-CRC32-Judge-null-on-pointers-and-solving-coding.patch
Normal file
1772
0154-Loop-CRC32-Judge-null-on-pointers-and-solving-coding.patch
Normal file
File diff suppressed because it is too large
Load Diff
477
0155-Add-maxmin-and-uzp1-uzp2-combining.patch
Normal file
477
0155-Add-maxmin-and-uzp1-uzp2-combining.patch
Normal file
@ -0,0 +1,477 @@
|
||||
From 1e886b98ff7ffdac023dcee8645717f2849d2eb7 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Wed, 25 Oct 2023 18:12:28 +0300
|
||||
Subject: [PATCH 1/6] Add maxmin and uzp1/uzp2 combining
|
||||
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 339 +++++++++++++++++++++++++-
|
||||
gcc/config/aarch64/predicates.md | 19 ++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin.c | 46 ++++
|
||||
3 files changed, 399 insertions(+), 5 deletions(-)
|
||||
create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index 6049adc3f..7f707de57 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -1034,6 +1034,82 @@
|
||||
[(set_attr "type" "neon_shift_imm<q>")]
|
||||
)
|
||||
|
||||
+;; Simplify the extension with following truncation for shift+neg operation.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_sshr_neg_v8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
|
||||
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_dup 1)
|
||||
+ (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
|
||||
+ (match_dup 2)))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (ashiftrt:V8HI
|
||||
+ (neg:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand" "w"))
|
||||
+ (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))]
|
||||
+ {
|
||||
+ /* Reduce the shift amount to smaller mode. */
|
||||
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[2], 0))
|
||||
+ - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands[2])) / 2);
|
||||
+ operands[2] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
|
||||
+ }
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
+;; The helper definition that allows combiner to use the previous pattern.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_operand:V4SI 1 "register_operand" "w"))
|
||||
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
|
||||
+ (truncate:V4HI
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_operand:V4SI 3 "register_operand" "w"))
|
||||
+ (match_dup 2)))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_dup 1))
|
||||
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
|
||||
+ (set (match_operand:V4SI 3 "register_operand" "=w")
|
||||
+ (ashiftrt:V4SI
|
||||
+ (neg:V4SI
|
||||
+ (match_dup 3))
|
||||
+ (match_dup 2)))
|
||||
+ (set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 1))
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 3))))]
|
||||
+ ""
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
(define_insn "*aarch64_simd_sra<mode>"
|
||||
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
|
||||
(plus:VDQ_I
|
||||
@@ -1459,6 +1535,78 @@
|
||||
[(set_attr "type" "neon_minmax<q>")]
|
||||
)
|
||||
|
||||
+;; Use sequential smax+smin to replace vector arithmetic operations like this:
|
||||
+;; a = ((x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x);
|
||||
+;; TODO: maybe extend to scalar operations.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_maxmin_arith<mode>"
|
||||
+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
+ (xor:VDQHSD
|
||||
+ (and:VDQHSD
|
||||
+ (xor:VDQHSD
|
||||
+ (ashiftrt:VDQHSD
|
||||
+ (neg:VDQHSD
|
||||
+ (match_operand:VDQHSD 1 "register_operand"))
|
||||
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand"))
|
||||
+ (match_dup 1))
|
||||
+ (neg:VDQHSD
|
||||
+ (eq:VDQHSD
|
||||
+ (and:VDQHSD
|
||||
+ (match_dup 1)
|
||||
+ (match_operand:VDQHSD 3 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (match_operand:VDQHSD 4 "aarch64_simd_or_scalar_imm_zero"))))
|
||||
+ (ashiftrt:VDQHSD
|
||||
+ (neg:VDQHSD
|
||||
+ (match_dup 1))
|
||||
+ (match_dup 2))))]
|
||||
+ "TARGET_SIMD && !reload_completed"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:VDQHSD 5 "register_operand" "w") (match_dup 3))
|
||||
+ (set (match_operand:VDQHSD 6 "register_operand" "w") (match_dup 4))
|
||||
+ (set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
+ (smax:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")
|
||||
+ (match_operand:VDQHSD 6 "register_operand" "w")))
|
||||
+ (set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
+ (smin:VDQHSD (match_operand:VDQHSD 0 "register_operand" "w")
|
||||
+ (match_operand:VDQHSD 5 "register_operand" "w")))]
|
||||
+ {
|
||||
+ if (can_create_pseudo_p ())
|
||||
+ {
|
||||
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[3], 0));
|
||||
+ operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
|
||||
+ ~val);
|
||||
+ operands[5] = gen_reg_rtx (<MODE>mode);
|
||||
+ operands[6] = gen_reg_rtx (<MODE>mode);
|
||||
+ }
|
||||
+ else
|
||||
+ FAIL;
|
||||
+ }
|
||||
+ [(set_attr "type" "neon_minmax<q>")]
|
||||
+)
|
||||
+
|
||||
+;; The helper definition that allows combiner to use the previous pattern.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_maxmin_tmp<mode>"
|
||||
+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
+ (ashiftrt:VDQHSD
|
||||
+ (neg:VDQHSD
|
||||
+ (match_operand:VDQHSD 1 "register_operand" "w"))
|
||||
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
|
||||
+ "TARGET_SIMD"
|
||||
+ "#"
|
||||
+ "&& reload_completed"
|
||||
+ [(set (match_operand:VDQHSD 0 "register_operand")
|
||||
+ (neg:VDQHSD
|
||||
+ (match_operand:VDQHSD 1 "register_operand" "w")))
|
||||
+ (set (match_dup 0)
|
||||
+ (ashiftrt:VDQHSD
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
|
||||
+ ""
|
||||
+ [(set_attr "type" "neon_minmax<q>")]
|
||||
+)
|
||||
+
|
||||
;; Pairwise FP Max/Min operations.
|
||||
(define_insn "aarch64_<maxmin_uns>p<mode>"
|
||||
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||
@@ -1599,7 +1747,8 @@
|
||||
DONE;
|
||||
})
|
||||
|
||||
-;; For quads.
|
||||
+;; For quads. Use UZP1 on the narrower type, which discards the high part of
|
||||
+;; each wide element.
|
||||
|
||||
(define_insn "vec_pack_trunc_<mode>"
|
||||
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
|
||||
@@ -1609,12 +1758,32 @@
|
||||
"TARGET_SIMD"
|
||||
{
|
||||
if (BYTES_BIG_ENDIAN)
|
||||
- return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
|
||||
+ return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
|
||||
else
|
||||
- return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
|
||||
+ return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
|
||||
}
|
||||
- [(set_attr "type" "multiple")
|
||||
- (set_attr "length" "8")]
|
||||
+ [(set_attr "type" "neon_permute<q>")
|
||||
+ (set_attr "length" "4")]
|
||||
+)
|
||||
+
|
||||
+(define_insn "vec_pack_trunc_shifted_<mode>"
|
||||
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
|
||||
+ (vec_concat:<VNARROWQ2>
|
||||
+ (truncate:<VNARROWQ>
|
||||
+ (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
|
||||
+ (match_operand:VQN 2 "half_size_operand" "w")))
|
||||
+ (truncate:<VNARROWQ>
|
||||
+ (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
|
||||
+ (match_operand:VQN 4 "half_size_operand" "w")))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ {
|
||||
+ if (BYTES_BIG_ENDIAN)
|
||||
+ return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
|
||||
+ else
|
||||
+ return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
|
||||
+ }
|
||||
+ [(set_attr "type" "neon_permute<q>")
|
||||
+ (set_attr "length" "4")]
|
||||
)
|
||||
|
||||
;; Widening operations.
|
||||
@@ -4852,6 +5021,166 @@
|
||||
[(set_attr "type" "neon_tst<q>")]
|
||||
)
|
||||
|
||||
+;; Simplify the extension with following truncation for cmtst-like operation.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (plus:V4HI
|
||||
+ (truncate:V4HI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
|
||||
+ (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
|
||||
+ (plus:V4HI
|
||||
+ (truncate:V4HI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_dup 1)
|
||||
+ (match_dup 2))
|
||||
+ (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
|
||||
+ (match_dup 4)))
|
||||
+ (match_dup 5))))]
|
||||
+ "TARGET_SIMD && !reload_completed"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 6 "register_operand" "=w")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (plus:V8HI
|
||||
+ (eq:V8HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand" "w")
|
||||
+ (match_dup 6))
|
||||
+ (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
|
||||
+ (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))]
|
||||
+ {
|
||||
+ if (can_create_pseudo_p ())
|
||||
+ {
|
||||
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[4], 0));
|
||||
+ operands[4] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
|
||||
+ int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[5], 0));
|
||||
+ operands[5] = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
|
||||
+
|
||||
+ operands[6] = gen_reg_rtx (V8HImode);
|
||||
+ }
|
||||
+ else
|
||||
+ FAIL;
|
||||
+ }
|
||||
+ [(set_attr "type" "neon_tst_q")]
|
||||
+)
|
||||
+
|
||||
+;; Three helper definitions that allow combiner to use the previous pattern.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
|
||||
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ "TARGET_SIMD && !reload_completed"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
|
||||
+ (set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_dup 5)
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
|
||||
+ (set (match_dup 0)
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ {
|
||||
+ if (can_create_pseudo_p ())
|
||||
+ operands[5] = gen_reg_rtx (V8HImode);
|
||||
+ else
|
||||
+ FAIL;
|
||||
+ }
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_hi_v8hi"
|
||||
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half")))
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ "TARGET_SIMD && !reload_completed"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
|
||||
+ (and:V8HI
|
||||
+ (match_operand:V8HI 1 "register_operand")
|
||||
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
|
||||
+ (set (match_operand:V4SI 0 "register_operand" "=w")
|
||||
+ (sign_extend:V4SI
|
||||
+ (vec_select:V4HI
|
||||
+ (match_dup 5)
|
||||
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half"))))
|
||||
+ (set (match_dup 0)
|
||||
+ (neg:V4SI
|
||||
+ (eq:V4SI
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
|
||||
+ {
|
||||
+ if (can_create_pseudo_p ())
|
||||
+ operands[5] = gen_reg_rtx (V8HImode);
|
||||
+ else
|
||||
+ FAIL;
|
||||
+ }
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmtst_arith_tmpv8hi"
|
||||
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (not:V4SI
|
||||
+ (match_operand:V4SI 1 "register_operand" "w")))
|
||||
+ (truncate:V4HI
|
||||
+ (not:V4SI
|
||||
+ (match_operand:V4SI 2 "register_operand" "w")))))]
|
||||
+ "TARGET_SIMD"
|
||||
+ "#"
|
||||
+ "&& true"
|
||||
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
|
||||
+ (not:V4SI
|
||||
+ (match_dup 1)))
|
||||
+ (set (match_operand:V4SI 2 "register_operand" "=w")
|
||||
+ (not:V4SI
|
||||
+ (match_dup 2)))
|
||||
+ (set (match_operand:V8HI 0 "register_operand" "=w")
|
||||
+ (vec_concat:V8HI
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 1))
|
||||
+ (truncate:V4HI
|
||||
+ (match_dup 2))))]
|
||||
+ ""
|
||||
+ [(set_attr "type" "multiple")]
|
||||
+)
|
||||
+
|
||||
(define_insn_and_split "aarch64_cmtstdi"
|
||||
[(set (match_operand:DI 0 "register_operand" "=w,r")
|
||||
(neg:DI
|
||||
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||||
index 1754b1eff..3cd83334b 100644
|
||||
--- a/gcc/config/aarch64/predicates.md
|
||||
+++ b/gcc/config/aarch64/predicates.md
|
||||
@@ -91,6 +91,25 @@
|
||||
(match_test "aarch64_simd_valid_immediate (op, NULL,
|
||||
AARCH64_CHECK_ORR)"))))
|
||||
|
||||
+(define_predicate "aarch64_bic_imm_for_maxmin"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ if (!aarch64_simd_valid_immediate (op, NULL, AARCH64_CHECK_BIC))
|
||||
+ return false;
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode);
|
||||
+ return CONST_INT_P (op)
|
||||
+ && ((~UINTVAL (op)) < (((long unsigned int) 1 << size) - 1));
|
||||
+})
|
||||
+
|
||||
+(define_predicate "maxmin_arith_shift_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) - 1;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == size);
|
||||
+})
|
||||
+
|
||||
(define_predicate "aarch64_reg_or_bic_imm"
|
||||
(ior (match_operand 0 "register_operand")
|
||||
(and (match_code "const_vector")
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
new file mode 100755
|
||||
index 000000000..06bce7029
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
@@ -0,0 +1,46 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -fdump-rtl-combine-all" } */
|
||||
+
|
||||
+/* The test checks usage of smax/smin insns for clip evaluation and
|
||||
+ * uzp1/uzp2 insns for vector element narrowing. It's inspired by
|
||||
+ * sources of x264 codec. */
|
||||
+
|
||||
+typedef unsigned char uint8_t;
|
||||
+typedef long int intptr_t;
|
||||
+typedef signed short int int16_t;
|
||||
+
|
||||
+static __attribute__((always_inline)) inline uint8_t clip (int x )
|
||||
+{
|
||||
+ return ( (x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x );
|
||||
+}
|
||||
+
|
||||
+void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
|
||||
+ intptr_t stride, int width, int height, int16_t *buf)
|
||||
+{
|
||||
+ const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
|
||||
+ for( int y = 0; y < height; y++ ) {
|
||||
+ for( int x = -2; x < width+3; x++ ) {
|
||||
+ int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
|
||||
+ + (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
|
||||
+ dstv[x] = clip ( (v + 16) >> 5 );
|
||||
+ buf[x+2] = v + pad;
|
||||
+ }
|
||||
+ for( int x = 0; x < width; x++ )
|
||||
+ dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
|
||||
+ + (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
|
||||
+ - 32*pad + 512) >> 10);
|
||||
+ for( int x = 0; x < width; x++ )
|
||||
+ dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
|
||||
+ + (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
|
||||
+ + 16) >> 5);
|
||||
+ dsth += stride;
|
||||
+ dstv += stride;
|
||||
+ dstc += stride;
|
||||
+ src += stride;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {smax\t} 4 } } */
|
||||
+/* { dg-final { scan-assembler-times {smin\t} 4 } } */
|
||||
+/* { dg-final { scan-assembler-times {cmtst\t} 2 } } */
|
||||
+/* { dg-final { scan-assembler-times {uzp1\t} 6 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
2387
0156-add-icp-optimization.patch
Normal file
2387
0156-add-icp-optimization.patch
Normal file
File diff suppressed because it is too large
Load Diff
1241
0157-Add-split-complex-instructions-pass.patch
Normal file
1241
0157-Add-split-complex-instructions-pass.patch
Normal file
File diff suppressed because it is too large
Load Diff
2072
0158-Implement-IPA-prefetch-optimization.patch
Normal file
2072
0158-Implement-IPA-prefetch-optimization.patch
Normal file
File diff suppressed because it is too large
Load Diff
233
0159-Implement-AES-pattern-matching.patch
Normal file
233
0159-Implement-AES-pattern-matching.patch
Normal file
@ -0,0 +1,233 @@
|
||||
From 3a48cd1be0915a0fabbfb3a30bd9b67ccd5c65d3 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Tue, 12 Dec 2023 10:41:12 +0800
|
||||
Subject: [PATCH 6/6] Implement AES pattern matching
|
||||
|
||||
---
|
||||
gcc/Makefile.in | 1 +
|
||||
gcc/common.opt | 4 ++++
|
||||
gcc/config/aarch64/aarch64.c | 24 +++++++++++++++++++++
|
||||
gcc/doc/tm.texi | 29 +++++++++++++++++++++++++
|
||||
gcc/doc/tm.texi.in | 12 +++++++++++
|
||||
gcc/passes.def | 1 +
|
||||
gcc/target.def | 41 ++++++++++++++++++++++++++++++++++++
|
||||
gcc/timevar.def | 1 +
|
||||
gcc/tree-pass.h | 1 +
|
||||
9 files changed, 114 insertions(+)
|
||||
|
||||
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
|
||||
index 31bf2cde2..75b28722e 100644
|
||||
--- a/gcc/Makefile.in
|
||||
+++ b/gcc/Makefile.in
|
||||
@@ -1288,6 +1288,7 @@ OBJS = \
|
||||
cgraphunit.o \
|
||||
cgraphclones.o \
|
||||
combine.o \
|
||||
+ crypto-accel.o \
|
||||
combine-stack-adj.o \
|
||||
compare-elim.o \
|
||||
context.o \
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 36b016253..eb995f701 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1069,6 +1069,10 @@ floop-crc
|
||||
Common Report Var(flag_loop_crc) Optimization
|
||||
Do the loop crc conversion.
|
||||
|
||||
+fcrypto-accel-aes
|
||||
+Common Report Var(flag_crypto_accel_aes) Init(0) Optimization
|
||||
+Perform crypto acceleration AES pattern matching.
|
||||
+
|
||||
fauto-inc-dec
|
||||
Common Report Var(flag_auto_inc_dec) Init(1) Optimization
|
||||
Generate auto-inc/dec instructions.
|
||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
index ae9e0802b..75efbcb97 100644
|
||||
--- a/gcc/config/aarch64/aarch64.c
|
||||
+++ b/gcc/config/aarch64/aarch64.c
|
||||
@@ -23894,6 +23894,30 @@ is_aarch64_stp_insn (int icode)
|
||||
return false;
|
||||
}
|
||||
|
||||
+machine_mode
|
||||
+aarch64_get_v16qi_mode ()
|
||||
+{
|
||||
+ return V16QImode;
|
||||
+}
|
||||
+
|
||||
+#undef TARGET_GET_V16QI_MODE
|
||||
+#define TARGET_GET_V16QI_MODE aarch64_get_v16qi_mode
|
||||
+
|
||||
+#undef TARGET_GEN_REV32V16QI
|
||||
+#define TARGET_GEN_REV32V16QI gen_aarch64_rev32v16qi
|
||||
+
|
||||
+#undef TARGET_GEN_AESEV16QI
|
||||
+#define TARGET_GEN_AESEV16QI gen_aarch64_crypto_aesev16qi
|
||||
+
|
||||
+#undef TARGET_GEN_AESDV16QI
|
||||
+#define TARGET_GEN_AESDV16QI gen_aarch64_crypto_aesdv16qi
|
||||
+
|
||||
+#undef TARGET_GEN_AESMCV16QI
|
||||
+#define TARGET_GEN_AESMCV16QI gen_aarch64_crypto_aesmcv16qi
|
||||
+
|
||||
+#undef TARGET_GEN_AESIMCV16QI
|
||||
+#define TARGET_GEN_AESIMCV16QI gen_aarch64_crypto_aesimcv16qi
|
||||
+
|
||||
#undef TARGET_IS_LDP_INSN
|
||||
#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
|
||||
|
||||
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
|
||||
index ac1d665c5..4a998aa76 100644
|
||||
--- a/gcc/doc/tm.texi
|
||||
+++ b/gcc/doc/tm.texi
|
||||
@@ -11870,6 +11870,35 @@ object files that are not referenced from @code{main} and uses export
|
||||
lists.
|
||||
@end defmac
|
||||
|
||||
+@deftypefn {Target Hook} machine_mode TARGET_GET_V16QI_MODE ()
|
||||
+This function get the 16 byte elements vector mode if target supports this.
|
||||
+@end deftypefn
|
||||
+
|
||||
+@deftypefn {Target Hook} rtx TARGET_GEN_REV32V16QI (rtx @var{dest}, rtx @var{src})
|
||||
+This function generate the byte reverse instruction
|
||||
+ of 16 byte elements vector if target supports this.
|
||||
+@end deftypefn
|
||||
+
|
||||
+@deftypefn {Target Hook} rtx TARGET_GEN_AESEV16QI (rtx @var{dest}, rtx @var{src1}, rtx @var{src2})
|
||||
+This function generate the AES encryption instruction
|
||||
+ of 16 byte elements vector if target supports this.
|
||||
+@end deftypefn
|
||||
+
|
||||
+@deftypefn {Target Hook} rtx TARGET_GEN_AESDV16QI (rtx @var{dest}, rtx @var{src1}, rtx @var{src2})
|
||||
+This function generate the AES decryption instruction
|
||||
+ of 16 byte elements vector if target supports this.
|
||||
+@end deftypefn
|
||||
+
|
||||
+@deftypefn {Target Hook} rtx TARGET_GEN_AESMCV16QI (rtx @var{dest}, rtx @var{src})
|
||||
+This function generate the AES mix columns instruction
|
||||
+ of 16 byte elements vector if target supports this.
|
||||
+@end deftypefn
|
||||
+
|
||||
+@deftypefn {Target Hook} rtx TARGET_GEN_AESIMCV16QI (rtx @var{dest}, rtx @var{src})
|
||||
+This function generate the AES inversed mix columns instruction
|
||||
+ of 16 byte elements vector if target supports this.
|
||||
+@end deftypefn
|
||||
+
|
||||
@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
|
||||
Return true if icode is corresponding to any of the LDP instruction types.
|
||||
@end deftypefn
|
||||
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
|
||||
index 0cd70dda4..f7094d8c2 100644
|
||||
--- a/gcc/doc/tm.texi.in
|
||||
+++ b/gcc/doc/tm.texi.in
|
||||
@@ -8010,6 +8010,18 @@ object files that are not referenced from @code{main} and uses export
|
||||
lists.
|
||||
@end defmac
|
||||
|
||||
+@hook TARGET_GET_V16QI_MODE
|
||||
+
|
||||
+@hook TARGET_GEN_REV32V16QI
|
||||
+
|
||||
+@hook TARGET_GEN_AESEV16QI
|
||||
+
|
||||
+@hook TARGET_GEN_AESDV16QI
|
||||
+
|
||||
+@hook TARGET_GEN_AESMCV16QI
|
||||
+
|
||||
+@hook TARGET_GEN_AESIMCV16QI
|
||||
+
|
||||
@hook TARGET_IS_LDP_INSN
|
||||
|
||||
@hook TARGET_IS_STP_INSN
|
||||
diff --git a/gcc/passes.def b/gcc/passes.def
|
||||
index ba13d897c..da5d71646 100644
|
||||
--- a/gcc/passes.def
|
||||
+++ b/gcc/passes.def
|
||||
@@ -448,6 +448,7 @@ along with GCC; see the file COPYING3. If not see
|
||||
NEXT_PASS (pass_rtl_fwprop_addr);
|
||||
NEXT_PASS (pass_inc_dec);
|
||||
NEXT_PASS (pass_initialize_regs);
|
||||
+ NEXT_PASS (pass_crypto_accel);
|
||||
NEXT_PASS (pass_ud_rtl_dce);
|
||||
NEXT_PASS (pass_combine);
|
||||
NEXT_PASS (pass_if_after_combine);
|
||||
diff --git a/gcc/target.def b/gcc/target.def
|
||||
index 48c8a8234..b4dff78ea 100644
|
||||
--- a/gcc/target.def
|
||||
+++ b/gcc/target.def
|
||||
@@ -2727,6 +2727,47 @@ modes and they have different conditional execution capability, such as ARM.",
|
||||
bool, (void),
|
||||
default_have_conditional_execution)
|
||||
|
||||
+DEFHOOK
|
||||
+(get_v16qi_mode,
|
||||
+ "This function get the 16 byte elements vector mode if target supports this.",
|
||||
+ machine_mode, (),
|
||||
+ NULL)
|
||||
+
|
||||
+DEFHOOK
|
||||
+(gen_rev32v16qi,
|
||||
+ "This function generate the byte reverse instruction\n\
|
||||
+ of 16 byte elements vector if target supports this.",
|
||||
+ rtx, (rtx dest, rtx src),
|
||||
+ NULL)
|
||||
+
|
||||
+DEFHOOK
|
||||
+(gen_aesev16qi,
|
||||
+ "This function generate the AES encryption instruction\n\
|
||||
+ of 16 byte elements vector if target supports this.",
|
||||
+ rtx, (rtx dest, rtx src1, rtx src2),
|
||||
+ NULL)
|
||||
+
|
||||
+DEFHOOK
|
||||
+(gen_aesdv16qi,
|
||||
+ "This function generate the AES decryption instruction\n\
|
||||
+ of 16 byte elements vector if target supports this.",
|
||||
+ rtx, (rtx dest, rtx src1, rtx src2),
|
||||
+ NULL)
|
||||
+
|
||||
+DEFHOOK
|
||||
+(gen_aesmcv16qi,
|
||||
+ "This function generate the AES mix columns instruction\n\
|
||||
+ of 16 byte elements vector if target supports this.",
|
||||
+ rtx, (rtx dest, rtx src),
|
||||
+ NULL)
|
||||
+
|
||||
+DEFHOOK
|
||||
+(gen_aesimcv16qi,
|
||||
+ "This function generate the AES inversed mix columns instruction\n\
|
||||
+ of 16 byte elements vector if target supports this.",
|
||||
+ rtx, (rtx dest, rtx src),
|
||||
+ NULL)
|
||||
+
|
||||
DEFHOOK
|
||||
(is_ldp_insn,
|
||||
"Return true if icode is corresponding to any of the LDP instruction types.",
|
||||
diff --git a/gcc/timevar.def b/gcc/timevar.def
|
||||
index 24caf1b5d..9ca74dffe 100644
|
||||
--- a/gcc/timevar.def
|
||||
+++ b/gcc/timevar.def
|
||||
@@ -258,6 +258,7 @@ DEFTIMEVAR (TV_AUTO_INC_DEC , "auto inc dec")
|
||||
DEFTIMEVAR (TV_CSE2 , "CSE 2")
|
||||
DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction")
|
||||
DEFTIMEVAR (TV_COMBINE , "combiner")
|
||||
+DEFTIMEVAR (TV_CRYPTO_ACCEL , "crypto accel")
|
||||
DEFTIMEVAR (TV_IFCVT , "if-conversion")
|
||||
DEFTIMEVAR (TV_MODE_SWITCH , "mode switching")
|
||||
DEFTIMEVAR (TV_SMS , "sms modulo scheduling")
|
||||
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
|
||||
index 232a3fdf6..29dc7e34b 100644
|
||||
--- a/gcc/tree-pass.h
|
||||
+++ b/gcc/tree-pass.h
|
||||
@@ -570,6 +570,7 @@ extern rtl_opt_pass *make_pass_cse2 (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_df_initialize_opt (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_df_initialize_no_opt (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_reginfo_init (gcc::context *ctxt);
|
||||
+extern rtl_opt_pass *make_pass_crypto_accel (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_inc_dec (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_stack_ptr_mod (gcc::context *ctxt);
|
||||
extern rtl_opt_pass *make_pass_initialize_regs (gcc::context *ctxt);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
3746
0160-AES-Add-lost-files.patch
Normal file
3746
0160-AES-Add-lost-files.patch
Normal file
File diff suppressed because it is too large
Load Diff
51
0161-Fix-lost-ftree-fold-phiopt-option-in-tests.patch
Normal file
51
0161-Fix-lost-ftree-fold-phiopt-option-in-tests.patch
Normal file
@ -0,0 +1,51 @@
|
||||
From 885c6fbfa6412a81740a8c806fa82273b7114b24 Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Wed, 13 Dec 2023 18:38:33 +0800
|
||||
Subject: [PATCH 1/2] Fix lost ftree-fold-phiopt option in tests
|
||||
|
||||
---
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-1.c | 2 +-
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-2.c | 2 +-
|
||||
gcc/testsuite/gcc.dg/ifcvt-gimple.c | 2 +-
|
||||
3 files changed, 3 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
index 4d475cc8a..bdb503bc4 100644
|
||||
--- a/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
|
||||
proper overflow detection in some cases. */
|
||||
-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -ftree-fold-phiopt -fdump-tree-widening_mul-stats" } */
|
||||
#include <stdint.h>
|
||||
|
||||
typedef unsigned __int128 uint128_t;
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
index cc6e5af25..f9d58a2f6 100644
|
||||
--- a/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
@@ -1,7 +1,7 @@
|
||||
/* { dg-do compile } */
|
||||
/* fif-conversion-gimple is required for proper overflow detection
|
||||
in some cases. */
|
||||
-/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -ftree-fold-phiopt -fdump-tree-widening_mul-stats" } */
|
||||
#include <stdint.h>
|
||||
|
||||
typedef unsigned __int128 uint128_t;
|
||||
diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
index 0f7c87e5c..4dc0f9206 100644
|
||||
--- a/gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
+++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized -ftree-fold-phiopt" } */
|
||||
|
||||
int test_int (int optimizable_int) {
|
||||
if (optimizable_int > 5)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
25
0162-rtl-ifcvt-free-dominance-info-before-cleanup_cfg.patch
Normal file
25
0162-rtl-ifcvt-free-dominance-info-before-cleanup_cfg.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From 126bd5722f96733e7fbe433062861d5c3534911a Mon Sep 17 00:00:00 2001
|
||||
From: vchernon <chernonog.vyacheslav@huawei.com>
|
||||
Date: Wed, 13 Dec 2023 21:52:03 +0800
|
||||
Subject: [PATCH 2/2] [rtl-ifcvt] free dominance info before cleanup_cfg not
|
||||
cleaned dominance info can cause infite loop in cleanup_cfg
|
||||
|
||||
---
|
||||
gcc/ifcvt.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
|
||||
index 209987ebc..04086c560 100644
|
||||
--- a/gcc/ifcvt.c
|
||||
+++ b/gcc/ifcvt.c
|
||||
@@ -5624,6 +5624,7 @@ if_convert (bool after_combine)
|
||||
df_live_add_problem ();
|
||||
df_live_set_all_dirty ();
|
||||
}
|
||||
+ free_dominance_info (CDI_DOMINATORS);
|
||||
cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
|
||||
/* Record whether we are after combine pass. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,42 @@
|
||||
From 3281cef37191a800d4fcc916c0e9d5c7a43802a4 Mon Sep 17 00:00:00 2001
|
||||
From: XingYuShuai <1150775134@qq.com>
|
||||
Date: Thu, 14 Dec 2023 20:11:35 +0800
|
||||
Subject: [PATCH 1/2] [Loop CRC] Solving the problem of insufficient CRC table
|
||||
validation
|
||||
|
||||
---
|
||||
gcc/tree-ssa-loop-crc.c | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c
|
||||
index 9878363eb..2dd9e1e3b 100644
|
||||
--- a/gcc/tree-ssa-loop-crc.c
|
||||
+++ b/gcc/tree-ssa-loop-crc.c
|
||||
@@ -336,11 +336,14 @@ only_one_array_read (class loop *loop, tree &crc_table)
|
||||
&& TREE_CODE (gimple_assign_lhs (stmt)) == ARRAY_REF)
|
||||
return false;
|
||||
|
||||
+ /* Only one-dimensional integer arrays meet the condition. */
|
||||
if (gimple_code (stmt) == GIMPLE_ASSIGN
|
||||
- && TREE_CODE (gimple_assign_rhs1 (stmt)) == ARRAY_REF)
|
||||
+ && TREE_CODE (gimple_assign_rhs1 (stmt)) == ARRAY_REF
|
||||
+ && TREE_CODE (TREE_OPERAND (gimple_assign_rhs1 (stmt), 0)) == VAR_DECL
|
||||
+ && TREE_CODE (TREE_TYPE (gimple_assign_rhs1 (stmt))) == INTEGER_TYPE)
|
||||
{
|
||||
if (crc_table == NULL
|
||||
- && gimple_assign_rhs1 (stmt)->base.readonly_flag)
|
||||
+ && TREE_READONLY (gimple_assign_rhs1 (stmt)))
|
||||
{
|
||||
crc_table = gimple_assign_rhs1 (stmt);
|
||||
crc_table_read_stmt = stmt;
|
||||
@@ -438,7 +441,6 @@ match_crc_table (tree crc_table)
|
||||
return true;
|
||||
}
|
||||
|
||||
-
|
||||
/* Check the crc table. The loop should have only one data reference.
|
||||
And match the data reference with the predefined array. */
|
||||
static bool
|
||||
--
|
||||
2.33.0
|
||||
|
||||
924
0164-LLC-Allocation-Fix-some-bugs-and-remove-variable-pre.patch
Normal file
924
0164-LLC-Allocation-Fix-some-bugs-and-remove-variable-pre.patch
Normal file
@ -0,0 +1,924 @@
|
||||
From 1722afc51311a6bb0b892df50602f660c706162f Mon Sep 17 00:00:00 2001
|
||||
From: liuf9 <liufeiyang6@huawei.com>
|
||||
Date: Fri, 15 Dec 2023 11:25:48 +0800
|
||||
Subject: [PATCH 2/2] [LLC Allocation] Fix some bugs and remove variable
|
||||
prefetch tool. After outer loop analysis, it is possible to get nested loops
|
||||
for kernel candidates and this situation has conflicts with the early exiting
|
||||
criterion for kernel filtering process and we restrict this criterion for
|
||||
innermost loops only. We also fix some pass configuration bugs in common.opt
|
||||
and params.opt. We remove variable prefetch tool due to the consideration of
|
||||
unsafe inputs from users.
|
||||
|
||||
---
|
||||
gcc/common.opt | 2 +-
|
||||
gcc/params.opt | 24 +-
|
||||
gcc/testsuite/gcc.dg/llc-allocate/llc-2.c | 2 +-
|
||||
.../llc-allocate/llc-issue-builtin-prefetch.c | 48 ----
|
||||
.../llc-allocate/llc-tool-insertion-1.c | 48 ----
|
||||
.../llc-allocate/llc-tool-insertion-2.c | 48 ----
|
||||
.../llc-allocate/llc-tool-insertion-3.c | 48 ----
|
||||
.../llc-allocate/llc-tool-insertion-4.c | 47 ---
|
||||
.../llc-allocate/llc-tool-insertion-5.c | 48 ----
|
||||
.../llc-allocate/llc-tool-insertion-6.c | 47 ---
|
||||
.../llc-tool-insertion-7-null-var-name.c | 52 ----
|
||||
.../llc-tool-insertion-8-tmp-var-name.c | 54 ----
|
||||
gcc/tree-ssa-llc-allocate.c | 267 +-----------------
|
||||
13 files changed, 11 insertions(+), 724 deletions(-)
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
|
||||
delete mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 56ad9a378..a8a2264ee 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2255,7 +2255,7 @@ Generate prefetch instructions, if available, for arrays in loops. The prefetch
|
||||
level can control the optimize level to array prefetch.
|
||||
|
||||
fllc-allocate
|
||||
-Common Report Var(flag_llc_allocate) Init(-1) Optimization
|
||||
+Common Report Var(flag_llc_allocate) Optimization
|
||||
Generate LLC hint instructions.
|
||||
|
||||
fipa-prefetch
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index 792ca5c35..ef7bea311 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -1030,13 +1030,11 @@ Common Joined UInteger Var(param_mem_access_num) Init(3) Param Optimization
|
||||
Memory access num.
|
||||
|
||||
-param=prefetch-offset=
|
||||
-Common Joined UInteger Var(param_prefetch_offset) Init(1024)
|
||||
-IntegerRange(1, 999999) Param Optimization
|
||||
+Common Joined UInteger Var(param_prefetch_offset) Init(1024) IntegerRange(1, 999999) Param Optimization
|
||||
Prefetch Offset, which is usually a power of two due to cache line size.
|
||||
|
||||
-param=branch-prob-threshold=
|
||||
-Common Joined UInteger Var(param_branch_prob_threshold) Init(80) IntegerRange(50, 100)
|
||||
-Param Optimization
|
||||
+Common Joined UInteger Var(param_branch_prob_threshold) Init(80) IntegerRange(50, 100) Param Optimization
|
||||
High Execution Rate Branch Threshold.
|
||||
|
||||
-param=issue-topn=
|
||||
@@ -1051,24 +1049,6 @@ Force issue the topn LLC mem_ref hint, without generating dynamic multi-branches
|
||||
Common Joined UInteger Var(param_llc_capacity_per_core) Init(114) IntegerRange(0, 999999) Param
|
||||
LLC capacity per core.
|
||||
|
||||
--param=target-variables=
|
||||
-Common Joined Var(param_target_variables) Init("") Param Optimization
|
||||
---param=target-variables=<var>[,<var>,...] Target variables for prefetching, separated by comma,
|
||||
-without space. The representation of a variable can be complex and containing space, please surround
|
||||
-it by quotation marks and escape special characters in Linux. The input length should be no more
|
||||
-than 512 characters.
|
||||
-
|
||||
--param=use-ref-group-index=
|
||||
-Common Joined UInteger Var(param_use_ref_group_index) Init(0) IntegerRange(0, 1) Param Optimization
|
||||
-Prefetch the target variables by their indices in sorted ref_groups, use together with parameter
|
||||
-target-variables.
|
||||
-
|
||||
--param=mem-ref-index=
|
||||
-Common Joined Var(param_mem_ref_index) Init("") Param Optimization
|
||||
---param=mem-ref-index=<idx>[,<idx>,...] Prefetch the target variable at the memory reference
|
||||
-location with the index of customized order, separated by comma, without space. The input length
|
||||
-should be no more than 512 characters.
|
||||
-
|
||||
-param=filter-kernels=
|
||||
Common Joined UInteger Var(param_filter_kernels) Init(1) IntegerRange(0, 1) Param
|
||||
Allow LLC allocate pass to greedily filter kernels by traversing the corresponding basic blocks
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
||||
index 9f8a5c307..f8b1cc5c1 100644
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-2.c
|
||||
@@ -45,7 +45,7 @@ main (int argc, char *argv[])
|
||||
/* { dg-final { scan-tree-dump-not "static_data_size:" "llc_allocate" } } */
|
||||
/* { dg-final { scan-tree-dump-times "\{ (?:\\d+\\(\\d+\\) ){1}\}" 2 "llc_allocate" } } */
|
||||
/* { dg-final { scan-tree-dump-not ", size: (?!(0\.000000))" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 8 "llc_allocate" } } */
|
||||
+/* { dg-final { scan-tree-dump-times ", size: 0\.000000" 6 "llc_allocate" } } */
|
||||
/* { dg-final { scan-tree-dump-times "\\d x_data \\(0.000000, 1, 0\\) : 3" 2 "llc_allocate" } } */
|
||||
/* { dg-final { scan-tree-dump-times "\\d A_j \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
|
||||
/* { dg-final { scan-tree-dump-times "\\d A_data \\(0.000000, 1, 0\\) : 2" 2 "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
|
||||
deleted file mode 100644
|
||||
index 2a58c501f..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-issue-builtin-prefetch.c
|
||||
+++ /dev/null
|
||||
@@ -1,48 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=uPtr" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-#define N 131590
|
||||
-#define F 384477
|
||||
-
|
||||
-double diagPtr[N];
|
||||
-double psiPtr[N];
|
||||
-double ApsiPtr[N];
|
||||
-int lPtr[F];
|
||||
-int uPtr[F];
|
||||
-double lowerPtr[F];
|
||||
-double upperPtr[F];
|
||||
-
|
||||
-void
|
||||
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
||||
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
||||
-{
|
||||
- for (int cell=0; cell<nCells; cell++)
|
||||
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
-
|
||||
- for (int face=0; face<nFaces; face++)
|
||||
- {
|
||||
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-int
|
||||
-main (int argc, char *argv[])
|
||||
-{
|
||||
- int nCells = N;
|
||||
- int nFaces = F;
|
||||
- int testIter = 2;
|
||||
-
|
||||
- for (int i=0; i<testIter; i++)
|
||||
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "\[&\]?uPtr(?:_\\d+\\(D\\))? \\+ \\d{4};" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "\[&\]?uPtr(?:_\\d+\\(D\\))? \\+ \[_\]\\d{1,4};" 2 "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
|
||||
deleted file mode 100644
|
||||
index 276781c4f..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-1.c
|
||||
+++ /dev/null
|
||||
@@ -1,48 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-#define N 131590
|
||||
-#define F 384477
|
||||
-
|
||||
-double diagPtr[N];
|
||||
-double psiPtr[N];
|
||||
-double ApsiPtr[N];
|
||||
-int lPtr[F];
|
||||
-int uPtr[F];
|
||||
-double lowerPtr[F];
|
||||
-double upperPtr[F];
|
||||
-
|
||||
-void
|
||||
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
||||
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
||||
-{
|
||||
- for (int cell=0; cell<nCells; cell++)
|
||||
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
-
|
||||
- for (int face=0; face<nFaces; face++)
|
||||
- {
|
||||
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-int
|
||||
-main (int argc, char *argv[])
|
||||
-{
|
||||
- int nCells = N;
|
||||
- int nFaces = F;
|
||||
- int testIter = 2;
|
||||
-
|
||||
- for (int i=0; i<testIter; i++)
|
||||
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
|
||||
- " lPtr \"" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
|
||||
deleted file mode 100644
|
||||
index 57c76f4a6..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-2.c
|
||||
+++ /dev/null
|
||||
@@ -1,48 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=,lPtr, --param mem-ref-index=5" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-#define N 131590
|
||||
-#define F 384477
|
||||
-
|
||||
-double diagPtr[N];
|
||||
-double psiPtr[N];
|
||||
-double ApsiPtr[N];
|
||||
-int lPtr[F];
|
||||
-int uPtr[F];
|
||||
-double lowerPtr[F];
|
||||
-double upperPtr[F];
|
||||
-
|
||||
-void
|
||||
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
||||
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
||||
-{
|
||||
- for (int cell=0; cell<nCells; cell++)
|
||||
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
-
|
||||
- for (int face=0; face<nFaces; face++)
|
||||
- {
|
||||
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-int
|
||||
-main (int argc, char *argv[])
|
||||
-{
|
||||
- int nCells = N;
|
||||
- int nFaces = F;
|
||||
- int testIter = 2;
|
||||
-
|
||||
- for (int i=0; i<testIter; i++)
|
||||
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-times "WARNING: The target data_ref index is "
|
||||
- "out of range." 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "insert prfm" 2 "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
|
||||
deleted file mode 100644
|
||||
index d9c053566..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-3.c
|
||||
+++ /dev/null
|
||||
@@ -1,48 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr,uPtr,, --param mem-ref-index=5" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-#define N 131590
|
||||
-#define F 384477
|
||||
-
|
||||
-double diagPtr[N];
|
||||
-double psiPtr[N];
|
||||
-double ApsiPtr[N];
|
||||
-int lPtr[F];
|
||||
-int uPtr[F];
|
||||
-double lowerPtr[F];
|
||||
-double upperPtr[F];
|
||||
-
|
||||
-void
|
||||
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
||||
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
||||
-{
|
||||
- for (int cell=0; cell<nCells; cell++)
|
||||
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
-
|
||||
- for (int face=0; face<nFaces; face++)
|
||||
- {
|
||||
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-int
|
||||
-main (int argc, char *argv[])
|
||||
-{
|
||||
- int nCells = N;
|
||||
- int nFaces = F;
|
||||
- int testIter = 2;
|
||||
-
|
||||
- for (int i=0; i<testIter; i++)
|
||||
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-not "WARNING: The number of provided memory "
|
||||
- "reference indices is less" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "insert prfm" 4 "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
|
||||
deleted file mode 100644
|
||||
index b87f9903d..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-4.c
|
||||
+++ /dev/null
|
||||
@@ -1,47 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=lPtr --param use-ref-group-index=1" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-#define N 131590
|
||||
-#define F 384477
|
||||
-
|
||||
-double diagPtr[N];
|
||||
-double psiPtr[N];
|
||||
-double ApsiPtr[N];
|
||||
-int lPtr[F];
|
||||
-int uPtr[F];
|
||||
-double lowerPtr[F];
|
||||
-double upperPtr[F];
|
||||
-
|
||||
-void
|
||||
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
||||
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
||||
-{
|
||||
- for (int cell=0; cell<nCells; cell++)
|
||||
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
-
|
||||
- for (int face=0; face<nFaces; face++)
|
||||
- {
|
||||
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-int
|
||||
-main (int argc, char *argv[])
|
||||
-{
|
||||
- int nCells = N;
|
||||
- int nFaces = F;
|
||||
- int testIter = 2;
|
||||
-
|
||||
- for (int i=0; i<testIter; i++)
|
||||
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-times "ERROR: not an unsigned integer" 1
|
||||
- "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
|
||||
deleted file mode 100644
|
||||
index d07836765..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-5.c
|
||||
+++ /dev/null
|
||||
@@ -1,48 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=1 --param use-ref-group-index=1" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-#define N 131590
|
||||
-#define F 384477
|
||||
-
|
||||
-double diagPtr[N];
|
||||
-double psiPtr[N];
|
||||
-double ApsiPtr[N];
|
||||
-int lPtr[F];
|
||||
-int uPtr[F];
|
||||
-double lowerPtr[F];
|
||||
-double upperPtr[F];
|
||||
-
|
||||
-void
|
||||
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
||||
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
||||
-{
|
||||
- for (int cell=0; cell<nCells; cell++)
|
||||
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
-
|
||||
- for (int face=0; face<nFaces; face++)
|
||||
- {
|
||||
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-int
|
||||
-main (int argc, char *argv[])
|
||||
-{
|
||||
- int nCells = N;
|
||||
- int nFaces = F;
|
||||
- int testIter = 2;
|
||||
-
|
||||
- for (int i=0; i<testIter; i++)
|
||||
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
|
||||
- " psiPtr \"" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "static issue" 2 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "insert svprfd" 2 "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
|
||||
deleted file mode 100644
|
||||
index c0a6afe5b..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-6.c
|
||||
+++ /dev/null
|
||||
@@ -1,47 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -march=armv8.2-a+sve -funroll-loops -ffast-math -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param target-variables=3,a --param use-ref-group-index=1" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-#define N 131590
|
||||
-#define F 384477
|
||||
-
|
||||
-double diagPtr[N];
|
||||
-double psiPtr[N];
|
||||
-double ApsiPtr[N];
|
||||
-int lPtr[F];
|
||||
-int uPtr[F];
|
||||
-double lowerPtr[F];
|
||||
-double upperPtr[F];
|
||||
-
|
||||
-void
|
||||
-AMUL (double *diagPtr, double *psiPtr, double *ApsiPtr, int *lPtr,
|
||||
- int *uPtr, double *lowerPtr, double *upperPtr, int nCells, int nFaces)
|
||||
-{
|
||||
- for (int cell=0; cell<nCells; cell++)
|
||||
- ApsiPtr[cell] = diagPtr[cell]*psiPtr[cell];
|
||||
-
|
||||
- for (int face=0; face<nFaces; face++)
|
||||
- {
|
||||
- ApsiPtr[uPtr[face]] += lowerPtr[face]*psiPtr[lPtr[face]];
|
||||
- ApsiPtr[lPtr[face]] += upperPtr[face]*psiPtr[uPtr[face]];
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-int
|
||||
-main (int argc, char *argv[])
|
||||
-{
|
||||
- int nCells = N;
|
||||
- int nFaces = F;
|
||||
- int testIter = 2;
|
||||
-
|
||||
- for (int i=0; i<testIter; i++)
|
||||
- AMUL (diagPtr,psiPtr,ApsiPtr,lPtr,uPtr,lowerPtr,upperPtr,nCells,nFaces);
|
||||
-
|
||||
- return 0;
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-times "ERROR: not an unsigned integer" 1
|
||||
- "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
|
||||
deleted file mode 100644
|
||||
index 4ad331626..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-7-null-var-name.c
|
||||
+++ /dev/null
|
||||
@@ -1,52 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -c -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=0 --param target-variables=\"bb_16(D)->aux\"" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-typedef struct stack_def
|
||||
-{
|
||||
- int top; /* index to top stack element */
|
||||
- unsigned long reg_set; /* set of live registers */
|
||||
- unsigned char reg[128]; /* register - stack mapping */
|
||||
-} *stack;
|
||||
-
|
||||
-typedef struct block_info_def
|
||||
-{
|
||||
- struct stack_def stack_in; /* Input stack configuration. */
|
||||
- struct stack_def stack_out; /* Output stack configuration. */
|
||||
- unsigned long out_reg_set; /* Stack regs live on output. */
|
||||
- int done; /* True if block already converted. */
|
||||
- int predecessors; /* Number of predecessors that need
|
||||
- to be visited. */
|
||||
-} *block_info;
|
||||
-
|
||||
-typedef struct basic_block_def
|
||||
-{
|
||||
- void *aux;
|
||||
-} *basic_block;
|
||||
-
|
||||
-unsigned char
|
||||
-convert_regs_exit (basic_block bb, int value_reg_low, int value_reg_high)
|
||||
-{
|
||||
- stack output_stack;
|
||||
-
|
||||
- output_stack = &(((block_info) bb->aux)->stack_in);
|
||||
- if (value_reg_low == -1)
|
||||
- output_stack->top = -1;
|
||||
- else
|
||||
- {
|
||||
- int reg;
|
||||
- output_stack->top = value_reg_high - value_reg_low;
|
||||
- for (reg = value_reg_low; reg <= value_reg_high; ++reg)
|
||||
- {
|
||||
- (output_stack->reg + 16)[value_reg_high - reg] = reg;
|
||||
- output_stack->reg_set |= (unsigned long) 1 << reg;
|
||||
- }
|
||||
- }
|
||||
- return output_stack->reg[0];
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-not "Unrecognizable variable name"
|
||||
- "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "static issue" "llc_allocate" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
|
||||
deleted file mode 100644
|
||||
index 09a525ce1..000000000
|
||||
--- a/gcc/testsuite/gcc.dg/llc-allocate/llc-tool-insertion-8-tmp-var-name.c
|
||||
+++ /dev/null
|
||||
@@ -1,54 +0,0 @@
|
||||
-/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
-/* { dg-options "-O3 -c -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=0 --param target-variables=tmp_var_0" } */
|
||||
-
|
||||
-#include <stdio.h>
|
||||
-
|
||||
-typedef struct stack_def
|
||||
-{
|
||||
- int top; /* index to top stack element */
|
||||
- unsigned long reg_set; /* set of live registers */
|
||||
- unsigned char reg[128]; /* register - stack mapping */
|
||||
-} *stack;
|
||||
-
|
||||
-typedef struct block_info_def
|
||||
-{
|
||||
- struct stack_def stack_in; /* Input stack configuration. */
|
||||
- struct stack_def stack_out; /* Output stack configuration. */
|
||||
- unsigned long out_reg_set; /* Stack regs live on output. */
|
||||
- int done; /* True if block already converted. */
|
||||
- int predecessors; /* Number of predecessors that need
|
||||
- to be visited. */
|
||||
-} *block_info;
|
||||
-
|
||||
-typedef struct basic_block_def
|
||||
-{
|
||||
- void *aux;
|
||||
-} *basic_block;
|
||||
-
|
||||
-unsigned char
|
||||
-convert_regs_exit (basic_block bb, int value_reg_low, int value_reg_high)
|
||||
-{
|
||||
- stack output_stack;
|
||||
-
|
||||
- output_stack = &(((block_info) bb->aux)->stack_in);
|
||||
- if (value_reg_low == -1)
|
||||
- output_stack->top = -1;
|
||||
- else
|
||||
- {
|
||||
- int reg;
|
||||
- output_stack->top = value_reg_high - value_reg_low;
|
||||
- for (reg = value_reg_low; reg <= value_reg_high; ++reg)
|
||||
- {
|
||||
- (output_stack->reg + 16)[value_reg_high - reg] = reg;
|
||||
- output_stack->reg_set |= (unsigned long) 1 << reg;
|
||||
- }
|
||||
- }
|
||||
- return output_stack->reg[0];
|
||||
-}
|
||||
-
|
||||
-/* { dg-final { scan-tree-dump-not "Unrecognizable variable name"
|
||||
- "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "NOTICE: Prefetching target variable \""
|
||||
- " bb_16(D)->aux \"" 1 "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-not "runtime issue" "llc_allocate" } } */
|
||||
-/* { dg-final { scan-tree-dump-times "static issue" 1 "llc_allocate" } } */
|
||||
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
|
||||
index 890f66e54..fa8979401 100644
|
||||
--- a/gcc/tree-ssa-llc-allocate.c
|
||||
+++ b/gcc/tree-ssa-llc-allocate.c
|
||||
@@ -23,7 +23,6 @@ along with GCC; see the file COPYING3. If not see
|
||||
#define INCLUDE_VECTOR
|
||||
#define INCLUDE_LIST
|
||||
#define INCLUDE_ALGORITHM
|
||||
-#define INCLUDE_STRING
|
||||
#include "system.h"
|
||||
#include "coretypes.h"
|
||||
#include "backend.h"
|
||||
@@ -1866,7 +1865,10 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
|
||||
list<basic_block> walked_header_bb; /* Used to record nested loops. */
|
||||
|
||||
for (unsigned i = 0; i < kernels.size (); ++i)
|
||||
- end_bb.insert (kernels[i]->header);
|
||||
+ {
|
||||
+ if (kernels[i]->inner == NULL)
|
||||
+ end_bb.insert (kernels[i]->header);
|
||||
+ }
|
||||
|
||||
dump_loop_headers ("kernels", kernels);
|
||||
|
||||
@@ -2380,30 +2382,6 @@ issue_builtin_prefetch (data_ref &mem_ref)
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
}
|
||||
|
||||
-/* Retrieve memory reference at the specific index. */
|
||||
-
|
||||
-data_ref
|
||||
-get_data_ref_at_idx (ref_group &var_ref_group)
|
||||
-{
|
||||
- unsigned int mem_ref_size = static_cast<unsigned int>(
|
||||
- var_ref_group.ref_scores.size ());
|
||||
- if (strlen (param_mem_ref_index) == 0)
|
||||
- return var_ref_group.first_use;
|
||||
- else
|
||||
- {
|
||||
- /* Insert prefetch hint at highly-likely-used location with the given
|
||||
- index. */
|
||||
- if (var_ref_group.mem_ref_index >= mem_ref_size)
|
||||
- {
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "WARNING: The target data_ref index is out "
|
||||
- "of range. Use top index instead!\n");
|
||||
- return var_ref_group.ref_scores[0].d_ref;
|
||||
- }
|
||||
- return var_ref_group.ref_scores[var_ref_group.mem_ref_index].d_ref;
|
||||
- }
|
||||
-}
|
||||
-
|
||||
/* Static form insertion and issue instruction. We may check the
|
||||
determination of the ARM SVE architecture before SVE hint insertion. */
|
||||
|
||||
@@ -2415,7 +2393,7 @@ static_issue (vector<ref_group> &ref_groups, int num_issue_var)
|
||||
|
||||
for (int i = 0; i < num_issue_var; ++i)
|
||||
{
|
||||
- data_ref mem_ref = get_data_ref_at_idx (ref_groups[i]);
|
||||
+ data_ref mem_ref = ref_groups[i].first_use;
|
||||
if (mem_ref.vectorize_p)
|
||||
{
|
||||
enum internal_fn ifn_code = gimple_call_internal_fn
|
||||
@@ -2591,10 +2569,7 @@ issue_llc_hint (vector<ref_group> &ref_groups)
|
||||
}
|
||||
if (param_force_issue)
|
||||
{
|
||||
- if (strlen (param_target_variables) > 0)
|
||||
- static_issue (ref_groups, static_cast<int>(ref_groups.size ()));
|
||||
- else
|
||||
- static_issue (ref_groups, num_issue_var);
|
||||
+ static_issue (ref_groups, num_issue_var);
|
||||
return;
|
||||
}
|
||||
calc_type topn_calc_type = STATIC_CALC;
|
||||
@@ -2626,224 +2601,6 @@ issue_llc_hint (vector<ref_group> &ref_groups)
|
||||
}
|
||||
|
||||
/* ==================== phase entry ==================== */
|
||||
-/* Check whether a string can be converted to an unsigned integer. */
|
||||
-
|
||||
-bool is_unsigned_int (const string &s)
|
||||
-{
|
||||
- if (s.empty () || s.size () > PREFETCH_TOOL_NUM_MAX_LEN)
|
||||
- return false;
|
||||
-
|
||||
- for (unsigned int i = 0; i < s.size (); ++i)
|
||||
- {
|
||||
- if (s[i] < '0' || s[i] > '9')
|
||||
- return false;
|
||||
- }
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
-/* Parse a substring separated by comma. If the substring is valid and
|
||||
- non-empty, store it as a parsed element. */
|
||||
-
|
||||
-bool
|
||||
-parse_string_helper (const string &substr, vector<string>& str_elts,
|
||||
- bool check_unsigned, size_t start, size_t end)
|
||||
-{
|
||||
- if (substr == "" && dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "WARNING: The input string from %lu to %lu is "
|
||||
- "empty.\n", start, end);
|
||||
- else if (check_unsigned && !is_unsigned_int (substr))
|
||||
- {
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "ERROR: not an unsigned integer: %s\n",
|
||||
- substr.c_str ());
|
||||
- str_elts.clear ();
|
||||
- return false;
|
||||
- }
|
||||
- else
|
||||
- str_elts.push_back (substr);
|
||||
- return true;
|
||||
-}
|
||||
-
|
||||
-/* Parse a user input string, separated by comma. */
|
||||
-
|
||||
-void
|
||||
-parse_string (const string &s, vector<string>& str_elts,
|
||||
- bool check_unsigned = false)
|
||||
-{
|
||||
- string delim = ",";
|
||||
- size_t start = 0;
|
||||
- size_t end = s.find (delim);
|
||||
- string substr = s.substr (start, end - start);
|
||||
- while (end != string::npos)
|
||||
- {
|
||||
- if (!parse_string_helper (substr, str_elts, check_unsigned, start, end))
|
||||
- return;
|
||||
- start = end + delim.size ();
|
||||
- end = s.find (delim, start);
|
||||
- substr = s.substr (start, end - start);
|
||||
- }
|
||||
- parse_string_helper (substr, str_elts, check_unsigned, start, end);
|
||||
-}
|
||||
-
|
||||
-/* Parse user input of target variables and memory indices and create a map
|
||||
- that assigns a target variable to a memory index. */
|
||||
-
|
||||
-void
|
||||
-parse_param_inputs (map<string, unsigned int> &var2mem_idx)
|
||||
-{
|
||||
- /* The user input length should have an input length limit. */
|
||||
- if ((strlen (param_target_variables) >= PREFETCH_TOOL_INPUT_MAX_LEN
|
||||
- || strlen (param_mem_ref_index) >= PREFETCH_TOOL_INPUT_MAX_LEN)
|
||||
- && dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "INVALID INPUT: The user inputs for target variables "
|
||||
- "and/or memory reference indices are too long for parsing.\n");
|
||||
-
|
||||
- vector<string> var_names;
|
||||
- string target_variables = param_target_variables;
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "Start parsing target variables:\n");
|
||||
- if (param_use_ref_group_index)
|
||||
- parse_string (target_variables, var_names, true);
|
||||
- else
|
||||
- parse_string (target_variables, var_names, false);
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "Finish parsing target variables.\n\n");
|
||||
-
|
||||
- vector<string> var_mem_indices;
|
||||
- string mem_indices = param_mem_ref_index;
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "Start parsing memory reference indices:\n");
|
||||
- parse_string (mem_indices, var_mem_indices, true);
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "Finish parsing memory reference indices.\n\n");
|
||||
-
|
||||
- /* Construct a map of var_name: var_mem_index. */
|
||||
- if (var_names.size () > 0)
|
||||
- {
|
||||
- if (var_mem_indices.size () < var_names.size ())
|
||||
- {
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "WARNING: The number of provided memory "
|
||||
- "reference indices is less than that of target "
|
||||
- "variables.\nUse the top index for all variables "
|
||||
- "instead.\n");
|
||||
- for (string& var_name : var_names)
|
||||
- var2mem_idx[var_name] = 0;
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- if (var_mem_indices.size () > var_names.size ()
|
||||
- && dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "WARNING: The number of target variables is "
|
||||
- "less than that of memory reference indices.\n");
|
||||
- for (unsigned int i = 0; i < var_names.size (); ++i)
|
||||
- {
|
||||
- var2mem_idx[var_names[i]] = static_cast<unsigned int>(
|
||||
- atoi (var_mem_indices[i].c_str ()));
|
||||
- }
|
||||
- }
|
||||
- }
|
||||
-}
|
||||
-
|
||||
-/* Filter reference groups by only selecting target variables from the user
|
||||
- input. There are two options for prefetching target variables:
|
||||
- 1. Specify variable name parsed by the pass, which you can double-check at
|
||||
- "sorted ref_groups" section in the dump file.
|
||||
- 2. Specify variable rank exhibited at "sorted ref_groups" section in the
|
||||
- dump file.
|
||||
-*/
|
||||
-
|
||||
-void
|
||||
-prefetch_variables (const vector<ref_group>& ref_groups,
|
||||
- vector<ref_group>& reduced_ref_groups)
|
||||
-{
|
||||
- map<unsigned int, unsigned int> ref_group2mem_idx;
|
||||
-
|
||||
- map<string, unsigned int> var2mem_idx; /* externally defined. */
|
||||
- parse_param_inputs (var2mem_idx);
|
||||
-
|
||||
- if (param_use_ref_group_index)
|
||||
- {
|
||||
- /* Use ref_group index at "sorted ref_groups" section to specify
|
||||
- variable. */
|
||||
- /* Collect the variables in "reduced_ref_group" only if their indices
|
||||
- show up at "sorted ref_groups" section. */
|
||||
- for (const pair<string, unsigned int> &var_mem_idx : var2mem_idx)
|
||||
- {
|
||||
- unsigned int var_idx = static_cast<unsigned int>(atoi (
|
||||
- var_mem_idx.first.c_str ()));
|
||||
- if (var_idx < ref_groups.size ())
|
||||
- ref_group2mem_idx[var_idx] = var_mem_idx.second;
|
||||
- else if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "WARNING: The index \"%u\" does not show "
|
||||
- "up in the ref_groups.\n", var_idx);
|
||||
- }
|
||||
- }
|
||||
- else
|
||||
- {
|
||||
- /* Use variable name shown up at "sorted ref_groups" section to specify
|
||||
- variable:
|
||||
- var2ref_group_idx + var2mem_idx -> ref_group2mem_idx. */
|
||||
- /* Create a map that assigns the variable name to its corresponding
|
||||
- ref_group index. */
|
||||
- map<string, unsigned int> var2ref_group_idx; /* internally detected. */
|
||||
- for (unsigned int i = 0; i < ref_groups.size (); ++i)
|
||||
- {
|
||||
- const ref_group &curr_ref_group = ref_groups[i];
|
||||
- const int UINT_MAX_DIGIT = 10;
|
||||
- /* Unrecognizable variable name related to ref_group. */
|
||||
- if (!get_name (curr_ref_group.var))
|
||||
- {
|
||||
- /* If the variable name does not have a string representation,
|
||||
- we can rename it by "tmp_var_" + <sorted_ref_group_index>. */
|
||||
- char group_idx[UINT_MAX_DIGIT];
|
||||
- sprintf (group_idx, "%u", i);
|
||||
- string tmp_var_name = "tmp_var_" + std::string (group_idx);
|
||||
- fprintf (dump_file, "Unrecognizable variable name at ref_group "
|
||||
- "index %u.\nThe tree expression for variable is: ", i);
|
||||
- print_generic_expr (dump_file, curr_ref_group.var, TDF_SLIM);
|
||||
- fprintf (dump_file, "\n");
|
||||
- var2ref_group_idx[tmp_var_name] = i;
|
||||
- }
|
||||
- else
|
||||
- var2ref_group_idx[std::string (get_name (curr_ref_group.var))] = i;
|
||||
- }
|
||||
- /* Collect the variables in "reduced_ref_group" only if they show up in
|
||||
- the ref_groups. */
|
||||
- for (const pair<string, unsigned int> &var_mem_idx : var2mem_idx)
|
||||
- {
|
||||
- if (var2ref_group_idx.count (var_mem_idx.first))
|
||||
- {
|
||||
- unsigned int ref_group_idx = var2ref_group_idx[var_mem_idx.first];
|
||||
- ref_group2mem_idx[ref_group_idx] = var_mem_idx.second;
|
||||
- }
|
||||
- else if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "WARNING: Target variable \" %s \" does "
|
||||
- "not show up in the ref_groups. Check whether it needs "
|
||||
- "temporary variable name.\n",
|
||||
- var_mem_idx.first.c_str ());
|
||||
- }
|
||||
- }
|
||||
-
|
||||
- for (const pair<unsigned int, unsigned int> &ref_group_mem_idx :
|
||||
- ref_group2mem_idx)
|
||||
- {
|
||||
- ref_group curr_ref_group = ref_groups[ref_group_mem_idx.first];
|
||||
- curr_ref_group.mem_ref_index = ref_group_mem_idx.second;
|
||||
- reduced_ref_groups.push_back (curr_ref_group);
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- {
|
||||
- fprintf (dump_file, "\nNOTICE: Prefetching target variable \" ");
|
||||
- print_generic_expr (dump_file, curr_ref_group.var, TDF_SLIM);
|
||||
- fprintf (dump_file, " \" at ref_group index %u and memory location "
|
||||
- "index %u.\n", ref_group_mem_idx.first,
|
||||
- ref_group_mem_idx.second);
|
||||
- }
|
||||
- }
|
||||
- if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
- fprintf (dump_file, "\n\n");
|
||||
-}
|
||||
-
|
||||
|
||||
/* The LLC intelligent allocation consists of 6 steps. */
|
||||
|
||||
@@ -2869,17 +2626,7 @@ llc_allocate (void)
|
||||
if (!record_and_sort_ref_groups (ref_groups, sorted_kernels, kernels_refs))
|
||||
return;
|
||||
|
||||
- if (strlen (param_target_variables) > 0)
|
||||
- {
|
||||
- /* If "param_target_variables" is not empty, we will issue parsed target
|
||||
- variables compulsorily. */
|
||||
- param_force_issue = true;
|
||||
- vector<ref_group> reduced_ref_groups;
|
||||
- prefetch_variables (ref_groups, reduced_ref_groups);
|
||||
- issue_llc_hint (reduced_ref_groups);
|
||||
- }
|
||||
- else
|
||||
- issue_llc_hint (ref_groups);
|
||||
+ issue_llc_hint (ref_groups);
|
||||
}
|
||||
|
||||
/* Check whether the function is an operator reloading function. */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,30 @@
|
||||
From 506eb1ff5ca27cd8d741ddf1894a32645919f773 Mon Sep 17 00:00:00 2001
|
||||
From: vchernon <chernonog.vyacheslav@huawei.com>
|
||||
Date: Sun, 17 Dec 2023 01:47:02 +0800
|
||||
Subject: [PATCH] [rtl-ifcvt][BugFix] change def selection logic in noce_arith
|
||||
|
||||
---
|
||||
gcc/ifcvt.c | 5 ++---
|
||||
1 file changed, 2 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
|
||||
index 04086c560..a55ac16f3 100644
|
||||
--- a/gcc/ifcvt.c
|
||||
+++ b/gcc/ifcvt.c
|
||||
@@ -1985,11 +1985,10 @@ bbs_ok_for_cmove_arith (basic_block bb_a,
|
||||
|
||||
if (!sset_a)
|
||||
goto end_cmove_arith_check_and_fail;
|
||||
- if (a_insn == last_a)
|
||||
- continue;
|
||||
/* Record all registers that BB_A sets. */
|
||||
FOR_EACH_INSN_DEF (def, a_insn)
|
||||
- bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
|
||||
+ if (!(to_rename && DF_REF_REG (def) == to_rename && a_insn == last_a))
|
||||
+ bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
|
||||
}
|
||||
|
||||
bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,57 @@
|
||||
From 43b6906c94ce6a683d325b8789267b7ee2d9bf15 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Sat, 16 Dec 2023 11:56:30 +0300
|
||||
Subject: [PATCH] [perm propagation][Bugfix] Check that the arithmetic
|
||||
operations follow each other
|
||||
|
||||
---
|
||||
gcc/tree-ssa-forwprop.c | 20 +++++++++++++++++---
|
||||
1 file changed, 17 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c
|
||||
index 92ef5d036..d5e9ca9bb 100644
|
||||
--- a/gcc/tree-ssa-forwprop.c
|
||||
+++ b/gcc/tree-ssa-forwprop.c
|
||||
@@ -2662,14 +2662,28 @@ check_def_use_order (vec<gimple *> &first_stmts, vec<gimple *> &second_stmts)
|
||||
/* Check similarity of stmts in the block of arithmetic operations. */
|
||||
|
||||
static bool
|
||||
-check_arithmetic_block (vec<gimple *> &initial_perm_stmts, unsigned nstmts)
|
||||
+check_arithmetic_block (auto_vec<gimple *> &all_arith_stmts,
|
||||
+ vec<gimple *> &initial_perm_stmts, unsigned nstmts)
|
||||
{
|
||||
auto_vec<gimple *> next_stmts (nstmts);
|
||||
auto_vec<gimple *> prev_stmts (nstmts);
|
||||
+ hash_set<gimple *> arith_stmt_set;
|
||||
|
||||
enum tree_code code;
|
||||
unsigned i;
|
||||
- gimple *stmt_it;
|
||||
+ gimple *stmt_it, *last_stmt = all_arith_stmts[all_arith_stmts.length () - 1];
|
||||
+
|
||||
+ /* Check that the arithmetic operations follow each other. */
|
||||
+ all_arith_stmts.qsort (gimple_uid_cmp);
|
||||
+ FOR_EACH_VEC_ELT (all_arith_stmts, i, stmt_it)
|
||||
+ arith_stmt_set.add (stmt_it);
|
||||
+
|
||||
+ gimple_stmt_iterator gsi;
|
||||
+ for (gsi = gsi_for_stmt (all_arith_stmts[0]); gsi_stmt (gsi) != last_stmt;
|
||||
+ gsi_next (&gsi))
|
||||
+ if (!arith_stmt_set.contains (gsi_stmt (gsi)))
|
||||
+ return false;
|
||||
+
|
||||
FOR_EACH_VEC_ELT (initial_perm_stmts, i, stmt_it)
|
||||
prev_stmts.quick_push (stmt_it);
|
||||
|
||||
@@ -2778,7 +2792,7 @@ analyze_perm_fwprop (tree type, unsigned HOST_WIDE_INT nelts,
|
||||
}
|
||||
|
||||
/* Check that all results has the same arithmetic patterns. */
|
||||
- if (!check_arithmetic_block (final_arith_stmts, nelts))
|
||||
+ if (!check_arithmetic_block (all_arith_stmts, final_arith_stmts, nelts))
|
||||
return false;
|
||||
|
||||
if (final_arith_stmts.length () < nelts)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,62 @@
|
||||
From bed123b58aaf435653e01692830def8d564cf51f Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Mon, 18 Dec 2023 22:49:54 +0300
|
||||
Subject: [PATCH] [perm propagation][Bugfix] Fix shll/shll2 patterns for perm
|
||||
prop
|
||||
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 8 ++++----
|
||||
gcc/config/aarch64/predicates.md | 7 +++++++
|
||||
2 files changed, 11 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index 66fcf0074..c7503561f 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -4791,10 +4791,10 @@
|
||||
(vec_select:V4HI
|
||||
(match_operand:V8HI 1 "register_operand" "w")
|
||||
(match_operand:V8HI 2 "vect_par_cnst_lo_half" "")))
|
||||
- (match_operand:V4SI 3 "aarch64_simd_rshift_imm" "Dr")))]
|
||||
+ (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
|
||||
"TARGET_SIMD"
|
||||
"shll\t%0.4s, %1.4h, #%3"
|
||||
- [(set_attr "type" "neon_compare_zero")]
|
||||
+ [(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
;; vshll_high_n
|
||||
@@ -4821,10 +4821,10 @@
|
||||
(vec_select:V4HI
|
||||
(match_operand:V8HI 1 "register_operand" "w")
|
||||
(match_operand:V8HI 2 "vect_par_cnst_hi_half" "")))
|
||||
- (match_operand:V4SI 3 "aarch64_simd_rshift_imm" "Dr")))]
|
||||
+ (match_operand:V4SI 3 "aarch64_simd_shift_imm_bitsize_v4si" "i")))]
|
||||
"TARGET_SIMD"
|
||||
"shll2\t%0.4s, %1.8h, #%3"
|
||||
- [(set_attr "type" "neon_compare_zero")]
|
||||
+ [(set_attr "type" "neon_shift_imm_long")]
|
||||
)
|
||||
|
||||
;; vrshr_n
|
||||
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||||
index b1b3cf82c..90db0efba 100644
|
||||
--- a/gcc/config/aarch64/predicates.md
|
||||
+++ b/gcc/config/aarch64/predicates.md
|
||||
@@ -618,6 +618,13 @@
|
||||
(and (match_code "const_int")
|
||||
(match_test "IN_RANGE (INTVAL (op), 0, 64)")))
|
||||
|
||||
+(define_predicate "aarch64_simd_shift_imm_bitsize_v4si"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ HOST_WIDE_INT val = INTVAL (unwrap_const_vec_duplicate (op));
|
||||
+ return val == 8 || val == 16 || val == 32;
|
||||
+})
|
||||
+
|
||||
(define_predicate "aarch64_constant_pool_symref"
|
||||
(and (match_code "symbol_ref")
|
||||
(match_test "CONSTANT_POOL_ADDRESS_P (op)")))
|
||||
--
|
||||
2.33.0
|
||||
|
||||
175
0168-LLC-Allocation-Bugfix-Terminate-kernel-filtering-for.patch
Normal file
175
0168-LLC-Allocation-Bugfix-Terminate-kernel-filtering-for.patch
Normal file
@ -0,0 +1,175 @@
|
||||
From 4369e823f0883c079c0681bef68cead870d02063 Mon Sep 17 00:00:00 2001
|
||||
From: Feiyang Liu <liufeiyang6@huawei.com>
|
||||
Date: Wed, 20 Dec 2023 09:48:02 +0800
|
||||
Subject: [PATCH] [LLC Allocation][Bugfix] Terminate kernel filtering for
|
||||
same-loop cycle.
|
||||
|
||||
---
|
||||
.../gcc.dg/llc-allocate/llc-same-loop-cycle.c | 125 ++++++++++++++++++
|
||||
gcc/tree-ssa-llc-allocate.c | 11 +-
|
||||
2 files changed, 135 insertions(+), 1 deletion(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c
|
||||
new file mode 100644
|
||||
index 000000000..ba5b5b0c8
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-same-loop-cycle.c
|
||||
@@ -0,0 +1,125 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -fwhole-program -flto-partition=one -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param filter-kernels=1 --param=branch-prob-threshold=50 -c -w" } */
|
||||
+
|
||||
+typedef unsigned long size_t;
|
||||
+typedef long scalar_t__;
|
||||
+
|
||||
+typedef struct TYPE_13__ TYPE_3__ ;
|
||||
+typedef struct TYPE_12__ TYPE_2__ ;
|
||||
+typedef struct TYPE_11__ TYPE_1__ ;
|
||||
+
|
||||
+struct dom_info {int nodes; int* dfs_parent; int* dfs_order; int* key; int* next_bucket; int* bucket; int* dom; int fake_exit_edge; TYPE_3__** dfs_to_bb; } ;
|
||||
+typedef enum cdi_direction { ____Placeholder_cdi_direction } cdi_direction ;
|
||||
+struct TYPE_11__ {scalar_t__ index; } ;
|
||||
+typedef TYPE_1__ edge_iterator ;
|
||||
+typedef TYPE_2__* edge ;
|
||||
+typedef TYPE_3__* basic_block ;
|
||||
+struct TYPE_13__ {size_t index; int preds; int succs; } ;
|
||||
+struct TYPE_12__ {TYPE_3__* src; TYPE_3__* dest; } ;
|
||||
+typedef int TBB ;
|
||||
+
|
||||
+basic_block ENTRY_BLOCK_PTR ;
|
||||
+basic_block EXIT_BLOCK_PTR ;
|
||||
+scalar_t__ bitmap_bit_p (int,size_t) ;
|
||||
+edge ei_edge (edge_iterator) ;
|
||||
+int ei_end_p (edge_iterator) ;
|
||||
+int ei_next (edge_iterator*) ;
|
||||
+edge_iterator ei_start (int) ;
|
||||
+size_t eval (struct dom_info*,int) ;
|
||||
+size_t last_basic_block ;
|
||||
+int link_roots (struct dom_info*,int,int) ;
|
||||
+
|
||||
+__attribute__((used)) static void
|
||||
+calc_idoms (struct dom_info *di, enum cdi_direction reverse)
|
||||
+{
|
||||
+ TBB v, w, k, par;
|
||||
+ basic_block en_block;
|
||||
+ edge_iterator ei, einext;
|
||||
+
|
||||
+ if (reverse)
|
||||
+ en_block = EXIT_BLOCK_PTR;
|
||||
+ else
|
||||
+ en_block = ENTRY_BLOCK_PTR;
|
||||
+
|
||||
+ /* Go backwards in DFS order, to first look at the leafs. */
|
||||
+ v = di->nodes;
|
||||
+ while (v > 1)
|
||||
+ {
|
||||
+ basic_block bb = di->dfs_to_bb[v];
|
||||
+ edge e;
|
||||
+
|
||||
+ par = di->dfs_parent[v];
|
||||
+ k = v;
|
||||
+
|
||||
+ ei = (reverse) ? ei_start (bb->succs) : ei_start (bb->preds);
|
||||
+
|
||||
+ if (reverse)
|
||||
+ {
|
||||
+ /* If this block has a fake edge to exit, process that first. */
|
||||
+ if (bitmap_bit_p (di->fake_exit_edge, bb->index))
|
||||
+ {
|
||||
+ einext = ei;
|
||||
+ einext.index = 0;
|
||||
+ goto do_fake_exit_edge;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ /* Search all direct predecessors for the smallest node with a path
|
||||
+ to them. That way we have the smallest node with also a path to
|
||||
+ us only over nodes behind us. In effect we search for our
|
||||
+ semidominator. */
|
||||
+ while (!ei_end_p (ei))
|
||||
+ {
|
||||
+ basic_block b;
|
||||
+ TBB k1;
|
||||
+
|
||||
+ e = ei_edge (ei);
|
||||
+ b = (reverse) ? e->dest : e->src;
|
||||
+ einext = ei;
|
||||
+ ei_next (&einext);
|
||||
+
|
||||
+ if (b == en_block)
|
||||
+ {
|
||||
+ do_fake_exit_edge:
|
||||
+ k1 = di->dfs_order[last_basic_block];
|
||||
+ }
|
||||
+ else
|
||||
+ k1 = di->dfs_order[b->index];
|
||||
+
|
||||
+ /* Call eval() only if really needed. If k1 is above V in DFS tree,
|
||||
+ then we know, that eval(k1) == k1 and key[k1] == k1. */
|
||||
+ if (k1 > v)
|
||||
+ k1 = di->key[eval (di, k1)];
|
||||
+ if (k1 < k)
|
||||
+ k = k1;
|
||||
+
|
||||
+ ei = einext;
|
||||
+ }
|
||||
+
|
||||
+ di->key[v] = k;
|
||||
+ link_roots (di, par, v);
|
||||
+ di->next_bucket[v] = di->bucket[k];
|
||||
+ di->bucket[k] = v;
|
||||
+
|
||||
+ /* Transform semidominators into dominators. */
|
||||
+ for (w = di->bucket[par]; w; w = di->next_bucket[w])
|
||||
+ {
|
||||
+ k = eval (di, w);
|
||||
+ if (di->key[k] < di->key[w])
|
||||
+ di->dom[w] = k;
|
||||
+ else
|
||||
+ di->dom[w] = par;
|
||||
+ }
|
||||
+ /* We don't need to cleanup next_bucket[]. */
|
||||
+ di->bucket[par] = 0;
|
||||
+ v--;
|
||||
+ }
|
||||
+
|
||||
+ /* Explicitly define the dominators. */
|
||||
+ di->dom[1] = 0;
|
||||
+ for (v = 2; v <= di->nodes; v++)
|
||||
+ if (di->dom[v] != di->key[v])
|
||||
+ di->dom[v] = di->dom[di->dom[v]];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump "Find same-loop cycle." "llc_allocate" } } */
|
||||
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
|
||||
index fa8979401..62b5f18ad 100644
|
||||
--- a/gcc/tree-ssa-llc-allocate.c
|
||||
+++ b/gcc/tree-ssa-llc-allocate.c
|
||||
@@ -1863,6 +1863,7 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
|
||||
|
||||
set<basic_block> end_bb;
|
||||
list<basic_block> walked_header_bb; /* Used to record nested loops. */
|
||||
+ set<int> walked_non_header_bb_idx;
|
||||
|
||||
for (unsigned i = 0; i < kernels.size (); ++i)
|
||||
{
|
||||
@@ -1895,7 +1896,15 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
|
||||
/* bb is not the head of the loop, go to the next. */
|
||||
if (bb != bb->loop_father->header)
|
||||
{
|
||||
- bb = next_high_probability_bb (bb);
|
||||
+ if (walked_non_header_bb_idx.count (bb->index))
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "Find same-loop cycle. "
|
||||
+ "Abort filtering process.\n");
|
||||
+ return false;
|
||||
+ }
|
||||
+ walked_non_header_bb_idx.insert (bb->index);
|
||||
+ bb = next_high_probability_bb (bb);
|
||||
continue;
|
||||
}
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
183
0169-Struct-Reorg-Fix-several-bugs.patch
Normal file
183
0169-Struct-Reorg-Fix-several-bugs.patch
Normal file
@ -0,0 +1,183 @@
|
||||
From 708ffe6f132ee39441b66b6ab6b98847d35916b7 Mon Sep 17 00:00:00 2001
|
||||
From: eastb233 <xiezhiheng@huawei.com>
|
||||
Date: Tue, 19 Dec 2023 17:03:12 +0800
|
||||
Subject: [PATCH 1/2] [Struct Reorg] Fix several bugs
|
||||
|
||||
---
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 50 ++++++-------------
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-10.c | 29 +++++++++++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-11.c | 16 ++++++
|
||||
gcc/testsuite/gcc.dg/struct/struct_reorg-12.c | 26 ++++++++++
|
||||
4 files changed, 85 insertions(+), 36 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
|
||||
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
||||
index 7aba74ff1..0064811ac 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
||||
@@ -4105,6 +4105,12 @@ ipa_struct_reorg::maybe_record_assign (cgraph_node *node, gassign *stmt)
|
||||
maybe_mark_or_record_other_side (rhs, lhs, stmt);
|
||||
if (TREE_CODE (lhs) == SSA_NAME)
|
||||
maybe_mark_or_record_other_side (lhs, rhs, stmt);
|
||||
+
|
||||
+ /* Handle missing ARRAY_REF cases. */
|
||||
+ if (TREE_CODE (lhs) == ARRAY_REF)
|
||||
+ mark_type_as_escape (TREE_TYPE (lhs), escape_array, stmt);
|
||||
+ if (TREE_CODE (rhs) == ARRAY_REF)
|
||||
+ mark_type_as_escape (TREE_TYPE (rhs), escape_array, stmt);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6169,6 +6175,7 @@ ipa_struct_reorg::rewrite_expr (tree expr, tree newexpr[max_split], bool ignore_
|
||||
bool escape_from_base = false;
|
||||
|
||||
tree newbase[max_split];
|
||||
+ memset (newbase, 0, sizeof (tree[max_split]));
|
||||
memset (newexpr, 0, sizeof(tree[max_split]));
|
||||
|
||||
if (TREE_CODE (expr) == CONSTRUCTOR)
|
||||
@@ -8162,43 +8169,14 @@ ipa_struct_reorg::rewrite_cond (gcond *stmt,
|
||||
should be removed. */
|
||||
|
||||
bool
|
||||
-ipa_struct_reorg::rewrite_debug (gimple *stmt, gimple_stmt_iterator *)
|
||||
+ipa_struct_reorg::rewrite_debug (gimple *, gimple_stmt_iterator *)
|
||||
{
|
||||
- if (current_layout_opt_level >= STRUCT_REORDER_FIELDS)
|
||||
- {
|
||||
- /* Delete debug gimple now. */
|
||||
- return true;
|
||||
- }
|
||||
- bool remove = false;
|
||||
- if (gimple_debug_bind_p (stmt))
|
||||
- {
|
||||
- tree var = gimple_debug_bind_get_var (stmt);
|
||||
- tree newvar[max_split];
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- if (gimple_debug_bind_has_value_p (stmt))
|
||||
- {
|
||||
- var = gimple_debug_bind_get_value (stmt);
|
||||
- if (TREE_CODE (var) == POINTER_PLUS_EXPR)
|
||||
- var = TREE_OPERAND (var, 0);
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- }
|
||||
- }
|
||||
- else if (gimple_debug_source_bind_p (stmt))
|
||||
- {
|
||||
- tree var = gimple_debug_source_bind_get_var (stmt);
|
||||
- tree newvar[max_split];
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- var = gimple_debug_source_bind_get_value (stmt);
|
||||
- if (TREE_CODE (var) == POINTER_PLUS_EXPR)
|
||||
- var = TREE_OPERAND (var, 0);
|
||||
- if (rewrite_expr (var, newvar, true))
|
||||
- remove = true;
|
||||
- }
|
||||
-
|
||||
- return remove;
|
||||
+ /* In debug statements, there might be some statements that have
|
||||
+ been optimized out in gimple but left in debug gimple. Sometimes
|
||||
+ these statements need to be analyzed to escape, but in rewrite
|
||||
+ stage it shouldn't happen. It needs to care a lot to handle these
|
||||
+ cases but seems useless. So now we just delete debug gimple. */
|
||||
+ return true;
|
||||
}
|
||||
|
||||
/* Rewrite PHI nodes, return true if the PHI was replaced. */
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
|
||||
new file mode 100644
|
||||
index 000000000..ec422f76f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-10.c
|
||||
@@ -0,0 +1,29 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
|
||||
+
|
||||
+struct a {
|
||||
+ int b;
|
||||
+ char c;
|
||||
+};
|
||||
+struct {
|
||||
+ double d;
|
||||
+ _Bool e;
|
||||
+} * f;
|
||||
+struct g {
|
||||
+ struct a h;
|
||||
+} i;
|
||||
+long j;
|
||||
+void k();
|
||||
+void l() { k(i); }
|
||||
+void k(struct a m) {
|
||||
+ f->e = 0;
|
||||
+ for (;;)
|
||||
+ l();
|
||||
+}
|
||||
+int main() {
|
||||
+ for (; j; f = 0) {
|
||||
+ struct g *n = 0;
|
||||
+ char o = n->h.c;
|
||||
+ }
|
||||
+ l();
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
|
||||
new file mode 100644
|
||||
index 000000000..3e42aa84a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-11.c
|
||||
@@ -0,0 +1,16 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
|
||||
+
|
||||
+struct a {
|
||||
+ int b;
|
||||
+ double c;
|
||||
+};
|
||||
+struct d {
|
||||
+ struct a e;
|
||||
+};
|
||||
+int f;
|
||||
+int main() {
|
||||
+ _Bool g;
|
||||
+ struct d **h = 0;
|
||||
+ g = *h += f;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
|
||||
new file mode 100644
|
||||
index 000000000..d434f9fe0
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/struct_reorg-12.c
|
||||
@@ -0,0 +1,26 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-w -g -O3 -flto-partition=one -fipa-struct-reorg -fwhole-program -S" } */
|
||||
+
|
||||
+struct foo {
|
||||
+ long element1;
|
||||
+ long element2;
|
||||
+};
|
||||
+
|
||||
+struct goo {
|
||||
+ struct foo element_foo;
|
||||
+};
|
||||
+
|
||||
+struct goo g1;
|
||||
+
|
||||
+void func () {
|
||||
+ struct foo (*local)[] = 0;
|
||||
+ long idx;
|
||||
+ (g1).element_foo = (*local)[idx];
|
||||
+}
|
||||
+
|
||||
+struct foo g2;
|
||||
+int main () {
|
||||
+ func ();
|
||||
+ g2 = g1.element_foo;
|
||||
+ return 0;
|
||||
+}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
104
0170-DFE-Add-escape-check.patch
Normal file
104
0170-DFE-Add-escape-check.patch
Normal file
@ -0,0 +1,104 @@
|
||||
From e875e4e7f3716aa268ffbbf55ee199ec82b6aeba Mon Sep 17 00:00:00 2001
|
||||
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
|
||||
Date: Thu, 21 Dec 2023 15:50:34 +0800
|
||||
Subject: [PATCH 2/2] [DFE] Add escape check. Fields with escape risks should
|
||||
not be processed.
|
||||
|
||||
---
|
||||
gcc/ipa-struct-reorg/ipa-struct-reorg.c | 15 +++++--
|
||||
gcc/testsuite/gcc.dg/struct/dfe_escape.c | 50 ++++++++++++++++++++++++
|
||||
2 files changed, 62 insertions(+), 3 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/struct/dfe_escape.c
|
||||
|
||||
diff --git a/gcc/ipa-struct-reorg/ipa-struct-reorg.c b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
||||
index 0064811ac..dcfa7cd95 100644
|
||||
--- a/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
||||
+++ b/gcc/ipa-struct-reorg/ipa-struct-reorg.c
|
||||
@@ -444,8 +444,13 @@ srtype::has_dead_field (void)
|
||||
if (!(this_field->field_access & READ_FIELD)
|
||||
&& !FUNCTION_POINTER_TYPE_P (this_field->fieldtype))
|
||||
{
|
||||
- may_dfe = true;
|
||||
- break;
|
||||
+ /* Fields with escape risks should not be processed. */
|
||||
+ if (this_field->type == NULL
|
||||
+ || (this_field->type->escapes == does_not_escape))
|
||||
+ {
|
||||
+ may_dfe = true;
|
||||
+ break;
|
||||
+ }
|
||||
}
|
||||
}
|
||||
return may_dfe;
|
||||
@@ -1030,7 +1035,11 @@ srtype::create_new_type (void)
|
||||
if (current_layout_opt_level & DEAD_FIELD_ELIMINATION
|
||||
&& !(f->field_access & READ_FIELD)
|
||||
&& !FUNCTION_POINTER_TYPE_P (f->fieldtype))
|
||||
- continue;
|
||||
+ {
|
||||
+ /* Fields with escape risks should not be processed. */
|
||||
+ if (f->type == NULL || (f->type->escapes == does_not_escape))
|
||||
+ continue;
|
||||
+ }
|
||||
f->create_new_fields (newtype, newfields, newlast);
|
||||
}
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/struct/dfe_escape.c b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
|
||||
new file mode 100644
|
||||
index 000000000..1b143cd26
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/struct/dfe_escape.c
|
||||
@@ -0,0 +1,50 @@
|
||||
+/* { dg-do compile } */
|
||||
+
|
||||
+#include <stdio.h>
|
||||
+#include <stdlib.h>
|
||||
+
|
||||
+typedef struct arc arc_t;
|
||||
+typedef struct arc *arc_p;
|
||||
+
|
||||
+typedef struct network
|
||||
+{
|
||||
+ int x;
|
||||
+} network_t;
|
||||
+
|
||||
+struct arc
|
||||
+{
|
||||
+ int flow;
|
||||
+ network_t* net_add;
|
||||
+};
|
||||
+
|
||||
+const int MAX = 100;
|
||||
+
|
||||
+/* let it escape_array, "Type is used in an array [not handled yet]". */
|
||||
+network_t* net[2];
|
||||
+arc_p stop_arcs = NULL;
|
||||
+
|
||||
+int
|
||||
+main ()
|
||||
+{
|
||||
+ net[0] = (network_t*) calloc (1, sizeof(network_t));
|
||||
+ stop_arcs = (arc_p) calloc (MAX, sizeof (arc_t));
|
||||
+
|
||||
+ net[0]->x = 100;
|
||||
+
|
||||
+ for (unsigned i = 0; i < 3; i++)
|
||||
+ {
|
||||
+ net[0]->x = net[0]->x + 2;
|
||||
+ stop_arcs->flow = net[0]->x / 2;
|
||||
+ stop_arcs->flow = stop_arcs->flow + 20;
|
||||
+ stop_arcs->net_add = net[0];
|
||||
+ stop_arcs++;
|
||||
+ }
|
||||
+
|
||||
+ if( net[1] != 0 && stop_arcs != 0)
|
||||
+ {
|
||||
+ return -1;
|
||||
+ }
|
||||
+ return 0;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-ipa-dump-times "Dead field elimination" 0 "struct_reorg" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,80 @@
|
||||
From 1f4d422fd8008f0af015df53f496c6dce3534b26 Mon Sep 17 00:00:00 2001
|
||||
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
|
||||
Date: Fri, 22 Dec 2023 11:38:15 +0800
|
||||
Subject: [PATCH] [phiopt][testsuite] Add -ftree-fold-phiopt option to 5 test
|
||||
cases.
|
||||
|
||||
Modified test cases include:
|
||||
1.gcc.dg/pr45416.c
|
||||
2.gcc.target/i386/pr65871-3.c
|
||||
3.g++.dg/opt/pr99305.C
|
||||
4.gcc.dg/pr107190.c
|
||||
5.g++.dg/tree-ssa/mull64.C
|
||||
---
|
||||
gcc/testsuite/g++.dg/opt/pr99305.C | 2 +-
|
||||
gcc/testsuite/g++.dg/tree-ssa/mull64.C | 2 +-
|
||||
gcc/testsuite/gcc.dg/pr107190.c | 2 +-
|
||||
gcc/testsuite/gcc.dg/pr45416.c | 2 +-
|
||||
gcc/testsuite/gcc.target/i386/pr65871-3.c | 2 +-
|
||||
5 files changed, 5 insertions(+), 5 deletions(-)
|
||||
|
||||
diff --git a/gcc/testsuite/g++.dg/opt/pr99305.C b/gcc/testsuite/g++.dg/opt/pr99305.C
|
||||
index 6fcdef391..06295116f 100644
|
||||
--- a/gcc/testsuite/g++.dg/opt/pr99305.C
|
||||
+++ b/gcc/testsuite/g++.dg/opt/pr99305.C
|
||||
@@ -1,6 +1,6 @@
|
||||
// PR tree-optimization/99305
|
||||
// { dg-do compile }
|
||||
-// { dg-options "-O3 -fno-ipa-icf -fdump-tree-optimized" }
|
||||
+// { dg-options "-O3 -ftree-fold-phiopt -fno-ipa-icf -fdump-tree-optimized" }
|
||||
// { dg-final { scan-tree-dump-times " = \\\(unsigned char\\\) c_\[0-9]*\\\(D\\\);" 3 "optimized" { target { ! unsigned_char } } } }
|
||||
// { dg-final { scan-tree-dump-times " = \[^\n\r]* \\+ \[0-9]*;" 3 "optimized" } }
|
||||
// { dg-final { scan-tree-dump-times " = \[^\n\r]* <= 9;" 3 "optimized" } }
|
||||
diff --git a/gcc/testsuite/g++.dg/tree-ssa/mull64.C b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
index cad891e62..ec359f2ba 100644
|
||||
--- a/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
+++ b/gcc/testsuite/g++.dg/tree-ssa/mull64.C
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
||||
+/* { dg-options "-O2 -ftree-fold-phiopt -fmerge-mull -Wno-psabi -fdump-tree-forwprop1-details -fdump-tree-forwprop4-details" } */
|
||||
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffL)
|
||||
diff --git a/gcc/testsuite/gcc.dg/pr107190.c b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
index d1e72e5df..d4e5fa0d0 100644
|
||||
--- a/gcc/testsuite/gcc.dg/pr107190.c
|
||||
+++ b/gcc/testsuite/gcc.dg/pr107190.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
||||
+/* { dg-options "-O2 -ftree-fold-phiopt -fmerge-mull -fexpensive-optimizations -fdump-tree-phiopt2-details" } */
|
||||
|
||||
# define BN_BITS4 32
|
||||
# define BN_MASK2 (0xffffffffffffffffL)
|
||||
diff --git a/gcc/testsuite/gcc.dg/pr45416.c b/gcc/testsuite/gcc.dg/pr45416.c
|
||||
index a3f6a759d..dd37ec534 100644
|
||||
--- a/gcc/testsuite/gcc.dg/pr45416.c
|
||||
+++ b/gcc/testsuite/gcc.dg/pr45416.c
|
||||
@@ -1,6 +1,6 @@
|
||||
/* { dg-do compile } */
|
||||
/* { dg-skip-if "Skip for Thumb1." { { arm*-*-* } && { arm_thumb1_ok } } } */
|
||||
-/* { dg-options "-O2" } */
|
||||
+/* { dg-options "-O2 -ftree-fold-phiopt" } */
|
||||
|
||||
int foo(long long a)
|
||||
{
|
||||
diff --git a/gcc/testsuite/gcc.target/i386/pr65871-3.c b/gcc/testsuite/gcc.target/i386/pr65871-3.c
|
||||
index c7d9bdd96..4fd3b48f8 100644
|
||||
--- a/gcc/testsuite/gcc.target/i386/pr65871-3.c
|
||||
+++ b/gcc/testsuite/gcc.target/i386/pr65871-3.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile } */
|
||||
-/* { dg-options "-O2 -mbmi" } */
|
||||
+/* { dg-options "-O2 -ftree-fold-phiopt -mbmi" } */
|
||||
|
||||
int foo (int x, int y)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
323
0172-minmax-Move-minmax-pattern-to-gimple.patch
Normal file
323
0172-minmax-Move-minmax-pattern-to-gimple.patch
Normal file
@ -0,0 +1,323 @@
|
||||
From df88d29c355c59e262397fdf3b22ee9099ce40c2 Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Tue, 19 Dec 2023 12:19:14 +0300
|
||||
Subject: [PATCH 1/5] [minmax] Move minmax pattern to gimple.
|
||||
|
||||
---
|
||||
gcc/common.opt | 4 +
|
||||
gcc/config/aarch64/aarch64-simd.md | 72 ----------------
|
||||
gcc/match.pd | 104 ++++++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin-1.c | 15 ++++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin-2.c | 14 ++++
|
||||
gcc/testsuite/gcc.dg/combine-maxmin.c | 19 +++--
|
||||
6 files changed, 151 insertions(+), 77 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/combine-maxmin-2.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index a8a2264ee..73234dcc3 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1750,6 +1750,10 @@ fif-conversion-gimple
|
||||
Common Report Var(flag_if_conversion_gimple) Optimization
|
||||
Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
|
||||
|
||||
+fconvert-minmax
|
||||
+Common Report Var(flag_convert_minmax) Optimization
|
||||
+Convert saturating clipping to min max.
|
||||
+
|
||||
fstack-reuse=
|
||||
Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
|
||||
-fstack-reuse=[all|named_vars|none] Set stack reuse level for local variables.
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index c7503561f..754343abc 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -1535,78 +1535,6 @@
|
||||
[(set_attr "type" "neon_minmax<q>")]
|
||||
)
|
||||
|
||||
-;; Use sequential smax+smin to replace vector arithmetic operations like this:
|
||||
-;; a = ((x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x);
|
||||
-;; TODO: maybe extend to scalar operations.
|
||||
-
|
||||
-(define_insn_and_split "*aarch64_maxmin_arith<mode>"
|
||||
- [(set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
- (xor:VDQHSD
|
||||
- (and:VDQHSD
|
||||
- (xor:VDQHSD
|
||||
- (ashiftrt:VDQHSD
|
||||
- (neg:VDQHSD
|
||||
- (match_operand:VDQHSD 1 "register_operand"))
|
||||
- (match_operand:VDQHSD 2 "maxmin_arith_shift_operand"))
|
||||
- (match_dup 1))
|
||||
- (neg:VDQHSD
|
||||
- (eq:VDQHSD
|
||||
- (and:VDQHSD
|
||||
- (match_dup 1)
|
||||
- (match_operand:VDQHSD 3 "aarch64_bic_imm_for_maxmin"))
|
||||
- (match_operand:VDQHSD 4 "aarch64_simd_or_scalar_imm_zero"))))
|
||||
- (ashiftrt:VDQHSD
|
||||
- (neg:VDQHSD
|
||||
- (match_dup 1))
|
||||
- (match_dup 2))))]
|
||||
- "TARGET_SIMD && !reload_completed"
|
||||
- "#"
|
||||
- "&& true"
|
||||
- [(set (match_operand:VDQHSD 5 "register_operand" "w") (match_dup 3))
|
||||
- (set (match_operand:VDQHSD 6 "register_operand" "w") (match_dup 4))
|
||||
- (set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
- (smax:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")
|
||||
- (match_operand:VDQHSD 6 "register_operand" "w")))
|
||||
- (set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
- (smin:VDQHSD (match_operand:VDQHSD 0 "register_operand" "w")
|
||||
- (match_operand:VDQHSD 5 "register_operand" "w")))]
|
||||
- {
|
||||
- if (can_create_pseudo_p ())
|
||||
- {
|
||||
- int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[3], 0));
|
||||
- operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
|
||||
- ~val);
|
||||
- operands[5] = gen_reg_rtx (<MODE>mode);
|
||||
- operands[6] = gen_reg_rtx (<MODE>mode);
|
||||
- }
|
||||
- else
|
||||
- FAIL;
|
||||
- }
|
||||
- [(set_attr "type" "neon_minmax<q>")]
|
||||
-)
|
||||
-
|
||||
-;; The helper definition that allows combiner to use the previous pattern.
|
||||
-
|
||||
-(define_insn_and_split "*aarch64_maxmin_tmp<mode>"
|
||||
- [(set (match_operand:VDQHSD 0 "register_operand" "=w")
|
||||
- (ashiftrt:VDQHSD
|
||||
- (neg:VDQHSD
|
||||
- (match_operand:VDQHSD 1 "register_operand" "w"))
|
||||
- (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
|
||||
- "TARGET_SIMD"
|
||||
- "#"
|
||||
- "&& reload_completed"
|
||||
- [(set (match_operand:VDQHSD 0 "register_operand")
|
||||
- (neg:VDQHSD
|
||||
- (match_operand:VDQHSD 1 "register_operand" "w")))
|
||||
- (set (match_dup 0)
|
||||
- (ashiftrt:VDQHSD
|
||||
- (match_dup 0)
|
||||
- (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
|
||||
- ""
|
||||
- [(set_attr "type" "neon_minmax<q>")]
|
||||
-)
|
||||
-
|
||||
;; Pairwise FP Max/Min operations.
|
||||
(define_insn "aarch64_<maxmin_uns>p<mode>"
|
||||
[(set (match_operand:VHSDF 0 "register_operand" "=w")
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 24ae157af..1097cd926 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -6595,3 +6595,107 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
(plus:c@4 (op2:c @0 @1)
|
||||
(plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
|
||||
(if (single_use (@4) && single_use (@5)))))
|
||||
+
|
||||
+/* MinMax pattern matching helpers. More info on the transformation below. */
|
||||
+
|
||||
+/* Match (a & 0b11..100..0) pattern. */
|
||||
+(match (minmax_cmp_arg @0 @1)
|
||||
+ (bit_and @0 INTEGER_CST@1)
|
||||
+ (if (wi::popcount (~wi::to_widest (@1) + 1) == 1)))
|
||||
+
|
||||
+/* Match (inversed_sign_bit >> sign_bit_pos) pattern.
|
||||
+ This statement is blocking for the transformation of unsigned integers.
|
||||
+ Do type check here to avoid unnecessary duplications. */
|
||||
+(match (minmax_sat_arg @0)
|
||||
+ (rshift (negate @0) INTEGER_CST@1)
|
||||
+ (if (!TYPE_UNSIGNED (TREE_TYPE (@0))
|
||||
+ && wi::eq_p (wi::to_widest (@1), TYPE_PRECISION (TREE_TYPE (@0)) - 1))))
|
||||
+
|
||||
+/* Transform ((x & ~mask) ? (-x)>>31 & mask : x) to (min (max (x, 0), mask)).
|
||||
+ The matched pattern can be described as saturated clipping.
|
||||
+
|
||||
+ The pattern supports truncation via both casts and bit_and.
|
||||
+ Also there are patterns for possible inverted conditions. */
|
||||
+(if (flag_convert_minmax)
|
||||
+/* Truncation via casts. Unfortunately convert? cannot be applied here
|
||||
+ because convert and cond take different number of arguments. */
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (minmax_sat_arg @0))
|
||||
+ (convert? @0)))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (minmax_sat_arg @0))
|
||||
+ (convert? @0))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (minmax_sat_arg @0))))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (minmax_sat_arg @0)))
|
||||
+ (if (wi::geu_p (~wi::to_widest (@1) + 1, TYPE_PRECISION (type)))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+
|
||||
+ /* Truncation via bit_and with mask. Same concerns on convert? here. */
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
|
||||
+ (convert? @0)))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (ne (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))
|
||||
+ (convert? @0))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+
|
||||
+ (simplify
|
||||
+ (convert
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2))))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; })))))
|
||||
+ (simplify
|
||||
+ (cond
|
||||
+ (eq (minmax_cmp_arg @0 INTEGER_CST@1) integer_zerop)
|
||||
+ (convert? @0)
|
||||
+ (convert? (bit_and (minmax_sat_arg @0) INTEGER_CST@2)))
|
||||
+ (if (wi::to_widest (@2) == ~wi::to_widest (@1))
|
||||
+ (with { tree mask = build_int_cst (integer_type_node, ~tree_to_shwi (@1)); }
|
||||
+ (convert (min (max @0 { integer_zero_node; })
|
||||
+ { mask; }))))))
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-1.c b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
|
||||
new file mode 100644
|
||||
index 000000000..859ff7df8
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-1.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -fconvert-minmax" } */
|
||||
+
|
||||
+#include <inttypes.h>
|
||||
+
|
||||
+__attribute__((noinline))
|
||||
+void test (int32_t *restrict a, int32_t *restrict x)
|
||||
+{
|
||||
+ for (int i = 0; i < 4; i++)
|
||||
+ a[i] = ((((-x[i]) >> 31) ^ x[i])
|
||||
+ & (-((int32_t)((x[i] & (~((1 << 8)-1))) == 0)))) ^ ((-x[i]) >> 31);
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-not {smax\t} } } */
|
||||
+/* { dg-final { scan-assembler-not {smin\t} } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin-2.c b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
|
||||
new file mode 100644
|
||||
index 000000000..63d4d85b3
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin-2.c
|
||||
@@ -0,0 +1,14 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -fconvert-minmax" } */
|
||||
+
|
||||
+#include <inttypes.h>
|
||||
+
|
||||
+__attribute__((noinline))
|
||||
+void test (int8_t *restrict a, int32_t *restrict x)
|
||||
+{
|
||||
+ for (int i = 0; i < 8; i++)
|
||||
+ a[i] = ((x[i] & ~((1 << 9)-1)) ? (-x[i])>>31 & ((1 << 9)-1) : x[i]);
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {smax\t} 4 } } */
|
||||
+/* { dg-final { scan-assembler-times {smin\t} 4 } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
index 06bce7029..a984fa560 100755
|
||||
--- a/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* { dg-do compile { target aarch64-*-* } } */
|
||||
-/* { dg-options "-O3 -fdump-rtl-combine-all" } */
|
||||
+/* { dg-options "-O3 -fconvert-minmax" } */
|
||||
|
||||
/* The test checks usage of smax/smin insns for clip evaluation and
|
||||
* uzp1/uzp2 insns for vector element narrowing. It's inspired by
|
||||
@@ -19,20 +19,26 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
|
||||
{
|
||||
const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
|
||||
for( int y = 0; y < height; y++ ) {
|
||||
+ /* This loop is not being vectorized now. */
|
||||
for( int x = -2; x < width+3; x++ ) {
|
||||
int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
|
||||
+ (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
|
||||
dstv[x] = clip ( (v + 16) >> 5 );
|
||||
buf[x+2] = v + pad;
|
||||
}
|
||||
+
|
||||
+ /* Produces two versions of the code: 3xUZP1/2xMAX/2xMIN + 1xUZP1/1xMAX/1xMIN. */
|
||||
for( int x = 0; x < width; x++ )
|
||||
dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
|
||||
+ (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
|
||||
- 32*pad + 512) >> 10);
|
||||
+
|
||||
+ /* Priduces two versions of the code: 1xUZP1/2xMAX/2xMIN + 0xUZP1/1xMAX/1xMIN. */
|
||||
for( int x = 0; x < width; x++ )
|
||||
dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
|
||||
+ (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
|
||||
+ 16) >> 5);
|
||||
+
|
||||
dsth += stride;
|
||||
dstv += stride;
|
||||
dstc += stride;
|
||||
@@ -40,7 +46,10 @@ void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
|
||||
}
|
||||
}
|
||||
|
||||
-/* { dg-final { scan-assembler-times {smax\t} 4 } } */
|
||||
-/* { dg-final { scan-assembler-times {smin\t} 4 } } */
|
||||
-/* { dg-final { scan-assembler-times {cmtst\t} 2 } } */
|
||||
-/* { dg-final { scan-assembler-times {uzp1\t} 6 } } */
|
||||
+/* Max is performed on 0 from signed values, match smax exactly. */
|
||||
+/* { dg-final { scan-assembler-times {smax\t} 6 } } */
|
||||
+/* Min is performed on signed val>0 and a mask, min sign doesn't matter. */
|
||||
+/* { dg-final { scan-assembler-times {[us]min\t} 6 } } */
|
||||
+/* All of the vectorized patterns are expected to be matched. */
|
||||
+/* { dg-final { scan-assembler-not {cmtst\t} } } */
|
||||
+/* { dg-final { scan-assembler-times {uzp1\t} 5 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
24
0173-IPA-Fix-test-completion-1.c.patch
Normal file
24
0173-IPA-Fix-test-completion-1.c.patch
Normal file
@ -0,0 +1,24 @@
|
||||
From d6ef1c0c182267d3ab68e3ae1d7f1a576a7bbb2a Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Wed, 20 Dec 2023 18:44:29 +0800
|
||||
Subject: [PATCH 2/5] [IPA] Fix test completion-1.c
|
||||
|
||||
---
|
||||
gcc/testsuite/gcc.dg/completion-1.c | 1 +
|
||||
1 file changed, 1 insertion(+)
|
||||
|
||||
diff --git a/gcc/testsuite/gcc.dg/completion-1.c b/gcc/testsuite/gcc.dg/completion-1.c
|
||||
index 64da64f1c..df2319c76 100644
|
||||
--- a/gcc/testsuite/gcc.dg/completion-1.c
|
||||
+++ b/gcc/testsuite/gcc.dg/completion-1.c
|
||||
@@ -2,6 +2,7 @@
|
||||
/* { dg-options "--completion=-fipa-ic" } */
|
||||
|
||||
/* { dg-begin-multiline-output "" }
|
||||
+-fipa-ic
|
||||
-fipa-icf
|
||||
-fipa-icf-functions
|
||||
-fipa-icf-variables
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,71 @@
|
||||
From ed548cec9d8efe8ef742225c39f5d84aba4be81b Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Wed, 20 Dec 2023 13:53:47 +0300
|
||||
Subject: [PATCH 3/5] [IPA] Fix fails on checked build and comments from review
|
||||
|
||||
---
|
||||
gcc/ipa-prefetch.c | 24 ++++++++++++++++++++++--
|
||||
gcc/params.opt | 4 ++--
|
||||
2 files changed, 24 insertions(+), 4 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
|
||||
index 93483a6e8..d8bb9a251 100644
|
||||
--- a/gcc/ipa-prefetch.c
|
||||
+++ b/gcc/ipa-prefetch.c
|
||||
@@ -167,6 +167,7 @@ analyse_cgraph ()
|
||||
}
|
||||
|
||||
/* TODO: maybe remove loop info here. */
|
||||
+ n->get_body ();
|
||||
push_cfun (DECL_STRUCT_FUNCTION (n->decl));
|
||||
calculate_dominance_info (CDI_DOMINATORS);
|
||||
loop_optimizer_init (LOOPS_NORMAL);
|
||||
@@ -1540,9 +1541,28 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
return 0;
|
||||
}
|
||||
else if (dump_file)
|
||||
- fprintf (dump_file, "Dominator bb %d for MRs\n", dom_bb->index);
|
||||
+ {
|
||||
+ fprintf (dump_file, "Dominator bb %d for MRs:\n", dom_bb->index);
|
||||
+ gimple_dump_bb (dump_file, dom_bb, 0, dump_flags);
|
||||
+ fprintf (dump_file, "\n");
|
||||
+ }
|
||||
+
|
||||
+ /* Try to find comp_mr's stmt in the dominator bb. */
|
||||
+ gimple *last_used = NULL;
|
||||
+ for (gimple_stmt_iterator si = gsi_last_bb (dom_bb); !gsi_end_p (si);
|
||||
+ gsi_prev (&si))
|
||||
+ if (comp_mr->stmts[0] == gsi_stmt (si))
|
||||
+ {
|
||||
+ last_used = gsi_stmt (si);
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "Last used stmt in dominator bb:\n");
|
||||
+ print_gimple_stmt (dump_file, last_used, 0);
|
||||
+ }
|
||||
+ break;
|
||||
+ }
|
||||
|
||||
- split_block (dom_bb, (gimple *) NULL);
|
||||
+ split_block (dom_bb, last_used);
|
||||
gimple_stmt_iterator gsi = gsi_last_bb (dom_bb);
|
||||
|
||||
/* Create new inc var. Insert new_var = old_var + step * factor. */
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index ef7bea311..76ae925fd 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -251,8 +251,8 @@ Common Joined UInteger Var(param_ipa_prefetch_distance_factor) Init(4) Param Opt
|
||||
The factor represents the number of inductive variable incrementations to evaluate an indirect memory address for IPA prefetch.
|
||||
|
||||
-param=ipa-prefetch-locality=
|
||||
-Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) Param Optimization
|
||||
-The flag represents temporal locality values in the following way: 0:pstl1strm, 1:pstl3keep, 2:pstl2keep, 3:pstl1keep.
|
||||
+Common Joined UInteger Var(param_ipa_prefetch_locality) Init(3) IntegerRange(0, 3) Param Optimization
|
||||
+The flag represents temporal locality value between 0 and 3, the higher value means the higher temporal locality in the data.
|
||||
|
||||
-param=ira-loop-reserved-regs=
|
||||
Common Joined UInteger Var(param_ira_loop_reserved_regs) Init(2) Param Optimization
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1426
0175-split-ldp-stp-Extending-and-refactoring-of-pass_spli.patch
Normal file
1426
0175-split-ldp-stp-Extending-and-refactoring-of-pass_spli.patch
Normal file
File diff suppressed because it is too large
Load Diff
61
0176-Fix-bugs-in-ICP-src-openEuler-gcc-I8PYBF-I8PYLL.patch
Normal file
61
0176-Fix-bugs-in-ICP-src-openEuler-gcc-I8PYBF-I8PYLL.patch
Normal file
@ -0,0 +1,61 @@
|
||||
From d2742041454dbd4c4c3c3e0a27b5fb26d1e05832 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Thu, 21 Dec 2023 11:14:06 +0300
|
||||
Subject: [PATCH 5/5] Fix bugs in ICP (src-openEuler/gcc: I8PYBF, I8PYLL)
|
||||
|
||||
---
|
||||
gcc/ipa-devirt.c | 17 +++++++++++++++--
|
||||
1 file changed, 15 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
|
||||
index fbde7eb94..a18cbe36a 100644
|
||||
--- a/gcc/ipa-devirt.c
|
||||
+++ b/gcc/ipa-devirt.c
|
||||
@@ -4399,6 +4399,11 @@ print_type_set(unsigned ftype_uid, type_alias_map *map)
|
||||
if (!map->count (ftype_uid))
|
||||
return;
|
||||
type_set* s = (*map)[ftype_uid];
|
||||
+ if (!s)
|
||||
+ {
|
||||
+ fprintf (dump_file, "%d (no set)", ftype_uid);
|
||||
+ return;
|
||||
+ }
|
||||
for (type_set::const_iterator it = s->begin (); it != s->end (); it++)
|
||||
fprintf (dump_file, it == s->begin () ? "%d" : ", %d", *it);
|
||||
}
|
||||
@@ -4966,7 +4971,8 @@ analyze_assign_stmt (gimple *stmt)
|
||||
{
|
||||
rhs = TREE_OPERAND (rhs, 0);
|
||||
if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
|
||||
- || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL)
|
||||
+ || TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
|
||||
+ || TREE_CODE (rhs) == LABEL_DECL)
|
||||
rhs_type = build_pointer_type (TREE_TYPE (rhs));
|
||||
else if (TREE_CODE (rhs) == COMPONENT_REF)
|
||||
{
|
||||
@@ -4980,7 +4986,12 @@ analyze_assign_stmt (gimple *stmt)
|
||||
gcc_assert (POINTER_TYPE_P (rhs_type));
|
||||
}
|
||||
else
|
||||
- gcc_unreachable();
|
||||
+ {
|
||||
+ fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
|
||||
+ get_tree_code_name (TREE_CODE (rhs)));
|
||||
+ print_gimple_stmt (dump_file, stmt, 0);
|
||||
+ gcc_unreachable ();
|
||||
+ }
|
||||
}
|
||||
else
|
||||
rhs_type = TREE_TYPE (rhs);
|
||||
@@ -5678,6 +5689,8 @@ merge_fs_map_for_ftype_aliases ()
|
||||
decl_set *d_set = it1->second;
|
||||
tree type = (*type_uid_map)[it1->first];
|
||||
type_set *set = (*fta_map)[it1->first];
|
||||
+ if (!set)
|
||||
+ continue;
|
||||
for (type_set::const_iterator it2 = set->begin ();
|
||||
it2 != set->end (); it2++)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
168
0177-Fix-sqlite-build.patch
Normal file
168
0177-Fix-sqlite-build.patch
Normal file
@ -0,0 +1,168 @@
|
||||
From 71a992aca88f63ec1afb1608619b82a857d8e297 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Fri, 22 Dec 2023 10:11:24 +0800
|
||||
Subject: [PATCH 1/4] Fix sqlite build
|
||||
|
||||
---
|
||||
gcc/ipa-prefetch.c | 71 ++++++++++++++++++++++++++--------------------
|
||||
gcc/ipa-sra.c | 7 +++++
|
||||
2 files changed, 47 insertions(+), 31 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
|
||||
index d8bb9a251..371702ad8 100644
|
||||
--- a/gcc/ipa-prefetch.c
|
||||
+++ b/gcc/ipa-prefetch.c
|
||||
@@ -1092,6 +1092,15 @@ analyse_loops ()
|
||||
memref_t *mr = it->first, *mr2 = it->second;
|
||||
if (mr2 == NULL || !(*fmrs_map)[fn]->count (mr))
|
||||
continue;
|
||||
+ /* For now optimize only MRs that mem is MEM_REF.
|
||||
+ TODO: support other MR types. */
|
||||
+ if (TREE_CODE (mr->mem) != MEM_REF)
|
||||
+ {
|
||||
+ if (dump_file)
|
||||
+ fprintf (dump_file, "Skip MR %d: unsupported tree code = %s\n",
|
||||
+ mr->mr_id, get_tree_code_name (TREE_CODE (mr->mem)));
|
||||
+ continue;
|
||||
+ }
|
||||
if (!optimize_mrs_map->count (fn))
|
||||
(*optimize_mrs_map)[fn] = new memref_set;
|
||||
(*optimize_mrs_map)[fn]->insert (mr);
|
||||
@@ -1104,7 +1113,7 @@ analyse_loops ()
|
||||
it != (*optimize_mrs_map)[fn]->end (); it++)
|
||||
{
|
||||
memref_t *mr = *it, *mr2 = (*mr_candidate_map)[mr];
|
||||
- fprintf (dump_file, "MRs %d,%d with incremental offset ",
|
||||
+ fprintf (dump_file, "MRs %d, %d with incremental offset ",
|
||||
mr->mr_id, mr2->mr_id);
|
||||
print_generic_expr (dump_file, mr2->offset);
|
||||
fprintf (dump_file, "\n");
|
||||
@@ -1437,6 +1446,27 @@ remap_gimple_op_r (tree *tp, int *walk_subtrees, void *data)
|
||||
return NULL_TREE;
|
||||
}
|
||||
|
||||
+/* Copy stmt and remap its operands. */
|
||||
+
|
||||
+static gimple *
|
||||
+gimple_copy_and_remap (gimple *stmt)
|
||||
+{
|
||||
+ gimple *copy = gimple_copy (stmt);
|
||||
+ gcc_checking_assert (!is_gimple_debug (copy));
|
||||
+
|
||||
+ /* Remap all the operands in COPY. */
|
||||
+ struct walk_stmt_info wi;
|
||||
+ memset (&wi, 0, sizeof (wi));
|
||||
+ wi.info = copy;
|
||||
+ walk_gimple_op (copy, remap_gimple_op_r, &wi);
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "Stmt copy after remap:\n");
|
||||
+ print_gimple_stmt (dump_file, copy, 0);
|
||||
+ }
|
||||
+ return copy;
|
||||
+}
|
||||
+
|
||||
static void
|
||||
create_cgraph_edge (cgraph_node *n, gimple *stmt)
|
||||
{
|
||||
@@ -1585,7 +1615,6 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
/* Create other new vars. Insert new stmts. */
|
||||
struct walk_stmt_info wi;
|
||||
stmt_set processed_stmts;
|
||||
- memref_tree_map mr_new_trees;
|
||||
for (memref_set::const_iterator it = used_mrs.begin ();
|
||||
it != used_mrs.end (); it++)
|
||||
{
|
||||
@@ -1606,23 +1635,10 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
}
|
||||
/* Create a new copy of STMT and duplicate STMT's virtual
|
||||
operands. */
|
||||
- gimple *copy = gimple_copy (mr->stmts[i]);
|
||||
- gcc_checking_assert (!is_gimple_debug (copy));
|
||||
-
|
||||
- /* Remap all the operands in COPY. */
|
||||
- memset (&wi, 0, sizeof (wi));
|
||||
- last_stmt = copy;
|
||||
- wi.info = copy;
|
||||
- walk_gimple_op (copy, remap_gimple_op_r, &wi);
|
||||
- if (dump_file)
|
||||
- {
|
||||
- fprintf (dump_file, "Stmt %d after remap:\n",i);
|
||||
- print_gimple_stmt (dump_file, copy, 0);
|
||||
- }
|
||||
- gimple_seq_add_stmt (&stmts, copy);
|
||||
+ last_stmt = gimple_copy_and_remap (mr->stmts[i]);
|
||||
+ gimple_seq_add_stmt (&stmts, last_stmt);
|
||||
}
|
||||
gcc_assert (last_stmt);
|
||||
- mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
|
||||
if (dump_file)
|
||||
{
|
||||
fprintf (dump_file, "MR (%d) new mem: ", mr->mr_id);
|
||||
@@ -1664,23 +1680,11 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
continue;
|
||||
processed_stmts.insert (mr->stmts[i]);
|
||||
|
||||
- gimple *copy = gimple_copy (mr->stmts[i]);
|
||||
- gcc_checking_assert (!is_gimple_debug (copy));
|
||||
-
|
||||
- /* Remap all the operands in COPY. */
|
||||
- memset (&wi, 0, sizeof (wi));
|
||||
- wi.info = copy;
|
||||
- walk_gimple_op (copy, remap_gimple_op_r, &wi);
|
||||
- if (dump_file)
|
||||
- {
|
||||
- fprintf (dump_file, "Stmt %d after remap:\n",i);
|
||||
- print_gimple_stmt (dump_file, copy, 0);
|
||||
- }
|
||||
+ gimple *copy = gimple_copy_and_remap (mr->stmts[i]);
|
||||
gimple_seq_add_stmt (&stmts, copy);
|
||||
}
|
||||
gimple *last_stmt = mr->stmts[0];
|
||||
gcc_assert (last_stmt);
|
||||
- mr_new_trees[mr] = gimple_assign_lhs (last_stmt);
|
||||
tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
|
||||
tree addr = get_mem_ref_address_ssa_name (mr->mem, NULL_TREE);
|
||||
if (decl_map->count (addr))
|
||||
@@ -1689,6 +1693,11 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
3, addr, write_p, local);
|
||||
pcalls.safe_push (last_stmt);
|
||||
gimple_seq_add_stmt (&stmts, last_stmt);
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "Insert %d prefetch stmt:\n", j);
|
||||
+ print_gimple_stmt (dump_file, last_stmt, 0);
|
||||
+ }
|
||||
}
|
||||
|
||||
gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT);
|
||||
@@ -1827,7 +1836,7 @@ pass_ipa_prefetch::gate (function *)
|
||||
/* Don't bother doing anything if the program has errors. */
|
||||
&& !seen_error ()
|
||||
&& flag_lto_partition == LTO_PARTITION_ONE
|
||||
- /* Only enable struct optimizations in lto or whole_program. */
|
||||
+ /* Only enable prefetch optimizations in lto or whole_program. */
|
||||
&& (in_lto_p || flag_whole_program));
|
||||
}
|
||||
|
||||
diff --git a/gcc/ipa-sra.c b/gcc/ipa-sra.c
|
||||
index d7019ec42..ee927bf6a 100644
|
||||
--- a/gcc/ipa-sra.c
|
||||
+++ b/gcc/ipa-sra.c
|
||||
@@ -3448,6 +3448,13 @@ param_splitting_across_edge (cgraph_edge *cs)
|
||||
gcc_checking_assert (from_ifs && from_ifs->m_parameters);
|
||||
|
||||
isra_call_summary *csum = call_sums->get (cs);
|
||||
+ /* TODO: implement better support for call edges inserted after summary
|
||||
+ collection but before sra wpa invocation. */
|
||||
+ if (!csum)
|
||||
+ {
|
||||
+ csum = call_sums->get_create (cs);
|
||||
+ csum->m_return_ignored = true;
|
||||
+ }
|
||||
gcc_checking_assert (csum);
|
||||
unsigned args_count = csum->m_arg_flow.length ();
|
||||
isra_func_summary *to_ifs = func_sums->get (callee);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
52
0178-Fix-freetype-build.patch
Normal file
52
0178-Fix-freetype-build.patch
Normal file
@ -0,0 +1,52 @@
|
||||
From b187b3043c5a7aa96e6d1106e4b0f37d14c914a6 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Fri, 22 Dec 2023 11:39:09 +0800
|
||||
Subject: [PATCH 2/4] Fix freetype build
|
||||
|
||||
---
|
||||
gcc/ipa-prefetch.c | 17 +++++++++++++++--
|
||||
1 file changed, 15 insertions(+), 2 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
|
||||
index 371702ad8..f91ac3edc 100644
|
||||
--- a/gcc/ipa-prefetch.c
|
||||
+++ b/gcc/ipa-prefetch.c
|
||||
@@ -1522,6 +1522,13 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
"Skip the case.\n");
|
||||
return 0;
|
||||
}
|
||||
+ if (!tree_fits_shwi_p (inc_mr->step))
|
||||
+ {
|
||||
+ if (dump_file)
|
||||
+ fprintf (dump_file, "Cannot represent incremental MR's step as "
|
||||
+ "integer. Skip the case.\n");
|
||||
+ return 0;
|
||||
+ }
|
||||
if (dump_file && !used_mrs.empty ())
|
||||
print_mrs_ids (used_mrs, "Common list of used mrs:\n");
|
||||
|
||||
@@ -1607,13 +1614,19 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
else
|
||||
inc_code = PLUS_EXPR;
|
||||
tree step = inc_mr->step;
|
||||
- unsigned dist_val = tree_to_uhwi (step) * param_ipa_prefetch_distance_factor;
|
||||
+ HOST_WIDE_INT dist_val = tree_to_shwi (step)
|
||||
+ * param_ipa_prefetch_distance_factor;
|
||||
tree dist = build_int_cst (TREE_TYPE (step), dist_val);
|
||||
tree new_inc_var = gimple_build (&stmts, inc_code, var_type, inc_var, dist);
|
||||
(*decl_map)[inc_var] = new_inc_var;
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "New distance value: %ld, new inc var: ", dist_val);
|
||||
+ print_generic_expr (dump_file, new_inc_var);
|
||||
+ fprintf (dump_file, "\n");
|
||||
+ }
|
||||
|
||||
/* Create other new vars. Insert new stmts. */
|
||||
- struct walk_stmt_info wi;
|
||||
stmt_set processed_stmts;
|
||||
for (memref_set::const_iterator it = used_mrs.begin ();
|
||||
it != used_mrs.end (); it++)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,29 @@
|
||||
From 3d1b0da292e383ce2a139c1612ec7e07336bbcd8 Mon Sep 17 00:00:00 2001
|
||||
From: vchernon <chernonog.vyacheslav@huawei.com>
|
||||
Date: Fri, 22 Dec 2023 22:05:27 +0800
|
||||
Subject: [PATCH 3/4] [rtl-ifcvt] refuse to rename def in the last instruction
|
||||
in BB
|
||||
|
||||
---
|
||||
gcc/ifcvt.c | 5 ++++-
|
||||
1 file changed, 4 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
|
||||
index 3df0f6fdd..025eb6cd1 100644
|
||||
--- a/gcc/ifcvt.c
|
||||
+++ b/gcc/ifcvt.c
|
||||
@@ -2176,7 +2176,10 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
|
||||
rtx x = SET_DEST (sset);
|
||||
if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x)))
|
||||
continue;
|
||||
-
|
||||
+ /* Do not need to rename dest in the last instruction
|
||||
+ it will be renamed anyway. */
|
||||
+ if (insn == last_insn)
|
||||
+ continue;
|
||||
machine_mode mode = GET_MODE (x);
|
||||
rtx tmp = gen_reg_rtx (mode);
|
||||
if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn))
|
||||
--
|
||||
2.33.0
|
||||
|
||||
25
0180-add-optimization-level-requirement-to-the-gate.patch
Normal file
25
0180-add-optimization-level-requirement-to-the-gate.patch
Normal file
@ -0,0 +1,25 @@
|
||||
From aa66bcf2b684655d0fbcc6b4543ffef1b2e37288 Mon Sep 17 00:00:00 2001
|
||||
From: vchernon <chernonog.vyacheslav@huawei.com>
|
||||
Date: Thu, 28 Dec 2023 10:44:35 +0800
|
||||
Subject: [PATCH] add optimization level requirement to the gate.
|
||||
|
||||
---
|
||||
gcc/crypto-accel.c | 2 +-
|
||||
1 file changed, 1 insertion(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/crypto-accel.c b/gcc/crypto-accel.c
|
||||
index f4e810a6b..e7766a585 100644
|
||||
--- a/gcc/crypto-accel.c
|
||||
+++ b/gcc/crypto-accel.c
|
||||
@@ -2391,7 +2391,7 @@ public:
|
||||
/* opt_pass methods: */
|
||||
virtual bool gate (function *)
|
||||
{
|
||||
- if (flag_crypto_accel_aes <= 0)
|
||||
+ if (flag_crypto_accel_aes <= 0 || optimize < 1)
|
||||
return false;
|
||||
return targetm.get_v16qi_mode
|
||||
&& targetm.gen_rev32v16qi
|
||||
--
|
||||
2.33.0
|
||||
|
||||
115
0181-Fix-issue-I8QD9H.patch
Normal file
115
0181-Fix-issue-I8QD9H.patch
Normal file
@ -0,0 +1,115 @@
|
||||
From 25f1ebeb88a4eae247f58488cac9da878f188d9f Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Sat, 23 Dec 2023 10:05:10 +0800
|
||||
Subject: [PATCH 4/4] Fix issue I8QD9H
|
||||
|
||||
---
|
||||
gcc/ipa-prefetch.c | 64 +++++++++++++++++++++++++++-------------------
|
||||
1 file changed, 37 insertions(+), 27 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
|
||||
index f91ac3edc..a471b118e 100644
|
||||
--- a/gcc/ipa-prefetch.c
|
||||
+++ b/gcc/ipa-prefetch.c
|
||||
@@ -1467,6 +1467,31 @@ gimple_copy_and_remap (gimple *stmt)
|
||||
return copy;
|
||||
}
|
||||
|
||||
+/* Copy and remap stmts listed in MR in reverse order to last_idx, skipping
|
||||
+ processed ones. Insert new stmts to the sequence. */
|
||||
+
|
||||
+static gimple *
|
||||
+gimple_copy_and_remap_memref_stmts (memref_t *mr, gimple_seq &stmts,
|
||||
+ int last_idx, stmt_set &processed)
|
||||
+{
|
||||
+ gimple *last_stmt = NULL;
|
||||
+ for (int i = mr->stmts.length () - 1; i >= last_idx ; i--)
|
||||
+ {
|
||||
+ if (processed.count (mr->stmts[i]))
|
||||
+ continue;
|
||||
+ processed.insert (mr->stmts[i]);
|
||||
+ if (dump_file)
|
||||
+ {
|
||||
+ fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
|
||||
+ i, mr->mr_id);
|
||||
+ print_gimple_stmt (dump_file, mr->stmts[i], 0);
|
||||
+ }
|
||||
+ last_stmt = gimple_copy_and_remap (mr->stmts[i]);
|
||||
+ gimple_seq_add_stmt (&stmts, last_stmt);
|
||||
+ }
|
||||
+ return last_stmt;
|
||||
+}
|
||||
+
|
||||
static void
|
||||
create_cgraph_edge (cgraph_node *n, gimple *stmt)
|
||||
{
|
||||
@@ -1606,7 +1631,16 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
decl_map = new tree_map;
|
||||
gcc_assert (comp_mr->stmts[0] && gimple_assign_single_p (comp_mr->stmts[0]));
|
||||
tree inc_var = gimple_assign_lhs (comp_mr->stmts[0]);
|
||||
+ /* If old_var definition dominates the current use, just use it, otherwise
|
||||
+ evaluate it just before new inc var evaluation. */
|
||||
gimple_seq stmts = NULL;
|
||||
+ stmt_set processed_stmts;
|
||||
+ if (!dominated_by_p (CDI_DOMINATORS, dom_bb, gimple_bb (comp_mr->stmts[0])))
|
||||
+ {
|
||||
+ gimple *tmp = gimple_copy_and_remap_memref_stmts (comp_mr, stmts, 0,
|
||||
+ processed_stmts);
|
||||
+ inc_var = gimple_assign_lhs (tmp);
|
||||
+ }
|
||||
tree var_type = TREE_TYPE (inc_var);
|
||||
enum tree_code inc_code;
|
||||
if (TREE_CODE (var_type) == POINTER_TYPE)
|
||||
@@ -1627,30 +1661,14 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
}
|
||||
|
||||
/* Create other new vars. Insert new stmts. */
|
||||
- stmt_set processed_stmts;
|
||||
for (memref_set::const_iterator it = used_mrs.begin ();
|
||||
it != used_mrs.end (); it++)
|
||||
{
|
||||
memref_t *mr = *it;
|
||||
- gimple *last_stmt = NULL;
|
||||
if (mr == comp_mr)
|
||||
continue;
|
||||
- for (int i = mr->stmts.length () - 1; i >= 0 ; i--)
|
||||
- {
|
||||
- if (processed_stmts.count (mr->stmts[i]))
|
||||
- continue;
|
||||
- processed_stmts.insert (mr->stmts[i]);
|
||||
- if (dump_file)
|
||||
- {
|
||||
- fprintf (dump_file, "Copy stmt %d from used MR (%d):\n",
|
||||
- i, mr->mr_id);
|
||||
- print_gimple_stmt (dump_file, mr->stmts[i], 0);
|
||||
- }
|
||||
- /* Create a new copy of STMT and duplicate STMT's virtual
|
||||
- operands. */
|
||||
- last_stmt = gimple_copy_and_remap (mr->stmts[i]);
|
||||
- gimple_seq_add_stmt (&stmts, last_stmt);
|
||||
- }
|
||||
+ gimple *last_stmt = gimple_copy_and_remap_memref_stmts (mr, stmts, 0,
|
||||
+ processed_stmts);
|
||||
gcc_assert (last_stmt);
|
||||
if (dump_file)
|
||||
{
|
||||
@@ -1687,15 +1705,7 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
memref_t *mr = vmrs[j];
|
||||
/* Don't need to copy the last stmt, since we insert prefetch insn
|
||||
instead of it. */
|
||||
- for (int i = mr->stmts.length () - 1; i >= 1 ; i--)
|
||||
- {
|
||||
- if (processed_stmts.count (mr->stmts[i]))
|
||||
- continue;
|
||||
- processed_stmts.insert (mr->stmts[i]);
|
||||
-
|
||||
- gimple *copy = gimple_copy_and_remap (mr->stmts[i]);
|
||||
- gimple_seq_add_stmt (&stmts, copy);
|
||||
- }
|
||||
+ gimple_copy_and_remap_memref_stmts (mr, stmts, 1, processed_stmts);
|
||||
gimple *last_stmt = mr->stmts[0];
|
||||
gcc_assert (last_stmt);
|
||||
tree write_p = mr->is_store ? integer_one_node : integer_zero_node;
|
||||
--
|
||||
2.33.0
|
||||
|
||||
47
0182-Fix-bugs-in-ICP-src-openEuler-gcc-I8RKFJ.patch
Normal file
47
0182-Fix-bugs-in-ICP-src-openEuler-gcc-I8RKFJ.patch
Normal file
@ -0,0 +1,47 @@
|
||||
From 1724319692f3c5443802e0cef44a81667cfcc0ae Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Wed, 27 Dec 2023 07:29:26 +0800
|
||||
Subject: [PATCH 1/4] Fix bugs in ICP (src-openEuler/gcc: I8RKFJ)
|
||||
|
||||
---
|
||||
gcc/ipa-devirt.c | 13 ++++++++++---
|
||||
1 file changed, 10 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
|
||||
index a18cbe36a..987f15a15 100644
|
||||
--- a/gcc/ipa-devirt.c
|
||||
+++ b/gcc/ipa-devirt.c
|
||||
@@ -4669,12 +4669,19 @@ maybe_register_aliases (tree type1, tree type2)
|
||||
if (register_ailas_type (type1, type2, ta_map))
|
||||
analyze_pointees (type1, type2);
|
||||
}
|
||||
+ unsigned type1_uid = TYPE_UID (type1);
|
||||
+ unsigned type2_uid = TYPE_UID (type2);
|
||||
+ if (type_uid_map->count (type1_uid) == 0)
|
||||
+ (*type_uid_map)[type1_uid] = type1;
|
||||
+ if (type_uid_map->count (type2_uid) == 0)
|
||||
+ (*type_uid_map)[type2_uid] = type2;
|
||||
+
|
||||
/* If function and non-function type pointers alias,
|
||||
the function type is unsafe. */
|
||||
if (FUNCTION_POINTER_TYPE_P (type1) && !FUNCTION_POINTER_TYPE_P (type2))
|
||||
- unsafe_types->insert (TYPE_UID (type1));
|
||||
+ unsafe_types->insert (type1_uid);
|
||||
if (FUNCTION_POINTER_TYPE_P (type2) && !FUNCTION_POINTER_TYPE_P (type1))
|
||||
- unsafe_types->insert (TYPE_UID (type2));
|
||||
+ unsafe_types->insert (type2_uid);
|
||||
|
||||
/* Try to figure out with pointers to incomplete types. */
|
||||
if (POINTER_TYPE_P (type1) && POINTER_TYPE_P (type2))
|
||||
@@ -4972,7 +4979,7 @@ analyze_assign_stmt (gimple *stmt)
|
||||
rhs = TREE_OPERAND (rhs, 0);
|
||||
if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
|
||||
|| TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
|
||||
- || TREE_CODE (rhs) == LABEL_DECL)
|
||||
+ || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL)
|
||||
rhs_type = build_pointer_type (TREE_TYPE (rhs));
|
||||
else if (TREE_CODE (rhs) == COMPONENT_REF)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
26
0183-Fix-fail-in-ICP-src-openEuler-gcc-I8RP4H.patch
Normal file
26
0183-Fix-fail-in-ICP-src-openEuler-gcc-I8RP4H.patch
Normal file
@ -0,0 +1,26 @@
|
||||
From 351d049f09b1e96e48c3038ab3a6a9c1d6a13f8d Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Thu, 28 Dec 2023 09:51:34 +0800
|
||||
Subject: [PATCH 2/4] Fix fail in ICP (src-openEuler/gcc: I8RP4H)
|
||||
|
||||
---
|
||||
gcc/ipa-devirt.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
|
||||
index 987f15a15..ed19a57a4 100644
|
||||
--- a/gcc/ipa-devirt.c
|
||||
+++ b/gcc/ipa-devirt.c
|
||||
@@ -4979,7 +4979,8 @@ analyze_assign_stmt (gimple *stmt)
|
||||
rhs = TREE_OPERAND (rhs, 0);
|
||||
if (VAR_OR_FUNCTION_DECL_P (rhs) || TREE_CODE (rhs) == STRING_CST
|
||||
|| TREE_CODE (rhs) == ARRAY_REF || TREE_CODE (rhs) == PARM_DECL
|
||||
- || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL)
|
||||
+ || TREE_CODE (rhs) == LABEL_DECL || TREE_CODE (rhs) == CONST_DECL
|
||||
+ || TREE_CODE (rhs) == RESULT_DECL)
|
||||
rhs_type = build_pointer_type (TREE_TYPE (rhs));
|
||||
else if (TREE_CODE (rhs) == COMPONENT_REF)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
45
0184-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RURA.patch
Normal file
45
0184-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RURA.patch
Normal file
@ -0,0 +1,45 @@
|
||||
From 2a5c250262ec0497a5efbbd1d0d67e7147696074 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Thu, 28 Dec 2023 20:20:16 +0800
|
||||
Subject: [PATCH 1/2] Fix fail in IPA prefetch (src-openEuler/gcc: I8RURA)
|
||||
|
||||
---
|
||||
gcc/ipa-devirt.c | 9 ++++++---
|
||||
gcc/ipa-prefetch.c | 1 +
|
||||
2 files changed, 7 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
|
||||
index ed19a57a4..9863084e4 100644
|
||||
--- a/gcc/ipa-devirt.c
|
||||
+++ b/gcc/ipa-devirt.c
|
||||
@@ -4995,9 +4995,12 @@ analyze_assign_stmt (gimple *stmt)
|
||||
}
|
||||
else
|
||||
{
|
||||
- fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
|
||||
- get_tree_code_name (TREE_CODE (rhs)));
|
||||
- print_gimple_stmt (dump_file, stmt, 0);
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ {
|
||||
+ fprintf (dump_file, "\nUnsupported rhs type %s in assign stmt: ",
|
||||
+ get_tree_code_name (TREE_CODE (rhs)));
|
||||
+ print_gimple_stmt (dump_file, stmt, 0);
|
||||
+ }
|
||||
gcc_unreachable ();
|
||||
}
|
||||
}
|
||||
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
|
||||
index a471b118e..24cb4424a 100644
|
||||
--- a/gcc/ipa-prefetch.c
|
||||
+++ b/gcc/ipa-prefetch.c
|
||||
@@ -1730,6 +1730,7 @@ optimize_function (cgraph_node *n, function *fn)
|
||||
for (unsigned i = 0; i < pcalls.length (); i++)
|
||||
create_cgraph_edge (n, pcalls[i]);
|
||||
ipa_update_overall_fn_summary (n);
|
||||
+ renumber_gimple_stmt_uids (DECL_STRUCT_FUNCTION (n->decl));
|
||||
|
||||
return 1;
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
26
0185-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RV7T.patch
Normal file
26
0185-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RV7T.patch
Normal file
@ -0,0 +1,26 @@
|
||||
From 4014d651825c3e03e0ad2eabeddcfb94f5f00e68 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Fri, 29 Dec 2023 05:59:00 +0800
|
||||
Subject: [PATCH 2/2] Fix fail in IPA prefetch (src-openEuler/gcc: I8RV7T)
|
||||
|
||||
---
|
||||
gcc/ipa-prefetch.c | 3 +++
|
||||
1 file changed, 3 insertions(+)
|
||||
|
||||
diff --git a/gcc/ipa-prefetch.c b/gcc/ipa-prefetch.c
|
||||
index 24cb4424a..d9456519c 100644
|
||||
--- a/gcc/ipa-prefetch.c
|
||||
+++ b/gcc/ipa-prefetch.c
|
||||
@@ -943,6 +943,9 @@ compare_memrefs (memref_t* mr, memref_t* mr2)
|
||||
(*mr_candidate_map)[mr] = mr2;
|
||||
return;
|
||||
}
|
||||
+ /* Probably we shouldn't leave nulls in the map. */
|
||||
+ if ((*mr_candidate_map)[mr] == NULL)
|
||||
+ return;
|
||||
/* TODO: support analysis with incrementation of different fields. */
|
||||
if ((*mr_candidate_map)[mr]->offset != mr2->offset)
|
||||
{
|
||||
--
|
||||
2.33.0
|
||||
|
||||
@ -0,0 +1,26 @@
|
||||
From 92dc99425b2566e8cc9cba7cec8774911db0c654 Mon Sep 17 00:00:00 2001
|
||||
From: XingYuShuai <1150775134@qq.com>
|
||||
Date: Fri, 2 Feb 2024 15:55:07 +0800
|
||||
Subject: [PATCH 1/3] [Loop CRC] Solving the problem of insufficient CRC table.
|
||||
|
||||
---
|
||||
gcc/tree-ssa-loop-crc.c | 3 ++-
|
||||
1 file changed, 2 insertions(+), 1 deletion(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c
|
||||
index 2dd9e1e3b..26f8e64d1 100644
|
||||
--- a/gcc/tree-ssa-loop-crc.c
|
||||
+++ b/gcc/tree-ssa-loop-crc.c
|
||||
@@ -421,7 +421,8 @@ match_crc_table (tree crc_table)
|
||||
tree low_bound = array_ref_low_bound (crc_table);
|
||||
tree up_bound = array_ref_up_bound (crc_table);
|
||||
tree element_size = array_ref_element_size (crc_table);
|
||||
- if (low_bound == NULL || up_bound == NULL || element_size == NULL)
|
||||
+ if (!tree_fits_uhwi_p(low_bound) || !tree_fits_uhwi_p(up_bound) ||
|
||||
+ !tree_fits_uhwi_p(element_size))
|
||||
return false;
|
||||
unsigned HOST_WIDE_INT lb = tree_to_uhwi (low_bound);
|
||||
unsigned HOST_WIDE_INT ub = tree_to_uhwi (up_bound);
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1862
0187-Add-IPA-prefetch-test.patch
Normal file
1862
0187-Add-IPA-prefetch-test.patch
Normal file
File diff suppressed because it is too large
Load Diff
@ -0,0 +1,32 @@
|
||||
From e7f50fc07c76b60b272cb97151b228d96b67938a Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Mon, 19 Feb 2024 11:06:37 +0300
|
||||
Subject: [PATCH 3/3] Fix fails in ICP (for src-openEuler/gcc: I90P7M, I91CZ8)
|
||||
|
||||
---
|
||||
gcc/ipa-devirt.c | 8 +++++---
|
||||
1 file changed, 5 insertions(+), 3 deletions(-)
|
||||
|
||||
diff --git a/gcc/ipa-devirt.c b/gcc/ipa-devirt.c
|
||||
index 9863084e4..194ad3f21 100644
|
||||
--- a/gcc/ipa-devirt.c
|
||||
+++ b/gcc/ipa-devirt.c
|
||||
@@ -4805,10 +4805,12 @@ compare_block_and_init_type (tree block, tree t1)
|
||||
static void
|
||||
analyze_global_var (varpool_node *var)
|
||||
{
|
||||
- var->get_constructor();
|
||||
tree decl = var->decl;
|
||||
- if (TREE_CODE (decl) == SSA_NAME || !DECL_INITIAL (decl)
|
||||
- || integer_zerop (DECL_INITIAL (decl)))
|
||||
+ if (decl || !DECL_INITIAL (decl))
|
||||
+ return;
|
||||
+ var->get_constructor ();
|
||||
+ if (TREE_CODE (decl) == SSA_NAME || integer_zerop (DECL_INITIAL (decl))
|
||||
+ || TREE_CODE (DECL_INITIAL (decl)) == ERROR_MARK)
|
||||
return;
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
--
|
||||
2.33.0
|
||||
|
||||
739
0189-Add-hip11-CPU-pipeline-scheduling.patch
Normal file
739
0189-Add-hip11-CPU-pipeline-scheduling.patch
Normal file
@ -0,0 +1,739 @@
|
||||
From 431f80e6d3a323e3382f73a80bf7fc7ee7a73f02 Mon Sep 17 00:00:00 2001
|
||||
From: XingYuShuai <1150775134@qq.com>
|
||||
Date: Mon, 26 Feb 2024 20:34:06 +0800
|
||||
Subject: [PATCH] Add hip11 CPU pipeline scheduling
|
||||
|
||||
This patch adds an mcpu: hip11. It has been tested on aarch64
|
||||
and no regressions from this patch.
|
||||
---
|
||||
gcc/config/aarch64/aarch64-cores.def | 3 +-
|
||||
gcc/config/aarch64/aarch64-cost-tables.h | 104 ++++++
|
||||
gcc/config/aarch64/aarch64-tune.md | 2 +-
|
||||
gcc/config/aarch64/aarch64.c | 83 +++++
|
||||
gcc/config/aarch64/aarch64.md | 1 +
|
||||
gcc/config/aarch64/hip11.md | 418 +++++++++++++++++++++++
|
||||
gcc/doc/invoke.texi | 2 +-
|
||||
7 files changed, 610 insertions(+), 3 deletions(-)
|
||||
create mode 100644 gcc/config/aarch64/hip11.md
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
|
||||
index 9c2902924..53125f6bd 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cores.def
|
||||
+++ b/gcc/config/aarch64/aarch64-cores.def
|
||||
@@ -123,7 +123,7 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, 8_2A, AARCH64_FL_FOR_
|
||||
AARCH64_CORE("a64fx", a64fx, a64fx, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_F16 | AARCH64_FL_SVE, a64fx, 0x46, 0x001, -1)
|
||||
|
||||
/* HiSilicon ('H') cores. */
|
||||
-AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
+AARCH64_CORE("tsv110", tsv110, tsv110, 8_2A, AARCH64_FL_FOR_ARCH8_2 | AARCH64_FL_CRYPTO | AARCH64_FL_F16 | AARCH64_FL_AES | AARCH64_FL_SHA2, tsv110, 0x48, 0xd01, -1)
|
||||
|
||||
/* ARMv8.3-A Architecture Processors. */
|
||||
|
||||
@@ -141,6 +141,7 @@ AARCH64_CORE("saphira", saphira, saphira, 8_4A, AARCH64_FL_FOR_ARCH8_
|
||||
|
||||
/* Armv8.5-A Architecture Processors. */
|
||||
AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, 8_5A, AARCH64_FL_FOR_ARCH8_5 | AARCH64_FL_I8MM | AARCH64_FL_BF16 | AARCH64_FL_F16 | AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_SVE2_BITPERM | AARCH64_FL_RNG | AARCH64_FL_MEMTAG, neoversen2, 0x41, 0xd49, -1)
|
||||
+AARCH64_CORE("hip11", hip11, hip11, 8_5A, AARCH64_FL_FOR_ARCH8_5| AARCH64_FL_SVE | AARCH64_FL_SVE2 | AARCH64_FL_F16, hip11, 0x48, 0xd22, -1)
|
||||
|
||||
/* ARMv8-A big.LITTLE implementations. */
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
|
||||
index c6805717f..377650be0 100644
|
||||
--- a/gcc/config/aarch64/aarch64-cost-tables.h
|
||||
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
|
||||
@@ -541,6 +541,110 @@ const struct cpu_cost_table tsv110_extra_costs =
|
||||
}
|
||||
};
|
||||
|
||||
+const struct cpu_cost_table hip11_extra_costs =
|
||||
+{
|
||||
+ /* ALU */
|
||||
+ {
|
||||
+ 0, /* arith. */
|
||||
+ 0, /* logical. */
|
||||
+ 0, /* shift. */
|
||||
+ 0, /* shift_reg. */
|
||||
+ COSTS_N_INSNS (1), /* arith_shift. */
|
||||
+ COSTS_N_INSNS (1), /* arith_shift_reg. */
|
||||
+ COSTS_N_INSNS (1), /* log_shift. */
|
||||
+ COSTS_N_INSNS (1), /* log_shift_reg. */
|
||||
+ 0, /* extend. */
|
||||
+ COSTS_N_INSNS (1), /* extend_arith. */
|
||||
+ 0, /* bfi. */
|
||||
+ 0, /* bfx. */
|
||||
+ 0, /* clz. */
|
||||
+ 0, /* rev. */
|
||||
+ 0, /* non_exec. */
|
||||
+ true /* non_exec_costs_exec. */
|
||||
+ },
|
||||
+
|
||||
+ {
|
||||
+ /* MULT SImode */
|
||||
+ {
|
||||
+ COSTS_N_INSNS (2), /* simple. */
|
||||
+ COSTS_N_INSNS (2), /* flag_setting. */
|
||||
+ COSTS_N_INSNS (2), /* extend. */
|
||||
+ COSTS_N_INSNS (2), /* add. */
|
||||
+ COSTS_N_INSNS (2), /* extend_add. */
|
||||
+ COSTS_N_INSNS (11) /* idiv. */
|
||||
+ },
|
||||
+ /* MULT DImode */
|
||||
+ {
|
||||
+ COSTS_N_INSNS (3), /* simple. */
|
||||
+ 0, /* flag_setting (N/A). */
|
||||
+ COSTS_N_INSNS (3), /* extend. */
|
||||
+ COSTS_N_INSNS (3), /* add. */
|
||||
+ COSTS_N_INSNS (3), /* extend_add. */
|
||||
+ COSTS_N_INSNS (19) /* idiv. */
|
||||
+ }
|
||||
+ },
|
||||
+ /* LD/ST */
|
||||
+ {
|
||||
+ COSTS_N_INSNS (3), /* load. */
|
||||
+ COSTS_N_INSNS (4), /* load_sign_extend. */
|
||||
+ COSTS_N_INSNS (3), /* ldrd. */
|
||||
+ COSTS_N_INSNS (3), /* ldm_1st. */
|
||||
+ 1, /* ldm_regs_per_insn_1st. */
|
||||
+ 2, /* ldm_regs_per_insn_subsequent. */
|
||||
+ COSTS_N_INSNS (4), /* loadf. */
|
||||
+ COSTS_N_INSNS (4), /* loadd. */
|
||||
+ COSTS_N_INSNS (4), /* load_unaligned. */
|
||||
+ 0, /* store. */
|
||||
+ 0, /* strd. */
|
||||
+ 0, /* stm_1st. */
|
||||
+ 1, /* stm_regs_per_insn_1st. */
|
||||
+ 2, /* stm_regs_per_insn_subsequent. */
|
||||
+ 0, /* storef. */
|
||||
+ 0, /* stored. */
|
||||
+ COSTS_N_INSNS (1), /* store_unaligned. */
|
||||
+ COSTS_N_INSNS (4), /* loadv. */
|
||||
+ COSTS_N_INSNS (4) /* storev. */
|
||||
+ },
|
||||
+ {
|
||||
+ /* FP SFmode */
|
||||
+ {
|
||||
+ COSTS_N_INSNS (10), /* div. */
|
||||
+ COSTS_N_INSNS (4), /* mult. */
|
||||
+ COSTS_N_INSNS (4), /* mult_addsub. */
|
||||
+ COSTS_N_INSNS (4), /* fma. */
|
||||
+ COSTS_N_INSNS (4), /* addsub. */
|
||||
+ COSTS_N_INSNS (1), /* fpconst. */
|
||||
+ COSTS_N_INSNS (1), /* neg. */
|
||||
+ COSTS_N_INSNS (1), /* compare. */
|
||||
+ COSTS_N_INSNS (2), /* widen. */
|
||||
+ COSTS_N_INSNS (2), /* narrow. */
|
||||
+ COSTS_N_INSNS (2), /* toint. */
|
||||
+ COSTS_N_INSNS (1), /* fromint. */
|
||||
+ COSTS_N_INSNS (2) /* roundint. */
|
||||
+ },
|
||||
+ /* FP DFmode */
|
||||
+ {
|
||||
+ COSTS_N_INSNS (17), /* div. */
|
||||
+ COSTS_N_INSNS (4), /* mult. */
|
||||
+ COSTS_N_INSNS (6), /* mult_addsub. */
|
||||
+ COSTS_N_INSNS (6), /* fma. */
|
||||
+ COSTS_N_INSNS (3), /* addsub. */
|
||||
+ COSTS_N_INSNS (1), /* fpconst. */
|
||||
+ COSTS_N_INSNS (1), /* neg. */
|
||||
+ COSTS_N_INSNS (1), /* compare. */
|
||||
+ COSTS_N_INSNS (2), /* widen. */
|
||||
+ COSTS_N_INSNS (2), /* narrow. */
|
||||
+ COSTS_N_INSNS (2), /* toint. */
|
||||
+ COSTS_N_INSNS (1), /* fromint. */
|
||||
+ COSTS_N_INSNS (2) /* roundint. */
|
||||
+ }
|
||||
+ },
|
||||
+ /* Vector */
|
||||
+ {
|
||||
+ COSTS_N_INSNS (1) /* alu. */
|
||||
+ }
|
||||
+};
|
||||
+
|
||||
const struct cpu_cost_table a64fx_extra_costs =
|
||||
{
|
||||
/* ALU */
|
||||
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
|
||||
index 7fda2294b..f33a3330d 100644
|
||||
--- a/gcc/config/aarch64/aarch64-tune.md
|
||||
+++ b/gcc/config/aarch64/aarch64-tune.md
|
||||
@@ -1,5 +1,5 @@
|
||||
;; -*- buffer-read-only: t -*-
|
||||
;; Generated automatically by gentune.sh from aarch64-cores.def
|
||||
(define_attr "tune"
|
||||
- "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
|
||||
+ "cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88p1,thunderxt88,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa65,cortexa65ae,ares,neoversen1,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,zeus,neoversev1,saphira,neoversen2,hip11,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55"
|
||||
(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
|
||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
index da4983236..938948f29 100644
|
||||
--- a/gcc/config/aarch64/aarch64.c
|
||||
+++ b/gcc/config/aarch64/aarch64.c
|
||||
@@ -448,6 +448,22 @@ static const struct cpu_addrcost_table tsv110_addrcost_table =
|
||||
0, /* imm_offset */
|
||||
};
|
||||
|
||||
+static const struct cpu_addrcost_table hip11_addrcost_table =
|
||||
+{
|
||||
+ {
|
||||
+ 1, /* hi */
|
||||
+ 0, /* si */
|
||||
+ 0, /* di */
|
||||
+ 1, /* ti */
|
||||
+ },
|
||||
+ 0, /* pre_modify */
|
||||
+ 0, /* post_modify */
|
||||
+ 0, /* register_offset */
|
||||
+ 1, /* register_sextend */
|
||||
+ 1, /* register_zextend */
|
||||
+ 0, /* imm_offset */
|
||||
+};
|
||||
+
|
||||
static const struct cpu_addrcost_table qdf24xx_addrcost_table =
|
||||
{
|
||||
{
|
||||
@@ -575,6 +591,16 @@ static const struct cpu_regmove_cost tsv110_regmove_cost =
|
||||
2 /* FP2FP */
|
||||
};
|
||||
|
||||
+static const struct cpu_regmove_cost hip11_regmove_cost =
|
||||
+{
|
||||
+ 1, /* GP2GP */
|
||||
+ /* Avoid the use of slow int<->fp moves for spilling by setting
|
||||
+ their cost higher than memmov_cost. */
|
||||
+ 2, /* GP2FP */
|
||||
+ 3, /* FP2GP */
|
||||
+ 2 /* FP2FP */
|
||||
+};
|
||||
+
|
||||
static const struct cpu_regmove_cost a64fx_regmove_cost =
|
||||
{
|
||||
1, /* GP2GP */
|
||||
@@ -664,6 +690,25 @@ static const struct cpu_vector_cost tsv110_vector_cost =
|
||||
1 /* cond_not_taken_branch_cost */
|
||||
};
|
||||
|
||||
+static const struct cpu_vector_cost hip11_vector_cost =
|
||||
+{
|
||||
+ 1, /* scalar_int_stmt_cost */
|
||||
+ 1, /* scalar_fp_stmt_cost */
|
||||
+ 5, /* scalar_load_cost */
|
||||
+ 1, /* scalar_store_cost */
|
||||
+ 2, /* vec_int_stmt_cost */
|
||||
+ 2, /* vec_fp_stmt_cost */
|
||||
+ 2, /* vec_permute_cost */
|
||||
+ 5, /* vec_to_scalar_cost */
|
||||
+ 5, /* scalar_to_vec_cost */
|
||||
+ 5, /* vec_align_load_cost */
|
||||
+ 5, /* vec_unalign_load_cost */
|
||||
+ 1, /* vec_unalign_store_cost */
|
||||
+ 1, /* vec_store_cost */
|
||||
+ 1, /* cond_taken_branch_cost */
|
||||
+ 1 /* cond_not_taken_branch_cost */
|
||||
+};
|
||||
+
|
||||
/* Generic costs for vector insn classes. */
|
||||
static const struct cpu_vector_cost cortexa57_vector_cost =
|
||||
{
|
||||
@@ -902,6 +947,17 @@ static const cpu_prefetch_tune tsv110_prefetch_tune =
|
||||
-1 /* default_opt_level */
|
||||
};
|
||||
|
||||
+static const cpu_prefetch_tune hip11_prefetch_tune =
|
||||
+{
|
||||
+ 0, /* num_slots */
|
||||
+ 64, /* l1_cache_size */
|
||||
+ 64, /* l1_cache_line_size */
|
||||
+ 512, /* l2_cache_size */
|
||||
+ true, /* prefetch_dynamic_strides */
|
||||
+ -1, /* minimum_stride */
|
||||
+ -1 /* default_opt_level */
|
||||
+};
|
||||
+
|
||||
static const cpu_prefetch_tune xgene1_prefetch_tune =
|
||||
{
|
||||
8, /* num_slots */
|
||||
@@ -1196,6 +1252,33 @@ static const struct tune_params tsv110_tunings =
|
||||
&tsv110_prefetch_tune
|
||||
};
|
||||
|
||||
+static const struct tune_params hip11_tunings =
|
||||
+{
|
||||
+ &hip11_extra_costs,
|
||||
+ &hip11_addrcost_table,
|
||||
+ &hip11_regmove_cost,
|
||||
+ &hip11_vector_cost,
|
||||
+ &generic_branch_cost,
|
||||
+ &generic_approx_modes,
|
||||
+ SVE_512, /* sve_width */
|
||||
+ 4, /* memmov_cost */
|
||||
+ 4, /* issue_rate */
|
||||
+ (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
|
||||
+ | AARCH64_FUSE_ALU_CBZ), /* fusible_ops */
|
||||
+ "16", /* function_align. */
|
||||
+ "4", /* jump_align. */
|
||||
+ "8", /* loop_align. */
|
||||
+ 2, /* int_reassoc_width. */
|
||||
+ 4, /* fp_reassoc_width. */
|
||||
+ 1, /* vec_reassoc_width. */
|
||||
+ 2, /* min_div_recip_mul_sf. */
|
||||
+ 2, /* min_div_recip_mul_df. */
|
||||
+ 0, /* max_case_values. */
|
||||
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
|
||||
+ (AARCH64_EXTRA_TUNE_PREFER_ADVSIMD_AUTOVEC), /* tune_flags. */
|
||||
+ &hip11_prefetch_tune
|
||||
+};
|
||||
+
|
||||
static const struct tune_params xgene1_tunings =
|
||||
{
|
||||
&xgene1_extra_costs,
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 7c2562f49..38af8d000 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -452,6 +452,7 @@
|
||||
(include "../arm/cortex-a57.md")
|
||||
(include "../arm/exynos-m1.md")
|
||||
(include "falkor.md")
|
||||
+(include "hip11.md")
|
||||
(include "saphira.md")
|
||||
(include "thunderx.md")
|
||||
(include "../arm/xgene1.md")
|
||||
diff --git a/gcc/config/aarch64/hip11.md b/gcc/config/aarch64/hip11.md
|
||||
new file mode 100644
|
||||
index 000000000..57944fbc2
|
||||
--- /dev/null
|
||||
+++ b/gcc/config/aarch64/hip11.md
|
||||
@@ -0,0 +1,418 @@
|
||||
+;; hip11 pipeline description
|
||||
+;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
|
||||
+;;
|
||||
+;; This file is part of GCC.
|
||||
+;;
|
||||
+;; GCC is free software; you can redistribute it and/or modify it
|
||||
+;; under the terms of the GNU General Public License as published by
|
||||
+;; the Free Software Foundation; either version 3, or (at your option)
|
||||
+;; any later version.
|
||||
+;;
|
||||
+;; GCC is distributed in the hope that it will be useful, but
|
||||
+;; WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
+;; General Public License for more details.
|
||||
+;;
|
||||
+;; You should have received a copy of the GNU General Public License
|
||||
+;; along with GCC; see the file COPYING3. If not see
|
||||
+;; <http://www.gnu.org/licenses/>.
|
||||
+
|
||||
+(define_automaton "hip11")
|
||||
+
|
||||
+;; The hip11 core is modelled as issues pipeline that has
|
||||
+;; the following functional units.
|
||||
+;; 1. Three pipelines for integer operations: ALU1, ALU2, ALU3
|
||||
+
|
||||
+(define_cpu_unit "hip11_alu1_issue" "hip11")
|
||||
+(define_reservation "hip11_alu1" "hip11_alu1_issue")
|
||||
+
|
||||
+(define_cpu_unit "hip11_alu2_issue" "hip11")
|
||||
+(define_reservation "hip11_alu2" "hip11_alu2_issue")
|
||||
+
|
||||
+(define_cpu_unit "hip11_alu3_issue" "hip11")
|
||||
+(define_reservation "hip11_alu3" "hip11_alu3_issue")
|
||||
+
|
||||
+(define_reservation "hip11alu" "hip11_alu1|hip11_alu2|hip11_alu3")
|
||||
+
|
||||
+;; 2. One pipeline for complex integer operations: MDU
|
||||
+
|
||||
+(define_cpu_unit "hip11_mdu_issue" "hip11")
|
||||
+(define_reservation "hip11_mdu" "hip11_mdu_issue")
|
||||
+
|
||||
+;; 3. Two asymmetric pipelines for Asimd and FP operations: FSU1, FSU2
|
||||
+(define_automaton "hip11_fsu")
|
||||
+
|
||||
+(define_cpu_unit "hip11_fsu1_issue"
|
||||
+ "hip11_fsu")
|
||||
+(define_cpu_unit "hip11_fsu2_issue"
|
||||
+ "hip11_fsu")
|
||||
+
|
||||
+(define_reservation "hip11_fsu1" "hip11_fsu1_issue")
|
||||
+(define_reservation "hip11_fsu2" "hip11_fsu2_issue")
|
||||
+(define_reservation "hip11_fsu_pipe" "hip11_fsu1|hip11_fsu2")
|
||||
+
|
||||
+;; 4. Two pipeline for branch operations but same with alu2 and alu3: BRU1, BRU2
|
||||
+
|
||||
+;; 5. Two pipelines for load and store operations: LS1, LS2.
|
||||
+
|
||||
+(define_cpu_unit "hip11_ls1_issue" "hip11")
|
||||
+(define_cpu_unit "hip11_ls2_issue" "hip11")
|
||||
+(define_reservation "hip11_ls1" "hip11_ls1_issue")
|
||||
+(define_reservation "hip11_ls2" "hip11_ls2_issue")
|
||||
+
|
||||
+;; Block all issue queues.
|
||||
+
|
||||
+(define_reservation "hip11_block" "hip11_fsu1_issue + hip11_fsu2_issue
|
||||
+ + hip11_mdu_issue + hip11_alu1_issue
|
||||
+ + hip11_alu2_issue + hip11_alu3_issue + hip11_ls1_issue + hip11_ls2_issue")
|
||||
+
|
||||
+;; Branch execution Unit
|
||||
+;;
|
||||
+(define_insn_reservation "hip11_branch" 1
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "branch"))
|
||||
+ "hip11_alu2|hip11_alu3")
|
||||
+
|
||||
+(define_insn_reservation "hip11_return_from_subroutine" 6
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "branch")
|
||||
+ (eq_attr "sls_length" "retbr"))
|
||||
+ "hip11_mdu,(hip11_alu2|hip11_alu3)")
|
||||
+
|
||||
+ ;; Simple Execution Unit:
|
||||
+;;
|
||||
+;; Simple ALU without shift
|
||||
+(define_insn_reservation "hip11_alu" 1
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "alu_imm,logic_imm,\
|
||||
+ alu_sreg,logic_reg,\
|
||||
+ adc_imm,adc_reg,\
|
||||
+ adr,bfm,clz,rbit,rev,\
|
||||
+ shift_imm,shift_reg,\
|
||||
+ mov_imm,mov_reg,\
|
||||
+ mvn_imm,mvn_reg,\
|
||||
+ mrs,multiple,csel,\
|
||||
+ rotate_imm"))
|
||||
+ "hip11_alu1|hip11_alu2|hip11_alu3")
|
||||
+
|
||||
+(define_insn_reservation "hip11_alus" 1
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "alus_imm,logics_imm,\
|
||||
+ alus_sreg,logics_reg,\
|
||||
+ adcs_imm,adcs_reg"))
|
||||
+ "hip11_alu2|hip11_alu3")
|
||||
+
|
||||
+;; ALU ops with shift
|
||||
+(define_insn_reservation "hip11_alu_shift" 2
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "extend,\
|
||||
+ alu_shift_imm,alu_shift_reg,\
|
||||
+ crc,logic_shift_imm,logic_shift_reg,\
|
||||
+ mov_shift,mvn_shift,\
|
||||
+ mov_shift_reg,mvn_shift_reg"))
|
||||
+ "hip11_mdu")
|
||||
+
|
||||
+(define_insn_reservation "hip11_alus_shift" 2
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "alus_shift_imm,alus_shift_reg,\
|
||||
+ logics_shift_imm,logics_shift_reg"))
|
||||
+ "hip11_alu2|hip11_alu3")
|
||||
+
|
||||
+;; Multiplies instructions
|
||||
+(define_insn_reservation "hip11_mult" 3
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (ior (eq_attr "mul32" "yes")
|
||||
+ (eq_attr "widen_mul64" "yes")))
|
||||
+ "hip11_mdu")
|
||||
+
|
||||
+;; Integer divide
|
||||
+(define_insn_reservation "hip11_div" 10
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "udiv,sdiv"))
|
||||
+ "hip11_mdu")
|
||||
+
|
||||
+(define_insn_reservation "hip11_mla" 4
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "mla,smlal,umlal,smull,umull"))
|
||||
+ "hip11_mdu")
|
||||
+
|
||||
+;; Block all issue pipes for a cycle
|
||||
+(define_insn_reservation "hip11_block" 1
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "block"))
|
||||
+ "hip11_block")
|
||||
+
|
||||
+;; Load-store execution Unit
|
||||
+;;
|
||||
+(define_insn_reservation "hip11_load1" 4
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "load_4,load_8,load_16"))
|
||||
+ "hip11_ls1|hip11_ls2")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_load" 5
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "f_loads,f_loadd"))
|
||||
+ "hip11_ls1|hip11_ls2")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld1_single" 7
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load1_one_lane,neon_load1_one_lane_q,\
|
||||
+ neon_load1_all_lanes,neon_load1_all_lanes_q"))
|
||||
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld1_1reg" 5
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q"))
|
||||
+ "hip11_ls1|hip11_ls2")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld1_2reg" 6
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load1_2reg,neon_load1_2reg_q"))
|
||||
+ "hip11_ls1|hip11_ls2")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld1_3reg" 7
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load1_3reg,neon_load1_3reg_q"))
|
||||
+ "hip11_ls1|hip11_ls2")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld1_4reg" 8
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load1_4reg,neon_load1_4reg_q"))
|
||||
+ "hip11_ls1|hip11_ls2")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld2" 8
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load2_one_lane,neon_load2_one_lane_q,\
|
||||
+ neon_load2_all_lanes,neon_load2_all_lanes_q,\
|
||||
+ neon_load2_2reg,neon_load2_2reg_q,\
|
||||
+ neon_load2_4reg,neon_load2_4reg_q"))
|
||||
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld3_single" 9
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load3_one_lane,neon_load3_one_lane_q,\
|
||||
+ neon_load3_all_lanes,neon_load3_all_lanes_q"))
|
||||
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld3_multiple" 13
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load3_3reg,neon_load3_3reg_q"))
|
||||
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld4_single" 10
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load4_one_lane,neon_load4_one_lane_q,\
|
||||
+ neon_load4_all_lanes,neon_load4_all_lanes_q"))
|
||||
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_neon_ld4_multiple" 11
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_load4_4reg,neon_load4_4reg_q"))
|
||||
+ "(hip11_ls1|hip11_ls2)+hip11_fsu1")
|
||||
+
|
||||
+;; Stores of up to two words.
|
||||
+(define_insn_reservation "hip11_store1" 1
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "store_4,store_8,store_16,\
|
||||
+ f_stored,f_stores"))
|
||||
+ "hip11_ls1|hip11_ls2")
|
||||
+
|
||||
+;; Floating-Point Operations.
|
||||
+(define_insn_reservation "hip11_fp_arith" 2
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "ffariths,ffarithd,f_minmaxs,\
|
||||
+ f_minmaxd,fadds,faddd,neon_fcadd"))
|
||||
+ "hip11_fsu_pipe")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_mul" 3
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,\
|
||||
+ neon_fp_mul_s_scalar,neon_fp_mul_s_scalar_q,\
|
||||
+ neon_fp_mul_d_scalar_q,fmuld,fmuls"))
|
||||
+ "hip11_fsu_pipe")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_cmp" 2
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fccmpd,fccmps"))
|
||||
+ "hip11alu,hip11_fsu_pipe")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_csel" 2
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fcsel"))
|
||||
+ "hip11alu,hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_fcmp" 1
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fcmpd,fcmps"))
|
||||
+ "hip11_fsu_pipe")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_divs" 7
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fdivs"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_divd" 10
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fdivd"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_sqrts" 9
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fsqrts"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_sqrtd" 15
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fsqrtd"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_mac" 4
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fmacs,ffmas,fmacd,ffmad"))
|
||||
+ "hip11_fsu_pipe")
|
||||
+
|
||||
+(define_insn_reservation "hip11_fp_mov" 1
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "fmov,neon_dup,neon_dup_q,\
|
||||
+ neon_from_gp,neon_from_gp_q,\
|
||||
+ neon_ins,neon_ins_q,\
|
||||
+ neon_to_gp,neon_to_gp_q,\
|
||||
+ neon_move,neon_move_q,\
|
||||
+ neon_rev,neon_rev_q,\
|
||||
+ neon_permute,neon_permute_q,\
|
||||
+ neon_shift_imm_narrow_q,\
|
||||
+ neon_ext,neon_ext_q,\
|
||||
+ neon_rbit,\
|
||||
+ crypto_sha3,neon_tbl1,neon_tbl1_q,\
|
||||
+ neon_tbl2_q,f_mcr,neon_tst,neon_tst_q,\
|
||||
+ neon_move_narrow_q"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+;; ASIMD instructions
|
||||
+(define_insn_reservation "hip11_asimd_simple_arithmetic" 2
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_abs,neon_abs_q,neon_neg,neon_neg_q,\
|
||||
+ neon_abd,neon_abd_q,\
|
||||
+ neon_add_long,neon_sub_long,neon_sub_widen,neon_add_widen,\
|
||||
+ neon_add_halve_narrow_q,neon_sub_halve_narrow_q,\
|
||||
+ neon_arith_acc,neon_arith_acc_q,\
|
||||
+ neon_compare,neon_compare_q,\
|
||||
+ neon_compare_zero,neon_compare_zero_q,\
|
||||
+ neon_minmax,neon_minmax_q,\
|
||||
+ neon_logic,neon_logic_q,\
|
||||
+ neon_reduc_add,neon_reduc_add_q,\
|
||||
+ neon_reduc_minmax,neon_reduc_minmax_q,\
|
||||
+ neon_fp_to_int_s,neon_fp_to_int_s_q,\
|
||||
+ neon_fp_to_int_d,neon_fp_to_int_d_q,\
|
||||
+ neon_fp_cvt_widen_s,\
|
||||
+ neon_fp_cvt_narrow_d_q,\
|
||||
+ neon_cls,neon_cls_q,\
|
||||
+ neon_cnt,neon_cnt_q,\
|
||||
+ f_rints,f_rintd,f_cvtf2i,f_cvt,\
|
||||
+ neon_tbl3,neon_fp_round_s,neon_fp_round_s_q,\
|
||||
+ neon_fp_round_d,neon_fp_round_d_q,\
|
||||
+ neon_int_to_fp_s,neon_fp_recpe_s,neon_fp_recpe_s_q,\
|
||||
+ neon_fp_recpe_d,neon_fp_recpe_d_q,\
|
||||
+ neon_fp_cvt_narrow_s_q,\
|
||||
+ crypto_aese,crypto_aesmc,\
|
||||
+ crypto_sha1_fast,crypto_sha1_xor,\
|
||||
+ crypto_sha1_slow,\
|
||||
+ crypto_sha256_fast,\
|
||||
+ crypto_sha512,crypto_sm3,\
|
||||
+ neon_qabs,neon_qabs_q,\
|
||||
+ neon_qneg,neon_qneg_q,\
|
||||
+ neon_qadd,neon_qadd_q,\
|
||||
+ neon_qsub,neon_qsub_q,\
|
||||
+ neon_add_halve,neon_add_halve_q,\
|
||||
+ neon_sub_halve,neon_sub_halve_q,\
|
||||
+ neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_s_q,\
|
||||
+ neon_fp_reduc_minmax_d,neon_fp_reduc_minmax_d_q,\
|
||||
+ neon_fp_rsqrte_s,neon_fp_rsqrte_s_q,\
|
||||
+ neon_fp_rsqrte_d,neon_fp_rsqrte_d_q"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_asimd_complex_arithmetic" 4
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_mul_b,neon_mul_b_q,\
|
||||
+ neon_mul_h,neon_mul_h_q,\
|
||||
+ neon_mul_s,neon_mul_s_q,\
|
||||
+ neon_mla_b,neon_mla_b_q,\
|
||||
+ neon_mla_h,neon_mla_h_q,\
|
||||
+ neon_mla_s,\
|
||||
+ neon_mla_h_scalar,neon_mla_h_scalar_q,\
|
||||
+ neon_mla_s_scalar,neon_mla_s_scalar_q,\
|
||||
+ neon_sat_mul_h_scalar,neon_sat_mul_h_scalar_q,\
|
||||
+ neon_sat_mul_s_scalar,neon_sat_mul_s_scalar_q,\
|
||||
+ neon_sat_mul_b,neon_sat_mul_b_q,\
|
||||
+ neon_sat_mul_h,neon_sat_mul_h_q,\
|
||||
+ neon_sat_mul_s,neon_sat_mul_s_q,\
|
||||
+ neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,\
|
||||
+ neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,\
|
||||
+ neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,\
|
||||
+ neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long,\
|
||||
+ neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,\
|
||||
+ neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long,\
|
||||
+ crypto_pmull,\
|
||||
+ neon_sat_shift_reg,neon_sat_shift_reg_q,\
|
||||
+ neon_shift_reg,neon_shift_reg_q,\
|
||||
+ neon_shift_imm,neon_shift_imm_q,\
|
||||
+ neon_shift_imm_long,\
|
||||
+ neon_sat_shift_imm,neon_sat_shift_imm_q,\
|
||||
+ neon_sat_shift_imm_narrow_q,\
|
||||
+ neon_shift_acc,neon_shift_acc_q,\
|
||||
+ crypto_sha256_slow"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_asimd_fp_compare" 2
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_fp_abs_s,neon_fp_abs_s_q,\
|
||||
+ neon_fp_abs_d,neon_fp_abs_d_q,\
|
||||
+ neon_fp_neg_s,neon_fp_neg_s_q,\
|
||||
+ neon_fp_neg_d,neon_fp_neg_d_q,\
|
||||
+ neon_fp_compare_s,neon_fp_compare_s_q,\
|
||||
+ neon_fp_compare_d,neon_fp_compare_d_q,\
|
||||
+ neon_fp_minmax_s,neon_fp_minmax_s_q,\
|
||||
+ neon_fp_minmax_d,neon_fp_minmax_d_q,\
|
||||
+ neon_fp_addsub_s,neon_fp_addsub_s_q,\
|
||||
+ neon_fp_addsub_d,neon_fp_addsub_d_q,\
|
||||
+ neon_fp_reduc_add_s,neon_fp_reduc_add_s_q,\
|
||||
+ neon_fp_reduc_add_d,neon_fp_reduc_add_d_q,\
|
||||
+ neon_fp_abd_s,neon_fp_abd_s_q,\
|
||||
+ neon_fp_abd_d,neon_fp_abd_d_q"))
|
||||
+ "hip11_fsu_pipe")
|
||||
+
|
||||
+(define_insn_reservation "hip11_asimd_fdiv" 10
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_fp_div_s,neon_fp_div_s_q,\
|
||||
+ neon_fp_div_d,neon_fp_div_d_q"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_asimd_fsqrt" 15
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_fp_sqrt_s,neon_fp_sqrt_s_q,\
|
||||
+ neon_fp_sqrt_d,neon_fp_sqrt_d_q"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_asimd_fp_multiply_add" 4
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_q,\
|
||||
+ neon_fp_mla_d,neon_fp_mla_d_q,\
|
||||
+ neon_fp_mla_s_scalar,neon_fp_mla_s_scalar_q,\
|
||||
+ neon_fp_mul_s,neon_fp_mul_s_q,neon_fcmla,\
|
||||
+ neon_fp_recps_s,neon_fp_recps_s_q,\
|
||||
+ neon_fp_recps_d,neon_fp_recps_d_q,\
|
||||
+ neon_fp_rsqrts_s,neon_fp_rsqrts_s_q,\
|
||||
+ neon_fp_rsqrts_d,neon_fp_rsqrts_d_q"))
|
||||
+ "hip11_fsu_pipe")
|
||||
+
|
||||
+(define_insn_reservation "hip11_asimd_frecpx" 3
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_fp_recpx_s,neon_fp_recpx_s_q,\
|
||||
+ neon_fp_recpx_d,neon_fp_recpx_d_q,neon_tbl4,\
|
||||
+ neon_dot,neon_dot_q"))
|
||||
+ "hip11_fsu1")
|
||||
+
|
||||
+(define_insn_reservation "hip11_asimd_mmla" 6
|
||||
+ (and (eq_attr "tune" "hip11")
|
||||
+ (eq_attr "type" "neon_mla_s_q"))
|
||||
+ "hip11_fsu1")
|
||||
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
|
||||
index a0a84c20b..ce0dd2ff6 100644
|
||||
--- a/gcc/doc/invoke.texi
|
||||
+++ b/gcc/doc/invoke.texi
|
||||
@@ -17049,7 +17049,7 @@ performance of the code. Permissible values for this option are:
|
||||
@samp{octeontx2}, @samp{octeontx2t98}, @samp{octeontx2t96}
|
||||
@samp{octeontx2t93}, @samp{octeontx2f95}, @samp{octeontx2f95n},
|
||||
@samp{octeontx2f95mm},
|
||||
-@samp{a64fx},
|
||||
+@samp{a64fx},@samp{hip11}
|
||||
@samp{thunderx}, @samp{thunderxt88},
|
||||
@samp{thunderxt88p1}, @samp{thunderxt81}, @samp{tsv110},
|
||||
@samp{thunderxt83}, @samp{thunderx2t99}, @samp{thunderx3t110}, @samp{zeus},
|
||||
--
|
||||
2.33.0
|
||||
|
||||
170
gcc.spec
170
gcc.spec
@ -46,7 +46,7 @@
|
||||
%else
|
||||
%global build_libitm 0
|
||||
%endif
|
||||
%global build_isl 1
|
||||
%global build_isl 0
|
||||
%global build_libstdcxx_docs 0
|
||||
%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips}
|
||||
%global attr_ifunc 1
|
||||
@ -61,7 +61,7 @@
|
||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||
Name: gcc
|
||||
Version: %{gcc_version}
|
||||
Release: 42
|
||||
Release: 53
|
||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||
URL: https://gcc.gnu.org
|
||||
|
||||
@ -261,6 +261,42 @@ Patch150: 0150-Implement-propagation-of-permutations-in-fwprop.patch
|
||||
Patch151: 0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch
|
||||
Patch152: 0152-Add-LLC-Allocation-Pass.patch
|
||||
Patch153: 0153-LLC-add-extending-outer-loop.patch
|
||||
Patch154: 0154-Loop-CRC32-Judge-null-on-pointers-and-solving-coding.patch
|
||||
Patch155: 0155-Add-maxmin-and-uzp1-uzp2-combining.patch
|
||||
Patch156: 0156-add-icp-optimization.patch
|
||||
Patch157: 0157-Add-split-complex-instructions-pass.patch
|
||||
Patch158: 0158-Implement-IPA-prefetch-optimization.patch
|
||||
Patch159: 0159-Implement-AES-pattern-matching.patch
|
||||
Patch160: 0160-AES-Add-lost-files.patch
|
||||
Patch161: 0161-Fix-lost-ftree-fold-phiopt-option-in-tests.patch
|
||||
Patch162: 0162-rtl-ifcvt-free-dominance-info-before-cleanup_cfg.patch
|
||||
Patch163: 0163-Loop-CRC-Solving-the-problem-of-insufficient-CRC-tab.patch
|
||||
Patch164: 0164-LLC-Allocation-Fix-some-bugs-and-remove-variable-pre.patch
|
||||
Patch165: 0165-rtl-ifcvt-BugFix-change-def-selection-logic-in-noce_.patch
|
||||
Patch166: 0166-perm-propagation-Bugfix-Check-that-the-arithmetic-op.patch
|
||||
Patch167: 0167-perm-propagation-Bugfix-Fix-shll-shll2-patterns-for-.patch
|
||||
Patch168: 0168-LLC-Allocation-Bugfix-Terminate-kernel-filtering-for.patch
|
||||
Patch169: 0169-Struct-Reorg-Fix-several-bugs.patch
|
||||
Patch170: 0170-DFE-Add-escape-check.patch
|
||||
Patch171: 0171-phiopt-testsuite-Add-ftree-fold-phiopt-option-to-5-t.patch
|
||||
Patch172: 0172-minmax-Move-minmax-pattern-to-gimple.patch
|
||||
Patch173: 0173-IPA-Fix-test-completion-1.c.patch
|
||||
Patch174: 0174-IPA-Fix-fails-on-checked-build-and-comments-from-rev.patch
|
||||
Patch175: 0175-split-ldp-stp-Extending-and-refactoring-of-pass_spli.patch
|
||||
Patch176: 0176-Fix-bugs-in-ICP-src-openEuler-gcc-I8PYBF-I8PYLL.patch
|
||||
Patch177: 0177-Fix-sqlite-build.patch
|
||||
Patch178: 0178-Fix-freetype-build.patch
|
||||
Patch179: 0179-rtl-ifcvt-refuse-to-rename-def-in-the-last-instructi.patch
|
||||
Patch180: 0180-add-optimization-level-requirement-to-the-gate.patch
|
||||
Patch181: 0181-Fix-issue-I8QD9H.patch
|
||||
Patch182: 0182-Fix-bugs-in-ICP-src-openEuler-gcc-I8RKFJ.patch
|
||||
Patch183: 0183-Fix-fail-in-ICP-src-openEuler-gcc-I8RP4H.patch
|
||||
Patch184: 0184-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RURA.patch
|
||||
Patch185: 0185-Fix-fail-in-IPA-prefetch-src-openEuler-gcc-I8RV7T.patch
|
||||
Patch186: 0186-Loop-CRC-Solving-the-problem-of-insufficient-CRC-tab.patch
|
||||
Patch187: 0187-Add-IPA-prefetch-test.patch
|
||||
Patch188: 0188-Fix-fails-in-ICP-for-src-openEuler-gcc-I90P7M-I91CZ8.patch
|
||||
Patch189: 0189-Add-hip11-CPU-pipeline-scheduling.patch
|
||||
|
||||
%global gcc_target_platform %{_arch}-linux-gnu
|
||||
|
||||
@ -867,6 +903,42 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%patch151 -p1
|
||||
%patch152 -p1
|
||||
%patch153 -p1
|
||||
%patch154 -p1
|
||||
%patch155 -p1
|
||||
%patch156 -p1
|
||||
%patch157 -p1
|
||||
%patch158 -p1
|
||||
%patch159 -p1
|
||||
%patch160 -p1
|
||||
%patch161 -p1
|
||||
%patch162 -p1
|
||||
%patch163 -p1
|
||||
%patch164 -p1
|
||||
%patch165 -p1
|
||||
%patch166 -p1
|
||||
%patch167 -p1
|
||||
%patch168 -p1
|
||||
%patch169 -p1
|
||||
%patch170 -p1
|
||||
%patch171 -p1
|
||||
%patch172 -p1
|
||||
%patch173 -p1
|
||||
%patch174 -p1
|
||||
%patch175 -p1
|
||||
%patch176 -p1
|
||||
%patch177 -p1
|
||||
%patch178 -p1
|
||||
%patch179 -p1
|
||||
%patch180 -p1
|
||||
%patch181 -p1
|
||||
%patch182 -p1
|
||||
%patch183 -p1
|
||||
%patch184 -p1
|
||||
%patch185 -p1
|
||||
%patch186 -p1
|
||||
%patch187 -p1
|
||||
%patch188 -p1
|
||||
%patch189 -p1
|
||||
|
||||
%build
|
||||
|
||||
@ -932,10 +1004,15 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \
|
||||
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions \
|
||||
--enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu \
|
||||
--enable-languages=c,c++,fortran${enablelobjc}${enablelada}${enablelgo}${enableld},lto --enable-plugin \
|
||||
--enable-initfini-array --disable-libgcj --with-isl --without-cloog \
|
||||
--enable-initfini-array --disable-libgcj --without-cloog \
|
||||
--enable-gnu-indirect-function --build=%{gcc_target_platform} \
|
||||
--with-stage1-ldflags="$OPT_LDFLAGS" \
|
||||
--with-boot-ldflags="$OPT_LDFLAGS" --disable-bootstrap \
|
||||
%if %{build_isl}
|
||||
--with-isl \
|
||||
%else
|
||||
--without-isl \
|
||||
%endif
|
||||
%ifarch x86_64
|
||||
--with-tune=generic \
|
||||
--with-arch_32=x86-64 \
|
||||
@ -2891,23 +2968,90 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Wed May 29 2024 zhengchenhui <zhengchenhui1@huawei.com> - 10.3.1-42
|
||||
- Type:Spec
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Revert last two commits about isl and ppc64le, and Sync patch from openeuler/gcc
|
||||
|
||||
* Mon Apr 15 2024 huyubiao <huyubiao@huawei.com> - 10.3.1-41
|
||||
* Mon Apr 15 2024 huyubiao <huyubiao@huawei.com> - 10.3.1-53
|
||||
- Type:SPEC
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC:disable isl
|
||||
|
||||
* Thu Mar 14 2024 chenyuanfeng <yuanfeng.chen@shingroup.cn> - 10.3.1-40
|
||||
- Type: Spec
|
||||
* Mon Apr 8 2024 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-52
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Set default configuration for the ppc64le
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Fri Feb 23 2024 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-51
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Thu Jan 4 2024 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-50
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Thu Dec 28 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-49
|
||||
- Type:Revert & sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Revert ICP and IPA prefetch related patches.
|
||||
Sync patch from openeuler/gcc.
|
||||
|
||||
* Sat Dec 23 2023 Chenhui Zheng <zhengchenhui1@huawei.com> - 10.3.1-48
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Fri Dec 22 2023 Feiyang Liu <liufeiyang6@huawei.com> - 10.3.1-47
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Thu Dec 21 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-46
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Mon Dec 18 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-45
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patches from openeuler/gcc
|
||||
|
||||
* Fri Dec 15 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-44
|
||||
- Type:Sync
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patches from openeuler/gcc
|
||||
|
||||
* Tue Dec 12 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-43
|
||||
- Type:Spec
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patches from openeuler/gcc
|
||||
|
||||
* Tue Dec 12 2023 Shujian Zhao <zhaoshujian@huawei.com> - 10.3.1-42
|
||||
- Type:Spec
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc, add LLC extending outer loop.
|
||||
|
||||
* Mon Dec 11 2023 Feiyang Liu <liufeiyang6@huawei.com> - 10.3.1-41
|
||||
- Type:Spec
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Wed Dec 6 2023 Wang Ding <wangding16@huawei.com> - 10.3.1-40
|
||||
- Type:Spec
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Sync patch from openeuler/gcc
|
||||
|
||||
* Wed Nov 29 2023 Mingchuan Wu <wumingchuan1992@foxmail.com> - 10.3.1-39
|
||||
- Type:Spec
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user