!429 Revert last two commits and sync patch from openeuler/gcc
From: @lesleyzheng1103 Reviewed-by: @huang-xiaoquan Signed-off-by: @huang-xiaoquan
This commit is contained in:
commit
d0fd3414e4
2332
0142-crc-loop-optimization-initial.patch
Normal file
2332
0142-crc-loop-optimization-initial.patch
Normal file
File diff suppressed because it is too large
Load Diff
109
0143-Perform-early-if-conversion-of-simple-arithmetic.patch
Normal file
109
0143-Perform-early-if-conversion-of-simple-arithmetic.patch
Normal file
@ -0,0 +1,109 @@
|
||||
From 7acb88ae27eb3e1af0da866d433968143c7754bd Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Thu, 12 Jan 2023 14:52:49 +0300
|
||||
Subject: [PATCH 20/33] Perform early if-conversion of simple arithmetic
|
||||
|
||||
---
|
||||
gcc/common.opt | 4 ++++
|
||||
gcc/match.pd | 25 +++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/ifcvt-gimple.c | 37 +++++++++++++++++++++++++++++
|
||||
3 files changed, 66 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 6f0ed7cea..6950756fd 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -1721,6 +1721,10 @@ fif-conversion2
|
||||
Common Report Var(flag_if_conversion2) Optimization
|
||||
Perform conversion of conditional jumps to conditional execution.
|
||||
|
||||
+fif-conversion-gimple
|
||||
+Common Report Var(flag_if_conversion_gimple) Optimization
|
||||
+Perform conversion of conditional jumps to branchless equivalents during gimple transformations.
|
||||
+
|
||||
fstack-reuse=
|
||||
Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization
|
||||
-fstack-reuse=[all|named_vars|none] Set stack reuse level for local variables.
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index 01f81b063..e98cd02e0 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -3402,6 +3402,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
)
|
||||
)
|
||||
)
|
||||
+
|
||||
+(if (flag_if_conversion_gimple)
|
||||
+ (for simple_op (plus minus bit_and bit_ior bit_xor)
|
||||
+ (simplify
|
||||
+ (cond @0 (simple_op @1 INTEGER_CST@2) @1)
|
||||
+ (switch
|
||||
+ /* a = cond ? a + 1 : a -> a = a + ((int) cond) */
|
||||
+ (if (integer_onep (@2))
|
||||
+ (simple_op @1 (convert (convert:boolean_type_node @0))))
|
||||
+ /* a = cond ? a + powerof2cst : a ->
|
||||
+ a = a + ((int) cond) << log2 (powerof2cst) */
|
||||
+ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2))
|
||||
+ (with
|
||||
+ {
|
||||
+ tree shift = build_int_cst (integer_type_node, tree_log2 (@2));
|
||||
+ }
|
||||
+ (simple_op @1 (lshift (convert (convert:boolean_type_node @0))
|
||||
+ { shift; })
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+ )
|
||||
+)
|
||||
#endif
|
||||
|
||||
#if GIMPLE
|
||||
diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
new file mode 100644
|
||||
index 000000000..0f7c87e5c
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c
|
||||
@@ -0,0 +1,37 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */
|
||||
+
|
||||
+int test_int (int optimizable_int) {
|
||||
+ if (optimizable_int > 5)
|
||||
+ ++optimizable_int;
|
||||
+ return optimizable_int;
|
||||
+}
|
||||
+
|
||||
+int test_int_pow2 (int optimizable_int_pow2) {
|
||||
+ if (optimizable_int_pow2 <= 4)
|
||||
+ optimizable_int_pow2 += 1024;
|
||||
+ return optimizable_int_pow2;
|
||||
+}
|
||||
+
|
||||
+int test_int_non_pow2 (int not_optimizable_int_non_pow2) {
|
||||
+ if (not_optimizable_int_non_pow2 == 1)
|
||||
+ not_optimizable_int_non_pow2 += 513;
|
||||
+ return not_optimizable_int_non_pow2;
|
||||
+}
|
||||
+
|
||||
+float test_float (float not_optimizable_float) {
|
||||
+ if (not_optimizable_float > 5)
|
||||
+ not_optimizable_float += 1;
|
||||
+ return not_optimizable_float;
|
||||
+}
|
||||
+
|
||||
+/* Expecting if-else block in test_float and test_int_non_pow2 only. */
|
||||
+/* { dg-final { scan-tree-dump-not "if \\(optimizable" "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump "if \\(not_optimizable_int_non_pow2" "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump "if \\(not_optimizable_float" "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "if " 2 "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "else" 2 "optimized" } } */
|
||||
+
|
||||
+/* Expecting shifted result only for optimizable_int_pow2. */
|
||||
+/* { dg-final { scan-tree-dump-times " << " 1 "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump " << 10;" "optimized" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
236
0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
Normal file
236
0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
Normal file
@ -0,0 +1,236 @@
|
||||
From f788555b23b0b676729bb695af96954fe083e354 Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Tue, 24 Jan 2023 16:43:40 +0300
|
||||
Subject: [PATCH 21/33] Add option to allow matching uaddsub overflow for widen
|
||||
ops too.
|
||||
|
||||
---
|
||||
gcc/common.opt | 5 ++
|
||||
gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
|
||||
gcc/tree-ssa-math-opts.c | 35 +++++++-
|
||||
3 files changed, 179 insertions(+), 4 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 6950756fd..c2f01bbc0 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -2989,6 +2989,11 @@ freciprocal-math
|
||||
Common Report Var(flag_reciprocal_math) SetByCombined Optimization
|
||||
Same as -fassociative-math for expressions which include division.
|
||||
|
||||
+fuaddsub-overflow-match-all
|
||||
+Common Report Var(flag_uaddsub_overflow_match_all)
|
||||
+Match unsigned add/sub overflow even if the target does not support
|
||||
+the corresponding instruction.
|
||||
+
|
||||
; Nonzero means that unsafe floating-point math optimizations are allowed
|
||||
; for the sake of speed. IEEE compliance is not guaranteed, and operations
|
||||
; are allowed to assume that their arguments and results are "normal"
|
||||
diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
|
||||
new file mode 100644
|
||||
index 000000000..96c26d308
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/uaddsub.c
|
||||
@@ -0,0 +1,143 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+typedef unsigned __int128 uint128_t;
|
||||
+typedef struct uint256_t
|
||||
+{
|
||||
+ uint128_t lo;
|
||||
+ uint128_t hi;
|
||||
+} uint256_t;
|
||||
+
|
||||
+uint16_t add16 (uint8_t a, uint8_t b)
|
||||
+{
|
||||
+ uint8_t tmp = a + b;
|
||||
+ uint8_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint16_t res = overflow;
|
||||
+ res <<= 8;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint32_t add32 (uint16_t a, uint16_t b)
|
||||
+{
|
||||
+ uint16_t tmp = a + b;
|
||||
+ uint16_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint32_t res = overflow;
|
||||
+ res <<= 16;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t add64 (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t tmp = a + b;
|
||||
+ uint32_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint64_t res = overflow;
|
||||
+ res <<= 32;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t add128 (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t tmp = a + b;
|
||||
+ uint64_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint128_t res = overflow;
|
||||
+ res <<= 64;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint256_t add256 (uint128_t a, uint128_t b)
|
||||
+{
|
||||
+ uint128_t tmp = a + b;
|
||||
+ uint128_t overflow = 0;
|
||||
+ if (tmp < a)
|
||||
+ overflow = 1;
|
||||
+
|
||||
+ uint256_t res;
|
||||
+ res.hi = overflow;
|
||||
+ res.lo = tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint16_t sub16 (uint8_t a, uint8_t b)
|
||||
+{
|
||||
+ uint8_t tmp = a - b;
|
||||
+ uint8_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint16_t res = overflow;
|
||||
+ res <<= 8;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint32_t sub32 (uint16_t a, uint16_t b)
|
||||
+{
|
||||
+ uint16_t tmp = a - b;
|
||||
+ uint16_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint32_t res = overflow;
|
||||
+ res <<= 16;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t sub64 (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t tmp = a - b;
|
||||
+ uint32_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint64_t res = overflow;
|
||||
+ res <<= 32;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t sub128 (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t tmp = a - b;
|
||||
+ uint64_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint128_t res = overflow;
|
||||
+ res <<= 64;
|
||||
+ res += tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint256_t sub256 (uint128_t a, uint128_t b)
|
||||
+{
|
||||
+ uint128_t tmp = a - b;
|
||||
+ uint128_t overflow = 0;
|
||||
+ if (tmp > a)
|
||||
+ overflow = -1;
|
||||
+
|
||||
+ uint256_t res;
|
||||
+ res.hi = overflow;
|
||||
+ res.lo = tmp;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
|
||||
+/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
|
||||
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
|
||||
index 4c89fddcf..716bf9e35 100644
|
||||
--- a/gcc/tree-ssa-math-opts.c
|
||||
+++ b/gcc/tree-ssa-math-opts.c
|
||||
@@ -3290,6 +3290,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
|
||||
}
|
||||
}
|
||||
|
||||
+/* Check if the corresponding operation has wider equivalent on the target. */
|
||||
+
|
||||
+static bool
|
||||
+wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
|
||||
+{
|
||||
+ machine_mode wider_mode;
|
||||
+ FOR_EACH_WIDER_MODE (wider_mode, mode)
|
||||
+ {
|
||||
+ machine_mode next_mode;
|
||||
+ if (optab_handler (op, wider_mode) != CODE_FOR_nothing
|
||||
+ || (op == smul_optab
|
||||
+ && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
|
||||
+ && (find_widening_optab_handler ((unsignedp
|
||||
+ ? umul_widen_optab
|
||||
+ : smul_widen_optab),
|
||||
+ next_mode, mode))))
|
||||
+ return true;
|
||||
+ }
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
|
||||
/* Helper function of match_uaddsub_overflow. Return 1
|
||||
if USE_STMT is unsigned overflow check ovf != 0 for
|
||||
@@ -3390,12 +3411,18 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
|
||||
gimple *use_stmt;
|
||||
|
||||
gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
|
||||
+ optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab;
|
||||
+ machine_mode mode = TYPE_MODE (type);
|
||||
+ int unsignedp = TYPE_UNSIGNED (type);
|
||||
if (!INTEGRAL_TYPE_P (type)
|
||||
- || !TYPE_UNSIGNED (type)
|
||||
+ || !unsignedp
|
||||
|| has_zero_uses (lhs)
|
||||
- || has_single_use (lhs)
|
||||
- || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
|
||||
- TYPE_MODE (type)) == CODE_FOR_nothing)
|
||||
+ || has_single_use (lhs))
|
||||
+ return false;
|
||||
+
|
||||
+ if (optab_handler (op, mode) == CODE_FOR_nothing
|
||||
+ && (!flag_uaddsub_overflow_match_all
|
||||
+ || !wider_optab_check_p (op, mode, unsignedp)))
|
||||
return false;
|
||||
|
||||
FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
|
||||
--
|
||||
2.33.0
|
||||
|
||||
488
0145-Match-double-sized-mul-pattern.patch
Normal file
488
0145-Match-double-sized-mul-pattern.patch
Normal file
@ -0,0 +1,488 @@
|
||||
From 3be7a26a08772d014f54f7b1a0555ccca91115d6 Mon Sep 17 00:00:00 2001
|
||||
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
||||
Date: Wed, 25 Jan 2023 15:04:07 +0300
|
||||
Subject: [PATCH 22/33] Match double sized mul pattern
|
||||
|
||||
---
|
||||
gcc/match.pd | 136 +++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-1.c | 141 ++++++++++++++++++++++
|
||||
gcc/testsuite/gcc.dg/double_sized_mul-2.c | 62 ++++++++++
|
||||
gcc/tree-ssa-math-opts.c | 80 ++++++++++++
|
||||
4 files changed, 419 insertions(+)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
|
||||
diff --git a/gcc/match.pd b/gcc/match.pd
|
||||
index e98cd02e0..74f8ab999 100644
|
||||
--- a/gcc/match.pd
|
||||
+++ b/gcc/match.pd
|
||||
@@ -6390,3 +6390,139 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
|
||||
to the number of trailing zeroes. */
|
||||
(match (ctz_table_index @1 @2 @3)
|
||||
(rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3))
|
||||
+
|
||||
+/* Match multiplication with double sized result.
|
||||
+
|
||||
+ Consider the following calculations:
|
||||
+ arg0 * arg1 = (2^(bit_size/2) * arg0_hi + arg0_lo)
|
||||
+ * (2^(bit_size/2) * arg1_hi + arg1_lo)
|
||||
+ arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
|
||||
+ + 2^(bit_size/2) * (arg0_hi * arg1_lo + arg0_lo * arg1_hi)
|
||||
+ + arg0_lo * arg1_lo
|
||||
+
|
||||
+ The products of high and low parts fits in bit_size values, thus they are
|
||||
+ placed in high and low parts of result respectively.
|
||||
+
|
||||
+ The sum of the mixed products may overflow, so we need a detection for that.
|
||||
+ Also it has a bit_size/2 offset, thus it intersects with both high and low
|
||||
+ parts of result. Overflow detection constant is bit_size/2 due to this.
|
||||
+
|
||||
+ With this info:
|
||||
+ arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi
|
||||
+ + 2^(bit_size/2) * middle
|
||||
+ + 2^bit_size * possible_middle_overflow
|
||||
+ + arg0_lo * arg1_lo
|
||||
+ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow)
|
||||
+ + 2^(bit_size/2) * (2^(bit_size/2) * middle_hi + middle_lo)
|
||||
+ + arg0_lo * arg1_lo
|
||||
+ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + middle_hi
|
||||
+ + possible_middle_overflow)
|
||||
+ + 2^(bit_size/2) * middle_lo
|
||||
+ + arg0_lo * arg1_lo
|
||||
+
|
||||
+ The last sum can produce overflow for the high result part. With this:
|
||||
+ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow
|
||||
+ + possible_res_lo_overflow + middle_hi)
|
||||
+ + res_lo
|
||||
+ = res_hi + res_lo
|
||||
+
|
||||
+ This formula is quite big to fit into one match pattern with all of the
|
||||
+ combinations of terms inside it. There are many helpers for better code
|
||||
+ readability.
|
||||
+
|
||||
+ The simplification basis is res_hi: assuming that res_lo only is not
|
||||
+ real practical case for such calculations.
|
||||
+
|
||||
+ Overflow handling is done via matching complex calculations:
|
||||
+ the realpart and imagpart are quite handy here. */
|
||||
+/* Match low and high parts of the argument. */
|
||||
+(match (double_size_mul_arg_lo @0 @1)
|
||||
+ (bit_and @0 INTEGER_CST@1)
|
||||
+ (if (wi::to_wide (@1)
|
||||
+ == wi::mask (TYPE_PRECISION (type) / 2, false, TYPE_PRECISION (type)))))
|
||||
+(match (double_size_mul_arg_hi @0 @1)
|
||||
+ (rshift @0 INTEGER_CST@1)
|
||||
+ (if (wi::to_wide (@1) == TYPE_PRECISION (type) / 2)))
|
||||
+
|
||||
+/* Match various argument parts products. */
|
||||
+(match (double_size_mul_lolo @0 @1)
|
||||
+ (mult@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_lo @1 @3))
|
||||
+ (if (single_use (@4))))
|
||||
+(match (double_size_mul_hihi @0 @1)
|
||||
+ (mult@4 (double_size_mul_arg_hi @0 @2) (double_size_mul_arg_hi @1 @3))
|
||||
+ (if (single_use (@4))))
|
||||
+(match (double_size_mul_lohi @0 @1)
|
||||
+ (mult:c@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_hi @1 @3))
|
||||
+ (if (single_use (@4))))
|
||||
+
|
||||
+/* Match complex middle sum. */
|
||||
+(match (double_size_mul_middle_complex @0 @1)
|
||||
+ (IFN_ADD_OVERFLOW@2 (double_size_mul_lohi @0 @1) (double_size_mul_lohi @1 @0))
|
||||
+ (if (num_imm_uses (@2) == 2)))
|
||||
+
|
||||
+/* Match real middle results. */
|
||||
+(match (double_size_mul_middle @0 @1)
|
||||
+ (realpart@2 (double_size_mul_middle_complex @0 @1))
|
||||
+ (if (num_imm_uses (@2) == 2)))
|
||||
+(match (double_size_mul_middleres_lo @0 @1)
|
||||
+ (lshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
|
||||
+ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
|
||||
+ && single_use (@3))))
|
||||
+(match (double_size_mul_middleres_hi @0 @1)
|
||||
+ (rshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2)
|
||||
+ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
|
||||
+ && single_use (@3))))
|
||||
+
|
||||
+/* Match low result part. */
|
||||
+/* Number of uses may be < 2 in case when we are interested in
|
||||
+ high part only. */
|
||||
+(match (double_size_mul_res_lo_complex @0 @1)
|
||||
+ (IFN_ADD_OVERFLOW:c@2
|
||||
+ (double_size_mul_lolo:c @0 @1) (double_size_mul_middleres_lo @0 @1))
|
||||
+ (if (num_imm_uses (@2) <= 2)))
|
||||
+(match (double_size_mul_res_lo @0 @1)
|
||||
+ (realpart (double_size_mul_res_lo_complex @0 @1)))
|
||||
+
|
||||
+/* Match overflow terms. */
|
||||
+(match (double_size_mul_overflow_check_lo @0 @1 @5)
|
||||
+ (convert@4 (ne@3
|
||||
+ (imagpart@2 (double_size_mul_res_lo_complex@5 @0 @1)) integer_zerop))
|
||||
+ (if (single_use (@2) && single_use (@3) && single_use (@4))))
|
||||
+(match (double_size_mul_overflow_check_hi @0 @1)
|
||||
+ (lshift@6 (convert@5 (ne@4
|
||||
+ (imagpart@3 (double_size_mul_middle_complex @0 @1)) integer_zerop))
|
||||
+ INTEGER_CST@2)
|
||||
+ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2
|
||||
+ && single_use (@3) && single_use (@4) && single_use (@5)
|
||||
+ && single_use (@6))))
|
||||
+
|
||||
+/* Match all possible permutations for high result part calculations. */
|
||||
+(for op1 (double_size_mul_hihi
|
||||
+ double_size_mul_overflow_check_hi
|
||||
+ double_size_mul_middleres_hi)
|
||||
+ op2 (double_size_mul_overflow_check_hi
|
||||
+ double_size_mul_middleres_hi
|
||||
+ double_size_mul_hihi)
|
||||
+ op3 (double_size_mul_middleres_hi
|
||||
+ double_size_mul_hihi
|
||||
+ double_size_mul_overflow_check_hi)
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2
|
||||
+ (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3) (op1:c @0 @1))
|
||||
+ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1)))
|
||||
+ (if (single_use (@4) && single_use (@5))))
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2 (double_size_mul_overflow_check_lo @0 @1 @3)
|
||||
+ (plus:c@4 (op1:c @0 @1)
|
||||
+ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
|
||||
+ (if (single_use (@4) && single_use (@5))))
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2 (op1:c @0 @1)
|
||||
+ (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3)
|
||||
+ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))))
|
||||
+ (if (single_use (@4) && single_use (@5))))
|
||||
+ (match (double_size_mul_candidate @0 @1 @2 @3)
|
||||
+ (plus:c@2 (op1:c @0 @1)
|
||||
+ (plus:c@4 (op2:c @0 @1)
|
||||
+ (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1))))
|
||||
+ (if (single_use (@4) && single_use (@5)))))
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
new file mode 100644
|
||||
index 000000000..4d475cc8a
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c
|
||||
@@ -0,0 +1,141 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for
|
||||
+ proper overflow detection in some cases. */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+typedef unsigned __int128 uint128_t;
|
||||
+
|
||||
+uint16_t mul16 (uint8_t a, uint8_t b)
|
||||
+{
|
||||
+ uint8_t a_lo = a & 0xF;
|
||||
+ uint8_t b_lo = b & 0xF;
|
||||
+ uint8_t a_hi = a >> 4;
|
||||
+ uint8_t b_hi = b >> 4;
|
||||
+ uint8_t lolo = a_lo * b_lo;
|
||||
+ uint8_t lohi = a_lo * b_hi;
|
||||
+ uint8_t hilo = a_hi * b_lo;
|
||||
+ uint8_t hihi = a_hi * b_hi;
|
||||
+ uint8_t middle = hilo + lohi;
|
||||
+ uint8_t middle_hi = middle >> 4;
|
||||
+ uint8_t middle_lo = middle << 4;
|
||||
+ uint8_t res_lo = lolo + middle_lo;
|
||||
+ uint8_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x10 : 0);
|
||||
+ uint16_t res = ((uint16_t) res_hi) << 8;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint32_t mul32 (uint16_t a, uint16_t b)
|
||||
+{
|
||||
+ uint16_t a_lo = a & 0xFF;
|
||||
+ uint16_t b_lo = b & 0xFF;
|
||||
+ uint16_t a_hi = a >> 8;
|
||||
+ uint16_t b_hi = b >> 8;
|
||||
+ uint16_t lolo = a_lo * b_lo;
|
||||
+ uint16_t lohi = a_lo * b_hi;
|
||||
+ uint16_t hilo = a_hi * b_lo;
|
||||
+ uint16_t hihi = a_hi * b_hi;
|
||||
+ uint16_t middle = hilo + lohi;
|
||||
+ uint16_t middle_hi = middle >> 8;
|
||||
+ uint16_t middle_lo = middle << 8;
|
||||
+ uint16_t res_lo = lolo + middle_lo;
|
||||
+ uint16_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x100 : 0);
|
||||
+ uint32_t res = ((uint32_t) res_hi) << 16;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t mul64 (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t a_lo = a & 0xFFFF;
|
||||
+ uint32_t b_lo = b & 0xFFFF;
|
||||
+ uint32_t a_hi = a >> 16;
|
||||
+ uint32_t b_hi = b >> 16;
|
||||
+ uint32_t lolo = a_lo * b_lo;
|
||||
+ uint32_t lohi = a_lo * b_hi;
|
||||
+ uint32_t hilo = a_hi * b_lo;
|
||||
+ uint32_t hihi = a_hi * b_hi;
|
||||
+ uint32_t middle = hilo + lohi;
|
||||
+ uint32_t middle_hi = middle >> 16;
|
||||
+ uint32_t middle_lo = middle << 16;
|
||||
+ uint32_t res_lo = lolo + middle_lo;
|
||||
+ uint32_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x10000 : 0);
|
||||
+ uint64_t res = ((uint64_t) res_hi) << 32;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t mul128 (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t a_lo = a & 0xFFFFFFFF;
|
||||
+ uint64_t b_lo = b & 0xFFFFFFFF;
|
||||
+ uint64_t a_hi = a >> 32;
|
||||
+ uint64_t b_hi = b >> 32;
|
||||
+ uint64_t lolo = a_lo * b_lo;
|
||||
+ uint64_t lohi = a_lo * b_hi;
|
||||
+ uint64_t hilo = a_hi * b_lo;
|
||||
+ uint64_t hihi = a_hi * b_hi;
|
||||
+ uint64_t middle = hilo + lohi;
|
||||
+ uint64_t middle_hi = middle >> 32;
|
||||
+ uint64_t middle_lo = middle << 32;
|
||||
+ uint64_t res_lo = lolo + middle_lo;
|
||||
+ uint64_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x100000000 : 0);
|
||||
+ uint128_t res = ((uint128_t) res_hi) << 64;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint64_t mul64_perm (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t a_lo = a & 0xFFFF;
|
||||
+ uint32_t b_lo = b & 0xFFFF;
|
||||
+ uint32_t a_hi = a >> 16;
|
||||
+ uint32_t b_hi = b >> 16;
|
||||
+ uint32_t lolo = a_lo * b_lo;
|
||||
+ uint32_t lohi = a_lo * b_hi;
|
||||
+ uint32_t hilo = a_hi * b_lo;
|
||||
+ uint32_t hihi = a_hi * b_hi;
|
||||
+ uint32_t middle = hilo + lohi;
|
||||
+ uint32_t middle_hi = middle >> 16;
|
||||
+ uint32_t middle_lo = middle << 16;
|
||||
+ uint32_t res_lo = lolo + middle_lo;
|
||||
+ uint32_t res_hi = hihi + middle_hi;
|
||||
+ res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi;
|
||||
+ res_hi = middle < hilo ? res_hi + 0x10000 : res_hi;
|
||||
+ uint64_t res = ((uint64_t) res_hi) << 32;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+uint128_t mul128_perm (uint64_t a, uint64_t b)
|
||||
+{
|
||||
+ uint64_t a_lo = a & 0xFFFFFFFF;
|
||||
+ uint64_t b_lo = b & 0xFFFFFFFF;
|
||||
+ uint64_t a_hi = a >> 32;
|
||||
+ uint64_t b_hi = b >> 32;
|
||||
+ uint64_t lolo = a_lo * b_lo;
|
||||
+ uint64_t lohi = a_lo * b_hi;
|
||||
+ uint64_t hilo = a_hi * b_lo;
|
||||
+ uint64_t hihi = a_hi * b_hi;
|
||||
+ uint64_t middle = hilo + lohi;
|
||||
+ uint64_t middle_hi = middle >> 32;
|
||||
+ uint64_t middle_lo = middle << 32;
|
||||
+ uint64_t res_lo = lolo + middle_lo;
|
||||
+ uint64_t res_hi = hihi + middle_hi;
|
||||
+ res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi;
|
||||
+ res_hi = middle < hilo ? res_hi + 0x100000000 : res_hi;
|
||||
+ uint128_t res = ((uint128_t) res_hi) << 64;
|
||||
+ res += res_lo;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */
|
||||
diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
new file mode 100644
|
||||
index 000000000..cc6e5af25
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c
|
||||
@@ -0,0 +1,62 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* fif-conversion-gimple is required for proper overflow detection
|
||||
+ in some cases. */
|
||||
+/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */
|
||||
+#include <stdint.h>
|
||||
+
|
||||
+typedef unsigned __int128 uint128_t;
|
||||
+typedef struct uint256_t
|
||||
+{
|
||||
+ uint128_t lo;
|
||||
+ uint128_t hi;
|
||||
+} uint256_t;
|
||||
+
|
||||
+uint64_t mul64_double_use (uint32_t a, uint32_t b)
|
||||
+{
|
||||
+ uint32_t a_lo = a & 0xFFFF;
|
||||
+ uint32_t b_lo = b & 0xFFFF;
|
||||
+ uint32_t a_hi = a >> 16;
|
||||
+ uint32_t b_hi = b >> 16;
|
||||
+ uint32_t lolo = a_lo * b_lo;
|
||||
+ uint32_t lohi = a_lo * b_hi;
|
||||
+ uint32_t hilo = a_hi * b_lo;
|
||||
+ uint32_t hihi = a_hi * b_hi;
|
||||
+ uint32_t middle = hilo + lohi;
|
||||
+ uint32_t middle_hi = middle >> 16;
|
||||
+ uint32_t middle_lo = middle << 16;
|
||||
+ uint32_t res_lo = lolo + middle_lo;
|
||||
+ uint32_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ res_hi += (middle < hilo ? 0x10000 : 0);
|
||||
+ uint64_t res = ((uint64_t) res_hi) << 32;
|
||||
+ res += res_lo;
|
||||
+ return res + lolo;
|
||||
+}
|
||||
+
|
||||
+uint256_t mul256 (uint128_t a, uint128_t b)
|
||||
+{
|
||||
+ uint128_t a_lo = a & 0xFFFFFFFFFFFFFFFF;
|
||||
+ uint128_t b_lo = b & 0xFFFFFFFFFFFFFFFF;
|
||||
+ uint128_t a_hi = a >> 64;
|
||||
+ uint128_t b_hi = b >> 64;
|
||||
+ uint128_t lolo = a_lo * b_lo;
|
||||
+ uint128_t lohi = a_lo * b_hi;
|
||||
+ uint128_t hilo = a_hi * b_lo;
|
||||
+ uint128_t hihi = a_hi * b_hi;
|
||||
+ uint128_t middle = hilo + lohi;
|
||||
+ uint128_t middle_hi = middle >> 64;
|
||||
+ uint128_t middle_lo = middle << 64;
|
||||
+ uint128_t res_lo = lolo + middle_lo;
|
||||
+ uint128_t res_hi = hihi + middle_hi;
|
||||
+ res_hi += (res_lo < middle_lo ? 1 : 0);
|
||||
+ /* Constant is to big warning WA */
|
||||
+ uint128_t overflow_tmp = (middle < hilo ? 1 : 0);
|
||||
+ overflow_tmp <<= 64;
|
||||
+ res_hi += overflow_tmp;
|
||||
+ uint256_t res;
|
||||
+ res.lo = res_lo;
|
||||
+ res.hi = res_hi;
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-tree-dump-not "double sized mul optimized" "widening_mul" } } */
|
||||
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
|
||||
index 716bf9e35..a81d7501c 100644
|
||||
--- a/gcc/tree-ssa-math-opts.c
|
||||
+++ b/gcc/tree-ssa-math-opts.c
|
||||
@@ -182,6 +182,9 @@ static struct
|
||||
|
||||
/* Number of divmod calls inserted. */
|
||||
int divmod_calls_inserted;
|
||||
+
|
||||
+ /* Number of optimized double sized multiplications. */
|
||||
+ int double_sized_mul_optimized;
|
||||
} widen_mul_stats;
|
||||
|
||||
/* The instance of "struct occurrence" representing the highest
|
||||
@@ -3708,6 +3711,78 @@ convert_to_divmod (gassign *stmt)
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* Pattern matcher for double sized multiplication defined in match.pd. */
|
||||
+extern bool gimple_double_size_mul_candidate (tree, tree*, tree (*)(tree));
|
||||
+
|
||||
+static bool
|
||||
+convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt)
|
||||
+{
|
||||
+ gimple *use_stmt, *complex_res_lo;
|
||||
+ gimple_stmt_iterator insert_before;
|
||||
+ imm_use_iterator use_iter;
|
||||
+ tree match[4]; // arg0, arg1, res_hi, complex_res_lo
|
||||
+ tree arg0, arg1, widen_mult, new_type, tmp;
|
||||
+ tree lhs = gimple_assign_lhs (stmt);
|
||||
+ location_t loc = UNKNOWN_LOCATION;
|
||||
+ machine_mode mode;
|
||||
+
|
||||
+ if (!gimple_double_size_mul_candidate (lhs, match, NULL))
|
||||
+ return false;
|
||||
+
|
||||
+ new_type = build_nonstandard_integer_type (
|
||||
+ TYPE_PRECISION (TREE_TYPE (match[0])) * 2, 1);
|
||||
+ mode = TYPE_MODE (new_type);
|
||||
+
|
||||
+ /* Early return if the target multiplication doesn't exist on target. */
|
||||
+ if (optab_handler (smul_optab, mode) == CODE_FOR_nothing
|
||||
+ && !wider_optab_check_p (smul_optab, mode, 1))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Determine the point where the wide multiplication
|
||||
+ should be inserted. Complex low res is OK since it is required
|
||||
+ by both high and low part getters, thus it dominates both of them. */
|
||||
+ complex_res_lo = SSA_NAME_DEF_STMT (match[3]);
|
||||
+ insert_before = gsi_for_stmt (complex_res_lo);
|
||||
+ gsi_next (&insert_before);
|
||||
+
|
||||
+ /* Create the widen multiplication. */
|
||||
+ arg0 = build_and_insert_cast (&insert_before, loc, new_type, match[0]);
|
||||
+ arg1 = build_and_insert_cast (&insert_before, loc, new_type, match[1]);
|
||||
+ widen_mult = build_and_insert_binop (&insert_before, loc, "widen_mult",
|
||||
+ MULT_EXPR, arg0, arg1);
|
||||
+
|
||||
+ /* Find the mult low part getter. */
|
||||
+ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match[3])
|
||||
+ if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR)
|
||||
+ BREAK_FROM_IMM_USE_STMT (use_iter);
|
||||
+
|
||||
+ /* Create high and low (if needed) parts extractors. */
|
||||
+ /* Low part. */
|
||||
+ if (use_stmt)
|
||||
+ {
|
||||
+ loc = gimple_location (use_stmt);
|
||||
+ tmp = build_and_insert_cast (&insert_before, loc,
|
||||
+ TREE_TYPE (gimple_get_lhs (use_stmt)),
|
||||
+ widen_mult);
|
||||
+ gassign *new_stmt = gimple_build_assign (gimple_get_lhs (use_stmt),
|
||||
+ NOP_EXPR, tmp);
|
||||
+ gsi_replace (&insert_before, new_stmt, true);
|
||||
+ }
|
||||
+
|
||||
+ /* High part. */
|
||||
+ loc = gimple_location (stmt);
|
||||
+ tmp = build_and_insert_binop (gsi, loc, "widen_mult_hi",
|
||||
+ RSHIFT_EXPR, widen_mult,
|
||||
+ build_int_cst (new_type,
|
||||
+ TYPE_PRECISION (new_type) / 2));
|
||||
+ tmp = build_and_insert_cast (gsi, loc, TREE_TYPE (lhs), tmp);
|
||||
+ gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, tmp);
|
||||
+ gsi_replace (gsi, new_stmt, true);
|
||||
+
|
||||
+ widen_mul_stats.double_sized_mul_optimized++;
|
||||
+ return true;
|
||||
+}
|
||||
+
|
||||
/* Find integer multiplications where the operands are extended from
|
||||
smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR
|
||||
where appropriate. */
|
||||
@@ -3801,6 +3876,9 @@ math_opts_dom_walker::after_dom_children (basic_block bb)
|
||||
break;
|
||||
|
||||
case PLUS_EXPR:
|
||||
+ if (convert_double_size_mul (&gsi, stmt))
|
||||
+ break;
|
||||
+ __attribute__ ((fallthrough));
|
||||
case MINUS_EXPR:
|
||||
if (!convert_plusminus_to_widen (&gsi, stmt, code))
|
||||
match_uaddsub_overflow (&gsi, stmt, code);
|
||||
@@ -3892,6 +3970,8 @@ pass_optimize_widening_mul::execute (function *fun)
|
||||
widen_mul_stats.fmas_inserted);
|
||||
statistics_counter_event (fun, "divmod calls inserted",
|
||||
widen_mul_stats.divmod_calls_inserted);
|
||||
+ statistics_counter_event (fun, "double sized mul optimized",
|
||||
+ widen_mul_stats.double_sized_mul_optimized);
|
||||
|
||||
return cfg_changed ? TODO_cleanup_cfg : 0;
|
||||
}
|
||||
--
|
||||
2.33.0
|
||||
|
||||
2354
0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch
Normal file
2354
0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch
Normal file
File diff suppressed because it is too large
Load Diff
194
0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch
Normal file
194
0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch
Normal file
@ -0,0 +1,194 @@
|
||||
From 80b7de670da46d8921118799904cba4a0753bb72 Mon Sep 17 00:00:00 2001
|
||||
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
|
||||
Date: Wed, 23 Aug 2023 15:03:00 +0300
|
||||
Subject: [PATCH 09/13] add insn defs and correct costs for cmlt generation
|
||||
|
||||
---
|
||||
gcc/config/aarch64/aarch64-simd.md | 48 +++++++++++++++++++++++++++++
|
||||
gcc/config/aarch64/aarch64.c | 15 +++++++++
|
||||
gcc/config/aarch64/aarch64.opt | 4 +++
|
||||
gcc/config/aarch64/iterators.md | 3 +-
|
||||
gcc/config/aarch64/predicates.md | 25 +++++++++++++++
|
||||
gcc/testsuite/gcc.dg/combine-cmlt.c | 20 ++++++++++++
|
||||
6 files changed, 114 insertions(+), 1 deletion(-)
|
||||
create mode 100755 gcc/testsuite/gcc.dg/combine-cmlt.c
|
||||
|
||||
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
|
||||
index 6049adc3f..f4213fd62 100644
|
||||
--- a/gcc/config/aarch64/aarch64-simd.md
|
||||
+++ b/gcc/config/aarch64/aarch64-simd.md
|
||||
@@ -4719,6 +4719,54 @@
|
||||
[(set_attr "type" "neon_compare<q>, neon_compare_zero<q>")]
|
||||
)
|
||||
|
||||
+;; Use cmlt to replace vector arithmetic operations like this (SImode example):
|
||||
+;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001)
|
||||
+;; TODO: maybe extend to scalar operations or other cm** instructions.
|
||||
+
|
||||
+(define_insn "*aarch64_cmlt_as_arith<mode>"
|
||||
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
|
||||
+ (minus:<V_INT_EQUIV>
|
||||
+ (ashift:<V_INT_EQUIV>
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_operand:VDQHSD 1 "register_operand" "w")
|
||||
+ (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
|
||||
+ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand"))
|
||||
+ (match_operand:VDQHSD 4 "half_size_operand"))
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_dup 1)
|
||||
+ (match_dup 2))
|
||||
+ (match_dup 3))))]
|
||||
+ "TARGET_SIMD && flag_cmlt_arith"
|
||||
+ "cmlt\t%<v>0.<V2ntype>, %<v>1.<V2ntype>, #0"
|
||||
+ [(set_attr "type" "neon_compare_zero")]
|
||||
+)
|
||||
+
|
||||
+;; The helper definition that allows combiner to use the previous pattern.
|
||||
+
|
||||
+(define_insn_and_split "*aarch64_cmlt_tmp<mode>"
|
||||
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_operand:VDQHSD 1 "register_operand" "w")
|
||||
+ (match_operand:VDQHSD 2 "half_size_minus_one_operand"))
|
||||
+ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))]
|
||||
+ "TARGET_SIMD && flag_cmlt_arith"
|
||||
+ "#"
|
||||
+ "&& reload_completed"
|
||||
+ [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
|
||||
+ (lshiftrt:<V_INT_EQUIV>
|
||||
+ (match_operand:VDQHSD 1 "register_operand")
|
||||
+ (match_operand:VDQHSD 2 "half_size_minus_one_operand")))
|
||||
+ (set (match_dup 0)
|
||||
+ (and:<V_INT_EQUIV>
|
||||
+ (match_dup 0)
|
||||
+ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))]
|
||||
+ ""
|
||||
+ [(set_attr "type" "neon_compare_zero")]
|
||||
+)
|
||||
+
|
||||
(define_insn_and_split "aarch64_cm<optab>di"
|
||||
[(set (match_operand:DI 0 "register_operand" "=w,w,r")
|
||||
(neg:DI
|
||||
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
|
||||
index cbdde11b0..7a00a0817 100644
|
||||
--- a/gcc/config/aarch64/aarch64.c
|
||||
+++ b/gcc/config/aarch64/aarch64.c
|
||||
@@ -12659,6 +12659,21 @@ cost_minus:
|
||||
return true;
|
||||
}
|
||||
|
||||
+ /* Detect aarch64_cmlt_as_arith instruction. Now only this pattern
|
||||
+ matches the condition. The costs of cmlt and sub instructions
|
||||
+ are comparable, so we are not increasing the cost here. */
|
||||
+ if (flag_cmlt_arith && GET_CODE (op0) == ASHIFT
|
||||
+ && GET_CODE (op1) == AND)
|
||||
+ {
|
||||
+ rtx op0_subop0 = XEXP (op0, 0);
|
||||
+ if (rtx_equal_p (op0_subop0, op1))
|
||||
+ {
|
||||
+ rtx lshrt_op = XEXP (op0_subop0, 0);
|
||||
+ if (GET_CODE (lshrt_op) == LSHIFTRT)
|
||||
+ return true;
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
/* Look for SUB (extended register). */
|
||||
if (is_a <scalar_int_mode> (mode, &int_mode)
|
||||
&& aarch64_rtx_arith_op_extract_p (op1, int_mode))
|
||||
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
|
||||
index bb888461a..c42494036 100644
|
||||
--- a/gcc/config/aarch64/aarch64.opt
|
||||
+++ b/gcc/config/aarch64/aarch64.opt
|
||||
@@ -273,6 +273,10 @@ Use an immediate to offset from the stack protector guard register, sp_el0.
|
||||
This option is for use with fstack-protector-strong and not for use in
|
||||
user-land code.
|
||||
|
||||
+mcmlt-arith
|
||||
+Target Report Var(flag_cmlt_arith) Optimization Init(0)
|
||||
+Use SIMD cmlt instruction to perform some arithmetic/logic calculations.
|
||||
+
|
||||
TargetVariable
|
||||
long aarch64_stack_protector_guard_offset = 0
|
||||
|
||||
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
|
||||
index 0a7145281..d3be06c6f 100644
|
||||
--- a/gcc/config/aarch64/iterators.md
|
||||
+++ b/gcc/config/aarch64/iterators.md
|
||||
@@ -1228,7 +1228,8 @@
|
||||
(V2DI "2s")])
|
||||
|
||||
;; Register suffix narrowed modes for VQN.
|
||||
-(define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h")
|
||||
+(define_mode_attr V2ntype [(V4HI "8b") (V2SI "4h")
|
||||
+ (V8HI "16b") (V4SI "8h")
|
||||
(V2DI "4s")])
|
||||
|
||||
;; Widened modes of vector modes.
|
||||
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
|
||||
index 1754b1eff..de58562a7 100644
|
||||
--- a/gcc/config/aarch64/predicates.md
|
||||
+++ b/gcc/config/aarch64/predicates.md
|
||||
@@ -47,6 +47,31 @@
|
||||
return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3);
|
||||
})
|
||||
|
||||
+(define_predicate "half_size_minus_one_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == size - 1);
|
||||
+})
|
||||
+
|
||||
+(define_predicate "half_size_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == size);
|
||||
+})
|
||||
+
|
||||
+(define_predicate "cmlt_arith_mask_operand"
|
||||
+ (match_code "const_vector")
|
||||
+{
|
||||
+ op = unwrap_const_vec_duplicate (op);
|
||||
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2;
|
||||
+ unsigned long long mask = ((unsigned long long) 1 << size) | 1;
|
||||
+ return CONST_INT_P (op) && (UINTVAL (op) == mask);
|
||||
+})
|
||||
+
|
||||
(define_predicate "subreg_lowpart_operator"
|
||||
(ior (match_code "truncate")
|
||||
(and (match_code "subreg")
|
||||
diff --git a/gcc/testsuite/gcc.dg/combine-cmlt.c b/gcc/testsuite/gcc.dg/combine-cmlt.c
|
||||
new file mode 100755
|
||||
index 000000000..b4c9a37ff
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/combine-cmlt.c
|
||||
@@ -0,0 +1,20 @@
|
||||
+/* { dg-do compile { target aarch64-*-* } } */
|
||||
+/* { dg-options "-O3 -mcmlt-arith" } */
|
||||
+
|
||||
+/* The test checks usage of cmlt insns for arithmetic/logic calculations
|
||||
+ * in foo (). It's inspired by sources of x264 codec. */
|
||||
+
|
||||
+typedef unsigned short int uint16_t;
|
||||
+typedef unsigned int uint32_t;
|
||||
+
|
||||
+void foo( uint32_t *a, uint32_t *b)
|
||||
+{
|
||||
+ for (unsigned i = 0; i < 4; i++)
|
||||
+ {
|
||||
+ uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1))
|
||||
+ &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1);
|
||||
+ b[i] = (a[i]+s)^s;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler-times {cmlt\t} 1 } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
502
0148-Introduce-RTL-ifcvt-enhancements.patch
Normal file
502
0148-Introduce-RTL-ifcvt-enhancements.patch
Normal file
@ -0,0 +1,502 @@
|
||||
From df68d120a049049671e44f6cda51e96a9a82c613 Mon Sep 17 00:00:00 2001
|
||||
From: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
|
||||
Date: Mon, 28 Nov 2022 14:16:48 +0300
|
||||
Subject: [PATCH 10/13] Introduce RTL ifcvt enhancements
|
||||
|
||||
It is controlled by option -fifcvt-allow-complicated-cmps, allowing
|
||||
ifcvt to deal with complicated cmps like
|
||||
if (cmp)
|
||||
X = reg1
|
||||
else
|
||||
X = reg2 + reg3
|
||||
and
|
||||
if (cmp)
|
||||
X = reg1 + reg3
|
||||
Y = reg2 + reg4
|
||||
Z = reg3
|
||||
|
||||
Parameter -param=ifcvt-allow-register-renaming=[0,1,2] allows ifcvt to
|
||||
aggressively rename registers in basic blocks.
|
||||
* 0: does not allow ifcvt to rename registers
|
||||
* 1: allows ifcvt to rename registers in then and else bb
|
||||
* 2: allows to rename registers in condition and else/then bb
|
||||
---
|
||||
gcc/ifcvt.c | 298 ++++++++++++++++++++++++++++++++++++++-----------
|
||||
gcc/params.opt | 8 ++
|
||||
2 files changed, 240 insertions(+), 66 deletions(-)
|
||||
|
||||
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
|
||||
index 2452f231c..50a73a7ca 100644
|
||||
--- a/gcc/ifcvt.c
|
||||
+++ b/gcc/ifcvt.c
|
||||
@@ -1,5 +1,5 @@
|
||||
/* If-conversion support.
|
||||
- Copyright (C) 2000-2020 Free Software Foundation, Inc.
|
||||
+ Copyright (C) 2000-2022 Free Software Foundation, Inc.
|
||||
|
||||
This file is part of GCC.
|
||||
|
||||
@@ -876,7 +876,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
|
||||
}
|
||||
|
||||
/* Don't even try if the comparison operands or the mode of X are weird. */
|
||||
- if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x)))
|
||||
+ if (!param_ifcvt_allow_complicated_cmps
|
||||
+ && (cond_complex
|
||||
+ || !SCALAR_INT_MODE_P (GET_MODE (x))))
|
||||
return NULL_RTX;
|
||||
|
||||
return emit_store_flag (x, code, XEXP (cond, 0),
|
||||
@@ -1743,8 +1745,9 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
|
||||
|
||||
/* Don't even try if the comparison operands are weird
|
||||
except that the target supports cbranchcc4. */
|
||||
- if (! general_operand (cmp_a, GET_MODE (cmp_a))
|
||||
- || ! general_operand (cmp_b, GET_MODE (cmp_b)))
|
||||
+ if (! param_ifcvt_allow_complicated_cmps
|
||||
+ && (! general_operand (cmp_a, GET_MODE (cmp_a))
|
||||
+ || ! general_operand (cmp_b, GET_MODE (cmp_b))))
|
||||
{
|
||||
if (!have_cbranchcc4
|
||||
|| GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC
|
||||
@@ -1915,19 +1918,6 @@ noce_try_cmove (struct noce_if_info *if_info)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
-/* Return true if X contains a conditional code mode rtx. */
|
||||
-
|
||||
-static bool
|
||||
-contains_ccmode_rtx_p (rtx x)
|
||||
-{
|
||||
- subrtx_iterator::array_type array;
|
||||
- FOR_EACH_SUBRTX (iter, array, x, ALL)
|
||||
- if (GET_MODE_CLASS (GET_MODE (*iter)) == MODE_CC)
|
||||
- return true;
|
||||
-
|
||||
- return false;
|
||||
-}
|
||||
-
|
||||
/* Helper for bb_valid_for_noce_process_p. Validate that
|
||||
the rtx insn INSN is a single set that does not set
|
||||
the conditional register CC and is in general valid for
|
||||
@@ -1946,7 +1936,6 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
|
||||
/* Currently support only simple single sets in test_bb. */
|
||||
if (!sset
|
||||
|| !noce_operand_ok (SET_DEST (sset))
|
||||
- || contains_ccmode_rtx_p (SET_DEST (sset))
|
||||
|| !noce_operand_ok (SET_SRC (sset)))
|
||||
return false;
|
||||
|
||||
@@ -1960,13 +1949,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
|
||||
in this function. */
|
||||
|
||||
static bool
|
||||
-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
||||
+bbs_ok_for_cmove_arith (basic_block bb_a,
|
||||
+ basic_block bb_b,
|
||||
+ rtx to_rename,
|
||||
+ bitmap conflict_regs)
|
||||
{
|
||||
rtx_insn *a_insn;
|
||||
bitmap bba_sets = BITMAP_ALLOC (®_obstack);
|
||||
-
|
||||
+ bitmap intersections = BITMAP_ALLOC (®_obstack);
|
||||
df_ref def;
|
||||
df_ref use;
|
||||
+ rtx_insn *last_a = last_active_insn (bb_a, FALSE);
|
||||
|
||||
FOR_BB_INSNS (bb_a, a_insn)
|
||||
{
|
||||
@@ -1976,30 +1969,25 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
||||
rtx sset_a = single_set (a_insn);
|
||||
|
||||
if (!sset_a)
|
||||
- {
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
- }
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
+ if (a_insn == last_a)
|
||||
+ continue;
|
||||
/* Record all registers that BB_A sets. */
|
||||
FOR_EACH_INSN_DEF (def, a_insn)
|
||||
if (!(to_rename && DF_REF_REG (def) == to_rename))
|
||||
bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
|
||||
}
|
||||
|
||||
+ bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
|
||||
rtx_insn *b_insn;
|
||||
-
|
||||
FOR_BB_INSNS (bb_b, b_insn)
|
||||
{
|
||||
if (!active_insn_p (b_insn))
|
||||
continue;
|
||||
-
|
||||
rtx sset_b = single_set (b_insn);
|
||||
|
||||
if (!sset_b)
|
||||
- {
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
- }
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
|
||||
/* Make sure this is a REG and not some instance
|
||||
of ZERO_EXTRACT or SUBREG or other dangerous stuff.
|
||||
@@ -2011,25 +1999,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename)
|
||||
if (MEM_P (SET_DEST (sset_b)))
|
||||
gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename));
|
||||
else if (!REG_P (SET_DEST (sset_b)))
|
||||
- {
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
- }
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
|
||||
- /* If the insn uses a reg set in BB_A return false. */
|
||||
+ /* If the insn uses a reg set in BB_A return false
|
||||
+ or try to collect register list for renaming. */
|
||||
FOR_EACH_INSN_USE (use, b_insn)
|
||||
{
|
||||
- if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use)))
|
||||
+ if (bitmap_bit_p (intersections, DF_REF_REGNO (use)))
|
||||
{
|
||||
- BITMAP_FREE (bba_sets);
|
||||
- return false;
|
||||
+ if (param_ifcvt_allow_register_renaming < 1)
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
+
|
||||
+ /* Those regs should be renamed. We can't rename CC reg, but
|
||||
+ possibly we can provide combined comparison in the future. */
|
||||
+ if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC)
|
||||
+ goto end_cmove_arith_check_and_fail;
|
||||
+ bitmap_set_bit (conflict_regs, DF_REF_REGNO (use));
|
||||
}
|
||||
}
|
||||
-
|
||||
}
|
||||
|
||||
BITMAP_FREE (bba_sets);
|
||||
+ BITMAP_FREE (intersections);
|
||||
return true;
|
||||
+
|
||||
+end_cmove_arith_check_and_fail:
|
||||
+ BITMAP_FREE (bba_sets);
|
||||
+ BITMAP_FREE (intersections);
|
||||
+ return false;
|
||||
}
|
||||
|
||||
/* Emit copies of all the active instructions in BB except the last.
|
||||
@@ -2084,6 +2081,134 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
|
||||
return true;
|
||||
}
|
||||
|
||||
+/* This function tries to rename regs that intersect with considered bb. */
|
||||
+
|
||||
+static bool
|
||||
+noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
|
||||
+{
|
||||
+ bool success = true;
|
||||
+ if (bitmap_empty_p (cond_rename_regs))
|
||||
+ return true;
|
||||
+ if (param_ifcvt_allow_register_renaming < 2)
|
||||
+ return false;
|
||||
+ df_ref use;
|
||||
+ rtx_insn* cmp_insn = if_info->cond_earliest;
|
||||
+ /* Jump instruction as a condion currently unsupported. */
|
||||
+ if (JUMP_P (cmp_insn))
|
||||
+ return false;
|
||||
+ rtx_insn* before_cmp = PREV_INSN (cmp_insn);
|
||||
+ start_sequence ();
|
||||
+ rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
|
||||
+ basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
|
||||
+ FOR_EACH_INSN_USE (use, cmp_insn)
|
||||
+ {
|
||||
+ if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use)))
|
||||
+ {
|
||||
+ rtx use_reg = DF_REF_REG (use);
|
||||
+ rtx tmp = gen_reg_rtx (GET_MODE (use_reg));
|
||||
+ if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp))
|
||||
+ {
|
||||
+ end_sequence ();
|
||||
+ return false;
|
||||
+ }
|
||||
+ noce_emit_move_insn (tmp, use_reg);
|
||||
+ }
|
||||
+ }
|
||||
+
|
||||
+ emit_insn (PATTERN (copy_of_cmp));
|
||||
+ rtx_insn *seq = get_insns ();
|
||||
+ unshare_all_rtl_in_chain (seq);
|
||||
+ end_sequence ();
|
||||
+
|
||||
+ emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn));
|
||||
+ delete_insn_and_edges (cmp_insn);
|
||||
+ rtx_insn* insn;
|
||||
+ FOR_BB_INSNS (cmp_block, insn)
|
||||
+ df_insn_rescan (insn);
|
||||
+
|
||||
+ if_info->cond = noce_get_condition (if_info->jump,
|
||||
+ ©_of_cmp,
|
||||
+ if_info->then_else_reversed);
|
||||
+ if_info->cond_earliest = copy_of_cmp;
|
||||
+ if_info->rev_cond = NULL_RTX;
|
||||
+
|
||||
+ return success;
|
||||
+}
|
||||
+
|
||||
+/* This function tries to rename regs that intersect with considered bb. */
|
||||
+static bool
|
||||
+noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
|
||||
+{
|
||||
+ if (bitmap_empty_p (rename_regs))
|
||||
+ return true;
|
||||
+ rtx_insn* insn;
|
||||
+ rtx_insn *last_insn = last_active_insn (test_bb, FALSE);
|
||||
+ bool res = true;
|
||||
+ start_sequence ();
|
||||
+ FOR_BB_INSNS (test_bb, insn)
|
||||
+ {
|
||||
+ if (!active_insn_p (insn))
|
||||
+ continue;
|
||||
+ /* Only ssets are supported for now. */
|
||||
+ rtx sset = single_set (insn);
|
||||
+ gcc_assert (sset);
|
||||
+ rtx x = SET_DEST (sset);
|
||||
+ if (!REG_P (x) || bitmap_bit_p (rename_regs, REGNO (x)))
|
||||
+ continue;
|
||||
+
|
||||
+ machine_mode mode = GET_MODE (x);
|
||||
+ rtx tmp = gen_reg_rtx (mode);
|
||||
+ if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn))
|
||||
+ {
|
||||
+ gcc_assert (insn != last_insn);
|
||||
+ /* We can generate additional move for such case,
|
||||
+ but it will increase register preasure.
|
||||
+ For now just stop transformation. */
|
||||
+ rtx result_rtx = SET_DEST (single_set (last_insn));
|
||||
+ if (REG_P (result_rtx) && (x != result_rtx))
|
||||
+ {
|
||||
+ res = false;
|
||||
+ break;
|
||||
+ }
|
||||
+ if (!validate_replace_rtx (x, tmp, insn))
|
||||
+ gcc_unreachable ();
|
||||
+ noce_emit_move_insn (tmp,x);
|
||||
+ }
|
||||
+ set_used_flags (insn);
|
||||
+ rtx_insn* rename_candidate;
|
||||
+ for (rename_candidate = NEXT_INSN (insn);
|
||||
+ rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb));
|
||||
+ rename_candidate = NEXT_INSN (rename_candidate))
|
||||
+ {
|
||||
+ if (!reg_overlap_mentioned_p (x, rename_candidate))
|
||||
+ continue;
|
||||
+
|
||||
+ int replace_res = TRUE;
|
||||
+ if (rename_candidate == last_insn)
|
||||
+ {
|
||||
+ validate_replace_src_group (x, tmp, rename_candidate);
|
||||
+ replace_res = apply_change_group ();
|
||||
+ }
|
||||
+ else
|
||||
+ replace_res = validate_replace_rtx (x, tmp, rename_candidate);
|
||||
+ gcc_assert (replace_res);
|
||||
+ set_used_flags (rename_candidate);
|
||||
+
|
||||
+ }
|
||||
+ set_used_flags (x);
|
||||
+ set_used_flags (tmp);
|
||||
+
|
||||
+ }
|
||||
+ rtx_insn *seq = get_insns ();
|
||||
+ unshare_all_rtl_in_chain (seq);
|
||||
+ end_sequence ();
|
||||
+ emit_insn_before_setloc (seq, first_active_insn (test_bb),
|
||||
+ INSN_LOCATION (first_active_insn (test_bb)));
|
||||
+ FOR_BB_INSNS (test_bb, insn)
|
||||
+ df_insn_rescan (insn);
|
||||
+ return res;
|
||||
+}
|
||||
+
|
||||
/* Try more complex cases involving conditional_move. */
|
||||
|
||||
static int
|
||||
@@ -2166,11 +2291,29 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
std::swap (then_bb, else_bb);
|
||||
}
|
||||
}
|
||||
-
|
||||
+ bitmap else_bb_rename_regs = BITMAP_ALLOC (®_obstack);
|
||||
+ bitmap then_bb_rename_regs = BITMAP_ALLOC (®_obstack);
|
||||
if (then_bb && else_bb
|
||||
- && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x)
|
||||
- || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x)))
|
||||
- return FALSE;
|
||||
+ && (!bbs_ok_for_cmove_arith (then_bb, else_bb,
|
||||
+ if_info->orig_x,
|
||||
+ then_bb_rename_regs)
|
||||
+ || !bbs_ok_for_cmove_arith (else_bb, then_bb,
|
||||
+ if_info->orig_x,
|
||||
+ else_bb_rename_regs)))
|
||||
+ {
|
||||
+ BITMAP_FREE (then_bb_rename_regs);
|
||||
+ BITMAP_FREE (else_bb_rename_regs);
|
||||
+ return FALSE;
|
||||
+ }
|
||||
+ bool prepass_renaming = true;
|
||||
+ prepass_renaming |= noce_rename_regs_in_bb (then_bb, then_bb_rename_regs);
|
||||
+ prepass_renaming |= noce_rename_regs_in_bb (else_bb, else_bb_rename_regs);
|
||||
+
|
||||
+ BITMAP_FREE (then_bb_rename_regs);
|
||||
+ BITMAP_FREE (else_bb_rename_regs);
|
||||
+
|
||||
+ if (!prepass_renaming)
|
||||
+ return FALSE;
|
||||
|
||||
start_sequence ();
|
||||
|
||||
@@ -2178,7 +2321,6 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
came from the test block. The non-empty complex block that we will
|
||||
emit might clobber the register used by B or A, so move it to a pseudo
|
||||
first. */
|
||||
-
|
||||
rtx tmp_a = NULL_RTX;
|
||||
rtx tmp_b = NULL_RTX;
|
||||
|
||||
@@ -3052,7 +3194,8 @@ noce_operand_ok (const_rtx op)
|
||||
|
||||
static bool
|
||||
bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
- unsigned int *cost, bool *simple_p)
|
||||
+ unsigned int *cost, bool *simple_p,
|
||||
+ bitmap cond_rename_regs)
|
||||
{
|
||||
if (!test_bb)
|
||||
return false;
|
||||
@@ -3086,10 +3229,10 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
rtx_insn *prev_last_insn = PREV_INSN (last_insn);
|
||||
gcc_assert (prev_last_insn);
|
||||
|
||||
- /* For now, disallow setting x multiple times in test_bb. */
|
||||
- if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn))
|
||||
+ if (REG_P (x)
|
||||
+ && reg_set_between_p (x, first_insn, prev_last_insn)
|
||||
+ && param_ifcvt_allow_register_renaming < 1)
|
||||
return false;
|
||||
-
|
||||
bitmap test_bb_temps = BITMAP_ALLOC (®_obstack);
|
||||
|
||||
/* The regs that are live out of test_bb. */
|
||||
@@ -3099,25 +3242,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
rtx_insn *insn;
|
||||
FOR_BB_INSNS (test_bb, insn)
|
||||
{
|
||||
- if (insn != last_insn)
|
||||
- {
|
||||
- if (!active_insn_p (insn))
|
||||
- continue;
|
||||
+ if (insn == last_insn)
|
||||
+ continue;
|
||||
+ if (!active_insn_p (insn))
|
||||
+ continue;
|
||||
|
||||
- if (!insn_valid_noce_process_p (insn, cc))
|
||||
- goto free_bitmap_and_fail;
|
||||
+ if (!insn_valid_noce_process_p (insn, cc))
|
||||
+ goto free_bitmap_and_fail;
|
||||
|
||||
- rtx sset = single_set (insn);
|
||||
- gcc_assert (sset);
|
||||
+ rtx sset = single_set (insn);
|
||||
+ gcc_assert (sset);
|
||||
|
||||
- if (contains_mem_rtx_p (SET_SRC (sset))
|
||||
- || !REG_P (SET_DEST (sset))
|
||||
- || reg_overlap_mentioned_p (SET_DEST (sset), cond))
|
||||
- goto free_bitmap_and_fail;
|
||||
+ if (contains_mem_rtx_p (SET_SRC (sset))
|
||||
+ || !REG_P (SET_DEST (sset)))
|
||||
+ goto free_bitmap_and_fail;
|
||||
|
||||
- potential_cost += pattern_cost (sset, speed_p);
|
||||
- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
|
||||
+ if (reg_overlap_mentioned_p (SET_DEST (sset), cond))
|
||||
+ {
|
||||
+ if (param_ifcvt_allow_register_renaming < 1)
|
||||
+ goto free_bitmap_and_fail;
|
||||
+ rtx sset_dest = SET_DEST (sset);
|
||||
+ if (REG_P (sset_dest)
|
||||
+ && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC))
|
||||
+ bitmap_set_bit (cond_rename_regs, REGNO (sset_dest));
|
||||
+ else
|
||||
+ goto free_bitmap_and_fail;
|
||||
}
|
||||
+ potential_cost += pattern_cost (sset, speed_p);
|
||||
+ if (SET_DEST (sset) != SET_DEST (last_set))
|
||||
+ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
|
||||
}
|
||||
|
||||
/* If any of the intermediate results in test_bb are live after test_bb
|
||||
@@ -3475,14 +3628,27 @@ noce_process_if_block (struct noce_if_info *if_info)
|
||||
|
||||
bool speed_p = optimize_bb_for_speed_p (test_bb);
|
||||
unsigned int then_cost = 0, else_cost = 0;
|
||||
+ bitmap cond_rename_regs = BITMAP_ALLOC (®_obstack);
|
||||
if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost,
|
||||
- &if_info->then_simple))
|
||||
- return false;
|
||||
+ &if_info->then_simple, cond_rename_regs))
|
||||
+ {
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
+ return false;
|
||||
+ }
|
||||
|
||||
if (else_bb
|
||||
&& !bb_valid_for_noce_process_p (else_bb, cond, &else_cost,
|
||||
- &if_info->else_simple))
|
||||
+ &if_info->else_simple, cond_rename_regs))
|
||||
+ {
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
+ return false;
|
||||
+ }
|
||||
+
|
||||
+ if (!noce_rename_regs_in_cond (if_info, cond_rename_regs))
|
||||
return false;
|
||||
+ cond = if_info->cond;
|
||||
+
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
|
||||
if (speed_p)
|
||||
if_info->original_cost += average_cost (then_cost, else_cost,
|
||||
@@ -5426,7 +5592,7 @@ if_convert (bool after_combine)
|
||||
{
|
||||
basic_block bb;
|
||||
int pass;
|
||||
-
|
||||
+ cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
if (optimize == 1)
|
||||
{
|
||||
df_live_add_problem ();
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index 83fd705ee..345f9b3ff 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -574,6 +574,14 @@ Maximum permissible cost for the sequence that would be generated by the RTL if-
|
||||
Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization
|
||||
Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable.
|
||||
|
||||
+-param=ifcvt-allow-complicated-cmps=
|
||||
+Common Joined UInteger Var(param_ifcvt_allow_complicated_cmps) IntegerRange(0, 1) Param Optimization
|
||||
+Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
|
||||
+
|
||||
+-param=ifcvt-allow-register-renaming=
|
||||
+Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization
|
||||
+Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created.
|
||||
+
|
||||
-param=max-sched-extend-regions-iters=
|
||||
Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization
|
||||
The maximum number of iterations through CFG to extend regions.
|
||||
--
|
||||
2.33.0
|
||||
|
||||
239
0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch
Normal file
239
0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch
Normal file
@ -0,0 +1,239 @@
|
||||
From f43bdfbdcfdeb425a0bd303f4787a13323fd2934 Mon Sep 17 00:00:00 2001
|
||||
From: vchernon <chernonog.vyacheslav@huawei.com>
|
||||
Date: Wed, 27 Sep 2023 11:07:29 +0800
|
||||
Subject: [PATCH 11/13] Add more flexible check for pointer aliasing during
|
||||
vectorization
|
||||
|
||||
It takes minimum between number of iteration and segment length and helps to
|
||||
speed up loops with small number of iterations when only tail can be vectorized.
|
||||
---
|
||||
gcc/params.opt | 5 ++
|
||||
.../sve/var_stride_flexible_segment_len_1.c | 23 +++++++
|
||||
gcc/tree-data-ref.c | 68 +++++++++++++------
|
||||
gcc/tree-data-ref.h | 11 ++-
|
||||
gcc/tree-vect-data-refs.c | 14 +++-
|
||||
5 files changed, 95 insertions(+), 26 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
|
||||
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index 83fd705ee..7f335a94b 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -964,6 +964,11 @@ Maximum number of loop peels to enhance alignment of data references in a loop.
|
||||
Common Joined UInteger Var(param_vect_max_version_for_alias_checks) Init(10) Param Optimization
|
||||
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alias check.
|
||||
|
||||
+-param=vect-alias-flexible-segment-len=
|
||||
+Common Joined UInteger Var(param_flexible_seg_len) Init(0) IntegerRange(0, 1) Param Optimization
|
||||
+Use a minimum length of different segments. Currently the minimum between
|
||||
+iteration number and vectorization length is chosen by this param.
|
||||
+
|
||||
-param=vect-max-version-for-alignment-checks=
|
||||
Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization
|
||||
Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check.
|
||||
diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
|
||||
new file mode 100644
|
||||
index 000000000..894f075f3
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c
|
||||
@@ -0,0 +1,23 @@
|
||||
+/* { dg-do compile } */
|
||||
+/* { dg-options "-O2 -ftree-vectorize --param=vect-alias-flexible-segment-len=1" } */
|
||||
+
|
||||
+#define TYPE int
|
||||
+#define SIZE 257
|
||||
+
|
||||
+void __attribute__ ((weak))
|
||||
+f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused)))
|
||||
+{
|
||||
+ for (int i = 0; i < SIZE; ++i)
|
||||
+ x[i * n] += y[i * n];
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */
|
||||
+/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */
|
||||
+/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */
|
||||
+/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */
|
||||
+/* Should use a WAR check that multiplies by (VF-2)*4 rather than
|
||||
+ an overlap check that multiplies by (257-1)*4. */
|
||||
+/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */
|
||||
+/* One range check and a check for n being zero. */
|
||||
+/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 2 } } */
|
||||
+/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */
|
||||
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
|
||||
index 2cb54def8..8c5f1048c 100644
|
||||
--- a/gcc/tree-data-ref.c
|
||||
+++ b/gcc/tree-data-ref.c
|
||||
@@ -2071,31 +2071,14 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr,
|
||||
same arguments. Try to optimize cases in which the second access
|
||||
is a write and in which some overlap is valid. */
|
||||
|
||||
-static bool
|
||||
-create_waw_or_war_checks (tree *cond_expr,
|
||||
+static void
|
||||
+create_waw_or_war_checks2 (tree *cond_expr, tree seg_len_a,
|
||||
const dr_with_seg_len_pair_t &alias_pair)
|
||||
{
|
||||
const dr_with_seg_len& dr_a = alias_pair.first;
|
||||
const dr_with_seg_len& dr_b = alias_pair.second;
|
||||
|
||||
- /* Check for cases in which:
|
||||
-
|
||||
- (a) DR_B is always a write;
|
||||
- (b) the accesses are well-ordered in both the original and new code
|
||||
- (see the comment above the DR_ALIAS_* flags for details); and
|
||||
- (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
|
||||
- if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
|
||||
- return false;
|
||||
-
|
||||
- /* Check for equal (but possibly variable) steps. */
|
||||
tree step = DR_STEP (dr_a.dr);
|
||||
- if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
|
||||
- return false;
|
||||
-
|
||||
- /* Make sure that we can operate on sizetype without loss of precision. */
|
||||
- tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
|
||||
- if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
|
||||
- return false;
|
||||
|
||||
/* All addresses involved are known to have a common alignment ALIGN.
|
||||
We can therefore subtract ALIGN from an exclusive endpoint to get
|
||||
@@ -2112,9 +2095,6 @@ create_waw_or_war_checks (tree *cond_expr,
|
||||
fold_convert (ssizetype, indicator),
|
||||
ssize_int (0));
|
||||
|
||||
- /* Get lengths in sizetype. */
|
||||
- tree seg_len_a
|
||||
- = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len));
|
||||
step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step));
|
||||
|
||||
/* Each access has the following pattern:
|
||||
@@ -2221,6 +2201,50 @@ create_waw_or_war_checks (tree *cond_expr,
|
||||
*cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit);
|
||||
if (dump_enabled_p ())
|
||||
dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n");
|
||||
+}
|
||||
+
|
||||
+/* This is a wrapper function for create_waw_or_war_checks2. */
|
||||
+static bool
|
||||
+create_waw_or_war_checks (tree *cond_expr,
|
||||
+ const dr_with_seg_len_pair_t &alias_pair)
|
||||
+{
|
||||
+ const dr_with_seg_len& dr_a = alias_pair.first;
|
||||
+ const dr_with_seg_len& dr_b = alias_pair.second;
|
||||
+
|
||||
+ /* Check for cases in which:
|
||||
+
|
||||
+ (a) DR_B is always a write;
|
||||
+ (b) the accesses are well-ordered in both the original and new code
|
||||
+ (see the comment above the DR_ALIAS_* flags for details); and
|
||||
+ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */
|
||||
+ if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Check for equal (but possibly variable) steps. */
|
||||
+ tree step = DR_STEP (dr_a.dr);
|
||||
+ if (!operand_equal_p (step, DR_STEP (dr_b.dr)))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Make sure that we can operate on sizetype without loss of precision. */
|
||||
+ tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr));
|
||||
+ if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype))
|
||||
+ return false;
|
||||
+
|
||||
+ /* Get lengths in sizetype. */
|
||||
+ tree seg_len_a
|
||||
+ = fold_convert (sizetype,
|
||||
+ rewrite_to_non_trapping_overflow (dr_a.seg_len));
|
||||
+ create_waw_or_war_checks2 (cond_expr, seg_len_a, alias_pair);
|
||||
+ if (param_flexible_seg_len && dr_a.seg_len != dr_a.seg_len2)
|
||||
+ {
|
||||
+ tree seg_len2_a
|
||||
+ = fold_convert (sizetype,
|
||||
+ rewrite_to_non_trapping_overflow (dr_a.seg_len2));
|
||||
+ tree cond_expr2;
|
||||
+ create_waw_or_war_checks2 (&cond_expr2, seg_len2_a, alias_pair);
|
||||
+ *cond_expr = fold_build2 (TRUTH_OR_EXPR, boolean_type_node,
|
||||
+ *cond_expr, cond_expr2);
|
||||
+ }
|
||||
return true;
|
||||
}
|
||||
|
||||
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
|
||||
index 771d20fbb..5903ce66a 100644
|
||||
--- a/gcc/tree-data-ref.h
|
||||
+++ b/gcc/tree-data-ref.h
|
||||
@@ -208,12 +208,19 @@ class dr_with_seg_len
|
||||
public:
|
||||
dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size,
|
||||
unsigned int a)
|
||||
- : dr (d), seg_len (len), access_size (size), align (a) {}
|
||||
-
|
||||
+ : dr (d), seg_len (len), seg_len2 (len), access_size (size), align (a)
|
||||
+ {}
|
||||
+ dr_with_seg_len (data_reference_p d, tree len, tree len2,
|
||||
+ unsigned HOST_WIDE_INT size, unsigned int a)
|
||||
+ : dr (d), seg_len (len), seg_len2 (len2), access_size (size), align (a)
|
||||
+ {}
|
||||
data_reference_p dr;
|
||||
/* The offset of the last access that needs to be checked minus
|
||||
the offset of the first. */
|
||||
tree seg_len;
|
||||
+ /* The second version of segment length. Currently this is used to
|
||||
+ soften checks for a small number of iterations. */
|
||||
+ tree seg_len2;
|
||||
/* A value that, when added to abs (SEG_LEN), gives the total number of
|
||||
bytes in the segment. */
|
||||
poly_uint64 access_size;
|
||||
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
|
||||
index e4466a4f3..1b8a03c9c 100644
|
||||
--- a/gcc/tree-vect-data-refs.c
|
||||
+++ b/gcc/tree-vect-data-refs.c
|
||||
@@ -3498,6 +3498,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
{
|
||||
poly_uint64 lower_bound;
|
||||
tree segment_length_a, segment_length_b;
|
||||
+ tree segment_length2_a, segment_length2_b;
|
||||
unsigned HOST_WIDE_INT access_size_a, access_size_b;
|
||||
unsigned int align_a, align_b;
|
||||
|
||||
@@ -3598,6 +3599,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
{
|
||||
segment_length_a = size_zero_node;
|
||||
segment_length_b = size_zero_node;
|
||||
+ segment_length2_a = size_zero_node;
|
||||
+ segment_length2_b = size_zero_node;
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -3606,8 +3609,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
length_factor = scalar_loop_iters;
|
||||
else
|
||||
length_factor = size_int (vect_factor);
|
||||
+ /* In any case we should rememeber scalar_loop_iters
|
||||
+ this helps to create flexible aliasing check
|
||||
+ for small number of iterations. */
|
||||
segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor);
|
||||
segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor);
|
||||
+ segment_length2_a
|
||||
+ = vect_vfa_segment_size (dr_info_a, scalar_loop_iters);
|
||||
+ segment_length2_b
|
||||
+ = vect_vfa_segment_size (dr_info_b, scalar_loop_iters);
|
||||
}
|
||||
access_size_a = vect_vfa_access_size (dr_info_a);
|
||||
access_size_b = vect_vfa_access_size (dr_info_b);
|
||||
@@ -3652,9 +3662,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
|
||||
}
|
||||
|
||||
dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a,
|
||||
- access_size_a, align_a);
|
||||
+ segment_length2_a, access_size_a, align_a);
|
||||
dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b,
|
||||
- access_size_b, align_b);
|
||||
+ segment_length2_b, access_size_b, align_b);
|
||||
/* Canonicalize the order to be the one that's needed for accurate
|
||||
RAW, WAR and WAW flags, in cases where the data references are
|
||||
well-ordered. The order doesn't really matter otherwise,
|
||||
--
|
||||
2.33.0
|
||||
|
||||
1050
0150-Implement-propagation-of-permutations-in-fwprop.patch
Normal file
1050
0150-Implement-propagation-of-permutations-in-fwprop.patch
Normal file
File diff suppressed because it is too large
Load Diff
381
0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch
Normal file
381
0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch
Normal file
@ -0,0 +1,381 @@
|
||||
From 4bcb19923cdcb042d66057766d661ef68bf70e92 Mon Sep 17 00:00:00 2001
|
||||
From: Chernonog Vyacheslav 00812786 <chernonog.vyacheslav@huawei.com>
|
||||
Date: Wed, 29 Mar 2023 05:22:17 +0300
|
||||
Subject: [PATCH 13/13] Fix bugs and add tests for RTL ifcvt
|
||||
|
||||
1. Fix bug in rtl ifcvt that run pass despite renaming failure.
|
||||
2. Fix bug that prevent final set register to be renamed.
|
||||
3. Clean up dominance info before runnig cleanup_cfg to avoid fixup
|
||||
invalid dominance info.
|
||||
4. Remove duplicated cleanup_cfg.
|
||||
5. Add tests.
|
||||
---
|
||||
gcc/common.opt | 4 +
|
||||
gcc/ifcvt.c | 88 ++++++++++++-------
|
||||
gcc/params.opt | 4 -
|
||||
.../gcc.c-torture/execute/ifcvt-renaming-1.c | 38 ++++++++
|
||||
gcc/testsuite/gcc.dg/ifcvt-6.c | 29 ++++++
|
||||
5 files changed, 128 insertions(+), 35 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c
|
||||
|
||||
diff --git a/gcc/common.opt b/gcc/common.opt
|
||||
index 6f0ed7cea..92d3a1986 100644
|
||||
--- a/gcc/common.opt
|
||||
+++ b/gcc/common.opt
|
||||
@@ -3534,4 +3534,8 @@ fipa-ra
|
||||
Common Report Var(flag_ipa_ra) Optimization
|
||||
Use caller save register across calls if possible.
|
||||
|
||||
+fifcvt-allow-complicated-cmps
|
||||
+Common Report Var(flag_ifcvt_allow_complicated_cmps) Optimization
|
||||
+Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
|
||||
+
|
||||
; This comment is to ensure we retain the blank line above.
|
||||
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
|
||||
index 50a73a7ca..209987ebc 100644
|
||||
--- a/gcc/ifcvt.c
|
||||
+++ b/gcc/ifcvt.c
|
||||
@@ -876,7 +876,7 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep,
|
||||
}
|
||||
|
||||
/* Don't even try if the comparison operands or the mode of X are weird. */
|
||||
- if (!param_ifcvt_allow_complicated_cmps
|
||||
+ if (!flag_ifcvt_allow_complicated_cmps
|
||||
&& (cond_complex
|
||||
|| !SCALAR_INT_MODE_P (GET_MODE (x))))
|
||||
return NULL_RTX;
|
||||
@@ -1745,7 +1745,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
|
||||
|
||||
/* Don't even try if the comparison operands are weird
|
||||
except that the target supports cbranchcc4. */
|
||||
- if (! param_ifcvt_allow_complicated_cmps
|
||||
+ if (! flag_ifcvt_allow_complicated_cmps
|
||||
&& (! general_operand (cmp_a, GET_MODE (cmp_a))
|
||||
|| ! general_operand (cmp_b, GET_MODE (cmp_b))))
|
||||
{
|
||||
@@ -1918,6 +1918,19 @@ noce_try_cmove (struct noce_if_info *if_info)
|
||||
return FALSE;
|
||||
}
|
||||
|
||||
+/* Return true if X contains a conditional code mode rtx. */
|
||||
+
|
||||
+static bool
|
||||
+contains_ccmode_rtx_p (rtx x)
|
||||
+{
|
||||
+ subrtx_iterator::array_type array;
|
||||
+ FOR_EACH_SUBRTX (iter, array, x, ALL)
|
||||
+ if (GET_MODE_CLASS (GET_MODE (*iter)) == MODE_CC)
|
||||
+ return true;
|
||||
+
|
||||
+ return false;
|
||||
+}
|
||||
+
|
||||
/* Helper for bb_valid_for_noce_process_p. Validate that
|
||||
the rtx insn INSN is a single set that does not set
|
||||
the conditional register CC and is in general valid for
|
||||
@@ -1936,6 +1949,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc)
|
||||
/* Currently support only simple single sets in test_bb. */
|
||||
if (!sset
|
||||
|| !noce_operand_ok (SET_DEST (sset))
|
||||
+ || (!flag_ifcvt_allow_complicated_cmps
|
||||
+ && contains_ccmode_rtx_p (SET_DEST (sset)))
|
||||
|| !noce_operand_ok (SET_SRC (sset)))
|
||||
return false;
|
||||
|
||||
@@ -1974,8 +1989,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a,
|
||||
continue;
|
||||
/* Record all registers that BB_A sets. */
|
||||
FOR_EACH_INSN_DEF (def, a_insn)
|
||||
- if (!(to_rename && DF_REF_REG (def) == to_rename))
|
||||
- bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
|
||||
+ bitmap_set_bit (bba_sets, DF_REF_REGNO (def));
|
||||
}
|
||||
|
||||
bitmap_and (intersections, df_get_live_in (bb_b), bba_sets);
|
||||
@@ -1984,6 +1998,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a,
|
||||
{
|
||||
if (!active_insn_p (b_insn))
|
||||
continue;
|
||||
+
|
||||
rtx sset_b = single_set (b_insn);
|
||||
|
||||
if (!sset_b)
|
||||
@@ -2081,7 +2096,12 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple)
|
||||
return true;
|
||||
}
|
||||
|
||||
-/* This function tries to rename regs that intersect with considered bb. */
|
||||
+/* This function tries to rename regs that intersect with considered bb
|
||||
+ inside condition expression. Condition expression will be moved down
|
||||
+ if the optimization will be applied, so it is essential to be sure that
|
||||
+ all intersected registers will be renamed otherwise transformation
|
||||
+ can't be applied. Function returns true if renaming was successful
|
||||
+ and optimization can proceed futher. */
|
||||
|
||||
static bool
|
||||
noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
|
||||
@@ -2092,11 +2112,11 @@ noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
|
||||
if (param_ifcvt_allow_register_renaming < 2)
|
||||
return false;
|
||||
df_ref use;
|
||||
- rtx_insn* cmp_insn = if_info->cond_earliest;
|
||||
+ rtx_insn *cmp_insn = if_info->cond_earliest;
|
||||
/* Jump instruction as a condion currently unsupported. */
|
||||
if (JUMP_P (cmp_insn))
|
||||
return false;
|
||||
- rtx_insn* before_cmp = PREV_INSN (cmp_insn);
|
||||
+ rtx_insn *before_cmp = PREV_INSN (cmp_insn);
|
||||
start_sequence ();
|
||||
rtx_insn *copy_of_cmp = as_a <rtx_insn *> (copy_rtx (cmp_insn));
|
||||
basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn);
|
||||
@@ -2122,7 +2142,7 @@ noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
|
||||
|
||||
emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn));
|
||||
delete_insn_and_edges (cmp_insn);
|
||||
- rtx_insn* insn;
|
||||
+ rtx_insn *insn;
|
||||
FOR_BB_INSNS (cmp_block, insn)
|
||||
df_insn_rescan (insn);
|
||||
|
||||
@@ -2135,13 +2155,15 @@ noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs)
|
||||
return success;
|
||||
}
|
||||
|
||||
-/* This function tries to rename regs that intersect with considered bb. */
|
||||
+/* This function tries to rename regs that intersect with considered bb.
|
||||
+ return true if the renaming was successful and optimization can
|
||||
+ proceed futher, false otherwise. */
|
||||
static bool
|
||||
noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
|
||||
{
|
||||
if (bitmap_empty_p (rename_regs))
|
||||
return true;
|
||||
- rtx_insn* insn;
|
||||
+ rtx_insn *insn;
|
||||
rtx_insn *last_insn = last_active_insn (test_bb, FALSE);
|
||||
bool res = true;
|
||||
start_sequence ();
|
||||
@@ -2153,7 +2175,7 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
|
||||
rtx sset = single_set (insn);
|
||||
gcc_assert (sset);
|
||||
rtx x = SET_DEST (sset);
|
||||
- if (!REG_P (x) || bitmap_bit_p (rename_regs, REGNO (x)))
|
||||
+ if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x)))
|
||||
continue;
|
||||
|
||||
machine_mode mode = GET_MODE (x);
|
||||
@@ -2175,7 +2197,7 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
|
||||
noce_emit_move_insn (tmp,x);
|
||||
}
|
||||
set_used_flags (insn);
|
||||
- rtx_insn* rename_candidate;
|
||||
+ rtx_insn *rename_candidate;
|
||||
for (rename_candidate = NEXT_INSN (insn);
|
||||
rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb));
|
||||
rename_candidate = NEXT_INSN (rename_candidate))
|
||||
@@ -2193,17 +2215,16 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs)
|
||||
replace_res = validate_replace_rtx (x, tmp, rename_candidate);
|
||||
gcc_assert (replace_res);
|
||||
set_used_flags (rename_candidate);
|
||||
-
|
||||
}
|
||||
set_used_flags (x);
|
||||
set_used_flags (tmp);
|
||||
-
|
||||
}
|
||||
- rtx_insn *seq = get_insns ();
|
||||
- unshare_all_rtl_in_chain (seq);
|
||||
- end_sequence ();
|
||||
- emit_insn_before_setloc (seq, first_active_insn (test_bb),
|
||||
- INSN_LOCATION (first_active_insn (test_bb)));
|
||||
+
|
||||
+ rtx_insn *seq = get_insns ();
|
||||
+ unshare_all_rtl_in_chain (seq);
|
||||
+ end_sequence ();
|
||||
+ emit_insn_before_setloc (seq, first_active_insn (test_bb),
|
||||
+ INSN_LOCATION (first_active_insn (test_bb)));
|
||||
FOR_BB_INSNS (test_bb, insn)
|
||||
df_insn_rescan (insn);
|
||||
return res;
|
||||
@@ -2305,9 +2326,10 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
BITMAP_FREE (else_bb_rename_regs);
|
||||
return FALSE;
|
||||
}
|
||||
- bool prepass_renaming = true;
|
||||
- prepass_renaming |= noce_rename_regs_in_bb (then_bb, then_bb_rename_regs);
|
||||
- prepass_renaming |= noce_rename_regs_in_bb (else_bb, else_bb_rename_regs);
|
||||
+ bool prepass_renaming = noce_rename_regs_in_bb (then_bb,
|
||||
+ then_bb_rename_regs)
|
||||
+ && noce_rename_regs_in_bb (else_bb,
|
||||
+ else_bb_rename_regs);
|
||||
|
||||
BITMAP_FREE (then_bb_rename_regs);
|
||||
BITMAP_FREE (else_bb_rename_regs);
|
||||
@@ -2321,6 +2343,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info)
|
||||
came from the test block. The non-empty complex block that we will
|
||||
emit might clobber the register used by B or A, so move it to a pseudo
|
||||
first. */
|
||||
+
|
||||
rtx tmp_a = NULL_RTX;
|
||||
rtx tmp_b = NULL_RTX;
|
||||
|
||||
@@ -3233,6 +3256,7 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
&& reg_set_between_p (x, first_insn, prev_last_insn)
|
||||
&& param_ifcvt_allow_register_renaming < 1)
|
||||
return false;
|
||||
+
|
||||
bitmap test_bb_temps = BITMAP_ALLOC (®_obstack);
|
||||
|
||||
/* The regs that are live out of test_bb. */
|
||||
@@ -3268,9 +3292,10 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond,
|
||||
else
|
||||
goto free_bitmap_and_fail;
|
||||
}
|
||||
- potential_cost += pattern_cost (sset, speed_p);
|
||||
- if (SET_DEST (sset) != SET_DEST (last_set))
|
||||
- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
|
||||
+
|
||||
+ potential_cost += pattern_cost (sset, speed_p);
|
||||
+ if (SET_DEST (sset) != SET_DEST (last_set))
|
||||
+ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset)));
|
||||
}
|
||||
|
||||
/* If any of the intermediate results in test_bb are live after test_bb
|
||||
@@ -3645,11 +3670,12 @@ noce_process_if_block (struct noce_if_info *if_info)
|
||||
}
|
||||
|
||||
if (!noce_rename_regs_in_cond (if_info, cond_rename_regs))
|
||||
- return false;
|
||||
- cond = if_info->cond;
|
||||
-
|
||||
+ {
|
||||
+ BITMAP_FREE (cond_rename_regs);
|
||||
+ return false;
|
||||
+ }
|
||||
BITMAP_FREE (cond_rename_regs);
|
||||
-
|
||||
+ cond = if_info->cond;
|
||||
if (speed_p)
|
||||
if_info->original_cost += average_cost (then_cost, else_cost,
|
||||
find_edge (test_bb, then_bb));
|
||||
@@ -5592,12 +5618,13 @@ if_convert (bool after_combine)
|
||||
{
|
||||
basic_block bb;
|
||||
int pass;
|
||||
- cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
+
|
||||
if (optimize == 1)
|
||||
{
|
||||
df_live_add_problem ();
|
||||
df_live_set_all_dirty ();
|
||||
}
|
||||
+ cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
|
||||
/* Record whether we are after combine pass. */
|
||||
ifcvt_after_combine = after_combine;
|
||||
@@ -5702,7 +5729,6 @@ rest_of_handle_if_conversion (void)
|
||||
dump_reg_info (dump_file);
|
||||
dump_flow_info (dump_file, dump_flags);
|
||||
}
|
||||
- cleanup_cfg (CLEANUP_EXPENSIVE);
|
||||
if_convert (false);
|
||||
if (num_updated_if_blocks)
|
||||
/* Get rid of any dead CC-related instructions. */
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index 345f9b3ff..272a0eb2b 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -574,10 +574,6 @@ Maximum permissible cost for the sequence that would be generated by the RTL if-
|
||||
Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization
|
||||
Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable.
|
||||
|
||||
--param=ifcvt-allow-complicated-cmps=
|
||||
-Common Joined UInteger Var(param_ifcvt_allow_complicated_cmps) IntegerRange(0, 1) Param Optimization
|
||||
-Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time).
|
||||
-
|
||||
-param=ifcvt-allow-register-renaming=
|
||||
Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization
|
||||
Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created.
|
||||
diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
|
||||
new file mode 100644
|
||||
index 000000000..761c8ab7e
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c
|
||||
@@ -0,0 +1,38 @@
|
||||
+
|
||||
+extern void abort(void);
|
||||
+
|
||||
+__attribute__ ((noinline))
|
||||
+int foo (int x, int y, int z, int a, int b)
|
||||
+{
|
||||
+ if (a < 2)
|
||||
+ {
|
||||
+ if (a == 0)
|
||||
+ {
|
||||
+ if (x - y < 0)
|
||||
+ x = x - y + z;
|
||||
+ else
|
||||
+ x = x - y;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (x + y >= z)
|
||||
+ x = x + y - z;
|
||||
+ else
|
||||
+ x = x + y;
|
||||
+ }
|
||||
+ }
|
||||
+ return x;
|
||||
+}
|
||||
+
|
||||
+int main(void)
|
||||
+{
|
||||
+ if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2
|
||||
+ abort ();
|
||||
+ if (foo (50,10,7,0,1) != 40) // x - y = 40
|
||||
+ abort ();
|
||||
+ if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8
|
||||
+ abort ();
|
||||
+ if (foo (5,10,70,1,1) != 15) // x + y = 15
|
||||
+ abort ();
|
||||
+ return 0;
|
||||
+}
|
||||
diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c
|
||||
new file mode 100644
|
||||
index 000000000..7d2a8d58b
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/ifcvt-6.c
|
||||
@@ -0,0 +1,29 @@
|
||||
+/* { dg-do compile { target { aarch64*-*-* } } } */
|
||||
+/* { dg-options "-fdump-rtl-ce1 -O2 -fifcvt-allow-complicated-cmps --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 " } */
|
||||
+
|
||||
+typedef unsigned int uint16_t;
|
||||
+
|
||||
+uint16_t
|
||||
+foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a,
|
||||
+ uint16_t b, uint16_t c, uint16_t d)
|
||||
+{
|
||||
+ int i = 1;
|
||||
+ int j = 1;
|
||||
+ if (a > b)
|
||||
+ {
|
||||
+ j = x;
|
||||
+ if (b > c)
|
||||
+ i = y;
|
||||
+ else
|
||||
+ i = z;
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ j = y;
|
||||
+ if (c > d)
|
||||
+ i = z;
|
||||
+ }
|
||||
+ return i * j;
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */
|
||||
--
|
||||
2.33.0
|
||||
|
||||
4905
0152-Add-LLC-Allocation-Pass.patch
Normal file
4905
0152-Add-LLC-Allocation-Pass.patch
Normal file
File diff suppressed because it is too large
Load Diff
1285
0153-LLC-add-extending-outer-loop.patch
Normal file
1285
0153-LLC-add-extending-outer-loop.patch
Normal file
File diff suppressed because it is too large
Load Diff
46
gcc.spec
46
gcc.spec
@ -46,7 +46,7 @@
|
||||
%else
|
||||
%global build_libitm 0
|
||||
%endif
|
||||
%global build_isl 0
|
||||
%global build_isl 1
|
||||
%global build_libstdcxx_docs 0
|
||||
%ifarch %{ix86} x86_64 ppc ppc64 ppc64le ppc64p7 s390 s390x %{arm} aarch64 %{mips}
|
||||
%global attr_ifunc 1
|
||||
@ -61,7 +61,7 @@
|
||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||
Name: gcc
|
||||
Version: %{gcc_version}
|
||||
Release: 41
|
||||
Release: 42
|
||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||
URL: https://gcc.gnu.org
|
||||
|
||||
@ -249,6 +249,18 @@ Patch138: 0138-Fix-ICE-bugs-in-transpose-test-cases-with-vector-ind.patch
|
||||
Patch139: 0139-Fix-errors-on-testsuite-c-c-tests-and-505.mcf_r.patch
|
||||
Patch140: 0140-Fix-an-error-in-memory-allocation-deallocation.patch
|
||||
Patch141: 0141-Fix-warnings-and-errors-with-debug-prints.patch
|
||||
Patch142: 0142-crc-loop-optimization-initial.patch
|
||||
Patch143: 0143-Perform-early-if-conversion-of-simple-arithmetic.patch
|
||||
Patch144: 0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch
|
||||
Patch145: 0145-Match-double-sized-mul-pattern.patch
|
||||
Patch146: 0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch
|
||||
Patch147: 0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch
|
||||
Patch148: 0148-Introduce-RTL-ifcvt-enhancements.patch
|
||||
Patch149: 0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch
|
||||
Patch150: 0150-Implement-propagation-of-permutations-in-fwprop.patch
|
||||
Patch151: 0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch
|
||||
Patch152: 0152-Add-LLC-Allocation-Pass.patch
|
||||
Patch153: 0153-LLC-add-extending-outer-loop.patch
|
||||
|
||||
%global gcc_target_platform %{_arch}-linux-gnu
|
||||
|
||||
@ -843,6 +855,18 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%patch139 -p1
|
||||
%patch140 -p1
|
||||
%patch141 -p1
|
||||
%patch142 -p1
|
||||
%patch143 -p1
|
||||
%patch144 -p1
|
||||
%patch145 -p1
|
||||
%patch146 -p1
|
||||
%patch147 -p1
|
||||
%patch148 -p1
|
||||
%patch149 -p1
|
||||
%patch150 -p1
|
||||
%patch151 -p1
|
||||
%patch152 -p1
|
||||
%patch153 -p1
|
||||
|
||||
%build
|
||||
|
||||
@ -908,15 +932,10 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \
|
||||
--with-system-zlib --enable-__cxa_atexit --disable-libunwind-exceptions \
|
||||
--enable-gnu-unique-object --enable-linker-build-id --with-linker-hash-style=gnu \
|
||||
--enable-languages=c,c++,fortran${enablelobjc}${enablelada}${enablelgo}${enableld},lto --enable-plugin \
|
||||
--enable-initfini-array --disable-libgcj --without-cloog \
|
||||
--enable-initfini-array --disable-libgcj --with-isl --without-cloog \
|
||||
--enable-gnu-indirect-function --build=%{gcc_target_platform} \
|
||||
--with-stage1-ldflags="$OPT_LDFLAGS" \
|
||||
--with-boot-ldflags="$OPT_LDFLAGS" --disable-bootstrap \
|
||||
%if %{build_isl}
|
||||
--with-isl \
|
||||
%else
|
||||
--without-isl \
|
||||
%endif
|
||||
%ifarch x86_64
|
||||
--with-tune=generic \
|
||||
--with-arch_32=x86-64 \
|
||||
@ -930,11 +949,6 @@ CC="$CC" CFLAGS="$OPT_FLAGS" \
|
||||
--with-arch=rv64g --with-abi=lp64d \
|
||||
--disable-libquadmath --disable-multilib
|
||||
%endif
|
||||
%ifarch ppc64le
|
||||
--disable-multilib \
|
||||
--enable-targets=powerpcle-linux \
|
||||
--with-cpu-32=power8 --with-tune-32=power8 --with-cpu-64=power8 --with-tune-64=power8 \
|
||||
%endif
|
||||
|
||||
%ifarch sparc sparcv9 sparc64
|
||||
make %{?_smp_mflags} BOOT_CFLAGS="$OPT_FLAGS" bootstrap
|
||||
@ -2877,6 +2891,12 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Wed May 29 2024 zhengchenhui <zhengchenhui1@huawei.com> - 10.3.1-42
|
||||
- Type:Spec
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Revert last two commits about isl and ppc64le, and Sync patch from openeuler/gcc
|
||||
|
||||
* Mon Apr 15 2024 huyubiao <huyubiao@huawei.com> - 10.3.1-41
|
||||
- Type:SPEC
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user