237 lines
5.7 KiB
Diff
237 lines
5.7 KiB
Diff
From f788555b23b0b676729bb695af96954fe083e354 Mon Sep 17 00:00:00 2001
|
|
From: Pronin Alexander 00812787 <pronin.alexander@huawei.com>
|
|
Date: Tue, 24 Jan 2023 16:43:40 +0300
|
|
Subject: [PATCH 21/33] Add option to allow matching uaddsub overflow for widen
|
|
ops too.
|
|
|
|
---
|
|
gcc/common.opt | 5 ++
|
|
gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++
|
|
gcc/tree-ssa-math-opts.c | 35 +++++++-
|
|
3 files changed, 179 insertions(+), 4 deletions(-)
|
|
create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c
|
|
|
|
diff --git a/gcc/common.opt b/gcc/common.opt
|
|
index 6950756fd..c2f01bbc0 100644
|
|
--- a/gcc/common.opt
|
|
+++ b/gcc/common.opt
|
|
@@ -2989,6 +2989,11 @@ freciprocal-math
|
|
Common Report Var(flag_reciprocal_math) SetByCombined Optimization
|
|
Same as -fassociative-math for expressions which include division.
|
|
|
|
+fuaddsub-overflow-match-all
|
|
+Common Report Var(flag_uaddsub_overflow_match_all)
|
|
+Match unsigned add/sub overflow even if the target does not support
|
|
+the corresponding instruction.
|
|
+
|
|
; Nonzero means that unsafe floating-point math optimizations are allowed
|
|
; for the sake of speed. IEEE compliance is not guaranteed, and operations
|
|
; are allowed to assume that their arguments and results are "normal"
|
|
diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c
|
|
new file mode 100644
|
|
index 000000000..96c26d308
|
|
--- /dev/null
|
|
+++ b/gcc/testsuite/gcc.dg/uaddsub.c
|
|
@@ -0,0 +1,143 @@
|
|
+/* { dg-do compile } */
|
|
+/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */
|
|
+#include <stdint.h>
|
|
+
|
|
+typedef unsigned __int128 uint128_t;
|
|
+typedef struct uint256_t
|
|
+{
|
|
+ uint128_t lo;
|
|
+ uint128_t hi;
|
|
+} uint256_t;
|
|
+
|
|
+uint16_t add16 (uint8_t a, uint8_t b)
|
|
+{
|
|
+ uint8_t tmp = a + b;
|
|
+ uint8_t overflow = 0;
|
|
+ if (tmp < a)
|
|
+ overflow = 1;
|
|
+
|
|
+ uint16_t res = overflow;
|
|
+ res <<= 8;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint32_t add32 (uint16_t a, uint16_t b)
|
|
+{
|
|
+ uint16_t tmp = a + b;
|
|
+ uint16_t overflow = 0;
|
|
+ if (tmp < a)
|
|
+ overflow = 1;
|
|
+
|
|
+ uint32_t res = overflow;
|
|
+ res <<= 16;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint64_t add64 (uint32_t a, uint32_t b)
|
|
+{
|
|
+ uint32_t tmp = a + b;
|
|
+ uint32_t overflow = 0;
|
|
+ if (tmp < a)
|
|
+ overflow = 1;
|
|
+
|
|
+ uint64_t res = overflow;
|
|
+ res <<= 32;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint128_t add128 (uint64_t a, uint64_t b)
|
|
+{
|
|
+ uint64_t tmp = a + b;
|
|
+ uint64_t overflow = 0;
|
|
+ if (tmp < a)
|
|
+ overflow = 1;
|
|
+
|
|
+ uint128_t res = overflow;
|
|
+ res <<= 64;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint256_t add256 (uint128_t a, uint128_t b)
|
|
+{
|
|
+ uint128_t tmp = a + b;
|
|
+ uint128_t overflow = 0;
|
|
+ if (tmp < a)
|
|
+ overflow = 1;
|
|
+
|
|
+ uint256_t res;
|
|
+ res.hi = overflow;
|
|
+ res.lo = tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint16_t sub16 (uint8_t a, uint8_t b)
|
|
+{
|
|
+ uint8_t tmp = a - b;
|
|
+ uint8_t overflow = 0;
|
|
+ if (tmp > a)
|
|
+ overflow = -1;
|
|
+
|
|
+ uint16_t res = overflow;
|
|
+ res <<= 8;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint32_t sub32 (uint16_t a, uint16_t b)
|
|
+{
|
|
+ uint16_t tmp = a - b;
|
|
+ uint16_t overflow = 0;
|
|
+ if (tmp > a)
|
|
+ overflow = -1;
|
|
+
|
|
+ uint32_t res = overflow;
|
|
+ res <<= 16;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint64_t sub64 (uint32_t a, uint32_t b)
|
|
+{
|
|
+ uint32_t tmp = a - b;
|
|
+ uint32_t overflow = 0;
|
|
+ if (tmp > a)
|
|
+ overflow = -1;
|
|
+
|
|
+ uint64_t res = overflow;
|
|
+ res <<= 32;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint128_t sub128 (uint64_t a, uint64_t b)
|
|
+{
|
|
+ uint64_t tmp = a - b;
|
|
+ uint64_t overflow = 0;
|
|
+ if (tmp > a)
|
|
+ overflow = -1;
|
|
+
|
|
+ uint128_t res = overflow;
|
|
+ res <<= 64;
|
|
+ res += tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+uint256_t sub256 (uint128_t a, uint128_t b)
|
|
+{
|
|
+ uint128_t tmp = a - b;
|
|
+ uint128_t overflow = 0;
|
|
+ if (tmp > a)
|
|
+ overflow = -1;
|
|
+
|
|
+ uint256_t res;
|
|
+ res.hi = overflow;
|
|
+ res.lo = tmp;
|
|
+ return res;
|
|
+}
|
|
+
|
|
+/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
|
|
+/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */
|
|
diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c
|
|
index 4c89fddcf..716bf9e35 100644
|
|
--- a/gcc/tree-ssa-math-opts.c
|
|
+++ b/gcc/tree-ssa-math-opts.c
|
|
@@ -3290,6 +3290,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2,
|
|
}
|
|
}
|
|
|
|
+/* Check if the corresponding operation has wider equivalent on the target. */
|
|
+
|
|
+static bool
|
|
+wider_optab_check_p (optab op, machine_mode mode, int unsignedp)
|
|
+{
|
|
+ machine_mode wider_mode;
|
|
+ FOR_EACH_WIDER_MODE (wider_mode, mode)
|
|
+ {
|
|
+ machine_mode next_mode;
|
|
+ if (optab_handler (op, wider_mode) != CODE_FOR_nothing
|
|
+ || (op == smul_optab
|
|
+ && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode)
|
|
+ && (find_widening_optab_handler ((unsignedp
|
|
+ ? umul_widen_optab
|
|
+ : smul_widen_optab),
|
|
+ next_mode, mode))))
|
|
+ return true;
|
|
+ }
|
|
+
|
|
+ return false;
|
|
+}
|
|
|
|
/* Helper function of match_uaddsub_overflow. Return 1
|
|
if USE_STMT is unsigned overflow check ovf != 0 for
|
|
@@ -3390,12 +3411,18 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt,
|
|
gimple *use_stmt;
|
|
|
|
gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR);
|
|
+ optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab;
|
|
+ machine_mode mode = TYPE_MODE (type);
|
|
+ int unsignedp = TYPE_UNSIGNED (type);
|
|
if (!INTEGRAL_TYPE_P (type)
|
|
- || !TYPE_UNSIGNED (type)
|
|
+ || !unsignedp
|
|
|| has_zero_uses (lhs)
|
|
- || has_single_use (lhs)
|
|
- || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab,
|
|
- TYPE_MODE (type)) == CODE_FOR_nothing)
|
|
+ || has_single_use (lhs))
|
|
+ return false;
|
|
+
|
|
+ if (optab_handler (op, mode) == CODE_FOR_nothing
|
|
+ && (!flag_uaddsub_overflow_match_all
|
|
+ || !wider_optab_check_p (op, mode, unsignedp)))
|
|
return false;
|
|
|
|
FOR_EACH_IMM_USE_FAST (use_p, iter, lhs)
|
|
--
|
|
2.33.0
|
|
|