From 8f8eb202662d4f87440285389b5ee758d469f4be Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:51:13 +0800 Subject: [PATCH] [Sync] Sync patch from openeuler/gcc 0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch --- ...rc32-Optimization-in-Gzip-For-crc32-.patch | 2354 +++++++++++++++++ 1 file changed, 2354 insertions(+) create mode 100644 0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch diff --git a/0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch b/0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch new file mode 100644 index 0000000..a9a8e94 --- /dev/null +++ b/0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch @@ -0,0 +1,2354 @@ +From 179412c66d0cdd6a48ef1c29acae90908102a1c9 Mon Sep 17 00:00:00 2001 +From: xingyushuai +Date: Mon, 24 Apr 2023 09:34:35 +0800 +Subject: [PATCH 08/13] [LOOP CRC32]Add Crc32 Optimization in Gzip For crc32 + algorithm in APBC int_gzip. Match crc32 lookup table algorithm. An example + for crc32 lookup table alg: ```c do { c = crc_32_tab[((int)c ^ (*s++)) & + 0xff] ^ (c >> 8); } while (--n); + +Usage: `gcc -O3 -march=armv8.1-a -floop-crc yourfile.c` +Node: The cpu you use needs to support the crc32 instructions +--- + gcc/config/aarch64/aarch64-builtins.c | 29 + + gcc/config/aarch64/aarch64-protos.h | 1 + + gcc/config/aarch64/aarch64.c | 12 + + gcc/doc/tm.texi | 9 + + gcc/doc/tm.texi.in | 2 + + gcc/match.pd | 146 +-- + gcc/passes.def | 2 +- + gcc/target.def | 14 + + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c | 85 -- + .../tree-ssa/loop-crc-1.c.042t.loop_crc | 90 -- + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c | 88 -- + .../tree-ssa/loop-crc-4.c.042t.loop_crc | 0 + .../loop-crc-calculation-check-fail.c | 156 --- + ...crc-calculation-check-fail.c.042t.loop_crc | 64 -- + .../loop-crc-calculation-check-fail.s | 329 ------- + ...crc-3.c => loop-crc-loop-condition-fail.c} | 6 +- + ...op-crc-4.c => loop-crc-loop-form-fail-2.c} | 7 +- + .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c | 3 +- + .../gcc.dg/tree-ssa/loop-crc-sucess.c | 7 +- + .../tree-ssa/loop-crc-table-check-fail.c | 3 +- + gcc/tree-ssa-loop-crc.c | 903 +++++++++++++++--- + 21 files changed, 873 insertions(+), 1083 deletions(-) + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s + rename gcc/testsuite/gcc.dg/tree-ssa/{loop-crc-3.c => loop-crc-loop-condition-fail.c} (97%) + rename gcc/testsuite/gcc.dg/tree-ssa/{loop-crc-4.c => loop-crc-loop-form-fail-2.c} (95%) + +diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c +index d92157dff..1e8b046da 100644 +--- a/gcc/config/aarch64/aarch64-builtins.c ++++ b/gcc/config/aarch64/aarch64-builtins.c +@@ -441,6 +441,12 @@ typedef struct + #define VAR1(T, N, MAP, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, + ++enum aarch64_crc_builtins{ ++ AARCH64_BUILTIN_CRC32B, ++ AARCH64_BUILTIN_CRC32H, ++ AARCH64_BUILTIN_CRC32W, ++}; ++ + enum aarch64_builtins + { + AARCH64_BUILTIN_MIN, +@@ -1321,6 +1327,29 @@ aarch64_general_builtin_decl (unsigned code, bool) + + return aarch64_builtin_decls[code]; + } ++/* Implement TARGET_GET_CRC_BUILTIN_CODE */ ++unsigned ++get_crc_builtin_code(unsigned code, bool) ++{ ++ if (code > AARCH64_BUILTIN_CRC32W) ++ return AARCH64_BUILTIN_MIN; ++ ++ unsigned res = AARCH64_BUILTIN_MIN; ++ switch (code) { ++ case AARCH64_BUILTIN_CRC32B: ++ res = AARCH64_BUILTIN_crc32b; ++ break; ++ case AARCH64_BUILTIN_CRC32H: ++ res = AARCH64_BUILTIN_crc32h; ++ break; ++ case AARCH64_BUILTIN_CRC32W: ++ res = AARCH64_BUILTIN_crc32w; ++ break; ++ default: ++ break; ++ } ++ return res; ++} + + typedef enum + { +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 9b6d309a7..a0ca662bc 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -723,6 +723,7 @@ tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *); + gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *); + rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); + tree aarch64_general_builtin_decl (unsigned, bool); ++unsigned get_crc_builtin_code(unsigned , bool); + tree aarch64_general_builtin_rsqrt (unsigned int); + tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); + +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index cbdde11b0..b8407c612 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -13735,6 +13735,15 @@ aarch64_builtin_decl (unsigned int code, bool initialize_p) + gcc_unreachable (); + } + ++/* Implement TARGET_GET_CRC_BUILTIN_CODE. */ ++static unsigned ++aarch64_get_crc_builtin_code(unsigned code, bool initialize_p) ++{ ++ unsigned subcode = get_crc_builtin_code(code,initialize_p); ++ unsigned res = subcode << AARCH64_BUILTIN_SHIFT; ++ return res; ++} ++ + /* Return true if it is safe and beneficial to use the approximate rsqrt optabs + to optimize 1.0/sqrt. */ + +@@ -23911,6 +23920,9 @@ aarch64_run_selftests (void) + #undef TARGET_BUILTIN_DECL + #define TARGET_BUILTIN_DECL aarch64_builtin_decl + ++#undef TARGET_GET_CRC_BUILTIN_CODE ++#define TARGET_GET_CRC_BUILTIN_CODE aarch64_get_crc_builtin_code ++ + #undef TARGET_BUILTIN_RECIPROCAL + #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 0508fce57..b46418d0b 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11610,6 +11610,15 @@ If @var{code} is out of range the function should return + @code{error_mark_node}. + @end deftypefn + ++@deftypefn {Target Hook} unsigned TARGET_GET_CRC_BUILTIN_CODE (unsigned @var{code}, bool @var{initialize_p}) ++Define this hook to get crc32 builtin code. It should be a function that ++returns the crc32 builtin function code @var{code}. ++If there is no such builtin and it cannot be initialized at this time ++if @var{initialize_p} is true the function should return @code{NULL_TREE}. ++If @var{code} is out of range the function should return ++@code{error_mark_node}. ++@end deftypefn ++ + @deftypefn {Target Hook} rtx TARGET_EXPAND_BUILTIN (tree @var{exp}, rtx @var{target}, rtx @var{subtarget}, machine_mode @var{mode}, int @var{ignore}) + + Expand a call to a machine specific built-in function that was set up by +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 3b70ea484..2663547c7 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7941,6 +7941,8 @@ to by @var{ce_info}. + + @hook TARGET_BUILTIN_DECL + ++@hook TARGET_GET_CRC_BUILTIN_CODE ++ + @hook TARGET_EXPAND_BUILTIN + + @hook TARGET_RESOLVE_OVERLOADED_BUILTIN +diff --git a/gcc/match.pd b/gcc/match.pd +index 87b316953..0f92003f7 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3487,160 +3487,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + +- +-#if GIMPLE +-(if (canonicalize_math_p ()) +-/* These patterns are mostly used by PHIOPT to move some operations outside of +- the if statements. They should be done late because it gives jump threading +- and few other passes to reduce what is going on. */ +-/* a ? x op POW2 : x -> x op (a ? POW2 : 0). */ +- (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) +- (simplify +- (cond @0 (op:s @1 INTEGER_CST@2) @1) +- /* powerof2cst */ +- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) +- (with { +- tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); +- } +- (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) +- ) +- ) +- ) +-) +-#endif +- +-#if GIMPLE +-/* These patterns are mostly used by FORWPROP to move some operations outside of +- the if statements. They should be done late because it gives jump threading +- and few other passes to reduce what is going on. */ +-/* Mul64 is defined as a multiplication algorithm which compute two 64-bit integers to one 128-bit integer +- (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { +- In0Lo = In0(D) & 4294967295; +- In0Hi = In0(D) >> 32; +- In1Lo = In1(D) & 4294967295; +- In1Hi = In1(D) >> 32; +- Mull_01 = In0Hi * In1Lo; +- Addc = In0Lo * In1Hi + Mull_01; +- addc32 = Addc << 32; +- ResLo = In0Lo * In1Lo + addc32; +- ResHi = ((long unsigned int) (addc32 > ResLo)) + +- (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; +- } */ +- (simplify +- (plus +- (plus +- (convert +- (gt @10 +- (plus +- (mult @4 @6) +- (lshift@10 @9 @3)))) +- (lshift +- (convert +- (gt @8 @9)) @3)) +- (plus@11 +- (rshift +- (plus@9 +- (mult (bit_and@4 SSA_NAME@0 @2) @7) +- (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) +- (mult (rshift@5 SSA_NAME@0 @3) +- (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) +- (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && +- TYPE_PRECISION (type) == 64) +- (with { +- tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); +- tree shift = build_int_cst (integer_type_node, 64); +- //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) +- } +- (convert:type (rshift +- (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) +- ) +- ) +- +- /* (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { +- In0Lo = In0(D) & 4294967295; +- In0Hi = In0(D) >> 32; +- In1Lo = In1(D) & 4294967295; +- In1Hi = In1(D) >> 32; +- Mull_01 = In0Hi * In1Lo; +- Addc = In0Lo * In1Hi + Mull_01; +- addc32 = Addc << 32; +- ResLo = In0(D) * In1(D); +- ResHi = ((long unsigned int) (addc32 > ResLo)) + +- (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; +- } */ +- (simplify +- (plus +- (plus +- (convert +- (gt (lshift@10 @9 @3) +- (mult @0 @1))) +- (lshift +- (convert +- (gt @8 @9)) @3)) +- (plus@11 +- (rshift +- (plus@9 +- (mult (bit_and@4 SSA_NAME@0 @2) @7) +- (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) +- (mult (rshift@5 SSA_NAME@0 @3) +- (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) +- (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && +- TYPE_PRECISION (type) == 64) +- (with { +- tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); +- tree shift = build_int_cst (integer_type_node, 64); +- //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) +- } +- (convert:type (rshift +- (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) +- ) +- ) +-#endif +- +-#if GIMPLE +-/* These patterns are mostly used by FORWPROP to move some operations outside of +- the if statements. They should be done late because it gives jump threading +- and few other passes to reduce what is going on. */ +- /* +- In0Lo = In0(D) & 4294967295; +- In0Hi = In0(D) >> 32; +- In1Lo = In1(D) & 4294967295; +- In1Hi = In1(D) >> 32; +- Addc = In0Lo * In1Hi + In0Hi * In1Lo; +- addc32 = Addc << 32; +- ResLo = In0Lo * In1Lo + addc32 +- */ +- (simplify +- (plus (mult @4 @5) +- (lshift +- (plus +- (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) +- (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) INTEGER_CST@3)) +- (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && +- TYPE_PRECISION (type) == 64) +- (with { +- tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); +- tree shift = build_int_cst (integer_type_node, 64); +- //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) +- } +- (mult (convert:type @0) (convert:type @1))) +- ) +- ) +-#endif +- +- + #if GIMPLE + /* Try to match */ + /* + _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) + _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME@1, SSA_NAME) +-_6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++_6 = _5 & 255; //BIT_AND_EXPR (SSA_NAME, INTEGER_CST@3) + */ + (match (crc_match_index @1 @2 @3) + (bit_and (bit_xor (nop SSA_NAME@2) SSA_NAME@1) INTEGER_CST@3) + (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@3) == 255)) + ) +- + #endif + + #if GIMPLE +@@ -3653,7 +3510,6 @@ c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) + (bit_xor SSA_NAME@3 (rshift SSA_NAME@1 INTEGER_CST@2)) + (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@2) == 8)) + ) +- + #endif + + /* Simplification moved from fold_cond_expr_with_comparison. It may also +diff --git a/gcc/passes.def b/gcc/passes.def +index 7abd946ce..df7d65733 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -92,7 +92,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_cd_dce); + NEXT_PASS (pass_phiopt, true /* early_p */); + NEXT_PASS (pass_array_widen_compare); +- NEXT_PASS (pass_loop_crc); ++ NEXT_PASS (pass_loop_crc); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_convert_switch); + NEXT_PASS (pass_cleanup_eh); +diff --git a/gcc/target.def b/gcc/target.def +index 202056411..34d3561bd 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -2421,6 +2421,20 @@ If @var{code} is out of range the function should return\n\ + @code{error_mark_node}.", + tree, (unsigned code, bool initialize_p), NULL) + ++/* Initialize (if INITIALIZE_P is true) and return the real code of ++ target-specific built-in function . ++ Return NULL if that is not possible. Return error_mark_node if CODE ++ is outside of the range of valid crc32 codes. */ ++DEFHOOK ++(get_crc_builtin_code, ++ "Define this hook to get crc32 builtin code. It should be a function that\n\ ++returns the crc32 builtin function code @var{code}.\n\ ++If there is no such builtin and it cannot be initialized at this time\n\ ++if @var{initialize_p} is true the function should return @code{NULL_TREE}.\n\ ++If @var{code} is out of range the function should return\n\ ++@code{error_mark_node}.", ++ unsigned , (unsigned code, bool initialize_p), NULL) ++ + /* Expand a target-specific builtin. */ + DEFHOOK + (expand_builtin, +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c +deleted file mode 100644 +index 07f9e01ec..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c ++++ /dev/null +@@ -1,85 +0,0 @@ +-/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +-/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ +- +-#include +-#include +-typedef unsigned long ulg; +-typedef unsigned char uch; +- +-static const ulg crc_32_tab[] = { +- 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, +- 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, +- 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, +- 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, +- 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, +- 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, +- 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, +- 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, +- 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, +- 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, +- 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, +- 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, +- 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, +- 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, +- 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, +- 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, +- 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, +- 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, +- 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, +- 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, +- 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, +- 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, +- 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, +- 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, +- 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, +- 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, +- 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, +- 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, +- 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, +- 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, +- 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, +- 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, +- 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, +- 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, +- 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, +- 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, +- 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, +- 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, +- 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, +- 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, +- 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, +- 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, +- 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, +- 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, +- 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, +- 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, +- 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, +- 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, +- 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, +- 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, +- 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, +- 0x2d02ef8dL +-}; +- +-ulg updcrc(s, n) +- uch *s; /* pointer to bytes to pump through */ +- unsigned n; /* number of bytes in s[] */ +-{ +- register ulg c; /* temporary variable */ +- +- static ulg crc = (ulg)0xffffffffL; /* shift register contents */ +- +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); +- } while (--n); +- } +- crc = c; +- return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +-} +-/* { dg-final { scan-tree-dump-times "Processing loop" 1 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc +deleted file mode 100644 +index c726059f3..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc ++++ /dev/null +@@ -1,90 +0,0 @@ +- +-;; Function updcrc (updcrc, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) +- +-;; 2 loops found +-;; +-;; Loop 0 +-;; header 0, latch 1 +-;; depth 0, outer -1 +-;; nodes: 0 1 2 3 6 4 7 5 +-;; +-;; Loop 1 +-;; header 4, latch 7 +-;; depth 1, outer 0 +-;; nodes: 4 7 +-;; 2 succs { 5 3 } +-;; 3 succs { 6 5 } +-;; 6 succs { 4 } +-;; 4 succs { 7 5 } +-;; 7 succs { 4 } +-;; 5 succs { 1 } +- +-Starting the loop_crc pass +-====================================== +-Processing loop 1: +-====================================== +-;; +-;; Loop 1 +-;; header 4, latch 7 +-;; depth 1, outer 0 +-;; nodes: 4 7 +- +- +-The 1th loop form is success matched,and the loop can be optimized. +-updcrc (uch * s, unsigned int n) +-{ +- static ulg crc = 4294967295; +- register ulg c; +- unsigned char _2; +- long unsigned int _3; +- long unsigned int _4; +- long unsigned int _5; +- long unsigned int _6; +- long unsigned int _7; +- ulg _21; +- +- : +- if (s_12(D) == 0B) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- c_14 = crc; +- if (n_15(D) != 0) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- +- : +- # s_8 = PHI +- # n_9 = PHI +- # c_10 = PHI +- s_16 = s_8 + 1; +- _2 = *s_8; +- _3 = (long unsigned int) _2; +- _4 = _3 ^ c_10; +- _5 = _4 & 255; +- _6 = crc_32_tab[_5]; +- _7 = c_10 >> 8; +- c_17 = _6 ^ _7; +- n_18 = n_9 + 4294967295; +- if (n_18 != 0) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- goto ; [100.00%] +- +- : +- # c_11 = PHI <4294967295(2), c_14(3), c_17(4)> +- crc = c_11; +- _21 = c_11 ^ 4294967295; +- return _21; +- +-} +- +- +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c +deleted file mode 100644 +index f73c4d550..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c ++++ /dev/null +@@ -1,88 +0,0 @@ +-/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +-/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ +- +-#include +-#include +-typedef unsigned long ulg; +-typedef unsigned char uch; +- +-static const ulg crc_32_tab[] = { +- 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, +- 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, +- 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, +- 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, +- 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, +- 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, +- 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, +- 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, +- 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, +- 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, +- 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, +- 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, +- 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, +- 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, +- 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, +- 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, +- 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, +- 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, +- 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, +- 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, +- 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, +- 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, +- 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, +- 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, +- 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, +- 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, +- 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, +- 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, +- 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, +- 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, +- 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, +- 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, +- 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, +- 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, +- 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, +- 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, +- 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, +- 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, +- 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, +- 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, +- 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, +- 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, +- 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, +- 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, +- 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, +- 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, +- 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, +- 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, +- 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, +- 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, +- 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, +- 0x2d02ef8dL +-}; +- +-ulg updcrc(s, n) +- uch *s; /* pointer to bytes to pump through */ +- unsigned n; /* number of bytes in s[] */ +-{ +- register ulg c; /* temporary variable */ +- +- static ulg crc = (ulg)0xffffffffL; /* shift register contents */ +- +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); +- for (int i = 0; i < 5; i++) { +- c++; +- } +- +- } while (--n); +- } +- crc = c; +- return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +-} +-/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c +deleted file mode 100644 +index 71b25f537..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c ++++ /dev/null +@@ -1,156 +0,0 @@ +-/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +-/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ +- +-#include +-#include +-typedef unsigned long ulg; +-typedef unsigned char uch; +- +-static const ulg crc_32_tab[] = { +- 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, +- 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, +- 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, +- 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, +- 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, +- 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, +- 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, +- 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, +- 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, +- 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, +- 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, +- 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, +- 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, +- 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, +- 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, +- 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, +- 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, +- 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, +- 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, +- 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, +- 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, +- 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, +- 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, +- 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, +- 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, +- 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, +- 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, +- 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, +- 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, +- 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, +- 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, +- 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, +- 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, +- 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, +- 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, +- 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, +- 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, +- 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, +- 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, +- 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, +- 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, +- 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, +- 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, +- 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, +- 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, +- 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, +- 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, +- 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, +- 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, +- 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, +- 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, +- 0x2d02ef8dL +-}; +- +-int test[5] = {0}; +- +-ulg updcrc(s, n) +- uch *s; +- unsigned n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- int a = 0; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- if (n) +- do { +- a++; +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; +- } while (--n) ; +- } +- crc = c; +- return c ^ 0xffffffffL*a; +-} +- +-ulg updcrc1(s, n) +- uch *s; +- unsigned n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- unsigned n_back = n; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; +- n = n - 2; +- } while (n != 0) ; +- } +- +- crc = c; +- return c ^ 0xffffffffL; +-} +- +-ulg updcrc2(s, n) +- uch *s; +- unsigned n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- unsigned n_back = n; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) + 1; +- } while (--n) ; +- } +- +- crc = c; +- return c ^ 0xffffffffL; +-} +-/* +-ulg updcrc3(s, n) +- uch *s; +- int n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); +- --n; +- } while (n ) ; +- } +- +- crc = c; +- return c ^ 0xffffffffL; +-}*/ +-/* { dg-final { scan-tree-dump-times "num of phi noeds check failed." 1 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "evolution pattern check failed." 1 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "calculation pattern check failed." 1 "loop_crc"} } */ +- +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc +deleted file mode 100644 +index 6d52a8684..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc ++++ /dev/null +@@ -1,64 +0,0 @@ +- +-;; Function updcrc3 (updcrc3, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) +- +-;; 2 loops found +-;; +-;; Loop 0 +-;; header 0, latch 1 +-;; depth 0, outer -1 +-;; nodes: 0 1 2 3 4 5 +-;; +-;; Loop 1 +-;; header 4, latch 4 +-;; depth 1, outer 0 +-;; nodes: 4 +-;; 2 succs { 5 3 } +-;; 3 succs { 4 5 } +-;; 4 succs { 4 } +-;; 5 succs { 1 } +- +-Starting the loop_crc pass +-====================================== +-Processing loop 1: +-====================================== +-;; +-;; Loop 1 +-;; header 4, latch 4 +-;; depth 1, outer 0 +-;; nodes: 4 +- +- +- +-Wrong loop form for crc matching. +-updcrc3 (uch * s, unsigned int n) +-{ +- unsigned int n_back; +- static ulg crc = 4294967295; +- register ulg c; +- ulg _22; +- +- : +- if (s_12(D) == 0B) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- c_14 = crc; +- if (n_15(D) != 0) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- goto ; [100.00%] +- +- : +- # c_11 = PHI <4294967295(2), c_14(3)> +- crc = c_11; +- _22 = c_11 ^ 4294967295; +- return _22; +- +-} +- +- +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s +deleted file mode 100644 +index cae934bfe..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s ++++ /dev/null +@@ -1,329 +0,0 @@ +- .arch armv8-a +- .file "loop-crc-calculation-check-fail.c" +- .text +- .section .rodata +- .align 3 +- .type crc_32_tab, %object +- .size crc_32_tab, 2048 +-crc_32_tab: +- .xword 0 +- .xword 1996959894 +- .xword 3993919788 +- .xword 2567524794 +- .xword 124634137 +- .xword 1886057615 +- .xword 3915621685 +- .xword 2657392035 +- .xword 249268274 +- .xword 2044508324 +- .xword 3772115230 +- .xword 2547177864 +- .xword 162941995 +- .xword 2125561021 +- .xword 3887607047 +- .xword 2428444049 +- .xword 498536548 +- .xword 1789927666 +- .xword 4089016648 +- .xword 2227061214 +- .xword 450548861 +- .xword 1843258603 +- .xword 4107580753 +- .xword 2211677639 +- .xword 325883990 +- .xword 1684777152 +- .xword 4251122042 +- .xword 2321926636 +- .xword 335633487 +- .xword 1661365465 +- .xword 4195302755 +- .xword 2366115317 +- .xword 997073096 +- .xword 1281953886 +- .xword 3579855332 +- .xword 2724688242 +- .xword 1006888145 +- .xword 1258607687 +- .xword 3524101629 +- .xword 2768942443 +- .xword 901097722 +- .xword 1119000684 +- .xword 3686517206 +- .xword 2898065728 +- .xword 853044451 +- .xword 1172266101 +- .xword 3705015759 +- .xword 2882616665 +- .xword 651767980 +- .xword 1373503546 +- .xword 3369554304 +- .xword 3218104598 +- .xword 565507253 +- .xword 1454621731 +- .xword 3485111705 +- .xword 3099436303 +- .xword 671266974 +- .xword 1594198024 +- .xword 3322730930 +- .xword 2970347812 +- .xword 795835527 +- .xword 1483230225 +- .xword 3244367275 +- .xword 3060149565 +- .xword 1994146192 +- .xword 31158534 +- .xword 2563907772 +- .xword 4023717930 +- .xword 1907459465 +- .xword 112637215 +- .xword 2680153253 +- .xword 3904427059 +- .xword 2013776290 +- .xword 251722036 +- .xword 2517215374 +- .xword 3775830040 +- .xword 2137656763 +- .xword 141376813 +- .xword 2439277719 +- .xword 3865271297 +- .xword 1802195444 +- .xword 476864866 +- .xword 2238001368 +- .xword 4066508878 +- .xword 1812370925 +- .xword 453092731 +- .xword 2181625025 +- .xword 4111451223 +- .xword 1706088902 +- .xword 314042704 +- .xword 2344532202 +- .xword 4240017532 +- .xword 1658658271 +- .xword 366619977 +- .xword 2362670323 +- .xword 4224994405 +- .xword 1303535960 +- .xword 984961486 +- .xword 2747007092 +- .xword 3569037538 +- .xword 1256170817 +- .xword 1037604311 +- .xword 2765210733 +- .xword 3554079995 +- .xword 1131014506 +- .xword 879679996 +- .xword 2909243462 +- .xword 3663771856 +- .xword 1141124467 +- .xword 855842277 +- .xword 2852801631 +- .xword 3708648649 +- .xword 1342533948 +- .xword 654459306 +- .xword 3188396048 +- .xword 3373015174 +- .xword 1466479909 +- .xword 544179635 +- .xword 3110523913 +- .xword 3462522015 +- .xword 1591671054 +- .xword 702138776 +- .xword 2966460450 +- .xword 3352799412 +- .xword 1504918807 +- .xword 783551873 +- .xword 3082640443 +- .xword 3233442989 +- .xword 3988292384 +- .xword 2596254646 +- .xword 62317068 +- .xword 1957810842 +- .xword 3939845945 +- .xword 2647816111 +- .xword 81470997 +- .xword 1943803523 +- .xword 3814918930 +- .xword 2489596804 +- .xword 225274430 +- .xword 2053790376 +- .xword 3826175755 +- .xword 2466906013 +- .xword 167816743 +- .xword 2097651377 +- .xword 4027552580 +- .xword 2265490386 +- .xword 503444072 +- .xword 1762050814 +- .xword 4150417245 +- .xword 2154129355 +- .xword 426522225 +- .xword 1852507879 +- .xword 4275313526 +- .xword 2312317920 +- .xword 282753626 +- .xword 1742555852 +- .xword 4189708143 +- .xword 2394877945 +- .xword 397917763 +- .xword 1622183637 +- .xword 3604390888 +- .xword 2714866558 +- .xword 953729732 +- .xword 1340076626 +- .xword 3518719985 +- .xword 2797360999 +- .xword 1068828381 +- .xword 1219638859 +- .xword 3624741850 +- .xword 2936675148 +- .xword 906185462 +- .xword 1090812512 +- .xword 3747672003 +- .xword 2825379669 +- .xword 829329135 +- .xword 1181335161 +- .xword 3412177804 +- .xword 3160834842 +- .xword 628085408 +- .xword 1382605366 +- .xword 3423369109 +- .xword 3138078467 +- .xword 570562233 +- .xword 1426400815 +- .xword 3317316542 +- .xword 2998733608 +- .xword 733239954 +- .xword 1555261956 +- .xword 3268935591 +- .xword 3050360625 +- .xword 752459403 +- .xword 1541320221 +- .xword 2607071920 +- .xword 3965973030 +- .xword 1969922972 +- .xword 40735498 +- .xword 2617837225 +- .xword 3943577151 +- .xword 1913087877 +- .xword 83908371 +- .xword 2512341634 +- .xword 3803740692 +- .xword 2075208622 +- .xword 213261112 +- .xword 2463272603 +- .xword 3855990285 +- .xword 2094854071 +- .xword 198958881 +- .xword 2262029012 +- .xword 4057260610 +- .xword 1759359992 +- .xword 534414190 +- .xword 2176718541 +- .xword 4139329115 +- .xword 1873836001 +- .xword 414664567 +- .xword 2282248934 +- .xword 4279200368 +- .xword 1711684554 +- .xword 285281116 +- .xword 2405801727 +- .xword 4167216745 +- .xword 1634467795 +- .xword 376229701 +- .xword 2685067896 +- .xword 3608007406 +- .xword 1308918612 +- .xword 956543938 +- .xword 2808555105 +- .xword 3495958263 +- .xword 1231636301 +- .xword 1047427035 +- .xword 2932959818 +- .xword 3654703836 +- .xword 1088359270 +- .xword 936918000 +- .xword 2847714899 +- .xword 3736837829 +- .xword 1202900863 +- .xword 817233897 +- .xword 3183342108 +- .xword 3401237130 +- .xword 1404277552 +- .xword 615818150 +- .xword 3134207493 +- .xword 3453421203 +- .xword 1423857449 +- .xword 601450431 +- .xword 3009837614 +- .xword 3294710456 +- .xword 1567103746 +- .xword 711928724 +- .xword 3020668471 +- .xword 3272380065 +- .xword 1510334235 +- .xword 755167117 +- .text +- .align 2 +- .global updcrc3 +- .type updcrc3, %function +-updcrc3: +-.LFB0: +- .cfi_startproc +- str x19, [sp, -48]! +- .cfi_def_cfa_offset 48 +- .cfi_offset 19, -48 +- str x0, [sp, 24] +- str w1, [sp, 20] +- ldr x0, [sp, 24] +- cmp x0, 0 +- bne .L2 +- mov x19, 4294967295 +- b .L3 +-.L2: +- adrp x0, crc.0 +- add x0, x0, :lo12:crc.0 +- ldr x19, [x0] +- ldr w0, [sp, 20] +- str w0, [sp, 44] +- ldr w0, [sp, 20] +- cmp w0, 0 +- beq .L3 +-.L4: +- ldr x0, [sp, 24] +- add x1, x0, 1 +- str x1, [sp, 24] +- ldrb w0, [x0] +- and x0, x0, 255 +- eor x0, x19, x0 +- and x1, x0, 255 +- adrp x0, crc_32_tab +- add x0, x0, :lo12:crc_32_tab +- ldr x1, [x0, x1, lsl 3] +- lsr x0, x19, 8 +- eor x19, x1, x0 +- ldr w0, [sp, 20] +- sub w0, w0, #1 +- str w0, [sp, 20] +- ldr w0, [sp, 20] +- cmp w0, 999 +- bls .L4 +-.L3: +- adrp x0, crc.0 +- add x0, x0, :lo12:crc.0 +- str x19, [x0] +- eor x0, x19, 4294967295 +- ldr x19, [sp], 48 +- .cfi_restore 19 +- .cfi_def_cfa_offset 0 +- ret +- .cfi_endproc +-.LFE0: +- .size updcrc3, .-updcrc3 +- .data +- .align 3 +- .type crc.0, %object +- .size crc.0, 8 +-crc.0: +- .xword 4294967295 +- .ident "GCC: (Kunpeng gcc 10.3.1-2.3.0.b006) 10.3.1" +- .section .note.GNU-stack,"",@progbits +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +similarity index 97% +rename from gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c +rename to gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +index 70eb1b814..fefa949f9 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +@@ -74,12 +74,12 @@ ulg updcrc(s, n) + } else { + c = crc; + if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n || c != 0) ; + } + crc = c; + exit1: + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +similarity index 95% +rename from gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c +rename to gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +index 1d7e0a319..b37446ec5 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +@@ -75,8 +75,8 @@ ulg updcrc(s, n) + } else { + c = crc; + if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) * test[c%5]; ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n) ; + } + do { +@@ -86,4 +86,5 @@ ulg updcrc(s, n) + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +index b59704e31..3dc500a46 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +@@ -108,4 +108,5 @@ ulg updcrc1(s, n) + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 2 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 1 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 2 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +index e1e16eaf2..8b556efc8 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +@@ -73,12 +73,11 @@ ulg updcrc(s, n) + c = 0xffffffffL; + } else { + c = crc; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n); + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "The 1th loop form is successmatched,and the loop can be optimized." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +index f03a4fa82..de21f4553 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +@@ -110,4 +110,5 @@ ulg updcrc1(s, n) + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } + /* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "Table check fail. Table not matching." 1 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 3 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Table check fail. Table not matching." 1 "loop_crc"} } */ +diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c +index 4982384c6..8225c2fa5 100644 +--- a/gcc/tree-ssa-loop-crc.c ++++ b/gcc/tree-ssa-loop-crc.c +@@ -1,5 +1,5 @@ +-/* Array widen compare. +- Copyright (C) 2022-2022 Free Software Foundation, Inc. ++/* loop crc. ++ Copyright (C) 2023-2023 Free Software Foundation, Inc. + + This file is part of GCC. + +@@ -42,13 +42,235 @@ along with GCC; see the file COPYING3. If not see + #include "print-tree.h" + #include "cfghooks.h" + #include "gimple-fold.h" ++#include "diagnostic-core.h" ++ ++/* This pass handles scenarios similar to the following: ++ulg updcrc(s, n) ++ uch *s; ++ unsigned n; ++{ ++ register ulg c; ++ ++ static ulg crc = (ulg)0xffffffffL; ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++ ++If the hardware supports the crc instruction, then the pass completes the ++conversion of the above scenario into: ++ ++#define SIZE_U32 sizeof(uint32_t) ++unsigned long updcrc(s, n) ++ unsigned char *s; ++ unsigned n; ++{ ++ register unsigned long c; ++ ++ static unsigned long crc = (unsigned long)0xffffffffL; ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ { ++ uint32_t nn = n/SIZE_U32; ++ do{ ++ c = __crc32w(c,*((uint32_t *)s)); ++ s += SIZE_U32; ++ }while(--nn); ++ if (n & sizeof(uint16_t)) { ++ c = __crc32h(c, *((uint16_t *)s)); ++ s += sizeof(uint16_t); ++ } ++ if (n & sizeof(uint8_t)) ++ c = __crc32b(c, *s); ++ } ++ } ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++ ++This pass is to complete the conversion of such scenarios from the internal ++perspective of the compiler: ++1)match_crc_loop:The function completes the screening of such scenarios; ++2)convert_to_new_loop:The function completes the conversion of ++ origin_loop to new loops, and removes origin_loop; ++3)origin_loop_info: The structure is used to record important information ++ of origin_loop: such as loop exit, initial value of induction ++ variable, etc; ++4) create_new_loops: The function is used as the key content of the pass ++ to complete the creation of new loops. */ + +-/* Match.pd function to match the ctz expression. */ + extern bool gimple_crc_match_index (tree, tree *, tree (*)(tree)); + extern bool gimple_crc_match_res (tree, tree *, tree (*)(tree)); + + static gimple *crc_table_read_stmt = NULL; + ++static gphi* phi_s = NULL; ++static gphi* phi_c = NULL; ++static tree nn_tree = NULL; ++ ++enum aarch64_crc_builtins ++{ ++ AARCH64_BUILTIN_CRC32B, ++ AARCH64_BUILTIN_CRC32H, ++ AARCH64_BUILTIN_CRC32W, ++}; ++ ++/* The useful information of origin loop. */ ++struct origin_loop_info ++{ ++ tree limit; /* The limit index of the array in the old loop. */ ++ tree base_n; /* The initial value of the old loop. */ ++ tree base_s; /* The initial value of the old loop. */ ++ tree base_c; /* The initial value of the old loop. */ ++ edge entry_edge; /* The edge into the old loop. */ ++ edge exit_edge; /* The edge outto the old loop. */ ++ basic_block exit_bb; ++}; ++ ++typedef struct origin_loop_info origin_loop_info; ++ ++static origin_loop_info origin_loop; ++hash_map n_map; ++hash_map nn_map; ++hash_map s_map; ++hash_map c_map; ++hash_map crc_map; ++ ++/* Initialize the origin_loop structure. */ ++static void ++init_origin_loop_structure () ++{ ++ origin_loop.entry_edge = NULL; ++ origin_loop.exit_edge = NULL; ++ origin_loop.exit_bb = NULL; ++ origin_loop.limit = NULL; ++ origin_loop.base_n = NULL; ++ origin_loop.base_s = NULL; ++ origin_loop.base_c = NULL; ++} ++ ++/* Get the edge that first entered the loop. */ ++static edge ++get_loop_preheader_edge (class loop *loop) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, loop->header->preds) ++ if (e->src != loop->latch) ++ break; ++ ++ return e; ++} ++ ++/* Returns true if t is SSA_NAME and user variable exists. */ ++ ++static bool ++ssa_name_var_p (tree t) ++{ ++ if (!t || TREE_CODE (t) != SSA_NAME) ++ return false; ++ if (SSA_NAME_VAR (t)) ++ return true; ++ return false; ++} ++ ++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ ++ ++static bool ++same_ssa_name_var_p (tree t1, tree t2) ++{ ++ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) ++ return false; ++ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) ++ return true; ++ return false; ++} ++ ++/* Get origin loop induction variable upper bound. */ ++ ++static bool ++get_iv_upper_bound (gimple *stmt) ++{ ++ if (origin_loop.limit != NULL || origin_loop.base_n != NULL) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) ++ return false; ++ ++ /* TODO: Currently, the input restrictions on lhs and rhs are implemented ++ through PARM_DECL. We may consider relax the restrictions later, and ++ we need to consider the overall adaptation scenario and adding test ++ cases. */ ++ if (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) == PARM_DECL) ++ { ++ origin_loop.limit = rhs; ++ origin_loop.base_n = lhs; ++ } ++ else ++ return false; ++ ++ if (origin_loop.limit != NULL && origin_loop.base_n != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Get origin loop info. */ ++static bool ++get_origin_loop_info(class loop *loop) ++{ ++ vec edges; ++ edges = get_loop_exit_edges (loop); ++ origin_loop.exit_edge = edges[0]; ++ origin_loop.exit_bb = origin_loop.exit_edge->dest; ++ origin_loop.entry_edge = get_loop_preheader_edge(loop); ++ origin_loop.base_s = PHI_ARG_DEF_FROM_EDGE(phi_s,origin_loop.entry_edge); ++ origin_loop.base_c = PHI_ARG_DEF_FROM_EDGE(phi_c,origin_loop.entry_edge); ++ ++ basic_block preheader_bb; ++ preheader_bb = origin_loop.entry_edge->src; ++ ++ if(preheader_bb->preds->length() != 1) ++ return false; ++ ++ edge entry_pre_bb_edge; ++ entry_pre_bb_edge = EDGE_PRED (preheader_bb, 0); ++ ++ basic_block pre_preheader_bb; ++ pre_preheader_bb = entry_pre_bb_edge->src; ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ bool get_upper_bound = false; ++ for (gsi = gsi_start_bb (pre_preheader_bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt && gimple_code (stmt) == GIMPLE_COND ++ && get_iv_upper_bound (stmt)) { ++ get_upper_bound = true; ++ break; ++ } ++ } ++ ++ return get_upper_bound; ++} + + /* The loop form check will check the entire loop control flow + It should be a loop that: +@@ -102,7 +324,8 @@ only_one_array_read (class loop *loop, tree &crc_table) + if (gimple_code (stmt) == GIMPLE_ASSIGN && + TREE_CODE(gimple_assign_rhs1 (stmt)) == ARRAY_REF) + { +- if (crc_table == NULL) ++ if (crc_table == NULL && ++ gimple_assign_rhs1 (stmt)->base.readonly_flag) + { + crc_table = gimple_assign_rhs1 (stmt); + crc_table_read_stmt = stmt; +@@ -174,15 +397,18 @@ static const unsigned HOST_WIDE_INT crc_32_tab[] = { + static bool + match_crc_table (tree crc_table) + { ++ const unsigned LOW_BOUND = 0; ++ const unsigned UP_BOUND = 255; ++ const unsigned ELEMENT_SIZE = 8; + unsigned HOST_WIDE_INT lb = tree_to_uhwi (array_ref_low_bound (crc_table)); + unsigned HOST_WIDE_INT ub = tree_to_uhwi (array_ref_up_bound (crc_table)); + unsigned HOST_WIDE_INT es = tree_to_uhwi (array_ref_element_size (crc_table)); +- if (lb != 0 || ub != 255 || es != 8) ++ if (lb != LOW_BOUND || ub != UP_BOUND || es != ELEMENT_SIZE) + return false; + + tree decl = TREE_OPERAND (crc_table, 0); + tree ctor = ctor_for_folding(decl); +- for (int i = 0; i < 255; i++) { ++ for (int i = lb; i <= ub; i++) { + unsigned HOST_WIDE_INT val = tree_to_uhwi (CONSTRUCTOR_ELT (ctor,i)->value); + if (crc_32_tab[i] != val) + return false; +@@ -273,6 +499,7 @@ check_evolution_pattern (class loop* loop, gphi *capture[]) + if (s != NULL) + return false; + s = capture[i]; ++ phi_s = s; + } + else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) + { +@@ -285,6 +512,7 @@ check_evolution_pattern (class loop* loop, gphi *capture[]) + if (c != NULL) + return false; + c = capture[i]; ++ phi_c = c; + } + } + +@@ -314,14 +542,19 @@ check_calculation_pattern (class loop* loop, gphi *capture[]) + _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME, PHI @1) + _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) + */ +- + if (!gimple_crc_match_index(index, res_ops, NULL)) + return false; +- gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); +- tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); +- if (res_ops[0] != gimple_phi_result (c) || +- s_res != gimple_phi_result (s)) ++ gimple *s_res_stmt = SSA_NAME_DEF_STMT (res_ops[0]); ++ if (!s_res_stmt) ++ return false; ++ gimple *s_def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (s_res_stmt)); ++ if (!s_def_stmt) + return false; ++ tree s_res = TREE_OPERAND (gimple_assign_rhs1 (s_def_stmt), 0); ++ if (res_ops[1] != gimple_phi_result (c) || s_res != gimple_phi_result (s)) ++ { ++ return false; ++ } + + /* Try to match + _8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) +@@ -333,7 +566,11 @@ check_calculation_pattern (class loop* loop, gphi *capture[]) + return false; + if (res_ops[0] != gimple_phi_result (c) + || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n gimple_crc_match_res pattern check failed.\n"); + return false; ++ } + + return true; + } +@@ -419,101 +656,91 @@ crc_loop_body_check (class loop *loop) + return false; + } + return true; +-/* gphi *phi; +- gphi_iterator gsi; +- int num_of_phi = 0; +- //s, n, c; +- //only 3 phi nodes are there, every one of the phi nodes comming from 2 edge only, one from preheader, one from latch +- // s increase by 1 every itoration +- // n decrease by 1 every itoration +- // The final one is c, which is the result, should be used for the start of the later pattern matching +- for (gsi = gsi_start_phis(loop->header); !gsi_end_p(gsi); gsi_next(&gsi)) +- { +- phi = gsi.phi(); ++} + +- if (phi) num_of_phi++; +- if (num_of_phi > 3) return false; // more then 3 phi node +- if (gimple_phi_num_args(phi) > 2) // more than 2 edges other then one backedge and one preheader edge +- return false; +- //capture[num_of_phi - 1] = gimple_phi_result(phi); +- capture[num_of_phi - 1] = phi; +- } +- if (num_of_phi != 3) return false; // phi node should be 3 */ +- // Find the envolution pattern for s and n, try to match the identity of these variable +-/* gphi *s=NULL; +- gphi *n=NULL; +- gphi *c=NULL; ++/* Check the prev_bb of prev_bb of loop header. The prev_bb we are trying to match is + +- for (int i = 0; i < 3; i++) +- { +- if (evolution_pattern_plus_with_p(loop, capture[i], 1)) +- { +- if(s != NULL) +- return false; +- s = capture[i]; +- } +- else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) +- { +- if(n != NULL) +- return false; +- n = capture[i]; +- } +- else +- { +- if(c != NULL) +- return false; +- c = capture[i]; +- } +- } ++c_15 = crc; ++if (n_16(D) != 0) ++ goto ; [INV] ++else ++ goto ; [INV] + +- // some envolution pattern cannot find +- if (!n || !s || !c) +- return false; +- gphi *s=capture[0]; +- gphi *n=capture[1]; +- gphi *c=capture[2]; +- tree res_ops[3]; +- tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); ++ In this case , we must be sure that the n is not zero. ++ so the match condition is ++ 1、the n is not zero. + +- /* Try to match +- _1 = (int) c_12; //NOP_EXPR (SSA_NAME @1) +- _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) +- _5 = _1 ^ _4; //BIT_XOR_EXPR (SSA_NAME, SSA_NAME) +- _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++ : ++if (s_13(D) == 0B) ++ goto ; [INV] ++else ++ goto ; [INV] + +- +- if (!gimple_crc_match_index(index, res_ops, NULL)) ++ In this case, we must be sure the s is not NULL. ++ so the match condition is ++ 1、the s is not NULL. ++*/ ++static bool ++crc_prev_bb_of_loop_header_check(class loop *loop) ++{ ++ basic_block header = loop->header; ++ basic_block prev_header_bb = header->prev_bb; ++ if(NULL == prev_header_bb) ++ { + return false; +- gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); +- tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); +- if (res_ops[0] != gimple_phi_result (c) || +- s_res != gimple_phi_result (s)) ++ } ++ ++ basic_block prev_prev_header_bb = prev_header_bb->prev_bb; ++ if(NULL == prev_prev_header_bb) ++ { + return false; ++ } ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ bool res = false; ++ for (gsi = gsi_start_bb (prev_prev_header_bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == NULL) ++ return false; + +- /* +-_8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) +-c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) ++ if (gimple_code (stmt) == GIMPLE_COND && ++ gimple_cond_code(stmt) == NE_EXPR && ++ TREE_CODE(gimple_cond_rhs (stmt)) == INTEGER_CST && ++ tree_int_cst_sgn(gimple_cond_rhs (stmt)) == 0 ) ++ { ++ res = true; ++ break; ++ } ++ } + +- edge backedge = find_edge(loop->latch, loop->header); +- tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); +- if (!gimple_crc_match_res(updated_c, res_ops, NULL)) +- return false; +- if (res_ops[0] != gimple_phi_result (c) +- || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) ++ if(!res) ++ { + return false; ++ } + +- // try match n as the induction variable +- // The proceed condition for back edge is n != 0 +- gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); +- if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND || gimple_cond_code (cond_stmt) != NE_EXPR +- || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) +- || tree_to_uhwi(gimple_cond_rhs (cond_stmt)) != 0) ++ basic_block first_bb = prev_prev_header_bb->prev_bb; ++ if(NULL == first_bb) + return false; +- +- return true; +- */ +-} + ++ for (gsi = gsi_start_bb (first_bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == NULL) ++ return false; ++ ++ if (gimple_code (stmt) == GIMPLE_COND && ++ gimple_cond_code(stmt) == EQ_EXPR && ++ TREE_CODE(gimple_cond_rhs (stmt)) == INTEGER_CST && ++ tree_int_cst_sgn(gimple_cond_rhs (stmt)) == 0 ) ++ { ++ return true; ++ } ++ } ++ ++ return false; ++} + + static bool + match_crc_loop (class loop *loop) +@@ -536,13 +763,463 @@ match_crc_loop (class loop *loop) + fprintf (dump_file, "\nWrong loop body for crc matching.\n"); + return false; + } ++ if(!crc_prev_bb_of_loop_header_check(loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong prev basic_blocks of loop header for crc matching.\n"); ++ return false; ++ } ++ ++ init_origin_loop_structure(); ++ if(!get_origin_loop_info(loop)) ++ return false; ++ + return true; + } + ++static void ++create_new_bb (basic_block &new_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ new_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (new_bb, outer); ++ set_immediate_dominator (CDI_DOMINATORS, new_bb, dominator_bb); ++} ++ ++static void ++change_preheader_bb(edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple* g; ++ tree lhs1; ++ ++ lhs1 = create_tmp_var(TREE_TYPE(origin_loop.base_n),"nn"); ++ lhs1 = make_ssa_name(lhs1); ++ gsi = gsi_last_bb (entry_edge->src); ++ g = gimple_build_assign(lhs1,RSHIFT_EXPR,origin_loop.base_n, ++ build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); ++ gimple_seq_add_stmt(&stmts,g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ nn_tree = lhs1; ++ set_current_def(nn_tree, lhs1); ++ nn_map.put (entry_edge->src, lhs1); ++} ++ ++static gphi* ++create_phi_node_for_bb(tree old_name, basic_block bb) ++{ ++ gphi *phi = create_phi_node(NULL_TREE, bb); ++ create_new_def_for(old_name, phi, gimple_phi_result_ptr(phi)); ++ return phi; ++} ++ ++static gimple* ++call_builtin_fun(int code,tree& lhs, tree arg1, tree arg2) ++{ ++ unsigned int builtin_code = targetm.get_crc_builtin_code(code, true);// 根据code获取到正确的builtin_fun_code ++ tree fn = targetm.builtin_decl(builtin_code,true); // get the decl of __builtin_aarch64_crc32w ++ if (!fn || fn == error_mark_node) ++ fatal_error (input_location, ++ "target specific builtin not available"); ++ gimple* call_builtin = gimple_build_call(fn, 2, arg1, arg2); // _40 = __builtin_aarch64_crc32* (_1, _2); ++ lhs = make_ssa_name (unsigned_type_node); ++ gimple_call_set_lhs(call_builtin,lhs); ++ ++ return call_builtin; ++} ++ ++/* Create loop_header and loop_latch for new loop ++ : ++ # s_14 = PHI ++ # c_16 = PHI ++ # nn_19 = PHI ++ _1 = (unsigned int) c_16; ++ _2 = MEM[(uint32_t *)s_14]; ++ _40 = __builtin_aarch64_crc32w (_1, _2); ++ c_29 = (long unsigned int) _40; ++ s_30 = s_14 + 4; ++ nn_31 = nn_19 + 4294967295; ++ if (nn_31 != 0) ++ The IR of bb is as above. */ ++static void ++create_loop_bb(basic_block& loop_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer, edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple* g; ++ gphi* phi_s_loop; ++ gphi* phi_c_loop; ++ gphi* phi_nn_loop; ++ ++ create_new_bb(loop_bb, after_bb, dominator_bb, outer); ++ redirect_edge_and_branch(entry_edge, loop_bb); ++ gsi = gsi_last_bb(loop_bb); ++ tree entry_nn = get_current_def(nn_tree); ++ phi_s_loop = create_phi_node_for_bb(origin_loop.base_s, loop_bb); ++ phi_c_loop = create_phi_node_for_bb(origin_loop.base_c, loop_bb); ++ phi_nn_loop = create_phi_node_for_bb(entry_nn, loop_bb); ++ ++ tree res_s = gimple_phi_result(phi_s_loop); ++ tree res_nn = gimple_phi_result(phi_nn_loop); ++ tree lhs1 = gimple_build(&stmts, NOP_EXPR, unsigned_type_node, ++ gimple_phi_result(phi_c_loop)); ++ g = gimple_build_assign(make_ssa_name(unsigned_type_node), ++ fold_build2(MEM_REF,unsigned_type_node,res_s, ++ build_int_cst (build_pointer_type (unsigned_type_node), 0))); ++ gimple_seq_add_stmt(&stmts, g); ++ tree lhs2 = gimple_assign_lhs(g); // _2 = MEM[(uint32_t *)s_14]; ++ unsigned int code = AARCH64_BUILTIN_CRC32W; ++ tree lhs3; ++ gimple* build_crc32w = call_builtin_fun(code,lhs3, lhs1, lhs2); ++ crc_map.put(loop_bb, lhs3); ++ gimple_seq_add_stmt(&stmts,build_crc32w); ++ ++ tree lhs4 = copy_ssa_name(origin_loop.base_c); ++ g = gimple_build_assign(lhs4, NOP_EXPR, lhs3); ++ gimple_seq_add_stmt(&stmts, g); ++ c_map.put(loop_bb, lhs4); ++ ++ tree lhs5 = copy_ssa_name(origin_loop.base_s); ++ g = gimple_build_assign(lhs5, POINTER_PLUS_EXPR, res_s, ++ build_int_cst (sizetype, 4)); ++ gimple_seq_add_stmt(&stmts, g); ++ s_map.put(loop_bb, lhs5); ++ ++ tree lhs6 = copy_ssa_name(nn_tree); ++ g = gimple_build_assign(lhs6, PLUS_EXPR, res_nn, ++ build_int_cst (TREE_TYPE (res_nn), 4294967295)); ++ gimple_seq_add_stmt(&stmts,g); ++ nn_map.put(loop_bb, lhs6); ++ ++ gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs6, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* : ++ # c_6 = PHI ++ # s_46 = PHI ++ _44 = n_26(D) & 2; ++ if (_44 != 0) ++ The IR of bb is as above. */ ++static void ++create_cond_bb(basic_block& cond_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer){ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gphi* phi_s_loop; ++ gphi* phi_c_loop; ++ ++ create_new_bb(cond_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb(cond_bb); ++ tree entry_nn = get_current_def(nn_tree); ++ phi_s_loop = create_phi_node_for_bb(origin_loop.base_s, cond_bb); ++ phi_c_loop = create_phi_node_for_bb(origin_loop.base_c, cond_bb); ++ tree res_s = gimple_phi_result(phi_s_loop); ++ set_current_def(origin_loop.base_s, res_s); ++ s_map.put(cond_bb, res_s); ++ tree res_c = gimple_phi_result(phi_c_loop); ++ set_current_def(origin_loop.base_c, res_c); ++ c_map.put(cond_bb, res_c); ++ ++ tree lhs1 = gimple_build(&stmts, BIT_AND_EXPR, TREE_TYPE(origin_loop.base_n), ++ origin_loop.base_n, build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); ++ gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* : ++ _7 = MEM[(uint16_t *)s_46]; ++ _41 = __builtin_aarch64_crc32h (_8, _7); ++ c_33 = (long unsigned int) _41; ++ s_34 = s_30 + 2; ++ The IR of bb is as above.*/ ++static void ++create_cond_true_bb(basic_block& cond_true_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer){ ++ gimple_seq stmts = NULL; ++ gimple* g; ++ gimple_stmt_iterator gsi; ++ ++ create_new_bb(cond_true_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb(cond_true_bb); ++ tree s_46 = *(s_map.get(after_bb)); ++ g = gimple_build_assign(make_ssa_name(short_unsigned_type_node), ++ fold_build2(MEM_REF,short_unsigned_type_node,s_46, ++ build_int_cst (build_pointer_type (short_unsigned_type_node), 0))); ++ gimple_seq_add_stmt(&stmts,g); ++ tree lhs1 = gimple_assign_lhs(g); // _7 = MEM[(uint16_t *)s_46]; ++ unsigned int code = AARCH64_BUILTIN_CRC32H; ++ tree lhs2; ++ gimple* call_builtin = call_builtin_fun(code, lhs2,*(crc_map.get(cond_true_bb->prev_bb->prev_bb)),lhs1); ++ crc_map.put(cond_true_bb,lhs2); ++ gimple_seq_add_stmt(&stmts, call_builtin); ++ ++ tree lhs3 = copy_ssa_name(origin_loop.base_c); ++ g = gimple_build_assign(lhs3, NOP_EXPR, lhs2); ++ gimple_seq_add_stmt(&stmts, g); ++ c_map.put(cond_true_bb, lhs3); ++ ++ tree lhs5 = copy_ssa_name(s_46); ++ g = gimple_build_assign(lhs5, POINTER_PLUS_EXPR, s_46, ++ build_int_cst (sizetype, 2)); // s_30 + 2; ++ gimple_seq_add_stmt(&stmts, g); ++ s_map.put(cond_true_bb, lhs5); ++ ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ s_map.put(cond_true_bb, lhs5); ++} ++ ++/* : ++ # s_15 = PHI ++ # c_17 = PHI ++ _3 = n_26(D) & 1; ++ if (_3 != 0) ++ The IR of bb is as above.*/ ++static void ++create_cond_false_bb(basic_block& cond_false_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gphi* phi_s_cond_true_bb; ++ gphi* phi_c_cond_true_bb; ++ ++ create_new_bb(cond_false_bb, after_bb, dominator_bb, outer); ++ make_single_succ_edge(after_bb, cond_false_bb, EDGE_FALLTHRU); ++ ++ tree entry_s = get_current_def(origin_loop.base_s); ++ phi_s_cond_true_bb = create_phi_node_for_bb(entry_s, cond_false_bb); ++ tree entry_c = get_current_def(origin_loop.base_c); ++ phi_c_cond_true_bb = create_phi_node_for_bb(entry_c, cond_false_bb); ++ tree res_s = gimple_phi_result(phi_s_cond_true_bb); ++ set_current_def(origin_loop.base_s, res_s); ++ s_map.put(cond_false_bb, res_s); ++ tree res_c = gimple_phi_result(phi_c_cond_true_bb); ++ set_current_def(origin_loop.base_c, res_c); ++ c_map.put(cond_false_bb, res_c); ++ ++ gsi = gsi_last_bb(cond_false_bb); ++ tree lhs1 = gimple_build(&stmts, BIT_AND_EXPR, TREE_TYPE(origin_loop.base_n), ++ origin_loop.base_n, build_int_cst (TREE_TYPE (origin_loop.base_n), 1)); ++ gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* : ++ _11 = (unsigned int) c_17; ++ _12 = *s_15; ++ _42 = __builtin_aarch64_crc32b (_11, _12); ++ c_36 = (long unsigned int) _42; ++ The IR of bb is as above. */ ++static void ++create_lastcond_true_bb(basic_block& new_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer){ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple* g; ++ ++ create_new_bb(new_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb(new_bb); ++ ++ tree lhs1 = gimple_build(&stmts, NOP_EXPR, unsigned_type_node, ++ get_current_def(origin_loop.base_c)); ++ tree lhs2; ++ tree s_15 = get_current_def(origin_loop.base_s); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, s_15, ++ build_int_cst (TREE_TYPE(s_15), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ ++ unsigned int code = AARCH64_BUILTIN_CRC32B; ++ tree lhs3; ++ gimple* call_builtin = call_builtin_fun(code, lhs3, lhs1, lhs2); ++ crc_map.put(new_bb,lhs3); ++ gimple_seq_add_stmt(&stmts,call_builtin); ++ ++ tree lhs4 = copy_ssa_name(origin_loop.base_c); ++ g = gimple_build_assign(lhs4, NOP_EXPR, lhs3); ++ gimple_seq_add_stmt(&stmts, g); ++ c_map.put(new_bb, lhs4); ++ ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++static bool ++optional_add_phi_arg(gphi * phi, tree phi_res, tree phi_arg, edge e) ++{ ++ location_t loc; ++ if (same_ssa_name_var_p (phi_arg, phi_res)) ++ { ++ if (virtual_operand_p (phi_arg)) ++ loc = UNKNOWN_LOCATION; ++ else ++ loc = gimple_location (SSA_NAME_DEF_STMT (phi_arg)); ++ add_phi_arg (phi, phi_arg, e, loc); ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Add phi_arg for bb with phi node. */ ++static void ++update_phi_nodes (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ gphi *phi; ++ gphi_iterator gsi; ++ tree res; ++ ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ res = gimple_phi_result (phi); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (PHI_ARG_DEF_FROM_EDGE (phi, e)) ++ continue; ++ tree var_c; ++ tree* ptr_var_c = c_map.get (e->src); ++ if(ptr_var_c == NULL) ++ { ++ var_c = origin_loop.base_c; ++ } else { ++ var_c = *ptr_var_c; ++ } ++ if(optional_add_phi_arg(phi, res, var_c, e)) ++ continue; ++ ++ tree var_nn; ++ tree* ptr_var_nn = nn_map.get (e->src); ++ if(ptr_var_nn == NULL) ++ { ++ var_nn = nn_tree; ++ } else { ++ var_nn = *ptr_var_nn; ++ } ++ if(optional_add_phi_arg(phi, res, var_nn, e)) ++ continue; ++ ++ tree var_s; ++ tree* ptr_var_s = s_map.get (e->src); ++ if(ptr_var_s == NULL) ++ { ++ var_s = origin_loop.base_s; ++ } else { ++ var_s = *ptr_var_s; ++ } ++ if(optional_add_phi_arg(phi, res, var_s, e)) ++ continue; ++ } ++ } ++} ++ ++static void ++create_new_loops(edge entry_edge) ++{ ++ class loop* new_loop = NULL; ++ basic_block loop_bb, cond_bb, cond_true_bb, cond_false_bb, lastcond_true_bb; ++ class loop *outer = entry_edge->src->loop_father; ++ change_preheader_bb(entry_edge); ++ ++ create_loop_bb(loop_bb, entry_edge->src, entry_edge->src, outer, entry_edge); ++ create_cond_bb(cond_bb, loop_bb, loop_bb, outer); ++ make_edge(loop_bb, loop_bb, EDGE_TRUE_VALUE); ++ make_edge(loop_bb, cond_bb, EDGE_FALSE_VALUE); ++ update_phi_nodes(loop_bb); ++ ++ new_loop = alloc_loop (); ++ new_loop->header = loop_bb; ++ new_loop->latch = loop_bb; ++ add_loop (new_loop, outer); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint byte new loop %d:\n", new_loop->num); ++ flow_loop_dump (new_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ create_cond_true_bb(cond_true_bb, cond_bb, cond_bb, outer); ++ make_edge(cond_bb, cond_true_bb, EDGE_TRUE_VALUE); ++ create_cond_false_bb(cond_false_bb, cond_true_bb, cond_bb, outer); ++ make_edge(cond_bb, cond_false_bb, EDGE_FALSE_VALUE); ++ update_phi_nodes(cond_bb); ++ update_phi_nodes(cond_false_bb); ++ create_lastcond_true_bb(lastcond_true_bb, cond_false_bb, cond_false_bb, outer); ++ make_edge(cond_false_bb, lastcond_true_bb, EDGE_TRUE_VALUE); ++ make_edge(cond_false_bb, origin_loop.exit_bb, EDGE_FALSE_VALUE); ++ make_single_succ_edge(lastcond_true_bb, origin_loop.exit_bb, EDGE_FALLTHRU); ++ ++ update_phi_nodes(origin_loop.exit_bb); ++ remove_edge(origin_loop.exit_edge); ++} ++ ++/* Clear information about the original loop. */ ++static void ++remove_origin_loop(class loop* loop) ++{ ++ basic_block* body = get_loop_body_in_dom_order(loop); ++ unsigned n = loop->num_nodes; ++ for(int i = 0; i < n; ++i) ++ { ++ delete_basic_block(body[i]); ++ } ++ free(body); ++ delete_loop(loop); ++} ++ ++/* Make sure that the dominance relationship of the newly inserted cfg ++ is not missing. */ ++static void ++update_loop_dominator(cdi_direction dir) ++{ ++ gcc_assert (dom_info_available_p (dir)); ++ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ basic_block imm_bb = get_immediate_dominator (dir, bb); ++ if (!imm_bb || bb == origin_loop.exit_bb) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, bb, ++ recompute_dominator (CDI_DOMINATORS, bb)); ++ continue; ++ } ++ } ++} ++ ++/* Perform the conversion of origin_loop to new_loop. */ ++static void ++convert_to_new_loop (class loop *loop) ++{ ++ create_new_loops (origin_loop.entry_edge); ++ remove_origin_loop (loop); ++ update_loop_dominator (CDI_DOMINATORS); ++ update_ssa (TODO_update_ssa); ++} ++ + /* The main entry of loop crc optimizes. */ + static unsigned int + tree_ssa_loop_crc () + { ++ if(TARGET_CRC32 == false){ ++ warning (OPT____,"The loop-crc optimization is not working."\ ++ "You should make sure that the specified architecture supports"\ ++ " crc:-march=armv8.1-a"); ++ return 0; ++ } + unsigned int todo = 0; + class loop *loop; + +@@ -553,28 +1230,28 @@ tree_ssa_loop_crc () + } + + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ if (match_crc_loop (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) +- { +- fprintf (dump_file, "======================================\n"); +- fprintf (dump_file, "Processing loop %d:\n", loop->num); +- fprintf (dump_file, "======================================\n"); +- flow_loop_dump (loop, dump_file, NULL, 1); +- fprintf (dump_file, "\n\n"); +- } +- +- if (match_crc_loop (loop)) +- { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- { +- fprintf (dump_file, "The %dth loop form is success matched," +- "and the loop can be optimized.\n", +- loop->num); +- } +- +- convert_to_new_loop (loop); +- } ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ } ++ ++ convert_to_new_loop (loop); + } ++ } + + todo |= (TODO_update_ssa); + return todo; +@@ -641,4 +1318,4 @@ gimple_opt_pass * + make_pass_loop_crc (gcc::context *ctxt) + { + return new pass_loop_crc (ctxt); +-} +\ No newline at end of file ++} +-- +2.33.0 +