From 40d16c6c9462a45727d8e31ab62eef4ee40bfbdc Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:49:42 +0800 Subject: [PATCH 1/7] [Sync] Sync patch from openeuler/gcc 0142-crc-loop-optimization-initial.patch --- 0142-crc-loop-optimization-initial.patch | 2332 ++++++++++++++++++++++ 1 file changed, 2332 insertions(+) create mode 100644 0142-crc-loop-optimization-initial.patch diff --git a/0142-crc-loop-optimization-initial.patch b/0142-crc-loop-optimization-initial.patch new file mode 100644 index 0000000..61d5ae5 --- /dev/null +++ b/0142-crc-loop-optimization-initial.patch @@ -0,0 +1,2332 @@ +From 2716abb1a4de2a4edf06d2f1877d9b76a88e5807 Mon Sep 17 00:00:00 2001 +From: bule +Date: Thu, 15 Dec 2022 14:34:16 +0800 +Subject: [PATCH 05/33] crc loop optimization initial + +--- + gcc/Makefile.in | 1 + + gcc/common.opt | 4 + + gcc/doc/invoke.texi | 6 +- + gcc/match.pd | 169 +++++ + gcc/passes.def | 1 + + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c | 85 +++ + .../tree-ssa/loop-crc-1.c.042t.loop_crc | 90 +++ + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c | 88 +++ + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c | 85 +++ + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c | 89 +++ + .../tree-ssa/loop-crc-4.c.042t.loop_crc | 0 + .../loop-crc-calculation-check-fail.c | 156 +++++ + ...crc-calculation-check-fail.c.042t.loop_crc | 64 ++ + .../loop-crc-calculation-check-fail.s | 329 +++++++++ + .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c | 111 +++ + .../gcc.dg/tree-ssa/loop-crc-sucess.c | 84 +++ + .../tree-ssa/loop-crc-table-check-fail.c | 113 +++ + gcc/timevar.def | 1 + + gcc/tree-pass.h | 1 + + gcc/tree-ssa-loop-crc.c | 644 ++++++++++++++++++ + 20 files changed, 2120 insertions(+), 1 deletion(-) + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c + create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c + create mode 100644 gcc/tree-ssa-loop-crc.c + +diff --git a/gcc/Makefile.in b/gcc/Makefile.in +index 3f06b8907..2a59acfbe 100644 +--- a/gcc/Makefile.in ++++ b/gcc/Makefile.in +@@ -1592,6 +1592,7 @@ OBJS = \ + tree-ssa-loop-manip.o \ + tree-ssa-loop-niter.o \ + tree-ssa-loop-array-widen-compare.o \ ++ tree-ssa-loop-crc.o \ + tree-ssa-loop-prefetch.o \ + tree-ssa-loop-split.o \ + tree-ssa-loop-unswitch.o \ +diff --git a/gcc/common.opt b/gcc/common.opt +index 6f0ed7cea..a286a2628 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1065,6 +1065,10 @@ Common Report Var(flag_array_widen_compare) Optimization + Extends types for pointers to arrays to improve array comparsion performance. + In some extreme situations this may result in unsafe behavior. + ++floop-crc ++Common Report Var(flag_loop_crc) Optimization ++do the loop crc conversion. ++ + fauto-inc-dec + Common Report Var(flag_auto_inc_dec) Init(1) Optimization + Generate auto-inc/dec instructions. +diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi +index 7498758b0..52018617a 100644 +--- a/gcc/doc/invoke.texi ++++ b/gcc/doc/invoke.texi +@@ -460,7 +460,7 @@ Objective-C and Objective-C++ Dialects}. + -fno-allocation-dce -fallow-store-data-races @gol + -fassociative-math -fauto-profile -fauto-profile[=@var{path}] @gol + -farray-widen-compare -fauto-inc-dec -fbranch-probabilities @gol +--fcaller-saves @gol ++-fcaller-saves -floop-crc @gol + -fcombine-stack-adjustments -fconserve-stack @gol + -fcompare-elim -fcprop-registers -fcrossjumping @gol + -fcse-follow-jumps -fcse-skip-blocks -fcx-fortran-rules @gol +@@ -9722,6 +9722,10 @@ extreme situations this may result in unsafe behavior. + This option may generate better or worse code; results are highly dependent + on the structure of loops within the source code. + ++@item -floop-crc ++@opindex floop-crc ++Do the loop crc conversion ++ + @item -fdce + @opindex fdce + Perform dead code elimination (DCE) on RTL@. +diff --git a/gcc/match.pd b/gcc/match.pd +index 01f81b063..87b316953 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3487,6 +3487,175 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + ++ ++#if GIMPLE ++(if (canonicalize_math_p ()) ++/* These patterns are mostly used by PHIOPT to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* a ? x op POW2 : x -> x op (a ? POW2 : 0). */ ++ (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) ++ (simplify ++ (cond @0 (op:s @1 INTEGER_CST@2) @1) ++ /* powerof2cst */ ++ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) ++ (with { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) ++ ) ++ ) ++ ) ++) ++#endif ++ ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++/* Mul64 is defined as a multiplication algorithm which compute two 64-bit integers to one 128-bit integer ++ (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Mull_01 = In0Hi * In1Lo; ++ Addc = In0Lo * In1Hi + Mull_01; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32; ++ ResHi = ((long unsigned int) (addc32 > ResLo)) + ++ (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; ++ } */ ++ (simplify ++ (plus ++ (plus ++ (convert ++ (gt @10 ++ (plus ++ (mult @4 @6) ++ (lshift@10 @9 @3)))) ++ (lshift ++ (convert ++ (gt @8 @9)) @3)) ++ (plus@11 ++ (rshift ++ (plus@9 ++ (mult (bit_and@4 SSA_NAME@0 @2) @7) ++ (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) ++ (mult (rshift@5 SSA_NAME@0 @3) ++ (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) ++ (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && ++ TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) ++ } ++ (convert:type (rshift ++ (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) ++ ) ++ ) ++ ++ /* (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Mull_01 = In0Hi * In1Lo; ++ Addc = In0Lo * In1Hi + Mull_01; ++ addc32 = Addc << 32; ++ ResLo = In0(D) * In1(D); ++ ResHi = ((long unsigned int) (addc32 > ResLo)) + ++ (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; ++ } */ ++ (simplify ++ (plus ++ (plus ++ (convert ++ (gt (lshift@10 @9 @3) ++ (mult @0 @1))) ++ (lshift ++ (convert ++ (gt @8 @9)) @3)) ++ (plus@11 ++ (rshift ++ (plus@9 ++ (mult (bit_and@4 SSA_NAME@0 @2) @7) ++ (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) ++ (mult (rshift@5 SSA_NAME@0 @3) ++ (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) ++ (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && ++ TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) ++ } ++ (convert:type (rshift ++ (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) ++ ) ++ ) ++#endif ++ ++#if GIMPLE ++/* These patterns are mostly used by FORWPROP to move some operations outside of ++ the if statements. They should be done late because it gives jump threading ++ and few other passes to reduce what is going on. */ ++ /* ++ In0Lo = In0(D) & 4294967295; ++ In0Hi = In0(D) >> 32; ++ In1Lo = In1(D) & 4294967295; ++ In1Hi = In1(D) >> 32; ++ Addc = In0Lo * In1Hi + In0Hi * In1Lo; ++ addc32 = Addc << 32; ++ ResLo = In0Lo * In1Lo + addc32 ++ */ ++ (simplify ++ (plus (mult @4 @5) ++ (lshift ++ (plus ++ (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) ++ (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) INTEGER_CST@3)) ++ (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && ++ TYPE_PRECISION (type) == 64) ++ (with { ++ tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); ++ tree shift = build_int_cst (integer_type_node, 64); ++ //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) ++ } ++ (mult (convert:type @0) (convert:type @1))) ++ ) ++ ) ++#endif ++ ++ ++#if GIMPLE ++/* Try to match */ ++ /* ++_4 = (int) _3; //NOP_EXPR (SSA_NAME @2) ++_5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME@1, SSA_NAME) ++_6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++ */ ++(match (crc_match_index @1 @2 @3) ++ (bit_and (bit_xor (nop SSA_NAME@2) SSA_NAME@1) INTEGER_CST@3) ++ (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@3) == 255)) ++) ++ ++#endif ++ ++#if GIMPLE ++/* Try to match */ ++ /* ++_8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) ++c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) ++ */ ++(match (crc_match_res @1 @2 @3) ++ (bit_xor SSA_NAME@3 (rshift SSA_NAME@1 INTEGER_CST@2)) ++ (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@2) == 8)) ++) ++ ++#endif ++ + /* Simplification moved from fold_cond_expr_with_comparison. It may also + be extended. */ + /* This pattern implements two kinds simplification: +diff --git a/gcc/passes.def b/gcc/passes.def +index ea50db086..7abd946ce 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -92,6 +92,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_cd_dce); + NEXT_PASS (pass_phiopt, true /* early_p */); + NEXT_PASS (pass_array_widen_compare); ++ NEXT_PASS (pass_loop_crc); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_convert_switch); + NEXT_PASS (pass_cleanup_eh); +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c +new file mode 100644 +index 000000000..07f9e01ec +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++ulg updcrc(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Processing loop" 1 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc +new file mode 100644 +index 000000000..c726059f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc +@@ -0,0 +1,90 @@ ++ ++;; Function updcrc (updcrc, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) ++ ++;; 2 loops found ++;; ++;; Loop 0 ++;; header 0, latch 1 ++;; depth 0, outer -1 ++;; nodes: 0 1 2 3 6 4 7 5 ++;; ++;; Loop 1 ++;; header 4, latch 7 ++;; depth 1, outer 0 ++;; nodes: 4 7 ++;; 2 succs { 5 3 } ++;; 3 succs { 6 5 } ++;; 6 succs { 4 } ++;; 4 succs { 7 5 } ++;; 7 succs { 4 } ++;; 5 succs { 1 } ++ ++Starting the loop_crc pass ++====================================== ++Processing loop 1: ++====================================== ++;; ++;; Loop 1 ++;; header 4, latch 7 ++;; depth 1, outer 0 ++;; nodes: 4 7 ++ ++ ++The 1th loop form is success matched,and the loop can be optimized. ++updcrc (uch * s, unsigned int n) ++{ ++ static ulg crc = 4294967295; ++ register ulg c; ++ unsigned char _2; ++ long unsigned int _3; ++ long unsigned int _4; ++ long unsigned int _5; ++ long unsigned int _6; ++ long unsigned int _7; ++ ulg _21; ++ ++ : ++ if (s_12(D) == 0B) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : ++ c_14 = crc; ++ if (n_15(D) != 0) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : ++ ++ : ++ # s_8 = PHI ++ # n_9 = PHI ++ # c_10 = PHI ++ s_16 = s_8 + 1; ++ _2 = *s_8; ++ _3 = (long unsigned int) _2; ++ _4 = _3 ^ c_10; ++ _5 = _4 & 255; ++ _6 = crc_32_tab[_5]; ++ _7 = c_10 >> 8; ++ c_17 = _6 ^ _7; ++ n_18 = n_9 + 4294967295; ++ if (n_18 != 0) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : ++ goto ; [100.00%] ++ ++ : ++ # c_11 = PHI <4294967295(2), c_14(3), c_17(4)> ++ crc = c_11; ++ _21 = c_11 ^ 4294967295; ++ return _21; ++ ++} ++ ++ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c +new file mode 100644 +index 000000000..f73c4d550 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c +@@ -0,0 +1,88 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++ulg updcrc(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ for (int i = 0; i < 5; i++) { ++ c++; ++ } ++ ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c +new file mode 100644 +index 000000000..70eb1b814 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c +@@ -0,0 +1,85 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++ulg updcrc(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ } while (--n || c != 0) ; ++ } ++ crc = c; ++exit1: ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c +new file mode 100644 +index 000000000..1d7e0a319 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c +@@ -0,0 +1,89 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++int test[5] = {0}; ++ ++ulg updcrc(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) * test[c%5]; ++ } while (--n) ; ++ } ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ test[c%5] = c; ++ } while (--n) ; ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc +new file mode 100644 +index 000000000..e69de29bb +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c +new file mode 100644 +index 000000000..71b25f537 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c +@@ -0,0 +1,156 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++int test[5] = {0}; ++ ++ulg updcrc(s, n) ++ uch *s; ++ unsigned n; ++{ ++ register ulg c; ++ ++ static ulg crc = (ulg)0xffffffffL; ++ int a = 0; ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ a++; ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; ++ } while (--n) ; ++ } ++ crc = c; ++ return c ^ 0xffffffffL*a; ++} ++ ++ulg updcrc1(s, n) ++ uch *s; ++ unsigned n; ++{ ++ register ulg c; ++ ++ static ulg crc = (ulg)0xffffffffL; ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ unsigned n_back = n; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; ++ n = n - 2; ++ } while (n != 0) ; ++ } ++ ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++ ++ulg updcrc2(s, n) ++ uch *s; ++ unsigned n; ++{ ++ register ulg c; ++ ++ static ulg crc = (ulg)0xffffffffL; ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ unsigned n_back = n; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) + 1; ++ } while (--n) ; ++ } ++ ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++/* ++ulg updcrc3(s, n) ++ uch *s; ++ int n; ++{ ++ register ulg c; ++ ++ static ulg crc = (ulg)0xffffffffL; ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ --n; ++ } while (n ) ; ++ } ++ ++ crc = c; ++ return c ^ 0xffffffffL; ++}*/ ++/* { dg-final { scan-tree-dump-times "num of phi noeds check failed." 1 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "evolution pattern check failed." 1 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "calculation pattern check failed." 1 "loop_crc"} } */ ++ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc +new file mode 100644 +index 000000000..6d52a8684 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc +@@ -0,0 +1,64 @@ ++ ++;; Function updcrc3 (updcrc3, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) ++ ++;; 2 loops found ++;; ++;; Loop 0 ++;; header 0, latch 1 ++;; depth 0, outer -1 ++;; nodes: 0 1 2 3 4 5 ++;; ++;; Loop 1 ++;; header 4, latch 4 ++;; depth 1, outer 0 ++;; nodes: 4 ++;; 2 succs { 5 3 } ++;; 3 succs { 4 5 } ++;; 4 succs { 4 } ++;; 5 succs { 1 } ++ ++Starting the loop_crc pass ++====================================== ++Processing loop 1: ++====================================== ++;; ++;; Loop 1 ++;; header 4, latch 4 ++;; depth 1, outer 0 ++;; nodes: 4 ++ ++ ++ ++Wrong loop form for crc matching. ++updcrc3 (uch * s, unsigned int n) ++{ ++ unsigned int n_back; ++ static ulg crc = 4294967295; ++ register ulg c; ++ ulg _22; ++ ++ : ++ if (s_12(D) == 0B) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : ++ c_14 = crc; ++ if (n_15(D) != 0) ++ goto ; [INV] ++ else ++ goto ; [INV] ++ ++ : ++ goto ; [100.00%] ++ ++ : ++ # c_11 = PHI <4294967295(2), c_14(3)> ++ crc = c_11; ++ _22 = c_11 ^ 4294967295; ++ return _22; ++ ++} ++ ++ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s +new file mode 100644 +index 000000000..cae934bfe +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s +@@ -0,0 +1,329 @@ ++ .arch armv8-a ++ .file "loop-crc-calculation-check-fail.c" ++ .text ++ .section .rodata ++ .align 3 ++ .type crc_32_tab, %object ++ .size crc_32_tab, 2048 ++crc_32_tab: ++ .xword 0 ++ .xword 1996959894 ++ .xword 3993919788 ++ .xword 2567524794 ++ .xword 124634137 ++ .xword 1886057615 ++ .xword 3915621685 ++ .xword 2657392035 ++ .xword 249268274 ++ .xword 2044508324 ++ .xword 3772115230 ++ .xword 2547177864 ++ .xword 162941995 ++ .xword 2125561021 ++ .xword 3887607047 ++ .xword 2428444049 ++ .xword 498536548 ++ .xword 1789927666 ++ .xword 4089016648 ++ .xword 2227061214 ++ .xword 450548861 ++ .xword 1843258603 ++ .xword 4107580753 ++ .xword 2211677639 ++ .xword 325883990 ++ .xword 1684777152 ++ .xword 4251122042 ++ .xword 2321926636 ++ .xword 335633487 ++ .xword 1661365465 ++ .xword 4195302755 ++ .xword 2366115317 ++ .xword 997073096 ++ .xword 1281953886 ++ .xword 3579855332 ++ .xword 2724688242 ++ .xword 1006888145 ++ .xword 1258607687 ++ .xword 3524101629 ++ .xword 2768942443 ++ .xword 901097722 ++ .xword 1119000684 ++ .xword 3686517206 ++ .xword 2898065728 ++ .xword 853044451 ++ .xword 1172266101 ++ .xword 3705015759 ++ .xword 2882616665 ++ .xword 651767980 ++ .xword 1373503546 ++ .xword 3369554304 ++ .xword 3218104598 ++ .xword 565507253 ++ .xword 1454621731 ++ .xword 3485111705 ++ .xword 3099436303 ++ .xword 671266974 ++ .xword 1594198024 ++ .xword 3322730930 ++ .xword 2970347812 ++ .xword 795835527 ++ .xword 1483230225 ++ .xword 3244367275 ++ .xword 3060149565 ++ .xword 1994146192 ++ .xword 31158534 ++ .xword 2563907772 ++ .xword 4023717930 ++ .xword 1907459465 ++ .xword 112637215 ++ .xword 2680153253 ++ .xword 3904427059 ++ .xword 2013776290 ++ .xword 251722036 ++ .xword 2517215374 ++ .xword 3775830040 ++ .xword 2137656763 ++ .xword 141376813 ++ .xword 2439277719 ++ .xword 3865271297 ++ .xword 1802195444 ++ .xword 476864866 ++ .xword 2238001368 ++ .xword 4066508878 ++ .xword 1812370925 ++ .xword 453092731 ++ .xword 2181625025 ++ .xword 4111451223 ++ .xword 1706088902 ++ .xword 314042704 ++ .xword 2344532202 ++ .xword 4240017532 ++ .xword 1658658271 ++ .xword 366619977 ++ .xword 2362670323 ++ .xword 4224994405 ++ .xword 1303535960 ++ .xword 984961486 ++ .xword 2747007092 ++ .xword 3569037538 ++ .xword 1256170817 ++ .xword 1037604311 ++ .xword 2765210733 ++ .xword 3554079995 ++ .xword 1131014506 ++ .xword 879679996 ++ .xword 2909243462 ++ .xword 3663771856 ++ .xword 1141124467 ++ .xword 855842277 ++ .xword 2852801631 ++ .xword 3708648649 ++ .xword 1342533948 ++ .xword 654459306 ++ .xword 3188396048 ++ .xword 3373015174 ++ .xword 1466479909 ++ .xword 544179635 ++ .xword 3110523913 ++ .xword 3462522015 ++ .xword 1591671054 ++ .xword 702138776 ++ .xword 2966460450 ++ .xword 3352799412 ++ .xword 1504918807 ++ .xword 783551873 ++ .xword 3082640443 ++ .xword 3233442989 ++ .xword 3988292384 ++ .xword 2596254646 ++ .xword 62317068 ++ .xword 1957810842 ++ .xword 3939845945 ++ .xword 2647816111 ++ .xword 81470997 ++ .xword 1943803523 ++ .xword 3814918930 ++ .xword 2489596804 ++ .xword 225274430 ++ .xword 2053790376 ++ .xword 3826175755 ++ .xword 2466906013 ++ .xword 167816743 ++ .xword 2097651377 ++ .xword 4027552580 ++ .xword 2265490386 ++ .xword 503444072 ++ .xword 1762050814 ++ .xword 4150417245 ++ .xword 2154129355 ++ .xword 426522225 ++ .xword 1852507879 ++ .xword 4275313526 ++ .xword 2312317920 ++ .xword 282753626 ++ .xword 1742555852 ++ .xword 4189708143 ++ .xword 2394877945 ++ .xword 397917763 ++ .xword 1622183637 ++ .xword 3604390888 ++ .xword 2714866558 ++ .xword 953729732 ++ .xword 1340076626 ++ .xword 3518719985 ++ .xword 2797360999 ++ .xword 1068828381 ++ .xword 1219638859 ++ .xword 3624741850 ++ .xword 2936675148 ++ .xword 906185462 ++ .xword 1090812512 ++ .xword 3747672003 ++ .xword 2825379669 ++ .xword 829329135 ++ .xword 1181335161 ++ .xword 3412177804 ++ .xword 3160834842 ++ .xword 628085408 ++ .xword 1382605366 ++ .xword 3423369109 ++ .xword 3138078467 ++ .xword 570562233 ++ .xword 1426400815 ++ .xword 3317316542 ++ .xword 2998733608 ++ .xword 733239954 ++ .xword 1555261956 ++ .xword 3268935591 ++ .xword 3050360625 ++ .xword 752459403 ++ .xword 1541320221 ++ .xword 2607071920 ++ .xword 3965973030 ++ .xword 1969922972 ++ .xword 40735498 ++ .xword 2617837225 ++ .xword 3943577151 ++ .xword 1913087877 ++ .xword 83908371 ++ .xword 2512341634 ++ .xword 3803740692 ++ .xword 2075208622 ++ .xword 213261112 ++ .xword 2463272603 ++ .xword 3855990285 ++ .xword 2094854071 ++ .xword 198958881 ++ .xword 2262029012 ++ .xword 4057260610 ++ .xword 1759359992 ++ .xword 534414190 ++ .xword 2176718541 ++ .xword 4139329115 ++ .xword 1873836001 ++ .xword 414664567 ++ .xword 2282248934 ++ .xword 4279200368 ++ .xword 1711684554 ++ .xword 285281116 ++ .xword 2405801727 ++ .xword 4167216745 ++ .xword 1634467795 ++ .xword 376229701 ++ .xword 2685067896 ++ .xword 3608007406 ++ .xword 1308918612 ++ .xword 956543938 ++ .xword 2808555105 ++ .xword 3495958263 ++ .xword 1231636301 ++ .xword 1047427035 ++ .xword 2932959818 ++ .xword 3654703836 ++ .xword 1088359270 ++ .xword 936918000 ++ .xword 2847714899 ++ .xword 3736837829 ++ .xword 1202900863 ++ .xword 817233897 ++ .xword 3183342108 ++ .xword 3401237130 ++ .xword 1404277552 ++ .xword 615818150 ++ .xword 3134207493 ++ .xword 3453421203 ++ .xword 1423857449 ++ .xword 601450431 ++ .xword 3009837614 ++ .xword 3294710456 ++ .xword 1567103746 ++ .xword 711928724 ++ .xword 3020668471 ++ .xword 3272380065 ++ .xword 1510334235 ++ .xword 755167117 ++ .text ++ .align 2 ++ .global updcrc3 ++ .type updcrc3, %function ++updcrc3: ++.LFB0: ++ .cfi_startproc ++ str x19, [sp, -48]! ++ .cfi_def_cfa_offset 48 ++ .cfi_offset 19, -48 ++ str x0, [sp, 24] ++ str w1, [sp, 20] ++ ldr x0, [sp, 24] ++ cmp x0, 0 ++ bne .L2 ++ mov x19, 4294967295 ++ b .L3 ++.L2: ++ adrp x0, crc.0 ++ add x0, x0, :lo12:crc.0 ++ ldr x19, [x0] ++ ldr w0, [sp, 20] ++ str w0, [sp, 44] ++ ldr w0, [sp, 20] ++ cmp w0, 0 ++ beq .L3 ++.L4: ++ ldr x0, [sp, 24] ++ add x1, x0, 1 ++ str x1, [sp, 24] ++ ldrb w0, [x0] ++ and x0, x0, 255 ++ eor x0, x19, x0 ++ and x1, x0, 255 ++ adrp x0, crc_32_tab ++ add x0, x0, :lo12:crc_32_tab ++ ldr x1, [x0, x1, lsl 3] ++ lsr x0, x19, 8 ++ eor x19, x1, x0 ++ ldr w0, [sp, 20] ++ sub w0, w0, #1 ++ str w0, [sp, 20] ++ ldr w0, [sp, 20] ++ cmp w0, 999 ++ bls .L4 ++.L3: ++ adrp x0, crc.0 ++ add x0, x0, :lo12:crc.0 ++ str x19, [x0] ++ eor x0, x19, 4294967295 ++ ldr x19, [sp], 48 ++ .cfi_restore 19 ++ .cfi_def_cfa_offset 0 ++ ret ++ .cfi_endproc ++.LFE0: ++ .size updcrc3, .-updcrc3 ++ .data ++ .align 3 ++ .type crc.0, %object ++ .size crc.0, 8 ++crc.0: ++ .xword 4294967295 ++ .ident "GCC: (Kunpeng gcc 10.3.1-2.3.0.b006) 10.3.1" ++ .section .note.GNU-stack,"",@progbits +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +new file mode 100644 +index 000000000..b59704e31 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +@@ -0,0 +1,111 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++/* check when the loop have a innor loop, should fail. */ ++ulg updcrc(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ for (int i = 0; i < 5; i++) { ++ c++; ++ } ++ ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++ ++/* check when the loop have a second backedge, should fail. */ ++ulg updcrc1(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ } while (--n || c != 0) ; ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 2 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +new file mode 100644 +index 000000000..e1e16eaf2 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +@@ -0,0 +1,84 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++ulg updcrc(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +new file mode 100644 +index 000000000..f03a4fa82 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +@@ -0,0 +1,113 @@ ++/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ ++/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ ++ ++#include ++#include ++typedef unsigned long ulg; ++typedef unsigned char uch; ++ ++static const ulg crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf1L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++int test[5] = {0}; ++ ++/* check when the loop is doing more then 1 array read or writing an array, both should fail. */ ++ulg updcrc(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) * test[c%5]; ++ } while (--n) ; ++ } ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ test[c%5] = c; ++ } while (--n) ; ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++ ++/* check when the loop is not working on a correct crc_table. should fail. */ ++ulg updcrc1(s, n) ++ uch *s; /* pointer to bytes to pump through */ ++ unsigned n; /* number of bytes in s[] */ ++{ ++ register ulg c; /* temporary variable */ ++ ++ static ulg crc = (ulg)0xffffffffL; /* shift register contents */ ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ } while (--n) ; ++ } ++ crc = c; ++ return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ ++} ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Table check fail. Table not matching." 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/timevar.def b/gcc/timevar.def +index 2814b14f2..ba86a1b7b 100644 +--- a/gcc/timevar.def ++++ b/gcc/timevar.def +@@ -215,6 +215,7 @@ DEFTIMEVAR (TV_TREE_COPY_RENAME , "tree rename SSA copies") + DEFTIMEVAR (TV_TREE_SSA_VERIFY , "tree SSA verifier") + DEFTIMEVAR (TV_TREE_STMT_VERIFY , "tree STMT verifier") + DEFTIMEVAR (TV_TREE_ARRAY_WIDEN_COMPARE, "tree array widen compare") ++DEFTIMEVAR (TV_TREE_LOOP_CRC, "tree loop crc") + DEFTIMEVAR (TV_TREE_SWITCH_CONVERSION, "tree switch conversion") + DEFTIMEVAR (TV_TREE_SWITCH_LOWERING, "tree switch lowering") + DEFTIMEVAR (TV_TREE_RECIP , "gimple CSE reciprocals") +diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h +index 3cdc12466..027f8992d 100644 +--- a/gcc/tree-pass.h ++++ b/gcc/tree-pass.h +@@ -437,6 +437,7 @@ extern gimple_opt_pass *make_pass_phiopt (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_forwprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_phiprop (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_array_widen_compare (gcc::context *ctxt); ++extern gimple_opt_pass *make_pass_loop_crc (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_tree_ifcombine (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_dse (gcc::context *ctxt); + extern gimple_opt_pass *make_pass_nrv (gcc::context *ctxt); +diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c +new file mode 100644 +index 000000000..4982384c6 +--- /dev/null ++++ b/gcc/tree-ssa-loop-crc.c +@@ -0,0 +1,644 @@ ++/* Array widen compare. ++ Copyright (C) 2022-2022 Free Software Foundation, Inc. ++ ++This file is part of GCC. ++ ++GCC is free software; you can redistribute it and/or modify it ++under the terms of the GNU General Public License as published by the ++Free Software Foundation; either version 3, or (at your option) any ++later version. ++ ++GCC is distributed in the hope that it will be useful, but WITHOUT ++ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++for more details. ++ ++You should have received a copy of the GNU General Public License ++along with GCC; see the file COPYING3. If not see ++. */ ++ ++#include "config.h" ++#include "system.h" ++#include "coretypes.h" ++#include "backend.h" ++#include "target.h" ++#include "tree.h" ++#include "gimple.h" ++#include "tree-pass.h" ++#include "gimple-ssa.h" ++#include "tree-pretty-print.h" ++#include "fold-const.h" ++#include "gimplify.h" ++#include "gimple-iterator.h" ++#include "tree-ssa-loop-manip.h" ++#include "tree-ssa-loop.h" ++#include "ssa.h" ++#include "tree-into-ssa.h" ++#include "cfganal.h" ++#include "cfgloop.h" ++#include "gimple-pretty-print.h" ++#include "tree-cfg.h" ++#include "cgraph.h" ++#include "print-tree.h" ++#include "cfghooks.h" ++#include "gimple-fold.h" ++ ++/* Match.pd function to match the ctz expression. */ ++extern bool gimple_crc_match_index (tree, tree *, tree (*)(tree)); ++extern bool gimple_crc_match_res (tree, tree *, tree (*)(tree)); ++ ++static gimple *crc_table_read_stmt = NULL; ++ ++ ++/* The loop form check will check the entire loop control flow ++ It should be a loop that: ++ 1. a do-while loop with header and latch only with no other control flow inside the loop ++ 2. have only one exiting edge ++ 3. have only one back edge and one entry edge ++*/ ++static bool ++crc_loop_form_check (class loop *loop) ++{ ++ if (loop->num_nodes > 2 || loop->inner) ++ return false; ++ // should only have 1 exit edge ++ vec edges; ++ edges = get_loop_exit_edges (loop); ++ if (edges.length() != 1) ++ return false; ++ ++ // The header should have only 2 incoming edges ++ // One of them is the preheader edge and the other is the backedge from the latch ++ if (EDGE_COUNT (loop->header->preds) != 2) ++ return false; ++ edge e1 = EDGE_PRED (loop->header, 0); ++ edge e2 = EDGE_PRED (loop->header, 1); ++ ++ if ((e1->src == loop->latch && e2->src->loop_father != loop) ++ || (e2->src == loop->latch && e1->src->loop_father != loop)) ++ return true; ++ ++ return false; ++} ++ ++/* Check there is only one array is read in the loop. ++ Return the only array as crc_table. */ ++static bool ++only_one_array_read (class loop *loop, tree &crc_table) ++{ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ bool res = false; ++ for (gsi = gsi_start_bb (loop->header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == NULL) ++ return false; ++ ++ if (gimple_code (stmt) == GIMPLE_ASSIGN && ++ TREE_CODE(gimple_assign_lhs (stmt)) == ARRAY_REF ) ++ return false; ++ ++ if (gimple_code (stmt) == GIMPLE_ASSIGN && ++ TREE_CODE(gimple_assign_rhs1 (stmt)) == ARRAY_REF) ++ { ++ if (crc_table == NULL) ++ { ++ crc_table = gimple_assign_rhs1 (stmt); ++ crc_table_read_stmt = stmt; ++ res = true; ++ } ++ else ++ return false; ++ } ++ } ++ return res; ++} ++ ++static const unsigned HOST_WIDE_INT crc_32_tab[] = { ++ 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, ++ 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, ++ 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, ++ 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, ++ 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, ++ 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, ++ 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, ++ 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, ++ 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, ++ 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, ++ 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, ++ 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, ++ 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, ++ 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, ++ 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, ++ 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, ++ 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, ++ 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, ++ 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, ++ 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, ++ 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, ++ 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, ++ 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, ++ 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, ++ 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, ++ 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, ++ 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, ++ 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, ++ 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, ++ 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, ++ 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, ++ 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, ++ 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, ++ 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, ++ 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, ++ 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, ++ 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, ++ 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, ++ 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, ++ 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, ++ 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, ++ 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, ++ 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, ++ 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, ++ 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, ++ 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, ++ 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, ++ 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, ++ 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, ++ 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, ++ 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, ++ 0x2d02ef8dL ++}; ++ ++/* Check the content of the array. */ ++static bool ++match_crc_table (tree crc_table) ++{ ++ unsigned HOST_WIDE_INT lb = tree_to_uhwi (array_ref_low_bound (crc_table)); ++ unsigned HOST_WIDE_INT ub = tree_to_uhwi (array_ref_up_bound (crc_table)); ++ unsigned HOST_WIDE_INT es = tree_to_uhwi (array_ref_element_size (crc_table)); ++ if (lb != 0 || ub != 255 || es != 8) ++ return false; ++ ++ tree decl = TREE_OPERAND (crc_table, 0); ++ tree ctor = ctor_for_folding(decl); ++ for (int i = 0; i < 255; i++) { ++ unsigned HOST_WIDE_INT val = tree_to_uhwi (CONSTRUCTOR_ELT (ctor,i)->value); ++ if (crc_32_tab[i] != val) ++ return false; ++ } ++ return true; ++} ++ ++ ++/* Check the crc table. The loop should have only one data reference. ++ And match the data reference with the predefined array. */ ++static bool ++crc_table_check (class loop *loop) ++{ ++ tree crc_table = NULL; ++ if (!only_one_array_read (loop, crc_table)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nTable check fail. not only single array is read.\n"); ++ return false; ++ } ++ if (!match_crc_table (crc_table)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nTable check fail. Table not matching.\n"); ++ return false; ++ } ++ return true; ++} ++ ++/* check whether the evolution pattern of phi is phi = SSA_NAME + target*/ ++static bool ++evolution_pattern_plus_with_p (class loop* loop, gphi *phi, unsigned HOST_WIDE_INT target) ++{ ++ edge backedge = find_edge (loop->latch, loop->header); ++ if (backedge == NULL) ++ return false; ++ tree evolution_node = PHI_ARG_DEF_FROM_EDGE (phi, backedge); ++ gimple *evolution_expr = SSA_NAME_DEF_STMT (evolution_node); ++ ++ if (evolution_expr && (gimple_assign_rhs_code (evolution_expr) == PLUS_EXPR || ++ gimple_assign_rhs_code (evolution_expr) == POINTER_PLUS_EXPR)) ++ { ++ tree rhs1 = gimple_assign_rhs1 (evolution_expr); ++ tree rhs2 = gimple_assign_rhs2 (evolution_expr); ++ if (TREE_CODE (rhs1) == SSA_NAME && TREE_CODE (rhs2) == INTEGER_CST ++ && tree_to_uhwi (rhs2) == target) ++ return true; ++ } ++ return false; ++} ++ ++/* Check whether there are only 3 phi nodes in the header block. ++ Return 3 phi nodes in the capture. */ ++static bool ++check_num_of_phi (basic_block header, gphi *capture[]) ++{ ++ gphi *phi; ++ gphi_iterator gsi; ++ int num_of_phi = 0; ++ ++ for (gsi = gsi_start_phis (header); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi(); ++ if (phi) num_of_phi++; ++ if (num_of_phi > 3) ++ return false; ++ capture[num_of_phi - 1] = phi; ++ } ++ /* phi node should be exactly 3. */ ++ return num_of_phi == 3; ++} ++ ++/* Check the evolution pattern of three phi nodes. ++ Should be one of the node +1 every time (s), one of the node -1 ++ every time (n), and a 3rd one neither (c). Return 3 phi nodes in ++ the capture with the order of s,n,c.*/ ++static bool ++check_evolution_pattern (class loop* loop, gphi *capture[]) ++{ ++ gphi *s=NULL; ++ gphi *n=NULL; ++ gphi *c=NULL; ++ ++ for (int i = 0; i < 3; i++) ++ { ++ if (evolution_pattern_plus_with_p(loop, capture[i], 1)) ++ { ++ if (s != NULL) ++ return false; ++ s = capture[i]; ++ } ++ else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) ++ { ++ if (n != NULL) ++ return false; ++ n = capture[i]; ++ } ++ else ++ { ++ if (c != NULL) ++ return false; ++ c = capture[i]; ++ } ++ } ++ ++ // some envolution pattern cannot find ++ if (!n || !s || !c) ++ return false; ++ ++ capture[0] = s; ++ capture[1] = n; ++ capture[2] = c; ++ return true; ++} ++/* check the calculation pattern before and after the crc_table array read stmt. ++ _7 = crc_32_tab[_6]; ++ The caculation of index _6 should be the result of a sequency of calculation by the s and c ++ The result of the array read _7 should be used to calculate the new c. */ ++static bool ++check_calculation_pattern (class loop* loop, gphi *capture[]) ++{ ++ gphi *s=capture[0]; ++ gphi *c=capture[2]; ++ tree res_ops[3]; ++ tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); ++ ++ /* Try to match ++ _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) ++ _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME, PHI @1) ++ _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++ */ ++ ++ if (!gimple_crc_match_index(index, res_ops, NULL)) ++ return false; ++ gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); ++ tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); ++ if (res_ops[0] != gimple_phi_result (c) || ++ s_res != gimple_phi_result (s)) ++ return false; ++ ++ /* Try to match ++ _8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) ++ c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) ++ */ ++ edge backedge = find_edge(loop->latch, loop->header); ++ tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); ++ if (!gimple_crc_match_res(updated_c, res_ops, NULL)) ++ return false; ++ if (res_ops[0] != gimple_phi_result (c) ++ || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) ++ return false; ++ ++ return true; ++} ++ ++/* check the exit condition is n != 0. */ ++static bool ++check_exit_condition (class loop* loop, gphi *n) ++{ ++ edge backedge = find_edge(loop->latch, loop->header); ++ gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); ++ if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND || gimple_cond_code (cond_stmt) != NE_EXPR ++ || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) ++ || tree_to_uhwi(gimple_cond_rhs (cond_stmt)) != 0) ++ return false; ++ ++ return true; ++} ++ ++/* Check the loop body. The loop body we are trying to match is ++ ++# s_10 = PHI ++# n_11 = PHI ++# c_12 = PHI ++_1 = (int) c_12; ++s_18 = s_10 + 1; ++_3 = *s_10; ++_4 = (int) _3; ++_5 = _1 ^ _4; ++_6 = _5 & 255; ++_7 = crc_32_tab[_6]; ++_8 = c_12 >> 8; ++c_19 = _7 ^ _8; ++n_20 = n_11 + 4294967295; ++if (n_20 != 0) ++ goto ; [INV] ++else ++ goto ; [INV] ++ ++which is doing a very simple calculation ++do { ++ c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++} while (--n); ++ ++In this case ,we don't want this loop to have any other operation inside. ++so the matching condition is ++1. There are only 3 loop variant during each itoration, namely s,c,n, ++ which is limited by the condition that the loop have exactly 3 phi nodes. ++2. The 3 loop variants should have evolution pattern as 1 of the 3 nodes is ++ increased by 1 every itoration, 1 of the 3 nodes is decreased by 1 every itor ++ and the 3rd one is neither. These three tree node SSA value will be captured for ++ the later arithmatic pattern matching ++3. Pattern matching for the index of crc_table ++4. pattern matching for the result of c calcuation after read from crc_table ++5. The exit condition matching. ++ */ ++static bool ++crc_loop_body_check (class loop *loop) ++{ ++ basic_block header = loop->header; ++ gphi *capture[3]; ++ if (!check_num_of_phi(header, capture)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n num of phi noeds check failed.\n"); ++ return false; ++ } ++ if (!check_evolution_pattern(loop, capture)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n evolution pattern check failed.\n"); ++ return false; ++ } ++ if (!check_calculation_pattern(loop, capture)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n calculation pattern check failed.\n"); ++ return false; ++ } ++ if (!check_exit_condition(loop, capture[1] /* n*/)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n exit condition check failed.\n"); ++ return false; ++ } ++ return true; ++/* gphi *phi; ++ gphi_iterator gsi; ++ int num_of_phi = 0; ++ //s, n, c; ++ //only 3 phi nodes are there, every one of the phi nodes comming from 2 edge only, one from preheader, one from latch ++ // s increase by 1 every itoration ++ // n decrease by 1 every itoration ++ // The final one is c, which is the result, should be used for the start of the later pattern matching ++ for (gsi = gsi_start_phis(loop->header); !gsi_end_p(gsi); gsi_next(&gsi)) ++ { ++ phi = gsi.phi(); ++ ++ if (phi) num_of_phi++; ++ if (num_of_phi > 3) return false; // more then 3 phi node ++ if (gimple_phi_num_args(phi) > 2) // more than 2 edges other then one backedge and one preheader edge ++ return false; ++ //capture[num_of_phi - 1] = gimple_phi_result(phi); ++ capture[num_of_phi - 1] = phi; ++ } ++ if (num_of_phi != 3) return false; // phi node should be 3 */ ++ // Find the envolution pattern for s and n, try to match the identity of these variable ++/* gphi *s=NULL; ++ gphi *n=NULL; ++ gphi *c=NULL; ++ ++ for (int i = 0; i < 3; i++) ++ { ++ if (evolution_pattern_plus_with_p(loop, capture[i], 1)) ++ { ++ if(s != NULL) ++ return false; ++ s = capture[i]; ++ } ++ else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) ++ { ++ if(n != NULL) ++ return false; ++ n = capture[i]; ++ } ++ else ++ { ++ if(c != NULL) ++ return false; ++ c = capture[i]; ++ } ++ } ++ ++ // some envolution pattern cannot find ++ if (!n || !s || !c) ++ return false; ++ gphi *s=capture[0]; ++ gphi *n=capture[1]; ++ gphi *c=capture[2]; ++ tree res_ops[3]; ++ tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); ++ ++ /* Try to match ++ _1 = (int) c_12; //NOP_EXPR (SSA_NAME @1) ++ _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) ++ _5 = _1 ^ _4; //BIT_XOR_EXPR (SSA_NAME, SSA_NAME) ++ _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++ ++ ++ if (!gimple_crc_match_index(index, res_ops, NULL)) ++ return false; ++ gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); ++ tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); ++ if (res_ops[0] != gimple_phi_result (c) || ++ s_res != gimple_phi_result (s)) ++ return false; ++ ++ /* ++_8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) ++c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) ++ ++ edge backedge = find_edge(loop->latch, loop->header); ++ tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); ++ if (!gimple_crc_match_res(updated_c, res_ops, NULL)) ++ return false; ++ if (res_ops[0] != gimple_phi_result (c) ++ || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) ++ return false; ++ ++ // try match n as the induction variable ++ // The proceed condition for back edge is n != 0 ++ gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); ++ if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND || gimple_cond_code (cond_stmt) != NE_EXPR ++ || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) ++ || tree_to_uhwi(gimple_cond_rhs (cond_stmt)) != 0) ++ return false; ++ ++ return true; ++ */ ++} ++ ++ ++static bool ++match_crc_loop (class loop *loop) ++{ ++ if (!crc_loop_form_check(loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong loop form for crc matching.\n"); ++ return false; ++ } ++ if (!crc_table_check(loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong crc table for crc matching.\n"); ++ return false; ++ } ++ if (!crc_loop_body_check(loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong loop body for crc matching.\n"); ++ return false; ++ } ++ return true; ++} ++ ++/* The main entry of loop crc optimizes. */ ++static unsigned int ++tree_ssa_loop_crc () ++{ ++ unsigned int todo = 0; ++ class loop *loop; ++ ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ flow_loops_dump (dump_file, NULL, 1); ++ fprintf (dump_file, "\nStarting the loop_crc pass\n"); ++ } ++ ++ FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ if (match_crc_loop (loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ } ++ ++ convert_to_new_loop (loop); ++ } ++ } ++ ++ todo |= (TODO_update_ssa); ++ return todo; ++} ++ ++/* Loop crc. */ ++ ++namespace { ++ ++const pass_data pass_data_tree_loop_crc = ++{ ++ GIMPLE_PASS, ++ "loop_crc", ++ OPTGROUP_LOOP, ++ TV_TREE_LOOP_CRC, ++ (PROP_cfg | PROP_ssa), ++ 0, ++ 0, ++ 0, ++ (TODO_update_ssa | TODO_verify_all) ++}; ++ ++class pass_loop_crc : public gimple_opt_pass ++{ ++public: ++ pass_loop_crc (gcc::context *ctxt) ++ : gimple_opt_pass (pass_data_tree_loop_crc, ctxt) ++ {} ++ ++ /* opt_pass methods: */ ++ virtual bool gate (function *); ++ virtual unsigned int execute (function *); ++ ++}; // class pass_loop_crc ++ ++bool ++pass_loop_crc::gate (function *) ++{ ++ return (flag_loop_crc > 0 && optimize >= 3); ++} ++ ++unsigned int ++pass_loop_crc::execute (function *fun) ++{ ++ if (number_of_loops (fun) <= 1) ++ return 0; ++ ++ /* Only supports LP64 data mode. */ ++ if (TYPE_PRECISION (long_integer_type_node) != 64 ++ || POINTER_SIZE != 64 || TYPE_PRECISION (integer_type_node) != 32) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "The current data mode is not supported," ++ "only the LP64 date mode is supported.\n"); ++ return 0; ++ } ++ ++ return tree_ssa_loop_crc (); ++} ++ ++} // anon namespace ++ ++gimple_opt_pass * ++make_pass_loop_crc (gcc::context *ctxt) ++{ ++ return new pass_loop_crc (ctxt); ++} +\ No newline at end of file +-- +2.33.0 + From b10bad3541fbd288562d6362beab9ff8ddeabfdb Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:50:47 +0800 Subject: [PATCH 2/7] [Sync] Sync patch from openeuler/gcc 0143-Perform-early-if-conversion-of-simple-arithmetic.patch 0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch 0145-Match-double-sized-mul-pattern.patch --- ...y-if-conversion-of-simple-arithmetic.patch | 109 ++++ ...low-matching-uaddsub-overflow-for-wi.patch | 236 +++++++++ 0145-Match-double-sized-mul-pattern.patch | 488 ++++++++++++++++++ 3 files changed, 833 insertions(+) create mode 100644 0143-Perform-early-if-conversion-of-simple-arithmetic.patch create mode 100644 0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch create mode 100644 0145-Match-double-sized-mul-pattern.patch diff --git a/0143-Perform-early-if-conversion-of-simple-arithmetic.patch b/0143-Perform-early-if-conversion-of-simple-arithmetic.patch new file mode 100644 index 0000000..6965a9b --- /dev/null +++ b/0143-Perform-early-if-conversion-of-simple-arithmetic.patch @@ -0,0 +1,109 @@ +From 7acb88ae27eb3e1af0da866d433968143c7754bd Mon Sep 17 00:00:00 2001 +From: Pronin Alexander 00812787 +Date: Thu, 12 Jan 2023 14:52:49 +0300 +Subject: [PATCH 20/33] Perform early if-conversion of simple arithmetic + +--- + gcc/common.opt | 4 ++++ + gcc/match.pd | 25 +++++++++++++++++++ + gcc/testsuite/gcc.dg/ifcvt-gimple.c | 37 +++++++++++++++++++++++++++++ + 3 files changed, 66 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/ifcvt-gimple.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 6f0ed7cea..6950756fd 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -1721,6 +1721,10 @@ fif-conversion2 + Common Report Var(flag_if_conversion2) Optimization + Perform conversion of conditional jumps to conditional execution. + ++fif-conversion-gimple ++Common Report Var(flag_if_conversion_gimple) Optimization ++Perform conversion of conditional jumps to branchless equivalents during gimple transformations. ++ + fstack-reuse= + Common Joined RejectNegative Enum(stack_reuse_level) Var(flag_stack_reuse) Init(SR_ALL) Optimization + -fstack-reuse=[all|named_vars|none] Set stack reuse level for local variables. +diff --git a/gcc/match.pd b/gcc/match.pd +index 01f81b063..e98cd02e0 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3402,6 +3402,31 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + ) + ) ++ ++(if (flag_if_conversion_gimple) ++ (for simple_op (plus minus bit_and bit_ior bit_xor) ++ (simplify ++ (cond @0 (simple_op @1 INTEGER_CST@2) @1) ++ (switch ++ /* a = cond ? a + 1 : a -> a = a + ((int) cond) */ ++ (if (integer_onep (@2)) ++ (simple_op @1 (convert (convert:boolean_type_node @0)))) ++ /* a = cond ? a + powerof2cst : a -> ++ a = a + ((int) cond) << log2 (powerof2cst) */ ++ (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) ++ (with ++ { ++ tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); ++ } ++ (simple_op @1 (lshift (convert (convert:boolean_type_node @0)) ++ { shift; }) ++ ) ++ ) ++ ) ++ ) ++ ) ++ ) ++) + #endif + + #if GIMPLE +diff --git a/gcc/testsuite/gcc.dg/ifcvt-gimple.c b/gcc/testsuite/gcc.dg/ifcvt-gimple.c +new file mode 100644 +index 000000000..0f7c87e5c +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ifcvt-gimple.c +@@ -0,0 +1,37 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fif-conversion-gimple -fdump-tree-optimized" } */ ++ ++int test_int (int optimizable_int) { ++ if (optimizable_int > 5) ++ ++optimizable_int; ++ return optimizable_int; ++} ++ ++int test_int_pow2 (int optimizable_int_pow2) { ++ if (optimizable_int_pow2 <= 4) ++ optimizable_int_pow2 += 1024; ++ return optimizable_int_pow2; ++} ++ ++int test_int_non_pow2 (int not_optimizable_int_non_pow2) { ++ if (not_optimizable_int_non_pow2 == 1) ++ not_optimizable_int_non_pow2 += 513; ++ return not_optimizable_int_non_pow2; ++} ++ ++float test_float (float not_optimizable_float) { ++ if (not_optimizable_float > 5) ++ not_optimizable_float += 1; ++ return not_optimizable_float; ++} ++ ++/* Expecting if-else block in test_float and test_int_non_pow2 only. */ ++/* { dg-final { scan-tree-dump-not "if \\(optimizable" "optimized" } } */ ++/* { dg-final { scan-tree-dump "if \\(not_optimizable_int_non_pow2" "optimized" } } */ ++/* { dg-final { scan-tree-dump "if \\(not_optimizable_float" "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "if " 2 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "else" 2 "optimized" } } */ ++ ++/* Expecting shifted result only for optimizable_int_pow2. */ ++/* { dg-final { scan-tree-dump-times " << " 1 "optimized" } } */ ++/* { dg-final { scan-tree-dump " << 10;" "optimized" } } */ +-- +2.33.0 + diff --git a/0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch b/0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch new file mode 100644 index 0000000..e5ea737 --- /dev/null +++ b/0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch @@ -0,0 +1,236 @@ +From f788555b23b0b676729bb695af96954fe083e354 Mon Sep 17 00:00:00 2001 +From: Pronin Alexander 00812787 +Date: Tue, 24 Jan 2023 16:43:40 +0300 +Subject: [PATCH 21/33] Add option to allow matching uaddsub overflow for widen + ops too. + +--- + gcc/common.opt | 5 ++ + gcc/testsuite/gcc.dg/uaddsub.c | 143 +++++++++++++++++++++++++++++++++ + gcc/tree-ssa-math-opts.c | 35 +++++++- + 3 files changed, 179 insertions(+), 4 deletions(-) + create mode 100644 gcc/testsuite/gcc.dg/uaddsub.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 6950756fd..c2f01bbc0 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -2989,6 +2989,11 @@ freciprocal-math + Common Report Var(flag_reciprocal_math) SetByCombined Optimization + Same as -fassociative-math for expressions which include division. + ++fuaddsub-overflow-match-all ++Common Report Var(flag_uaddsub_overflow_match_all) ++Match unsigned add/sub overflow even if the target does not support ++the corresponding instruction. ++ + ; Nonzero means that unsafe floating-point math optimizations are allowed + ; for the sake of speed. IEEE compliance is not guaranteed, and operations + ; are allowed to assume that their arguments and results are "normal" +diff --git a/gcc/testsuite/gcc.dg/uaddsub.c b/gcc/testsuite/gcc.dg/uaddsub.c +new file mode 100644 +index 000000000..96c26d308 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/uaddsub.c +@@ -0,0 +1,143 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -fuaddsub-overflow-match-all -fdump-tree-optimized" } */ ++#include ++ ++typedef unsigned __int128 uint128_t; ++typedef struct uint256_t ++{ ++ uint128_t lo; ++ uint128_t hi; ++} uint256_t; ++ ++uint16_t add16 (uint8_t a, uint8_t b) ++{ ++ uint8_t tmp = a + b; ++ uint8_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint16_t res = overflow; ++ res <<= 8; ++ res += tmp; ++ return res; ++} ++ ++uint32_t add32 (uint16_t a, uint16_t b) ++{ ++ uint16_t tmp = a + b; ++ uint16_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint32_t res = overflow; ++ res <<= 16; ++ res += tmp; ++ return res; ++} ++ ++uint64_t add64 (uint32_t a, uint32_t b) ++{ ++ uint32_t tmp = a + b; ++ uint32_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint64_t res = overflow; ++ res <<= 32; ++ res += tmp; ++ return res; ++} ++ ++uint128_t add128 (uint64_t a, uint64_t b) ++{ ++ uint64_t tmp = a + b; ++ uint64_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint128_t res = overflow; ++ res <<= 64; ++ res += tmp; ++ return res; ++} ++ ++uint256_t add256 (uint128_t a, uint128_t b) ++{ ++ uint128_t tmp = a + b; ++ uint128_t overflow = 0; ++ if (tmp < a) ++ overflow = 1; ++ ++ uint256_t res; ++ res.hi = overflow; ++ res.lo = tmp; ++ return res; ++} ++ ++uint16_t sub16 (uint8_t a, uint8_t b) ++{ ++ uint8_t tmp = a - b; ++ uint8_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint16_t res = overflow; ++ res <<= 8; ++ res += tmp; ++ return res; ++} ++ ++uint32_t sub32 (uint16_t a, uint16_t b) ++{ ++ uint16_t tmp = a - b; ++ uint16_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint32_t res = overflow; ++ res <<= 16; ++ res += tmp; ++ return res; ++} ++ ++uint64_t sub64 (uint32_t a, uint32_t b) ++{ ++ uint32_t tmp = a - b; ++ uint32_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint64_t res = overflow; ++ res <<= 32; ++ res += tmp; ++ return res; ++} ++ ++uint128_t sub128 (uint64_t a, uint64_t b) ++{ ++ uint64_t tmp = a - b; ++ uint64_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint128_t res = overflow; ++ res <<= 64; ++ res += tmp; ++ return res; ++} ++ ++uint256_t sub256 (uint128_t a, uint128_t b) ++{ ++ uint128_t tmp = a - b; ++ uint128_t overflow = 0; ++ if (tmp > a) ++ overflow = -1; ++ ++ uint256_t res; ++ res.hi = overflow; ++ res.lo = tmp; ++ return res; ++} ++ ++/* { dg-final { scan-tree-dump-times "= .ADD_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */ ++/* { dg-final { scan-tree-dump-times "= .SUB_OVERFLOW \\(a_\[0-9\]+\\(D\\), b_\[0-9\]+\\(D\\)\\)" 5 "optimized" } } */ +diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c +index 4c89fddcf..716bf9e35 100644 +--- a/gcc/tree-ssa-math-opts.c ++++ b/gcc/tree-ssa-math-opts.c +@@ -3290,6 +3290,27 @@ convert_mult_to_fma (gimple *mul_stmt, tree op1, tree op2, + } + } + ++/* Check if the corresponding operation has wider equivalent on the target. */ ++ ++static bool ++wider_optab_check_p (optab op, machine_mode mode, int unsignedp) ++{ ++ machine_mode wider_mode; ++ FOR_EACH_WIDER_MODE (wider_mode, mode) ++ { ++ machine_mode next_mode; ++ if (optab_handler (op, wider_mode) != CODE_FOR_nothing ++ || (op == smul_optab ++ && GET_MODE_WIDER_MODE (wider_mode).exists (&next_mode) ++ && (find_widening_optab_handler ((unsignedp ++ ? umul_widen_optab ++ : smul_widen_optab), ++ next_mode, mode)))) ++ return true; ++ } ++ ++ return false; ++} + + /* Helper function of match_uaddsub_overflow. Return 1 + if USE_STMT is unsigned overflow check ovf != 0 for +@@ -3390,12 +3411,18 @@ match_uaddsub_overflow (gimple_stmt_iterator *gsi, gimple *stmt, + gimple *use_stmt; + + gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); ++ optab op = code == PLUS_EXPR ? uaddv4_optab : usubv4_optab; ++ machine_mode mode = TYPE_MODE (type); ++ int unsignedp = TYPE_UNSIGNED (type); + if (!INTEGRAL_TYPE_P (type) +- || !TYPE_UNSIGNED (type) ++ || !unsignedp + || has_zero_uses (lhs) +- || has_single_use (lhs) +- || optab_handler (code == PLUS_EXPR ? uaddv4_optab : usubv4_optab, +- TYPE_MODE (type)) == CODE_FOR_nothing) ++ || has_single_use (lhs)) ++ return false; ++ ++ if (optab_handler (op, mode) == CODE_FOR_nothing ++ && (!flag_uaddsub_overflow_match_all ++ || !wider_optab_check_p (op, mode, unsignedp))) + return false; + + FOR_EACH_IMM_USE_FAST (use_p, iter, lhs) +-- +2.33.0 + diff --git a/0145-Match-double-sized-mul-pattern.patch b/0145-Match-double-sized-mul-pattern.patch new file mode 100644 index 0000000..f04d6ce --- /dev/null +++ b/0145-Match-double-sized-mul-pattern.patch @@ -0,0 +1,488 @@ +From 3be7a26a08772d014f54f7b1a0555ccca91115d6 Mon Sep 17 00:00:00 2001 +From: Pronin Alexander 00812787 +Date: Wed, 25 Jan 2023 15:04:07 +0300 +Subject: [PATCH 22/33] Match double sized mul pattern + +--- + gcc/match.pd | 136 +++++++++++++++++++++ + gcc/testsuite/gcc.dg/double_sized_mul-1.c | 141 ++++++++++++++++++++++ + gcc/testsuite/gcc.dg/double_sized_mul-2.c | 62 ++++++++++ + gcc/tree-ssa-math-opts.c | 80 ++++++++++++ + 4 files changed, 419 insertions(+) + create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-1.c + create mode 100644 gcc/testsuite/gcc.dg/double_sized_mul-2.c + +diff --git a/gcc/match.pd b/gcc/match.pd +index e98cd02e0..74f8ab999 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -6390,3 +6390,139 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + to the number of trailing zeroes. */ + (match (ctz_table_index @1 @2 @3) + (rshift (mult (bit_and:c (negate @1) @1) INTEGER_CST@2) INTEGER_CST@3)) ++ ++/* Match multiplication with double sized result. ++ ++ Consider the following calculations: ++ arg0 * arg1 = (2^(bit_size/2) * arg0_hi + arg0_lo) ++ * (2^(bit_size/2) * arg1_hi + arg1_lo) ++ arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi ++ + 2^(bit_size/2) * (arg0_hi * arg1_lo + arg0_lo * arg1_hi) ++ + arg0_lo * arg1_lo ++ ++ The products of high and low parts fits in bit_size values, thus they are ++ placed in high and low parts of result respectively. ++ ++ The sum of the mixed products may overflow, so we need a detection for that. ++ Also it has a bit_size/2 offset, thus it intersects with both high and low ++ parts of result. Overflow detection constant is bit_size/2 due to this. ++ ++ With this info: ++ arg0 * arg1 = 2^bit_size * arg0_hi * arg1_hi ++ + 2^(bit_size/2) * middle ++ + 2^bit_size * possible_middle_overflow ++ + arg0_lo * arg1_lo ++ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow) ++ + 2^(bit_size/2) * (2^(bit_size/2) * middle_hi + middle_lo) ++ + arg0_lo * arg1_lo ++ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + middle_hi ++ + possible_middle_overflow) ++ + 2^(bit_size/2) * middle_lo ++ + arg0_lo * arg1_lo ++ ++ The last sum can produce overflow for the high result part. With this: ++ arg0 * arg1 = 2^bit_size * (arg0_hi * arg1_hi + possible_middle_overflow ++ + possible_res_lo_overflow + middle_hi) ++ + res_lo ++ = res_hi + res_lo ++ ++ This formula is quite big to fit into one match pattern with all of the ++ combinations of terms inside it. There are many helpers for better code ++ readability. ++ ++ The simplification basis is res_hi: assuming that res_lo only is not ++ real practical case for such calculations. ++ ++ Overflow handling is done via matching complex calculations: ++ the realpart and imagpart are quite handy here. */ ++/* Match low and high parts of the argument. */ ++(match (double_size_mul_arg_lo @0 @1) ++ (bit_and @0 INTEGER_CST@1) ++ (if (wi::to_wide (@1) ++ == wi::mask (TYPE_PRECISION (type) / 2, false, TYPE_PRECISION (type))))) ++(match (double_size_mul_arg_hi @0 @1) ++ (rshift @0 INTEGER_CST@1) ++ (if (wi::to_wide (@1) == TYPE_PRECISION (type) / 2))) ++ ++/* Match various argument parts products. */ ++(match (double_size_mul_lolo @0 @1) ++ (mult@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_lo @1 @3)) ++ (if (single_use (@4)))) ++(match (double_size_mul_hihi @0 @1) ++ (mult@4 (double_size_mul_arg_hi @0 @2) (double_size_mul_arg_hi @1 @3)) ++ (if (single_use (@4)))) ++(match (double_size_mul_lohi @0 @1) ++ (mult:c@4 (double_size_mul_arg_lo @0 @2) (double_size_mul_arg_hi @1 @3)) ++ (if (single_use (@4)))) ++ ++/* Match complex middle sum. */ ++(match (double_size_mul_middle_complex @0 @1) ++ (IFN_ADD_OVERFLOW@2 (double_size_mul_lohi @0 @1) (double_size_mul_lohi @1 @0)) ++ (if (num_imm_uses (@2) == 2))) ++ ++/* Match real middle results. */ ++(match (double_size_mul_middle @0 @1) ++ (realpart@2 (double_size_mul_middle_complex @0 @1)) ++ (if (num_imm_uses (@2) == 2))) ++(match (double_size_mul_middleres_lo @0 @1) ++ (lshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2) ++ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2 ++ && single_use (@3)))) ++(match (double_size_mul_middleres_hi @0 @1) ++ (rshift@3 (double_size_mul_middle @0 @1) INTEGER_CST@2) ++ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2 ++ && single_use (@3)))) ++ ++/* Match low result part. */ ++/* Number of uses may be < 2 in case when we are interested in ++ high part only. */ ++(match (double_size_mul_res_lo_complex @0 @1) ++ (IFN_ADD_OVERFLOW:c@2 ++ (double_size_mul_lolo:c @0 @1) (double_size_mul_middleres_lo @0 @1)) ++ (if (num_imm_uses (@2) <= 2))) ++(match (double_size_mul_res_lo @0 @1) ++ (realpart (double_size_mul_res_lo_complex @0 @1))) ++ ++/* Match overflow terms. */ ++(match (double_size_mul_overflow_check_lo @0 @1 @5) ++ (convert@4 (ne@3 ++ (imagpart@2 (double_size_mul_res_lo_complex@5 @0 @1)) integer_zerop)) ++ (if (single_use (@2) && single_use (@3) && single_use (@4)))) ++(match (double_size_mul_overflow_check_hi @0 @1) ++ (lshift@6 (convert@5 (ne@4 ++ (imagpart@3 (double_size_mul_middle_complex @0 @1)) integer_zerop)) ++ INTEGER_CST@2) ++ (if (wi::to_wide (@2) == TYPE_PRECISION (type) / 2 ++ && single_use (@3) && single_use (@4) && single_use (@5) ++ && single_use (@6)))) ++ ++/* Match all possible permutations for high result part calculations. */ ++(for op1 (double_size_mul_hihi ++ double_size_mul_overflow_check_hi ++ double_size_mul_middleres_hi) ++ op2 (double_size_mul_overflow_check_hi ++ double_size_mul_middleres_hi ++ double_size_mul_hihi) ++ op3 (double_size_mul_middleres_hi ++ double_size_mul_hihi ++ double_size_mul_overflow_check_hi) ++ (match (double_size_mul_candidate @0 @1 @2 @3) ++ (plus:c@2 ++ (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3) (op1:c @0 @1)) ++ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1))) ++ (if (single_use (@4) && single_use (@5)))) ++ (match (double_size_mul_candidate @0 @1 @2 @3) ++ (plus:c@2 (double_size_mul_overflow_check_lo @0 @1 @3) ++ (plus:c@4 (op1:c @0 @1) ++ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1)))) ++ (if (single_use (@4) && single_use (@5)))) ++ (match (double_size_mul_candidate @0 @1 @2 @3) ++ (plus:c@2 (op1:c @0 @1) ++ (plus:c@4 (double_size_mul_overflow_check_lo @0 @1 @3) ++ (plus:c@5 (op2:c @0 @1) (op3:c @0 @1)))) ++ (if (single_use (@4) && single_use (@5)))) ++ (match (double_size_mul_candidate @0 @1 @2 @3) ++ (plus:c@2 (op1:c @0 @1) ++ (plus:c@4 (op2:c @0 @1) ++ (plus:c@5 (double_size_mul_overflow_check_lo @0 @1 @3) (op3:c @0 @1)))) ++ (if (single_use (@4) && single_use (@5))))) +diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-1.c b/gcc/testsuite/gcc.dg/double_sized_mul-1.c +new file mode 100644 +index 000000000..4d475cc8a +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/double_sized_mul-1.c +@@ -0,0 +1,141 @@ ++/* { dg-do compile } */ ++/* fif-conversion-gimple and fuaddsub-overflow-match-all are required for ++ proper overflow detection in some cases. */ ++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++#include ++ ++typedef unsigned __int128 uint128_t; ++ ++uint16_t mul16 (uint8_t a, uint8_t b) ++{ ++ uint8_t a_lo = a & 0xF; ++ uint8_t b_lo = b & 0xF; ++ uint8_t a_hi = a >> 4; ++ uint8_t b_hi = b >> 4; ++ uint8_t lolo = a_lo * b_lo; ++ uint8_t lohi = a_lo * b_hi; ++ uint8_t hilo = a_hi * b_lo; ++ uint8_t hihi = a_hi * b_hi; ++ uint8_t middle = hilo + lohi; ++ uint8_t middle_hi = middle >> 4; ++ uint8_t middle_lo = middle << 4; ++ uint8_t res_lo = lolo + middle_lo; ++ uint8_t res_hi = hihi + middle_hi; ++ res_hi += (res_lo < middle_lo ? 1 : 0); ++ res_hi += (middle < hilo ? 0x10 : 0); ++ uint16_t res = ((uint16_t) res_hi) << 8; ++ res += res_lo; ++ return res; ++} ++ ++uint32_t mul32 (uint16_t a, uint16_t b) ++{ ++ uint16_t a_lo = a & 0xFF; ++ uint16_t b_lo = b & 0xFF; ++ uint16_t a_hi = a >> 8; ++ uint16_t b_hi = b >> 8; ++ uint16_t lolo = a_lo * b_lo; ++ uint16_t lohi = a_lo * b_hi; ++ uint16_t hilo = a_hi * b_lo; ++ uint16_t hihi = a_hi * b_hi; ++ uint16_t middle = hilo + lohi; ++ uint16_t middle_hi = middle >> 8; ++ uint16_t middle_lo = middle << 8; ++ uint16_t res_lo = lolo + middle_lo; ++ uint16_t res_hi = hihi + middle_hi; ++ res_hi += (res_lo < middle_lo ? 1 : 0); ++ res_hi += (middle < hilo ? 0x100 : 0); ++ uint32_t res = ((uint32_t) res_hi) << 16; ++ res += res_lo; ++ return res; ++} ++ ++uint64_t mul64 (uint32_t a, uint32_t b) ++{ ++ uint32_t a_lo = a & 0xFFFF; ++ uint32_t b_lo = b & 0xFFFF; ++ uint32_t a_hi = a >> 16; ++ uint32_t b_hi = b >> 16; ++ uint32_t lolo = a_lo * b_lo; ++ uint32_t lohi = a_lo * b_hi; ++ uint32_t hilo = a_hi * b_lo; ++ uint32_t hihi = a_hi * b_hi; ++ uint32_t middle = hilo + lohi; ++ uint32_t middle_hi = middle >> 16; ++ uint32_t middle_lo = middle << 16; ++ uint32_t res_lo = lolo + middle_lo; ++ uint32_t res_hi = hihi + middle_hi; ++ res_hi += (res_lo < middle_lo ? 1 : 0); ++ res_hi += (middle < hilo ? 0x10000 : 0); ++ uint64_t res = ((uint64_t) res_hi) << 32; ++ res += res_lo; ++ return res; ++} ++ ++uint128_t mul128 (uint64_t a, uint64_t b) ++{ ++ uint64_t a_lo = a & 0xFFFFFFFF; ++ uint64_t b_lo = b & 0xFFFFFFFF; ++ uint64_t a_hi = a >> 32; ++ uint64_t b_hi = b >> 32; ++ uint64_t lolo = a_lo * b_lo; ++ uint64_t lohi = a_lo * b_hi; ++ uint64_t hilo = a_hi * b_lo; ++ uint64_t hihi = a_hi * b_hi; ++ uint64_t middle = hilo + lohi; ++ uint64_t middle_hi = middle >> 32; ++ uint64_t middle_lo = middle << 32; ++ uint64_t res_lo = lolo + middle_lo; ++ uint64_t res_hi = hihi + middle_hi; ++ res_hi += (res_lo < middle_lo ? 1 : 0); ++ res_hi += (middle < hilo ? 0x100000000 : 0); ++ uint128_t res = ((uint128_t) res_hi) << 64; ++ res += res_lo; ++ return res; ++} ++ ++uint64_t mul64_perm (uint32_t a, uint32_t b) ++{ ++ uint32_t a_lo = a & 0xFFFF; ++ uint32_t b_lo = b & 0xFFFF; ++ uint32_t a_hi = a >> 16; ++ uint32_t b_hi = b >> 16; ++ uint32_t lolo = a_lo * b_lo; ++ uint32_t lohi = a_lo * b_hi; ++ uint32_t hilo = a_hi * b_lo; ++ uint32_t hihi = a_hi * b_hi; ++ uint32_t middle = hilo + lohi; ++ uint32_t middle_hi = middle >> 16; ++ uint32_t middle_lo = middle << 16; ++ uint32_t res_lo = lolo + middle_lo; ++ uint32_t res_hi = hihi + middle_hi; ++ res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi; ++ res_hi = middle < hilo ? res_hi + 0x10000 : res_hi; ++ uint64_t res = ((uint64_t) res_hi) << 32; ++ res += res_lo; ++ return res; ++} ++ ++uint128_t mul128_perm (uint64_t a, uint64_t b) ++{ ++ uint64_t a_lo = a & 0xFFFFFFFF; ++ uint64_t b_lo = b & 0xFFFFFFFF; ++ uint64_t a_hi = a >> 32; ++ uint64_t b_hi = b >> 32; ++ uint64_t lolo = a_lo * b_lo; ++ uint64_t lohi = a_lo * b_hi; ++ uint64_t hilo = a_hi * b_lo; ++ uint64_t hihi = a_hi * b_hi; ++ uint64_t middle = hilo + lohi; ++ uint64_t middle_hi = middle >> 32; ++ uint64_t middle_lo = middle << 32; ++ uint64_t res_lo = lolo + middle_lo; ++ uint64_t res_hi = hihi + middle_hi; ++ res_hi = res_lo < middle_lo ? res_hi + 1 : res_hi; ++ res_hi = middle < hilo ? res_hi + 0x100000000 : res_hi; ++ uint128_t res = ((uint128_t) res_hi) << 64; ++ res += res_lo; ++ return res; ++} ++ ++/* { dg-final { scan-tree-dump-times "double sized mul optimized: 1" 6 "widening_mul" } } */ +diff --git a/gcc/testsuite/gcc.dg/double_sized_mul-2.c b/gcc/testsuite/gcc.dg/double_sized_mul-2.c +new file mode 100644 +index 000000000..cc6e5af25 +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/double_sized_mul-2.c +@@ -0,0 +1,62 @@ ++/* { dg-do compile } */ ++/* fif-conversion-gimple is required for proper overflow detection ++ in some cases. */ ++/* { dg-options "-O2 -fif-conversion-gimple -fuaddsub-overflow-match-all -fdump-tree-widening_mul-stats" } */ ++#include ++ ++typedef unsigned __int128 uint128_t; ++typedef struct uint256_t ++{ ++ uint128_t lo; ++ uint128_t hi; ++} uint256_t; ++ ++uint64_t mul64_double_use (uint32_t a, uint32_t b) ++{ ++ uint32_t a_lo = a & 0xFFFF; ++ uint32_t b_lo = b & 0xFFFF; ++ uint32_t a_hi = a >> 16; ++ uint32_t b_hi = b >> 16; ++ uint32_t lolo = a_lo * b_lo; ++ uint32_t lohi = a_lo * b_hi; ++ uint32_t hilo = a_hi * b_lo; ++ uint32_t hihi = a_hi * b_hi; ++ uint32_t middle = hilo + lohi; ++ uint32_t middle_hi = middle >> 16; ++ uint32_t middle_lo = middle << 16; ++ uint32_t res_lo = lolo + middle_lo; ++ uint32_t res_hi = hihi + middle_hi; ++ res_hi += (res_lo < middle_lo ? 1 : 0); ++ res_hi += (middle < hilo ? 0x10000 : 0); ++ uint64_t res = ((uint64_t) res_hi) << 32; ++ res += res_lo; ++ return res + lolo; ++} ++ ++uint256_t mul256 (uint128_t a, uint128_t b) ++{ ++ uint128_t a_lo = a & 0xFFFFFFFFFFFFFFFF; ++ uint128_t b_lo = b & 0xFFFFFFFFFFFFFFFF; ++ uint128_t a_hi = a >> 64; ++ uint128_t b_hi = b >> 64; ++ uint128_t lolo = a_lo * b_lo; ++ uint128_t lohi = a_lo * b_hi; ++ uint128_t hilo = a_hi * b_lo; ++ uint128_t hihi = a_hi * b_hi; ++ uint128_t middle = hilo + lohi; ++ uint128_t middle_hi = middle >> 64; ++ uint128_t middle_lo = middle << 64; ++ uint128_t res_lo = lolo + middle_lo; ++ uint128_t res_hi = hihi + middle_hi; ++ res_hi += (res_lo < middle_lo ? 1 : 0); ++ /* Constant is to big warning WA */ ++ uint128_t overflow_tmp = (middle < hilo ? 1 : 0); ++ overflow_tmp <<= 64; ++ res_hi += overflow_tmp; ++ uint256_t res; ++ res.lo = res_lo; ++ res.hi = res_hi; ++ return res; ++} ++ ++/* { dg-final { scan-tree-dump-not "double sized mul optimized" "widening_mul" } } */ +diff --git a/gcc/tree-ssa-math-opts.c b/gcc/tree-ssa-math-opts.c +index 716bf9e35..a81d7501c 100644 +--- a/gcc/tree-ssa-math-opts.c ++++ b/gcc/tree-ssa-math-opts.c +@@ -182,6 +182,9 @@ static struct + + /* Number of divmod calls inserted. */ + int divmod_calls_inserted; ++ ++ /* Number of optimized double sized multiplications. */ ++ int double_sized_mul_optimized; + } widen_mul_stats; + + /* The instance of "struct occurrence" representing the highest +@@ -3708,6 +3711,78 @@ convert_to_divmod (gassign *stmt) + return true; + } + ++/* Pattern matcher for double sized multiplication defined in match.pd. */ ++extern bool gimple_double_size_mul_candidate (tree, tree*, tree (*)(tree)); ++ ++static bool ++convert_double_size_mul (gimple_stmt_iterator *gsi, gimple *stmt) ++{ ++ gimple *use_stmt, *complex_res_lo; ++ gimple_stmt_iterator insert_before; ++ imm_use_iterator use_iter; ++ tree match[4]; // arg0, arg1, res_hi, complex_res_lo ++ tree arg0, arg1, widen_mult, new_type, tmp; ++ tree lhs = gimple_assign_lhs (stmt); ++ location_t loc = UNKNOWN_LOCATION; ++ machine_mode mode; ++ ++ if (!gimple_double_size_mul_candidate (lhs, match, NULL)) ++ return false; ++ ++ new_type = build_nonstandard_integer_type ( ++ TYPE_PRECISION (TREE_TYPE (match[0])) * 2, 1); ++ mode = TYPE_MODE (new_type); ++ ++ /* Early return if the target multiplication doesn't exist on target. */ ++ if (optab_handler (smul_optab, mode) == CODE_FOR_nothing ++ && !wider_optab_check_p (smul_optab, mode, 1)) ++ return false; ++ ++ /* Determine the point where the wide multiplication ++ should be inserted. Complex low res is OK since it is required ++ by both high and low part getters, thus it dominates both of them. */ ++ complex_res_lo = SSA_NAME_DEF_STMT (match[3]); ++ insert_before = gsi_for_stmt (complex_res_lo); ++ gsi_next (&insert_before); ++ ++ /* Create the widen multiplication. */ ++ arg0 = build_and_insert_cast (&insert_before, loc, new_type, match[0]); ++ arg1 = build_and_insert_cast (&insert_before, loc, new_type, match[1]); ++ widen_mult = build_and_insert_binop (&insert_before, loc, "widen_mult", ++ MULT_EXPR, arg0, arg1); ++ ++ /* Find the mult low part getter. */ ++ FOR_EACH_IMM_USE_STMT (use_stmt, use_iter, match[3]) ++ if (gimple_assign_rhs_code (use_stmt) == REALPART_EXPR) ++ BREAK_FROM_IMM_USE_STMT (use_iter); ++ ++ /* Create high and low (if needed) parts extractors. */ ++ /* Low part. */ ++ if (use_stmt) ++ { ++ loc = gimple_location (use_stmt); ++ tmp = build_and_insert_cast (&insert_before, loc, ++ TREE_TYPE (gimple_get_lhs (use_stmt)), ++ widen_mult); ++ gassign *new_stmt = gimple_build_assign (gimple_get_lhs (use_stmt), ++ NOP_EXPR, tmp); ++ gsi_replace (&insert_before, new_stmt, true); ++ } ++ ++ /* High part. */ ++ loc = gimple_location (stmt); ++ tmp = build_and_insert_binop (gsi, loc, "widen_mult_hi", ++ RSHIFT_EXPR, widen_mult, ++ build_int_cst (new_type, ++ TYPE_PRECISION (new_type) / 2)); ++ tmp = build_and_insert_cast (gsi, loc, TREE_TYPE (lhs), tmp); ++ gassign *new_stmt = gimple_build_assign (lhs, NOP_EXPR, tmp); ++ gsi_replace (gsi, new_stmt, true); ++ ++ widen_mul_stats.double_sized_mul_optimized++; ++ return true; ++} ++ + /* Find integer multiplications where the operands are extended from + smaller types, and replace the MULT_EXPR with a WIDEN_MULT_EXPR + where appropriate. */ +@@ -3801,6 +3876,9 @@ math_opts_dom_walker::after_dom_children (basic_block bb) + break; + + case PLUS_EXPR: ++ if (convert_double_size_mul (&gsi, stmt)) ++ break; ++ __attribute__ ((fallthrough)); + case MINUS_EXPR: + if (!convert_plusminus_to_widen (&gsi, stmt, code)) + match_uaddsub_overflow (&gsi, stmt, code); +@@ -3892,6 +3970,8 @@ pass_optimize_widening_mul::execute (function *fun) + widen_mul_stats.fmas_inserted); + statistics_counter_event (fun, "divmod calls inserted", + widen_mul_stats.divmod_calls_inserted); ++ statistics_counter_event (fun, "double sized mul optimized", ++ widen_mul_stats.double_sized_mul_optimized); + + return cfg_changed ? TODO_cleanup_cfg : 0; + } +-- +2.33.0 + From 8f8eb202662d4f87440285389b5ee758d469f4be Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:51:13 +0800 Subject: [PATCH 3/7] [Sync] Sync patch from openeuler/gcc 0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch --- ...rc32-Optimization-in-Gzip-For-crc32-.patch | 2354 +++++++++++++++++ 1 file changed, 2354 insertions(+) create mode 100644 0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch diff --git a/0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch b/0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch new file mode 100644 index 0000000..a9a8e94 --- /dev/null +++ b/0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch @@ -0,0 +1,2354 @@ +From 179412c66d0cdd6a48ef1c29acae90908102a1c9 Mon Sep 17 00:00:00 2001 +From: xingyushuai +Date: Mon, 24 Apr 2023 09:34:35 +0800 +Subject: [PATCH 08/13] [LOOP CRC32]Add Crc32 Optimization in Gzip For crc32 + algorithm in APBC int_gzip. Match crc32 lookup table algorithm. An example + for crc32 lookup table alg: ```c do { c = crc_32_tab[((int)c ^ (*s++)) & + 0xff] ^ (c >> 8); } while (--n); + +Usage: `gcc -O3 -march=armv8.1-a -floop-crc yourfile.c` +Node: The cpu you use needs to support the crc32 instructions +--- + gcc/config/aarch64/aarch64-builtins.c | 29 + + gcc/config/aarch64/aarch64-protos.h | 1 + + gcc/config/aarch64/aarch64.c | 12 + + gcc/doc/tm.texi | 9 + + gcc/doc/tm.texi.in | 2 + + gcc/match.pd | 146 +-- + gcc/passes.def | 2 +- + gcc/target.def | 14 + + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c | 85 -- + .../tree-ssa/loop-crc-1.c.042t.loop_crc | 90 -- + gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c | 88 -- + .../tree-ssa/loop-crc-4.c.042t.loop_crc | 0 + .../loop-crc-calculation-check-fail.c | 156 --- + ...crc-calculation-check-fail.c.042t.loop_crc | 64 -- + .../loop-crc-calculation-check-fail.s | 329 ------- + ...crc-3.c => loop-crc-loop-condition-fail.c} | 6 +- + ...op-crc-4.c => loop-crc-loop-form-fail-2.c} | 7 +- + .../gcc.dg/tree-ssa/loop-crc-loop-form-fail.c | 3 +- + .../gcc.dg/tree-ssa/loop-crc-sucess.c | 7 +- + .../tree-ssa/loop-crc-table-check-fail.c | 3 +- + gcc/tree-ssa-loop-crc.c | 903 +++++++++++++++--- + 21 files changed, 873 insertions(+), 1083 deletions(-) + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc + delete mode 100644 gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s + rename gcc/testsuite/gcc.dg/tree-ssa/{loop-crc-3.c => loop-crc-loop-condition-fail.c} (97%) + rename gcc/testsuite/gcc.dg/tree-ssa/{loop-crc-4.c => loop-crc-loop-form-fail-2.c} (95%) + +diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c +index d92157dff..1e8b046da 100644 +--- a/gcc/config/aarch64/aarch64-builtins.c ++++ b/gcc/config/aarch64/aarch64-builtins.c +@@ -441,6 +441,12 @@ typedef struct + #define VAR1(T, N, MAP, A) \ + AARCH64_SIMD_BUILTIN_##T##_##N##A, + ++enum aarch64_crc_builtins{ ++ AARCH64_BUILTIN_CRC32B, ++ AARCH64_BUILTIN_CRC32H, ++ AARCH64_BUILTIN_CRC32W, ++}; ++ + enum aarch64_builtins + { + AARCH64_BUILTIN_MIN, +@@ -1321,6 +1327,29 @@ aarch64_general_builtin_decl (unsigned code, bool) + + return aarch64_builtin_decls[code]; + } ++/* Implement TARGET_GET_CRC_BUILTIN_CODE */ ++unsigned ++get_crc_builtin_code(unsigned code, bool) ++{ ++ if (code > AARCH64_BUILTIN_CRC32W) ++ return AARCH64_BUILTIN_MIN; ++ ++ unsigned res = AARCH64_BUILTIN_MIN; ++ switch (code) { ++ case AARCH64_BUILTIN_CRC32B: ++ res = AARCH64_BUILTIN_crc32b; ++ break; ++ case AARCH64_BUILTIN_CRC32H: ++ res = AARCH64_BUILTIN_crc32h; ++ break; ++ case AARCH64_BUILTIN_CRC32W: ++ res = AARCH64_BUILTIN_crc32w; ++ break; ++ default: ++ break; ++ } ++ return res; ++} + + typedef enum + { +diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h +index 9b6d309a7..a0ca662bc 100644 +--- a/gcc/config/aarch64/aarch64-protos.h ++++ b/gcc/config/aarch64/aarch64-protos.h +@@ -723,6 +723,7 @@ tree aarch64_general_fold_builtin (unsigned int, tree, unsigned int, tree *); + gimple *aarch64_general_gimple_fold_builtin (unsigned int, gcall *); + rtx aarch64_general_expand_builtin (unsigned int, tree, rtx, int); + tree aarch64_general_builtin_decl (unsigned, bool); ++unsigned get_crc_builtin_code(unsigned , bool); + tree aarch64_general_builtin_rsqrt (unsigned int); + tree aarch64_builtin_vectorized_function (unsigned int, tree, tree); + +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index cbdde11b0..b8407c612 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -13735,6 +13735,15 @@ aarch64_builtin_decl (unsigned int code, bool initialize_p) + gcc_unreachable (); + } + ++/* Implement TARGET_GET_CRC_BUILTIN_CODE. */ ++static unsigned ++aarch64_get_crc_builtin_code(unsigned code, bool initialize_p) ++{ ++ unsigned subcode = get_crc_builtin_code(code,initialize_p); ++ unsigned res = subcode << AARCH64_BUILTIN_SHIFT; ++ return res; ++} ++ + /* Return true if it is safe and beneficial to use the approximate rsqrt optabs + to optimize 1.0/sqrt. */ + +@@ -23911,6 +23920,9 @@ aarch64_run_selftests (void) + #undef TARGET_BUILTIN_DECL + #define TARGET_BUILTIN_DECL aarch64_builtin_decl + ++#undef TARGET_GET_CRC_BUILTIN_CODE ++#define TARGET_GET_CRC_BUILTIN_CODE aarch64_get_crc_builtin_code ++ + #undef TARGET_BUILTIN_RECIPROCAL + #define TARGET_BUILTIN_RECIPROCAL aarch64_builtin_reciprocal + +diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi +index 0508fce57..b46418d0b 100644 +--- a/gcc/doc/tm.texi ++++ b/gcc/doc/tm.texi +@@ -11610,6 +11610,15 @@ If @var{code} is out of range the function should return + @code{error_mark_node}. + @end deftypefn + ++@deftypefn {Target Hook} unsigned TARGET_GET_CRC_BUILTIN_CODE (unsigned @var{code}, bool @var{initialize_p}) ++Define this hook to get crc32 builtin code. It should be a function that ++returns the crc32 builtin function code @var{code}. ++If there is no such builtin and it cannot be initialized at this time ++if @var{initialize_p} is true the function should return @code{NULL_TREE}. ++If @var{code} is out of range the function should return ++@code{error_mark_node}. ++@end deftypefn ++ + @deftypefn {Target Hook} rtx TARGET_EXPAND_BUILTIN (tree @var{exp}, rtx @var{target}, rtx @var{subtarget}, machine_mode @var{mode}, int @var{ignore}) + + Expand a call to a machine specific built-in function that was set up by +diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in +index 3b70ea484..2663547c7 100644 +--- a/gcc/doc/tm.texi.in ++++ b/gcc/doc/tm.texi.in +@@ -7941,6 +7941,8 @@ to by @var{ce_info}. + + @hook TARGET_BUILTIN_DECL + ++@hook TARGET_GET_CRC_BUILTIN_CODE ++ + @hook TARGET_EXPAND_BUILTIN + + @hook TARGET_RESOLVE_OVERLOADED_BUILTIN +diff --git a/gcc/match.pd b/gcc/match.pd +index 87b316953..0f92003f7 100644 +--- a/gcc/match.pd ++++ b/gcc/match.pd +@@ -3487,160 +3487,17 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT) + ) + #endif + +- +-#if GIMPLE +-(if (canonicalize_math_p ()) +-/* These patterns are mostly used by PHIOPT to move some operations outside of +- the if statements. They should be done late because it gives jump threading +- and few other passes to reduce what is going on. */ +-/* a ? x op POW2 : x -> x op (a ? POW2 : 0). */ +- (for op (plus minus bit_ior bit_xor lshift rshift lrotate rrotate) +- (simplify +- (cond @0 (op:s @1 INTEGER_CST@2) @1) +- /* powerof2cst */ +- (if (INTEGRAL_TYPE_P (type) && integer_pow2p (@2)) +- (with { +- tree shift = build_int_cst (integer_type_node, tree_log2 (@2)); +- } +- (op @1 (lshift (convert (convert:boolean_type_node @0)) { shift; }))) +- ) +- ) +- ) +-) +-#endif +- +-#if GIMPLE +-/* These patterns are mostly used by FORWPROP to move some operations outside of +- the if statements. They should be done late because it gives jump threading +- and few other passes to reduce what is going on. */ +-/* Mul64 is defined as a multiplication algorithm which compute two 64-bit integers to one 128-bit integer +- (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { +- In0Lo = In0(D) & 4294967295; +- In0Hi = In0(D) >> 32; +- In1Lo = In1(D) & 4294967295; +- In1Hi = In1(D) >> 32; +- Mull_01 = In0Hi * In1Lo; +- Addc = In0Lo * In1Hi + Mull_01; +- addc32 = Addc << 32; +- ResLo = In0Lo * In1Lo + addc32; +- ResHi = ((long unsigned int) (addc32 > ResLo)) + +- (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; +- } */ +- (simplify +- (plus +- (plus +- (convert +- (gt @10 +- (plus +- (mult @4 @6) +- (lshift@10 @9 @3)))) +- (lshift +- (convert +- (gt @8 @9)) @3)) +- (plus@11 +- (rshift +- (plus@9 +- (mult (bit_and@4 SSA_NAME@0 @2) @7) +- (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) +- (mult (rshift@5 SSA_NAME@0 @3) +- (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) +- (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && +- TYPE_PRECISION (type) == 64) +- (with { +- tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); +- tree shift = build_int_cst (integer_type_node, 64); +- //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) +- } +- (convert:type (rshift +- (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) +- ) +- ) +- +- /* (i64 ResLo, i64 ResHi) = Mul64(i64 In0, i64 In1) { +- In0Lo = In0(D) & 4294967295; +- In0Hi = In0(D) >> 32; +- In1Lo = In1(D) & 4294967295; +- In1Hi = In1(D) >> 32; +- Mull_01 = In0Hi * In1Lo; +- Addc = In0Lo * In1Hi + Mull_01; +- addc32 = Addc << 32; +- ResLo = In0(D) * In1(D); +- ResHi = ((long unsigned int) (addc32 > ResLo)) + +- (((long unsigned int) (Mull_01 > Addc)) << 32) + (Addc >> 32) + In0Hi * In1Hi; +- } */ +- (simplify +- (plus +- (plus +- (convert +- (gt (lshift@10 @9 @3) +- (mult @0 @1))) +- (lshift +- (convert +- (gt @8 @9)) @3)) +- (plus@11 +- (rshift +- (plus@9 +- (mult (bit_and@4 SSA_NAME@0 @2) @7) +- (mult@8 @5 (bit_and@6 SSA_NAME@1 INTEGER_CST@2))) @3) +- (mult (rshift@5 SSA_NAME@0 @3) +- (rshift@7 SSA_NAME@1 INTEGER_CST@3)))) +- (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && +- TYPE_PRECISION (type) == 64) +- (with { +- tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); +- tree shift = build_int_cst (integer_type_node, 64); +- //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) +- } +- (convert:type (rshift +- (mult (convert:i128_type @0) (convert:i128_type @1)) { shift; }))) +- ) +- ) +-#endif +- +-#if GIMPLE +-/* These patterns are mostly used by FORWPROP to move some operations outside of +- the if statements. They should be done late because it gives jump threading +- and few other passes to reduce what is going on. */ +- /* +- In0Lo = In0(D) & 4294967295; +- In0Hi = In0(D) >> 32; +- In1Lo = In1(D) & 4294967295; +- In1Hi = In1(D) >> 32; +- Addc = In0Lo * In1Hi + In0Hi * In1Lo; +- addc32 = Addc << 32; +- ResLo = In0Lo * In1Lo + addc32 +- */ +- (simplify +- (plus (mult @4 @5) +- (lshift +- (plus +- (mult (bit_and@4 SSA_NAME@0 @2) (rshift SSA_NAME@1 @3)) +- (mult (rshift SSA_NAME@0 @3) (bit_and@5 SSA_NAME@1 INTEGER_CST@2))) INTEGER_CST@3)) +- (if (INTEGRAL_TYPE_P (type) && INTEGRAL_TYPE_P (TREE_TYPE (@0)) && types_match (@0, @1) && +- TYPE_PRECISION (type) == 64) +- (with { +- tree i128_type = build_nonstandard_integer_type (128, TYPE_UNSIGNED (type)); +- tree shift = build_int_cst (integer_type_node, 64); +- //direct_internal_fn_supported_p (UMULH, type, OPTIMIZE_FOR_BOTH) +- } +- (mult (convert:type @0) (convert:type @1))) +- ) +- ) +-#endif +- +- + #if GIMPLE + /* Try to match */ + /* + _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) + _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME@1, SSA_NAME) +-_6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++_6 = _5 & 255; //BIT_AND_EXPR (SSA_NAME, INTEGER_CST@3) + */ + (match (crc_match_index @1 @2 @3) + (bit_and (bit_xor (nop SSA_NAME@2) SSA_NAME@1) INTEGER_CST@3) + (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@3) == 255)) + ) +- + #endif + + #if GIMPLE +@@ -3653,7 +3510,6 @@ c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) + (bit_xor SSA_NAME@3 (rshift SSA_NAME@1 INTEGER_CST@2)) + (if (INTEGRAL_TYPE_P (type) && tree_to_uhwi(@2) == 8)) + ) +- + #endif + + /* Simplification moved from fold_cond_expr_with_comparison. It may also +diff --git a/gcc/passes.def b/gcc/passes.def +index 7abd946ce..df7d65733 100644 +--- a/gcc/passes.def ++++ b/gcc/passes.def +@@ -92,7 +92,7 @@ along with GCC; see the file COPYING3. If not see + NEXT_PASS (pass_cd_dce); + NEXT_PASS (pass_phiopt, true /* early_p */); + NEXT_PASS (pass_array_widen_compare); +- NEXT_PASS (pass_loop_crc); ++ NEXT_PASS (pass_loop_crc); + NEXT_PASS (pass_tail_recursion); + NEXT_PASS (pass_convert_switch); + NEXT_PASS (pass_cleanup_eh); +diff --git a/gcc/target.def b/gcc/target.def +index 202056411..34d3561bd 100644 +--- a/gcc/target.def ++++ b/gcc/target.def +@@ -2421,6 +2421,20 @@ If @var{code} is out of range the function should return\n\ + @code{error_mark_node}.", + tree, (unsigned code, bool initialize_p), NULL) + ++/* Initialize (if INITIALIZE_P is true) and return the real code of ++ target-specific built-in function . ++ Return NULL if that is not possible. Return error_mark_node if CODE ++ is outside of the range of valid crc32 codes. */ ++DEFHOOK ++(get_crc_builtin_code, ++ "Define this hook to get crc32 builtin code. It should be a function that\n\ ++returns the crc32 builtin function code @var{code}.\n\ ++If there is no such builtin and it cannot be initialized at this time\n\ ++if @var{initialize_p} is true the function should return @code{NULL_TREE}.\n\ ++If @var{code} is out of range the function should return\n\ ++@code{error_mark_node}.", ++ unsigned , (unsigned code, bool initialize_p), NULL) ++ + /* Expand a target-specific builtin. */ + DEFHOOK + (expand_builtin, +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c +deleted file mode 100644 +index 07f9e01ec..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c ++++ /dev/null +@@ -1,85 +0,0 @@ +-/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +-/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ +- +-#include +-#include +-typedef unsigned long ulg; +-typedef unsigned char uch; +- +-static const ulg crc_32_tab[] = { +- 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, +- 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, +- 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, +- 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, +- 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, +- 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, +- 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, +- 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, +- 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, +- 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, +- 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, +- 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, +- 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, +- 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, +- 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, +- 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, +- 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, +- 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, +- 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, +- 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, +- 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, +- 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, +- 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, +- 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, +- 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, +- 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, +- 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, +- 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, +- 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, +- 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, +- 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, +- 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, +- 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, +- 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, +- 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, +- 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, +- 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, +- 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, +- 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, +- 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, +- 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, +- 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, +- 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, +- 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, +- 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, +- 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, +- 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, +- 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, +- 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, +- 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, +- 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, +- 0x2d02ef8dL +-}; +- +-ulg updcrc(s, n) +- uch *s; /* pointer to bytes to pump through */ +- unsigned n; /* number of bytes in s[] */ +-{ +- register ulg c; /* temporary variable */ +- +- static ulg crc = (ulg)0xffffffffL; /* shift register contents */ +- +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); +- } while (--n); +- } +- crc = c; +- return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +-} +-/* { dg-final { scan-tree-dump-times "Processing loop" 1 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc +deleted file mode 100644 +index c726059f3..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-1.c.042t.loop_crc ++++ /dev/null +@@ -1,90 +0,0 @@ +- +-;; Function updcrc (updcrc, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) +- +-;; 2 loops found +-;; +-;; Loop 0 +-;; header 0, latch 1 +-;; depth 0, outer -1 +-;; nodes: 0 1 2 3 6 4 7 5 +-;; +-;; Loop 1 +-;; header 4, latch 7 +-;; depth 1, outer 0 +-;; nodes: 4 7 +-;; 2 succs { 5 3 } +-;; 3 succs { 6 5 } +-;; 6 succs { 4 } +-;; 4 succs { 7 5 } +-;; 7 succs { 4 } +-;; 5 succs { 1 } +- +-Starting the loop_crc pass +-====================================== +-Processing loop 1: +-====================================== +-;; +-;; Loop 1 +-;; header 4, latch 7 +-;; depth 1, outer 0 +-;; nodes: 4 7 +- +- +-The 1th loop form is success matched,and the loop can be optimized. +-updcrc (uch * s, unsigned int n) +-{ +- static ulg crc = 4294967295; +- register ulg c; +- unsigned char _2; +- long unsigned int _3; +- long unsigned int _4; +- long unsigned int _5; +- long unsigned int _6; +- long unsigned int _7; +- ulg _21; +- +- : +- if (s_12(D) == 0B) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- c_14 = crc; +- if (n_15(D) != 0) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- +- : +- # s_8 = PHI +- # n_9 = PHI +- # c_10 = PHI +- s_16 = s_8 + 1; +- _2 = *s_8; +- _3 = (long unsigned int) _2; +- _4 = _3 ^ c_10; +- _5 = _4 & 255; +- _6 = crc_32_tab[_5]; +- _7 = c_10 >> 8; +- c_17 = _6 ^ _7; +- n_18 = n_9 + 4294967295; +- if (n_18 != 0) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- goto ; [100.00%] +- +- : +- # c_11 = PHI <4294967295(2), c_14(3), c_17(4)> +- crc = c_11; +- _21 = c_11 ^ 4294967295; +- return _21; +- +-} +- +- +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c +deleted file mode 100644 +index f73c4d550..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-2.c ++++ /dev/null +@@ -1,88 +0,0 @@ +-/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +-/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ +- +-#include +-#include +-typedef unsigned long ulg; +-typedef unsigned char uch; +- +-static const ulg crc_32_tab[] = { +- 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, +- 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, +- 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, +- 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, +- 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, +- 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, +- 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, +- 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, +- 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, +- 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, +- 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, +- 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, +- 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, +- 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, +- 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, +- 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, +- 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, +- 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, +- 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, +- 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, +- 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, +- 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, +- 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, +- 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, +- 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, +- 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, +- 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, +- 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, +- 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, +- 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, +- 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, +- 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, +- 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, +- 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, +- 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, +- 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, +- 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, +- 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, +- 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, +- 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, +- 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, +- 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, +- 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, +- 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, +- 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, +- 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, +- 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, +- 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, +- 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, +- 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, +- 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, +- 0x2d02ef8dL +-}; +- +-ulg updcrc(s, n) +- uch *s; /* pointer to bytes to pump through */ +- unsigned n; /* number of bytes in s[] */ +-{ +- register ulg c; /* temporary variable */ +- +- static ulg crc = (ulg)0xffffffffL; /* shift register contents */ +- +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); +- for (int i = 0; i < 5; i++) { +- c++; +- } +- +- } while (--n); +- } +- crc = c; +- return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ +-} +-/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +\ No newline at end of file +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c.042t.loop_crc +deleted file mode 100644 +index e69de29bb..000000000 +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c +deleted file mode 100644 +index 71b25f537..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c ++++ /dev/null +@@ -1,156 +0,0 @@ +-/* { dg-do compile { target {{ aarch64*-*-linux* } && lp64 } } } */ +-/* { dg-options "-O3 -mabi=lp64 -floop-crc -fdump-tree-loop_crc-details" } */ +- +-#include +-#include +-typedef unsigned long ulg; +-typedef unsigned char uch; +- +-static const ulg crc_32_tab[] = { +- 0x00000000L, 0x77073096L, 0xee0e612cL, 0x990951baL, 0x076dc419L, +- 0x706af48fL, 0xe963a535L, 0x9e6495a3L, 0x0edb8832L, 0x79dcb8a4L, +- 0xe0d5e91eL, 0x97d2d988L, 0x09b64c2bL, 0x7eb17cbdL, 0xe7b82d07L, +- 0x90bf1d91L, 0x1db71064L, 0x6ab020f2L, 0xf3b97148L, 0x84be41deL, +- 0x1adad47dL, 0x6ddde4ebL, 0xf4d4b551L, 0x83d385c7L, 0x136c9856L, +- 0x646ba8c0L, 0xfd62f97aL, 0x8a65c9ecL, 0x14015c4fL, 0x63066cd9L, +- 0xfa0f3d63L, 0x8d080df5L, 0x3b6e20c8L, 0x4c69105eL, 0xd56041e4L, +- 0xa2677172L, 0x3c03e4d1L, 0x4b04d447L, 0xd20d85fdL, 0xa50ab56bL, +- 0x35b5a8faL, 0x42b2986cL, 0xdbbbc9d6L, 0xacbcf940L, 0x32d86ce3L, +- 0x45df5c75L, 0xdcd60dcfL, 0xabd13d59L, 0x26d930acL, 0x51de003aL, +- 0xc8d75180L, 0xbfd06116L, 0x21b4f4b5L, 0x56b3c423L, 0xcfba9599L, +- 0xb8bda50fL, 0x2802b89eL, 0x5f058808L, 0xc60cd9b2L, 0xb10be924L, +- 0x2f6f7c87L, 0x58684c11L, 0xc1611dabL, 0xb6662d3dL, 0x76dc4190L, +- 0x01db7106L, 0x98d220bcL, 0xefd5102aL, 0x71b18589L, 0x06b6b51fL, +- 0x9fbfe4a5L, 0xe8b8d433L, 0x7807c9a2L, 0x0f00f934L, 0x9609a88eL, +- 0xe10e9818L, 0x7f6a0dbbL, 0x086d3d2dL, 0x91646c97L, 0xe6635c01L, +- 0x6b6b51f4L, 0x1c6c6162L, 0x856530d8L, 0xf262004eL, 0x6c0695edL, +- 0x1b01a57bL, 0x8208f4c1L, 0xf50fc457L, 0x65b0d9c6L, 0x12b7e950L, +- 0x8bbeb8eaL, 0xfcb9887cL, 0x62dd1ddfL, 0x15da2d49L, 0x8cd37cf3L, +- 0xfbd44c65L, 0x4db26158L, 0x3ab551ceL, 0xa3bc0074L, 0xd4bb30e2L, +- 0x4adfa541L, 0x3dd895d7L, 0xa4d1c46dL, 0xd3d6f4fbL, 0x4369e96aL, +- 0x346ed9fcL, 0xad678846L, 0xda60b8d0L, 0x44042d73L, 0x33031de5L, +- 0xaa0a4c5fL, 0xdd0d7cc9L, 0x5005713cL, 0x270241aaL, 0xbe0b1010L, +- 0xc90c2086L, 0x5768b525L, 0x206f85b3L, 0xb966d409L, 0xce61e49fL, +- 0x5edef90eL, 0x29d9c998L, 0xb0d09822L, 0xc7d7a8b4L, 0x59b33d17L, +- 0x2eb40d81L, 0xb7bd5c3bL, 0xc0ba6cadL, 0xedb88320L, 0x9abfb3b6L, +- 0x03b6e20cL, 0x74b1d29aL, 0xead54739L, 0x9dd277afL, 0x04db2615L, +- 0x73dc1683L, 0xe3630b12L, 0x94643b84L, 0x0d6d6a3eL, 0x7a6a5aa8L, +- 0xe40ecf0bL, 0x9309ff9dL, 0x0a00ae27L, 0x7d079eb1L, 0xf00f9344L, +- 0x8708a3d2L, 0x1e01f268L, 0x6906c2feL, 0xf762575dL, 0x806567cbL, +- 0x196c3671L, 0x6e6b06e7L, 0xfed41b76L, 0x89d32be0L, 0x10da7a5aL, +- 0x67dd4accL, 0xf9b9df6fL, 0x8ebeeff9L, 0x17b7be43L, 0x60b08ed5L, +- 0xd6d6a3e8L, 0xa1d1937eL, 0x38d8c2c4L, 0x4fdff252L, 0xd1bb67f1L, +- 0xa6bc5767L, 0x3fb506ddL, 0x48b2364bL, 0xd80d2bdaL, 0xaf0a1b4cL, +- 0x36034af6L, 0x41047a60L, 0xdf60efc3L, 0xa867df55L, 0x316e8eefL, +- 0x4669be79L, 0xcb61b38cL, 0xbc66831aL, 0x256fd2a0L, 0x5268e236L, +- 0xcc0c7795L, 0xbb0b4703L, 0x220216b9L, 0x5505262fL, 0xc5ba3bbeL, +- 0xb2bd0b28L, 0x2bb45a92L, 0x5cb36a04L, 0xc2d7ffa7L, 0xb5d0cf31L, +- 0x2cd99e8bL, 0x5bdeae1dL, 0x9b64c2b0L, 0xec63f226L, 0x756aa39cL, +- 0x026d930aL, 0x9c0906a9L, 0xeb0e363fL, 0x72076785L, 0x05005713L, +- 0x95bf4a82L, 0xe2b87a14L, 0x7bb12baeL, 0x0cb61b38L, 0x92d28e9bL, +- 0xe5d5be0dL, 0x7cdcefb7L, 0x0bdbdf21L, 0x86d3d2d4L, 0xf1d4e242L, +- 0x68ddb3f8L, 0x1fda836eL, 0x81be16cdL, 0xf6b9265bL, 0x6fb077e1L, +- 0x18b74777L, 0x88085ae6L, 0xff0f6a70L, 0x66063bcaL, 0x11010b5cL, +- 0x8f659effL, 0xf862ae69L, 0x616bffd3L, 0x166ccf45L, 0xa00ae278L, +- 0xd70dd2eeL, 0x4e048354L, 0x3903b3c2L, 0xa7672661L, 0xd06016f7L, +- 0x4969474dL, 0x3e6e77dbL, 0xaed16a4aL, 0xd9d65adcL, 0x40df0b66L, +- 0x37d83bf0L, 0xa9bcae53L, 0xdebb9ec5L, 0x47b2cf7fL, 0x30b5ffe9L, +- 0xbdbdf21cL, 0xcabac28aL, 0x53b39330L, 0x24b4a3a6L, 0xbad03605L, +- 0xcdd70693L, 0x54de5729L, 0x23d967bfL, 0xb3667a2eL, 0xc4614ab8L, +- 0x5d681b02L, 0x2a6f2b94L, 0xb40bbe37L, 0xc30c8ea1L, 0x5a05df1bL, +- 0x2d02ef8dL +-}; +- +-int test[5] = {0}; +- +-ulg updcrc(s, n) +- uch *s; +- unsigned n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- int a = 0; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- if (n) +- do { +- a++; +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; +- } while (--n) ; +- } +- crc = c; +- return c ^ 0xffffffffL*a; +-} +- +-ulg updcrc1(s, n) +- uch *s; +- unsigned n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- unsigned n_back = n; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) ; +- n = n - 2; +- } while (n != 0) ; +- } +- +- crc = c; +- return c ^ 0xffffffffL; +-} +- +-ulg updcrc2(s, n) +- uch *s; +- unsigned n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- unsigned n_back = n; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) + 1; +- } while (--n) ; +- } +- +- crc = c; +- return c ^ 0xffffffffL; +-} +-/* +-ulg updcrc3(s, n) +- uch *s; +- int n; +-{ +- register ulg c; +- +- static ulg crc = (ulg)0xffffffffL; +- if (s == NULL) { +- c = 0xffffffffL; +- } else { +- c = crc; +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); +- --n; +- } while (n ) ; +- } +- +- crc = c; +- return c ^ 0xffffffffL; +-}*/ +-/* { dg-final { scan-tree-dump-times "num of phi noeds check failed." 1 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "evolution pattern check failed." 1 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "calculation pattern check failed." 1 "loop_crc"} } */ +- +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc +deleted file mode 100644 +index 6d52a8684..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.c.042t.loop_crc ++++ /dev/null +@@ -1,64 +0,0 @@ +- +-;; Function updcrc3 (updcrc3, funcdef_no=0, decl_uid=3687, cgraph_uid=1, symbol_order=1) +- +-;; 2 loops found +-;; +-;; Loop 0 +-;; header 0, latch 1 +-;; depth 0, outer -1 +-;; nodes: 0 1 2 3 4 5 +-;; +-;; Loop 1 +-;; header 4, latch 4 +-;; depth 1, outer 0 +-;; nodes: 4 +-;; 2 succs { 5 3 } +-;; 3 succs { 4 5 } +-;; 4 succs { 4 } +-;; 5 succs { 1 } +- +-Starting the loop_crc pass +-====================================== +-Processing loop 1: +-====================================== +-;; +-;; Loop 1 +-;; header 4, latch 4 +-;; depth 1, outer 0 +-;; nodes: 4 +- +- +- +-Wrong loop form for crc matching. +-updcrc3 (uch * s, unsigned int n) +-{ +- unsigned int n_back; +- static ulg crc = 4294967295; +- register ulg c; +- ulg _22; +- +- : +- if (s_12(D) == 0B) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- c_14 = crc; +- if (n_15(D) != 0) +- goto ; [INV] +- else +- goto ; [INV] +- +- : +- goto ; [100.00%] +- +- : +- # c_11 = PHI <4294967295(2), c_14(3)> +- crc = c_11; +- _22 = c_11 ^ 4294967295; +- return _22; +- +-} +- +- +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s +deleted file mode 100644 +index cae934bfe..000000000 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-calculation-check-fail.s ++++ /dev/null +@@ -1,329 +0,0 @@ +- .arch armv8-a +- .file "loop-crc-calculation-check-fail.c" +- .text +- .section .rodata +- .align 3 +- .type crc_32_tab, %object +- .size crc_32_tab, 2048 +-crc_32_tab: +- .xword 0 +- .xword 1996959894 +- .xword 3993919788 +- .xword 2567524794 +- .xword 124634137 +- .xword 1886057615 +- .xword 3915621685 +- .xword 2657392035 +- .xword 249268274 +- .xword 2044508324 +- .xword 3772115230 +- .xword 2547177864 +- .xword 162941995 +- .xword 2125561021 +- .xword 3887607047 +- .xword 2428444049 +- .xword 498536548 +- .xword 1789927666 +- .xword 4089016648 +- .xword 2227061214 +- .xword 450548861 +- .xword 1843258603 +- .xword 4107580753 +- .xword 2211677639 +- .xword 325883990 +- .xword 1684777152 +- .xword 4251122042 +- .xword 2321926636 +- .xword 335633487 +- .xword 1661365465 +- .xword 4195302755 +- .xword 2366115317 +- .xword 997073096 +- .xword 1281953886 +- .xword 3579855332 +- .xword 2724688242 +- .xword 1006888145 +- .xword 1258607687 +- .xword 3524101629 +- .xword 2768942443 +- .xword 901097722 +- .xword 1119000684 +- .xword 3686517206 +- .xword 2898065728 +- .xword 853044451 +- .xword 1172266101 +- .xword 3705015759 +- .xword 2882616665 +- .xword 651767980 +- .xword 1373503546 +- .xword 3369554304 +- .xword 3218104598 +- .xword 565507253 +- .xword 1454621731 +- .xword 3485111705 +- .xword 3099436303 +- .xword 671266974 +- .xword 1594198024 +- .xword 3322730930 +- .xword 2970347812 +- .xword 795835527 +- .xword 1483230225 +- .xword 3244367275 +- .xword 3060149565 +- .xword 1994146192 +- .xword 31158534 +- .xword 2563907772 +- .xword 4023717930 +- .xword 1907459465 +- .xword 112637215 +- .xword 2680153253 +- .xword 3904427059 +- .xword 2013776290 +- .xword 251722036 +- .xword 2517215374 +- .xword 3775830040 +- .xword 2137656763 +- .xword 141376813 +- .xword 2439277719 +- .xword 3865271297 +- .xword 1802195444 +- .xword 476864866 +- .xword 2238001368 +- .xword 4066508878 +- .xword 1812370925 +- .xword 453092731 +- .xword 2181625025 +- .xword 4111451223 +- .xword 1706088902 +- .xword 314042704 +- .xword 2344532202 +- .xword 4240017532 +- .xword 1658658271 +- .xword 366619977 +- .xword 2362670323 +- .xword 4224994405 +- .xword 1303535960 +- .xword 984961486 +- .xword 2747007092 +- .xword 3569037538 +- .xword 1256170817 +- .xword 1037604311 +- .xword 2765210733 +- .xword 3554079995 +- .xword 1131014506 +- .xword 879679996 +- .xword 2909243462 +- .xword 3663771856 +- .xword 1141124467 +- .xword 855842277 +- .xword 2852801631 +- .xword 3708648649 +- .xword 1342533948 +- .xword 654459306 +- .xword 3188396048 +- .xword 3373015174 +- .xword 1466479909 +- .xword 544179635 +- .xword 3110523913 +- .xword 3462522015 +- .xword 1591671054 +- .xword 702138776 +- .xword 2966460450 +- .xword 3352799412 +- .xword 1504918807 +- .xword 783551873 +- .xword 3082640443 +- .xword 3233442989 +- .xword 3988292384 +- .xword 2596254646 +- .xword 62317068 +- .xword 1957810842 +- .xword 3939845945 +- .xword 2647816111 +- .xword 81470997 +- .xword 1943803523 +- .xword 3814918930 +- .xword 2489596804 +- .xword 225274430 +- .xword 2053790376 +- .xword 3826175755 +- .xword 2466906013 +- .xword 167816743 +- .xword 2097651377 +- .xword 4027552580 +- .xword 2265490386 +- .xword 503444072 +- .xword 1762050814 +- .xword 4150417245 +- .xword 2154129355 +- .xword 426522225 +- .xword 1852507879 +- .xword 4275313526 +- .xword 2312317920 +- .xword 282753626 +- .xword 1742555852 +- .xword 4189708143 +- .xword 2394877945 +- .xword 397917763 +- .xword 1622183637 +- .xword 3604390888 +- .xword 2714866558 +- .xword 953729732 +- .xword 1340076626 +- .xword 3518719985 +- .xword 2797360999 +- .xword 1068828381 +- .xword 1219638859 +- .xword 3624741850 +- .xword 2936675148 +- .xword 906185462 +- .xword 1090812512 +- .xword 3747672003 +- .xword 2825379669 +- .xword 829329135 +- .xword 1181335161 +- .xword 3412177804 +- .xword 3160834842 +- .xword 628085408 +- .xword 1382605366 +- .xword 3423369109 +- .xword 3138078467 +- .xword 570562233 +- .xword 1426400815 +- .xword 3317316542 +- .xword 2998733608 +- .xword 733239954 +- .xword 1555261956 +- .xword 3268935591 +- .xword 3050360625 +- .xword 752459403 +- .xword 1541320221 +- .xword 2607071920 +- .xword 3965973030 +- .xword 1969922972 +- .xword 40735498 +- .xword 2617837225 +- .xword 3943577151 +- .xword 1913087877 +- .xword 83908371 +- .xword 2512341634 +- .xword 3803740692 +- .xword 2075208622 +- .xword 213261112 +- .xword 2463272603 +- .xword 3855990285 +- .xword 2094854071 +- .xword 198958881 +- .xword 2262029012 +- .xword 4057260610 +- .xword 1759359992 +- .xword 534414190 +- .xword 2176718541 +- .xword 4139329115 +- .xword 1873836001 +- .xword 414664567 +- .xword 2282248934 +- .xword 4279200368 +- .xword 1711684554 +- .xword 285281116 +- .xword 2405801727 +- .xword 4167216745 +- .xword 1634467795 +- .xword 376229701 +- .xword 2685067896 +- .xword 3608007406 +- .xword 1308918612 +- .xword 956543938 +- .xword 2808555105 +- .xword 3495958263 +- .xword 1231636301 +- .xword 1047427035 +- .xword 2932959818 +- .xword 3654703836 +- .xword 1088359270 +- .xword 936918000 +- .xword 2847714899 +- .xword 3736837829 +- .xword 1202900863 +- .xword 817233897 +- .xword 3183342108 +- .xword 3401237130 +- .xword 1404277552 +- .xword 615818150 +- .xword 3134207493 +- .xword 3453421203 +- .xword 1423857449 +- .xword 601450431 +- .xword 3009837614 +- .xword 3294710456 +- .xword 1567103746 +- .xword 711928724 +- .xword 3020668471 +- .xword 3272380065 +- .xword 1510334235 +- .xword 755167117 +- .text +- .align 2 +- .global updcrc3 +- .type updcrc3, %function +-updcrc3: +-.LFB0: +- .cfi_startproc +- str x19, [sp, -48]! +- .cfi_def_cfa_offset 48 +- .cfi_offset 19, -48 +- str x0, [sp, 24] +- str w1, [sp, 20] +- ldr x0, [sp, 24] +- cmp x0, 0 +- bne .L2 +- mov x19, 4294967295 +- b .L3 +-.L2: +- adrp x0, crc.0 +- add x0, x0, :lo12:crc.0 +- ldr x19, [x0] +- ldr w0, [sp, 20] +- str w0, [sp, 44] +- ldr w0, [sp, 20] +- cmp w0, 0 +- beq .L3 +-.L4: +- ldr x0, [sp, 24] +- add x1, x0, 1 +- str x1, [sp, 24] +- ldrb w0, [x0] +- and x0, x0, 255 +- eor x0, x19, x0 +- and x1, x0, 255 +- adrp x0, crc_32_tab +- add x0, x0, :lo12:crc_32_tab +- ldr x1, [x0, x1, lsl 3] +- lsr x0, x19, 8 +- eor x19, x1, x0 +- ldr w0, [sp, 20] +- sub w0, w0, #1 +- str w0, [sp, 20] +- ldr w0, [sp, 20] +- cmp w0, 999 +- bls .L4 +-.L3: +- adrp x0, crc.0 +- add x0, x0, :lo12:crc.0 +- str x19, [x0] +- eor x0, x19, 4294967295 +- ldr x19, [sp], 48 +- .cfi_restore 19 +- .cfi_def_cfa_offset 0 +- ret +- .cfi_endproc +-.LFE0: +- .size updcrc3, .-updcrc3 +- .data +- .align 3 +- .type crc.0, %object +- .size crc.0, 8 +-crc.0: +- .xword 4294967295 +- .ident "GCC: (Kunpeng gcc 10.3.1-2.3.0.b006) 10.3.1" +- .section .note.GNU-stack,"",@progbits +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +similarity index 97% +rename from gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c +rename to gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +index 70eb1b814..fefa949f9 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-3.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-condition-fail.c +@@ -74,12 +74,12 @@ ulg updcrc(s, n) + } else { + c = crc; + if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n || c != 0) ; + } + crc = c; + exit1: + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +similarity index 95% +rename from gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c +rename to gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +index 1d7e0a319..b37446ec5 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-4.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail-2.c +@@ -75,8 +75,8 @@ ulg updcrc(s, n) + } else { + c = crc; + if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8) * test[c%5]; ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n) ; + } + do { +@@ -86,4 +86,5 @@ ulg updcrc(s, n) + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +index b59704e31..3dc500a46 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-loop-form-fail.c +@@ -108,4 +108,5 @@ ulg updcrc1(s, n) + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "Wrong loop form for crc matching." 2 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 1 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 2 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +index e1e16eaf2..8b556efc8 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-sucess.c +@@ -73,12 +73,11 @@ ulg updcrc(s, n) + c = 0xffffffffL; + } else { + c = crc; +- if (n) +- do { +- c = crc_32_tab[(c ^ (*s++)) & 0xff] ^ (c >> 8); ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); + } while (--n); + } + crc = c; + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } +-/* { dg-final { scan-tree-dump-times "the loop can be optimized" 1 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "The 1th loop form is successmatched,and the loop can be optimized." 1 "loop_crc"} } */ +diff --git a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +index f03a4fa82..de21f4553 100644 +--- a/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c ++++ b/gcc/testsuite/gcc.dg/tree-ssa/loop-crc-table-check-fail.c +@@ -110,4 +110,5 @@ ulg updcrc1(s, n) + return c ^ 0xffffffffL; /* (instead of ~c for 64-bit machines) */ + } + /* { dg-final { scan-tree-dump-times "Table check fail. not only single array is read." 2 "loop_crc"} } */ +-/* { dg-final { scan-tree-dump-times "Table check fail. Table not matching." 1 "loop_crc"} } */ +\ No newline at end of file ++/* { dg-final { scan-tree-dump-times "Wrong crc table for crc matching." 3 "loop_crc"} } */ ++/* { dg-final { scan-tree-dump-times "Table check fail. Table not matching." 1 "loop_crc"} } */ +diff --git a/gcc/tree-ssa-loop-crc.c b/gcc/tree-ssa-loop-crc.c +index 4982384c6..8225c2fa5 100644 +--- a/gcc/tree-ssa-loop-crc.c ++++ b/gcc/tree-ssa-loop-crc.c +@@ -1,5 +1,5 @@ +-/* Array widen compare. +- Copyright (C) 2022-2022 Free Software Foundation, Inc. ++/* loop crc. ++ Copyright (C) 2023-2023 Free Software Foundation, Inc. + + This file is part of GCC. + +@@ -42,13 +42,235 @@ along with GCC; see the file COPYING3. If not see + #include "print-tree.h" + #include "cfghooks.h" + #include "gimple-fold.h" ++#include "diagnostic-core.h" ++ ++/* This pass handles scenarios similar to the following: ++ulg updcrc(s, n) ++ uch *s; ++ unsigned n; ++{ ++ register ulg c; ++ ++ static ulg crc = (ulg)0xffffffffL; ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) do { ++ c = crc_32_tab[((int)c ^ (*s++)) & 0xff] ^ (c >> 8); ++ } while (--n); ++ } ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++ ++If the hardware supports the crc instruction, then the pass completes the ++conversion of the above scenario into: ++ ++#define SIZE_U32 sizeof(uint32_t) ++unsigned long updcrc(s, n) ++ unsigned char *s; ++ unsigned n; ++{ ++ register unsigned long c; ++ ++ static unsigned long crc = (unsigned long)0xffffffffL; ++ ++ if (s == NULL) { ++ c = 0xffffffffL; ++ } else { ++ c = crc; ++ if (n) ++ { ++ uint32_t nn = n/SIZE_U32; ++ do{ ++ c = __crc32w(c,*((uint32_t *)s)); ++ s += SIZE_U32; ++ }while(--nn); ++ if (n & sizeof(uint16_t)) { ++ c = __crc32h(c, *((uint16_t *)s)); ++ s += sizeof(uint16_t); ++ } ++ if (n & sizeof(uint8_t)) ++ c = __crc32b(c, *s); ++ } ++ } ++ crc = c; ++ return c ^ 0xffffffffL; ++} ++ ++This pass is to complete the conversion of such scenarios from the internal ++perspective of the compiler: ++1)match_crc_loop:The function completes the screening of such scenarios; ++2)convert_to_new_loop:The function completes the conversion of ++ origin_loop to new loops, and removes origin_loop; ++3)origin_loop_info: The structure is used to record important information ++ of origin_loop: such as loop exit, initial value of induction ++ variable, etc; ++4) create_new_loops: The function is used as the key content of the pass ++ to complete the creation of new loops. */ + +-/* Match.pd function to match the ctz expression. */ + extern bool gimple_crc_match_index (tree, tree *, tree (*)(tree)); + extern bool gimple_crc_match_res (tree, tree *, tree (*)(tree)); + + static gimple *crc_table_read_stmt = NULL; + ++static gphi* phi_s = NULL; ++static gphi* phi_c = NULL; ++static tree nn_tree = NULL; ++ ++enum aarch64_crc_builtins ++{ ++ AARCH64_BUILTIN_CRC32B, ++ AARCH64_BUILTIN_CRC32H, ++ AARCH64_BUILTIN_CRC32W, ++}; ++ ++/* The useful information of origin loop. */ ++struct origin_loop_info ++{ ++ tree limit; /* The limit index of the array in the old loop. */ ++ tree base_n; /* The initial value of the old loop. */ ++ tree base_s; /* The initial value of the old loop. */ ++ tree base_c; /* The initial value of the old loop. */ ++ edge entry_edge; /* The edge into the old loop. */ ++ edge exit_edge; /* The edge outto the old loop. */ ++ basic_block exit_bb; ++}; ++ ++typedef struct origin_loop_info origin_loop_info; ++ ++static origin_loop_info origin_loop; ++hash_map n_map; ++hash_map nn_map; ++hash_map s_map; ++hash_map c_map; ++hash_map crc_map; ++ ++/* Initialize the origin_loop structure. */ ++static void ++init_origin_loop_structure () ++{ ++ origin_loop.entry_edge = NULL; ++ origin_loop.exit_edge = NULL; ++ origin_loop.exit_bb = NULL; ++ origin_loop.limit = NULL; ++ origin_loop.base_n = NULL; ++ origin_loop.base_s = NULL; ++ origin_loop.base_c = NULL; ++} ++ ++/* Get the edge that first entered the loop. */ ++static edge ++get_loop_preheader_edge (class loop *loop) ++{ ++ edge e; ++ edge_iterator ei; ++ ++ FOR_EACH_EDGE (e, ei, loop->header->preds) ++ if (e->src != loop->latch) ++ break; ++ ++ return e; ++} ++ ++/* Returns true if t is SSA_NAME and user variable exists. */ ++ ++static bool ++ssa_name_var_p (tree t) ++{ ++ if (!t || TREE_CODE (t) != SSA_NAME) ++ return false; ++ if (SSA_NAME_VAR (t)) ++ return true; ++ return false; ++} ++ ++/* Returns true if t1 and t2 are SSA_NAME and belong to the same variable. */ ++ ++static bool ++same_ssa_name_var_p (tree t1, tree t2) ++{ ++ if (!ssa_name_var_p (t1) || !ssa_name_var_p (t2)) ++ return false; ++ if (SSA_NAME_VAR (t1) == SSA_NAME_VAR (t2)) ++ return true; ++ return false; ++} ++ ++/* Get origin loop induction variable upper bound. */ ++ ++static bool ++get_iv_upper_bound (gimple *stmt) ++{ ++ if (origin_loop.limit != NULL || origin_loop.base_n != NULL) ++ return false; ++ ++ tree lhs = gimple_cond_lhs (stmt); ++ tree rhs = gimple_cond_rhs (stmt); ++ ++ if (TREE_CODE (TREE_TYPE (lhs)) != INTEGER_TYPE ++ || TREE_CODE (TREE_TYPE (rhs)) != INTEGER_TYPE) ++ return false; ++ ++ /* TODO: Currently, the input restrictions on lhs and rhs are implemented ++ through PARM_DECL. We may consider relax the restrictions later, and ++ we need to consider the overall adaptation scenario and adding test ++ cases. */ ++ if (ssa_name_var_p (lhs) && TREE_CODE (SSA_NAME_VAR (lhs)) == PARM_DECL) ++ { ++ origin_loop.limit = rhs; ++ origin_loop.base_n = lhs; ++ } ++ else ++ return false; ++ ++ if (origin_loop.limit != NULL && origin_loop.base_n != NULL) ++ return true; ++ ++ return false; ++} ++ ++/* Get origin loop info. */ ++static bool ++get_origin_loop_info(class loop *loop) ++{ ++ vec edges; ++ edges = get_loop_exit_edges (loop); ++ origin_loop.exit_edge = edges[0]; ++ origin_loop.exit_bb = origin_loop.exit_edge->dest; ++ origin_loop.entry_edge = get_loop_preheader_edge(loop); ++ origin_loop.base_s = PHI_ARG_DEF_FROM_EDGE(phi_s,origin_loop.entry_edge); ++ origin_loop.base_c = PHI_ARG_DEF_FROM_EDGE(phi_c,origin_loop.entry_edge); ++ ++ basic_block preheader_bb; ++ preheader_bb = origin_loop.entry_edge->src; ++ ++ if(preheader_bb->preds->length() != 1) ++ return false; ++ ++ edge entry_pre_bb_edge; ++ entry_pre_bb_edge = EDGE_PRED (preheader_bb, 0); ++ ++ basic_block pre_preheader_bb; ++ pre_preheader_bb = entry_pre_bb_edge->src; ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ bool get_upper_bound = false; ++ for (gsi = gsi_start_bb (pre_preheader_bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt && gimple_code (stmt) == GIMPLE_COND ++ && get_iv_upper_bound (stmt)) { ++ get_upper_bound = true; ++ break; ++ } ++ } ++ ++ return get_upper_bound; ++} + + /* The loop form check will check the entire loop control flow + It should be a loop that: +@@ -102,7 +324,8 @@ only_one_array_read (class loop *loop, tree &crc_table) + if (gimple_code (stmt) == GIMPLE_ASSIGN && + TREE_CODE(gimple_assign_rhs1 (stmt)) == ARRAY_REF) + { +- if (crc_table == NULL) ++ if (crc_table == NULL && ++ gimple_assign_rhs1 (stmt)->base.readonly_flag) + { + crc_table = gimple_assign_rhs1 (stmt); + crc_table_read_stmt = stmt; +@@ -174,15 +397,18 @@ static const unsigned HOST_WIDE_INT crc_32_tab[] = { + static bool + match_crc_table (tree crc_table) + { ++ const unsigned LOW_BOUND = 0; ++ const unsigned UP_BOUND = 255; ++ const unsigned ELEMENT_SIZE = 8; + unsigned HOST_WIDE_INT lb = tree_to_uhwi (array_ref_low_bound (crc_table)); + unsigned HOST_WIDE_INT ub = tree_to_uhwi (array_ref_up_bound (crc_table)); + unsigned HOST_WIDE_INT es = tree_to_uhwi (array_ref_element_size (crc_table)); +- if (lb != 0 || ub != 255 || es != 8) ++ if (lb != LOW_BOUND || ub != UP_BOUND || es != ELEMENT_SIZE) + return false; + + tree decl = TREE_OPERAND (crc_table, 0); + tree ctor = ctor_for_folding(decl); +- for (int i = 0; i < 255; i++) { ++ for (int i = lb; i <= ub; i++) { + unsigned HOST_WIDE_INT val = tree_to_uhwi (CONSTRUCTOR_ELT (ctor,i)->value); + if (crc_32_tab[i] != val) + return false; +@@ -273,6 +499,7 @@ check_evolution_pattern (class loop* loop, gphi *capture[]) + if (s != NULL) + return false; + s = capture[i]; ++ phi_s = s; + } + else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) + { +@@ -285,6 +512,7 @@ check_evolution_pattern (class loop* loop, gphi *capture[]) + if (c != NULL) + return false; + c = capture[i]; ++ phi_c = c; + } + } + +@@ -314,14 +542,19 @@ check_calculation_pattern (class loop* loop, gphi *capture[]) + _5 = _4 ^ c_10; //BIT_XOR_EXPR (SSA_NAME, PHI @1) + _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) + */ +- + if (!gimple_crc_match_index(index, res_ops, NULL)) + return false; +- gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); +- tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); +- if (res_ops[0] != gimple_phi_result (c) || +- s_res != gimple_phi_result (s)) ++ gimple *s_res_stmt = SSA_NAME_DEF_STMT (res_ops[0]); ++ if (!s_res_stmt) ++ return false; ++ gimple *s_def_stmt = SSA_NAME_DEF_STMT (gimple_assign_rhs1 (s_res_stmt)); ++ if (!s_def_stmt) + return false; ++ tree s_res = TREE_OPERAND (gimple_assign_rhs1 (s_def_stmt), 0); ++ if (res_ops[1] != gimple_phi_result (c) || s_res != gimple_phi_result (s)) ++ { ++ return false; ++ } + + /* Try to match + _8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) +@@ -333,7 +566,11 @@ check_calculation_pattern (class loop* loop, gphi *capture[]) + return false; + if (res_ops[0] != gimple_phi_result (c) + || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\n gimple_crc_match_res pattern check failed.\n"); + return false; ++ } + + return true; + } +@@ -419,101 +656,91 @@ crc_loop_body_check (class loop *loop) + return false; + } + return true; +-/* gphi *phi; +- gphi_iterator gsi; +- int num_of_phi = 0; +- //s, n, c; +- //only 3 phi nodes are there, every one of the phi nodes comming from 2 edge only, one from preheader, one from latch +- // s increase by 1 every itoration +- // n decrease by 1 every itoration +- // The final one is c, which is the result, should be used for the start of the later pattern matching +- for (gsi = gsi_start_phis(loop->header); !gsi_end_p(gsi); gsi_next(&gsi)) +- { +- phi = gsi.phi(); ++} + +- if (phi) num_of_phi++; +- if (num_of_phi > 3) return false; // more then 3 phi node +- if (gimple_phi_num_args(phi) > 2) // more than 2 edges other then one backedge and one preheader edge +- return false; +- //capture[num_of_phi - 1] = gimple_phi_result(phi); +- capture[num_of_phi - 1] = phi; +- } +- if (num_of_phi != 3) return false; // phi node should be 3 */ +- // Find the envolution pattern for s and n, try to match the identity of these variable +-/* gphi *s=NULL; +- gphi *n=NULL; +- gphi *c=NULL; ++/* Check the prev_bb of prev_bb of loop header. The prev_bb we are trying to match is + +- for (int i = 0; i < 3; i++) +- { +- if (evolution_pattern_plus_with_p(loop, capture[i], 1)) +- { +- if(s != NULL) +- return false; +- s = capture[i]; +- } +- else if (evolution_pattern_plus_with_p(loop, capture[i], 4294967295)) +- { +- if(n != NULL) +- return false; +- n = capture[i]; +- } +- else +- { +- if(c != NULL) +- return false; +- c = capture[i]; +- } +- } ++c_15 = crc; ++if (n_16(D) != 0) ++ goto ; [INV] ++else ++ goto ; [INV] + +- // some envolution pattern cannot find +- if (!n || !s || !c) +- return false; +- gphi *s=capture[0]; +- gphi *n=capture[1]; +- gphi *c=capture[2]; +- tree res_ops[3]; +- tree index = TREE_OPERAND (gimple_assign_rhs1 (crc_table_read_stmt), 1); ++ In this case , we must be sure that the n is not zero. ++ so the match condition is ++ 1、the n is not zero. + +- /* Try to match +- _1 = (int) c_12; //NOP_EXPR (SSA_NAME @1) +- _4 = (int) _3; //NOP_EXPR (SSA_NAME @2) +- _5 = _1 ^ _4; //BIT_XOR_EXPR (SSA_NAME, SSA_NAME) +- _6 = _5 & 255; //BIT_XOR_EXPR (SSA_NAME, INTEGER_CST@3) ++ : ++if (s_13(D) == 0B) ++ goto ; [INV] ++else ++ goto ; [INV] + +- +- if (!gimple_crc_match_index(index, res_ops, NULL)) ++ In this case, we must be sure the s is not NULL. ++ so the match condition is ++ 1、the s is not NULL. ++*/ ++static bool ++crc_prev_bb_of_loop_header_check(class loop *loop) ++{ ++ basic_block header = loop->header; ++ basic_block prev_header_bb = header->prev_bb; ++ if(NULL == prev_header_bb) ++ { + return false; +- gimple *s_res_stmt = SSA_NAME_DEF_STMT(res_ops[1]); +- tree s_res = TREE_OPERAND(gimple_assign_rhs1(s_res_stmt),0); +- if (res_ops[0] != gimple_phi_result (c) || +- s_res != gimple_phi_result (s)) ++ } ++ ++ basic_block prev_prev_header_bb = prev_header_bb->prev_bb; ++ if(NULL == prev_prev_header_bb) ++ { + return false; ++ } ++ ++ gimple_stmt_iterator gsi; ++ gimple *stmt; ++ bool res = false; ++ for (gsi = gsi_start_bb (prev_prev_header_bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == NULL) ++ return false; + +- /* +-_8 = c_12 >> 8; // RSHIFT_EXPR (SSA_NAME @1, INTEGER_CST @2) +-c_19 = _7 ^ _8; // BIT_XOR_EXPR (SSA_NAME@3, SSA_NAME) ++ if (gimple_code (stmt) == GIMPLE_COND && ++ gimple_cond_code(stmt) == NE_EXPR && ++ TREE_CODE(gimple_cond_rhs (stmt)) == INTEGER_CST && ++ tree_int_cst_sgn(gimple_cond_rhs (stmt)) == 0 ) ++ { ++ res = true; ++ break; ++ } ++ } + +- edge backedge = find_edge(loop->latch, loop->header); +- tree updated_c = PHI_ARG_DEF_FROM_EDGE (c, backedge); +- if (!gimple_crc_match_res(updated_c, res_ops, NULL)) +- return false; +- if (res_ops[0] != gimple_phi_result (c) +- || res_ops[2] != gimple_assign_lhs(crc_table_read_stmt)) ++ if(!res) ++ { + return false; ++ } + +- // try match n as the induction variable +- // The proceed condition for back edge is n != 0 +- gimple *cond_stmt = gsi_stmt (gsi_last_bb (loop->header)); +- if (!cond_stmt || gimple_code (cond_stmt) != GIMPLE_COND || gimple_cond_code (cond_stmt) != NE_EXPR +- || gimple_cond_lhs (cond_stmt) != PHI_ARG_DEF_FROM_EDGE (n, backedge) +- || tree_to_uhwi(gimple_cond_rhs (cond_stmt)) != 0) ++ basic_block first_bb = prev_prev_header_bb->prev_bb; ++ if(NULL == first_bb) + return false; +- +- return true; +- */ +-} + ++ for (gsi = gsi_start_bb (first_bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ stmt = gsi_stmt (gsi); ++ if (stmt == NULL) ++ return false; ++ ++ if (gimple_code (stmt) == GIMPLE_COND && ++ gimple_cond_code(stmt) == EQ_EXPR && ++ TREE_CODE(gimple_cond_rhs (stmt)) == INTEGER_CST && ++ tree_int_cst_sgn(gimple_cond_rhs (stmt)) == 0 ) ++ { ++ return true; ++ } ++ } ++ ++ return false; ++} + + static bool + match_crc_loop (class loop *loop) +@@ -536,13 +763,463 @@ match_crc_loop (class loop *loop) + fprintf (dump_file, "\nWrong loop body for crc matching.\n"); + return false; + } ++ if(!crc_prev_bb_of_loop_header_check(loop)) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ fprintf (dump_file, "\nWrong prev basic_blocks of loop header for crc matching.\n"); ++ return false; ++ } ++ ++ init_origin_loop_structure(); ++ if(!get_origin_loop_info(loop)) ++ return false; ++ + return true; + } + ++static void ++create_new_bb (basic_block &new_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ new_bb = create_empty_bb (after_bb); ++ add_bb_to_loop (new_bb, outer); ++ set_immediate_dominator (CDI_DOMINATORS, new_bb, dominator_bb); ++} ++ ++static void ++change_preheader_bb(edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple* g; ++ tree lhs1; ++ ++ lhs1 = create_tmp_var(TREE_TYPE(origin_loop.base_n),"nn"); ++ lhs1 = make_ssa_name(lhs1); ++ gsi = gsi_last_bb (entry_edge->src); ++ g = gimple_build_assign(lhs1,RSHIFT_EXPR,origin_loop.base_n, ++ build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); ++ gimple_seq_add_stmt(&stmts,g); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ nn_tree = lhs1; ++ set_current_def(nn_tree, lhs1); ++ nn_map.put (entry_edge->src, lhs1); ++} ++ ++static gphi* ++create_phi_node_for_bb(tree old_name, basic_block bb) ++{ ++ gphi *phi = create_phi_node(NULL_TREE, bb); ++ create_new_def_for(old_name, phi, gimple_phi_result_ptr(phi)); ++ return phi; ++} ++ ++static gimple* ++call_builtin_fun(int code,tree& lhs, tree arg1, tree arg2) ++{ ++ unsigned int builtin_code = targetm.get_crc_builtin_code(code, true);// 根据code获取到正确的builtin_fun_code ++ tree fn = targetm.builtin_decl(builtin_code,true); // get the decl of __builtin_aarch64_crc32w ++ if (!fn || fn == error_mark_node) ++ fatal_error (input_location, ++ "target specific builtin not available"); ++ gimple* call_builtin = gimple_build_call(fn, 2, arg1, arg2); // _40 = __builtin_aarch64_crc32* (_1, _2); ++ lhs = make_ssa_name (unsigned_type_node); ++ gimple_call_set_lhs(call_builtin,lhs); ++ ++ return call_builtin; ++} ++ ++/* Create loop_header and loop_latch for new loop ++ : ++ # s_14 = PHI ++ # c_16 = PHI ++ # nn_19 = PHI ++ _1 = (unsigned int) c_16; ++ _2 = MEM[(uint32_t *)s_14]; ++ _40 = __builtin_aarch64_crc32w (_1, _2); ++ c_29 = (long unsigned int) _40; ++ s_30 = s_14 + 4; ++ nn_31 = nn_19 + 4294967295; ++ if (nn_31 != 0) ++ The IR of bb is as above. */ ++static void ++create_loop_bb(basic_block& loop_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer, edge entry_edge) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple* g; ++ gphi* phi_s_loop; ++ gphi* phi_c_loop; ++ gphi* phi_nn_loop; ++ ++ create_new_bb(loop_bb, after_bb, dominator_bb, outer); ++ redirect_edge_and_branch(entry_edge, loop_bb); ++ gsi = gsi_last_bb(loop_bb); ++ tree entry_nn = get_current_def(nn_tree); ++ phi_s_loop = create_phi_node_for_bb(origin_loop.base_s, loop_bb); ++ phi_c_loop = create_phi_node_for_bb(origin_loop.base_c, loop_bb); ++ phi_nn_loop = create_phi_node_for_bb(entry_nn, loop_bb); ++ ++ tree res_s = gimple_phi_result(phi_s_loop); ++ tree res_nn = gimple_phi_result(phi_nn_loop); ++ tree lhs1 = gimple_build(&stmts, NOP_EXPR, unsigned_type_node, ++ gimple_phi_result(phi_c_loop)); ++ g = gimple_build_assign(make_ssa_name(unsigned_type_node), ++ fold_build2(MEM_REF,unsigned_type_node,res_s, ++ build_int_cst (build_pointer_type (unsigned_type_node), 0))); ++ gimple_seq_add_stmt(&stmts, g); ++ tree lhs2 = gimple_assign_lhs(g); // _2 = MEM[(uint32_t *)s_14]; ++ unsigned int code = AARCH64_BUILTIN_CRC32W; ++ tree lhs3; ++ gimple* build_crc32w = call_builtin_fun(code,lhs3, lhs1, lhs2); ++ crc_map.put(loop_bb, lhs3); ++ gimple_seq_add_stmt(&stmts,build_crc32w); ++ ++ tree lhs4 = copy_ssa_name(origin_loop.base_c); ++ g = gimple_build_assign(lhs4, NOP_EXPR, lhs3); ++ gimple_seq_add_stmt(&stmts, g); ++ c_map.put(loop_bb, lhs4); ++ ++ tree lhs5 = copy_ssa_name(origin_loop.base_s); ++ g = gimple_build_assign(lhs5, POINTER_PLUS_EXPR, res_s, ++ build_int_cst (sizetype, 4)); ++ gimple_seq_add_stmt(&stmts, g); ++ s_map.put(loop_bb, lhs5); ++ ++ tree lhs6 = copy_ssa_name(nn_tree); ++ g = gimple_build_assign(lhs6, PLUS_EXPR, res_nn, ++ build_int_cst (TREE_TYPE (res_nn), 4294967295)); ++ gimple_seq_add_stmt(&stmts,g); ++ nn_map.put(loop_bb, lhs6); ++ ++ gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs6, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* : ++ # c_6 = PHI ++ # s_46 = PHI ++ _44 = n_26(D) & 2; ++ if (_44 != 0) ++ The IR of bb is as above. */ ++static void ++create_cond_bb(basic_block& cond_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer){ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gphi* phi_s_loop; ++ gphi* phi_c_loop; ++ ++ create_new_bb(cond_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb(cond_bb); ++ tree entry_nn = get_current_def(nn_tree); ++ phi_s_loop = create_phi_node_for_bb(origin_loop.base_s, cond_bb); ++ phi_c_loop = create_phi_node_for_bb(origin_loop.base_c, cond_bb); ++ tree res_s = gimple_phi_result(phi_s_loop); ++ set_current_def(origin_loop.base_s, res_s); ++ s_map.put(cond_bb, res_s); ++ tree res_c = gimple_phi_result(phi_c_loop); ++ set_current_def(origin_loop.base_c, res_c); ++ c_map.put(cond_bb, res_c); ++ ++ tree lhs1 = gimple_build(&stmts, BIT_AND_EXPR, TREE_TYPE(origin_loop.base_n), ++ origin_loop.base_n, build_int_cst (TREE_TYPE (origin_loop.base_n), 2)); ++ gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* : ++ _7 = MEM[(uint16_t *)s_46]; ++ _41 = __builtin_aarch64_crc32h (_8, _7); ++ c_33 = (long unsigned int) _41; ++ s_34 = s_30 + 2; ++ The IR of bb is as above.*/ ++static void ++create_cond_true_bb(basic_block& cond_true_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer){ ++ gimple_seq stmts = NULL; ++ gimple* g; ++ gimple_stmt_iterator gsi; ++ ++ create_new_bb(cond_true_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb(cond_true_bb); ++ tree s_46 = *(s_map.get(after_bb)); ++ g = gimple_build_assign(make_ssa_name(short_unsigned_type_node), ++ fold_build2(MEM_REF,short_unsigned_type_node,s_46, ++ build_int_cst (build_pointer_type (short_unsigned_type_node), 0))); ++ gimple_seq_add_stmt(&stmts,g); ++ tree lhs1 = gimple_assign_lhs(g); // _7 = MEM[(uint16_t *)s_46]; ++ unsigned int code = AARCH64_BUILTIN_CRC32H; ++ tree lhs2; ++ gimple* call_builtin = call_builtin_fun(code, lhs2,*(crc_map.get(cond_true_bb->prev_bb->prev_bb)),lhs1); ++ crc_map.put(cond_true_bb,lhs2); ++ gimple_seq_add_stmt(&stmts, call_builtin); ++ ++ tree lhs3 = copy_ssa_name(origin_loop.base_c); ++ g = gimple_build_assign(lhs3, NOP_EXPR, lhs2); ++ gimple_seq_add_stmt(&stmts, g); ++ c_map.put(cond_true_bb, lhs3); ++ ++ tree lhs5 = copy_ssa_name(s_46); ++ g = gimple_build_assign(lhs5, POINTER_PLUS_EXPR, s_46, ++ build_int_cst (sizetype, 2)); // s_30 + 2; ++ gimple_seq_add_stmt(&stmts, g); ++ s_map.put(cond_true_bb, lhs5); ++ ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++ s_map.put(cond_true_bb, lhs5); ++} ++ ++/* : ++ # s_15 = PHI ++ # c_17 = PHI ++ _3 = n_26(D) & 1; ++ if (_3 != 0) ++ The IR of bb is as above.*/ ++static void ++create_cond_false_bb(basic_block& cond_false_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer) ++{ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gphi* phi_s_cond_true_bb; ++ gphi* phi_c_cond_true_bb; ++ ++ create_new_bb(cond_false_bb, after_bb, dominator_bb, outer); ++ make_single_succ_edge(after_bb, cond_false_bb, EDGE_FALLTHRU); ++ ++ tree entry_s = get_current_def(origin_loop.base_s); ++ phi_s_cond_true_bb = create_phi_node_for_bb(entry_s, cond_false_bb); ++ tree entry_c = get_current_def(origin_loop.base_c); ++ phi_c_cond_true_bb = create_phi_node_for_bb(entry_c, cond_false_bb); ++ tree res_s = gimple_phi_result(phi_s_cond_true_bb); ++ set_current_def(origin_loop.base_s, res_s); ++ s_map.put(cond_false_bb, res_s); ++ tree res_c = gimple_phi_result(phi_c_cond_true_bb); ++ set_current_def(origin_loop.base_c, res_c); ++ c_map.put(cond_false_bb, res_c); ++ ++ gsi = gsi_last_bb(cond_false_bb); ++ tree lhs1 = gimple_build(&stmts, BIT_AND_EXPR, TREE_TYPE(origin_loop.base_n), ++ origin_loop.base_n, build_int_cst (TREE_TYPE (origin_loop.base_n), 1)); ++ gcond* cond_stmt = gimple_build_cond (NE_EXPR, lhs1, origin_loop.limit, ++ NULL_TREE, NULL_TREE); ++ gimple_seq_add_stmt (&stmts, cond_stmt); ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++/* : ++ _11 = (unsigned int) c_17; ++ _12 = *s_15; ++ _42 = __builtin_aarch64_crc32b (_11, _12); ++ c_36 = (long unsigned int) _42; ++ The IR of bb is as above. */ ++static void ++create_lastcond_true_bb(basic_block& new_bb, basic_block after_bb, ++ basic_block dominator_bb, class loop *outer){ ++ gimple_seq stmts = NULL; ++ gimple_stmt_iterator gsi; ++ gimple* g; ++ ++ create_new_bb(new_bb, after_bb, dominator_bb, outer); ++ gsi = gsi_last_bb(new_bb); ++ ++ tree lhs1 = gimple_build(&stmts, NOP_EXPR, unsigned_type_node, ++ get_current_def(origin_loop.base_c)); ++ tree lhs2; ++ tree s_15 = get_current_def(origin_loop.base_s); ++ g = gimple_build_assign (make_ssa_name (unsigned_char_type_node), ++ fold_build2 (MEM_REF, unsigned_char_type_node, s_15, ++ build_int_cst (TREE_TYPE(s_15), 0))); ++ gimple_seq_add_stmt (&stmts, g); ++ lhs2 = gimple_assign_lhs (g); ++ ++ unsigned int code = AARCH64_BUILTIN_CRC32B; ++ tree lhs3; ++ gimple* call_builtin = call_builtin_fun(code, lhs3, lhs1, lhs2); ++ crc_map.put(new_bb,lhs3); ++ gimple_seq_add_stmt(&stmts,call_builtin); ++ ++ tree lhs4 = copy_ssa_name(origin_loop.base_c); ++ g = gimple_build_assign(lhs4, NOP_EXPR, lhs3); ++ gimple_seq_add_stmt(&stmts, g); ++ c_map.put(new_bb, lhs4); ++ ++ gsi_insert_seq_after (&gsi, stmts, GSI_NEW_STMT); ++} ++ ++static bool ++optional_add_phi_arg(gphi * phi, tree phi_res, tree phi_arg, edge e) ++{ ++ location_t loc; ++ if (same_ssa_name_var_p (phi_arg, phi_res)) ++ { ++ if (virtual_operand_p (phi_arg)) ++ loc = UNKNOWN_LOCATION; ++ else ++ loc = gimple_location (SSA_NAME_DEF_STMT (phi_arg)); ++ add_phi_arg (phi, phi_arg, e, loc); ++ ++ return true; ++ } ++ ++ return false; ++} ++ ++/* Add phi_arg for bb with phi node. */ ++static void ++update_phi_nodes (basic_block bb) ++{ ++ edge e; ++ edge_iterator ei; ++ gphi *phi; ++ gphi_iterator gsi; ++ tree res; ++ ++ for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi)) ++ { ++ phi = gsi.phi (); ++ res = gimple_phi_result (phi); ++ ++ FOR_EACH_EDGE (e, ei, bb->preds) ++ { ++ if (PHI_ARG_DEF_FROM_EDGE (phi, e)) ++ continue; ++ tree var_c; ++ tree* ptr_var_c = c_map.get (e->src); ++ if(ptr_var_c == NULL) ++ { ++ var_c = origin_loop.base_c; ++ } else { ++ var_c = *ptr_var_c; ++ } ++ if(optional_add_phi_arg(phi, res, var_c, e)) ++ continue; ++ ++ tree var_nn; ++ tree* ptr_var_nn = nn_map.get (e->src); ++ if(ptr_var_nn == NULL) ++ { ++ var_nn = nn_tree; ++ } else { ++ var_nn = *ptr_var_nn; ++ } ++ if(optional_add_phi_arg(phi, res, var_nn, e)) ++ continue; ++ ++ tree var_s; ++ tree* ptr_var_s = s_map.get (e->src); ++ if(ptr_var_s == NULL) ++ { ++ var_s = origin_loop.base_s; ++ } else { ++ var_s = *ptr_var_s; ++ } ++ if(optional_add_phi_arg(phi, res, var_s, e)) ++ continue; ++ } ++ } ++} ++ ++static void ++create_new_loops(edge entry_edge) ++{ ++ class loop* new_loop = NULL; ++ basic_block loop_bb, cond_bb, cond_true_bb, cond_false_bb, lastcond_true_bb; ++ class loop *outer = entry_edge->src->loop_father; ++ change_preheader_bb(entry_edge); ++ ++ create_loop_bb(loop_bb, entry_edge->src, entry_edge->src, outer, entry_edge); ++ create_cond_bb(cond_bb, loop_bb, loop_bb, outer); ++ make_edge(loop_bb, loop_bb, EDGE_TRUE_VALUE); ++ make_edge(loop_bb, cond_bb, EDGE_FALSE_VALUE); ++ update_phi_nodes(loop_bb); ++ ++ new_loop = alloc_loop (); ++ new_loop->header = loop_bb; ++ new_loop->latch = loop_bb; ++ add_loop (new_loop, outer); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "\nPrint byte new loop %d:\n", new_loop->num); ++ flow_loop_dump (new_loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ create_cond_true_bb(cond_true_bb, cond_bb, cond_bb, outer); ++ make_edge(cond_bb, cond_true_bb, EDGE_TRUE_VALUE); ++ create_cond_false_bb(cond_false_bb, cond_true_bb, cond_bb, outer); ++ make_edge(cond_bb, cond_false_bb, EDGE_FALSE_VALUE); ++ update_phi_nodes(cond_bb); ++ update_phi_nodes(cond_false_bb); ++ create_lastcond_true_bb(lastcond_true_bb, cond_false_bb, cond_false_bb, outer); ++ make_edge(cond_false_bb, lastcond_true_bb, EDGE_TRUE_VALUE); ++ make_edge(cond_false_bb, origin_loop.exit_bb, EDGE_FALSE_VALUE); ++ make_single_succ_edge(lastcond_true_bb, origin_loop.exit_bb, EDGE_FALLTHRU); ++ ++ update_phi_nodes(origin_loop.exit_bb); ++ remove_edge(origin_loop.exit_edge); ++} ++ ++/* Clear information about the original loop. */ ++static void ++remove_origin_loop(class loop* loop) ++{ ++ basic_block* body = get_loop_body_in_dom_order(loop); ++ unsigned n = loop->num_nodes; ++ for(int i = 0; i < n; ++i) ++ { ++ delete_basic_block(body[i]); ++ } ++ free(body); ++ delete_loop(loop); ++} ++ ++/* Make sure that the dominance relationship of the newly inserted cfg ++ is not missing. */ ++static void ++update_loop_dominator(cdi_direction dir) ++{ ++ gcc_assert (dom_info_available_p (dir)); ++ ++ basic_block bb; ++ FOR_EACH_BB_FN (bb, cfun) ++ { ++ basic_block imm_bb = get_immediate_dominator (dir, bb); ++ if (!imm_bb || bb == origin_loop.exit_bb) ++ { ++ set_immediate_dominator (CDI_DOMINATORS, bb, ++ recompute_dominator (CDI_DOMINATORS, bb)); ++ continue; ++ } ++ } ++} ++ ++/* Perform the conversion of origin_loop to new_loop. */ ++static void ++convert_to_new_loop (class loop *loop) ++{ ++ create_new_loops (origin_loop.entry_edge); ++ remove_origin_loop (loop); ++ update_loop_dominator (CDI_DOMINATORS); ++ update_ssa (TODO_update_ssa); ++} ++ + /* The main entry of loop crc optimizes. */ + static unsigned int + tree_ssa_loop_crc () + { ++ if(TARGET_CRC32 == false){ ++ warning (OPT____,"The loop-crc optimization is not working."\ ++ "You should make sure that the specified architecture supports"\ ++ " crc:-march=armv8.1-a"); ++ return 0; ++ } + unsigned int todo = 0; + class loop *loop; + +@@ -553,28 +1230,28 @@ tree_ssa_loop_crc () + } + + FOR_EACH_LOOP (loop, LI_FROM_INNERMOST) ++ { ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ fprintf (dump_file, "======================================\n"); ++ fprintf (dump_file, "Processing loop %d:\n", loop->num); ++ fprintf (dump_file, "======================================\n"); ++ flow_loop_dump (loop, dump_file, NULL, 1); ++ fprintf (dump_file, "\n\n"); ++ } ++ ++ if (match_crc_loop (loop)) + { + if (dump_file && (dump_flags & TDF_DETAILS)) +- { +- fprintf (dump_file, "======================================\n"); +- fprintf (dump_file, "Processing loop %d:\n", loop->num); +- fprintf (dump_file, "======================================\n"); +- flow_loop_dump (loop, dump_file, NULL, 1); +- fprintf (dump_file, "\n\n"); +- } +- +- if (match_crc_loop (loop)) +- { +- if (dump_file && (dump_flags & TDF_DETAILS)) +- { +- fprintf (dump_file, "The %dth loop form is success matched," +- "and the loop can be optimized.\n", +- loop->num); +- } +- +- convert_to_new_loop (loop); +- } ++ { ++ fprintf (dump_file, "The %dth loop form is success matched," ++ "and the loop can be optimized.\n", ++ loop->num); ++ } ++ ++ convert_to_new_loop (loop); + } ++ } + + todo |= (TODO_update_ssa); + return todo; +@@ -641,4 +1318,4 @@ gimple_opt_pass * + make_pass_loop_crc (gcc::context *ctxt) + { + return new pass_loop_crc (ctxt); +-} +\ No newline at end of file ++} +-- +2.33.0 + From c396b7ffabbadd0ec9da99ce55470c2f53cb1805 Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:52:14 +0800 Subject: [PATCH 4/7] [Sync] Sync patch from openeuler/gcc 0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch 0148-Introduce-RTL-ifcvt-enhancements.patch 0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch --- ...nd-correct-costs-for-cmlt-generation.patch | 194 +++++++ 0148-Introduce-RTL-ifcvt-enhancements.patch | 502 ++++++++++++++++++ ...e-check-for-pointer-aliasing-during-.patch | 239 +++++++++ 3 files changed, 935 insertions(+) create mode 100644 0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch create mode 100644 0148-Introduce-RTL-ifcvt-enhancements.patch create mode 100644 0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch diff --git a/0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch b/0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch new file mode 100644 index 0000000..b03ad48 --- /dev/null +++ b/0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch @@ -0,0 +1,194 @@ +From 80b7de670da46d8921118799904cba4a0753bb72 Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia WX1215920 +Date: Wed, 23 Aug 2023 15:03:00 +0300 +Subject: [PATCH 09/13] add insn defs and correct costs for cmlt generation + +--- + gcc/config/aarch64/aarch64-simd.md | 48 +++++++++++++++++++++++++++++ + gcc/config/aarch64/aarch64.c | 15 +++++++++ + gcc/config/aarch64/aarch64.opt | 4 +++ + gcc/config/aarch64/iterators.md | 3 +- + gcc/config/aarch64/predicates.md | 25 +++++++++++++++ + gcc/testsuite/gcc.dg/combine-cmlt.c | 20 ++++++++++++ + 6 files changed, 114 insertions(+), 1 deletion(-) + create mode 100755 gcc/testsuite/gcc.dg/combine-cmlt.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 6049adc3f..f4213fd62 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -4719,6 +4719,54 @@ + [(set_attr "type" "neon_compare, neon_compare_zero")] + ) + ++;; Use cmlt to replace vector arithmetic operations like this (SImode example): ++;; B = (((A >> 15) & 0x00010001) << 16) - ((A >> 15) & 0x00010001) ++;; TODO: maybe extend to scalar operations or other cm** instructions. ++ ++(define_insn "*aarch64_cmlt_as_arith" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (minus: ++ (ashift: ++ (and: ++ (lshiftrt: ++ (match_operand:VDQHSD 1 "register_operand" "w") ++ (match_operand:VDQHSD 2 "half_size_minus_one_operand")) ++ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")) ++ (match_operand:VDQHSD 4 "half_size_operand")) ++ (and: ++ (lshiftrt: ++ (match_dup 1) ++ (match_dup 2)) ++ (match_dup 3))))] ++ "TARGET_SIMD && flag_cmlt_arith" ++ "cmlt\t%0., %1., #0" ++ [(set_attr "type" "neon_compare_zero")] ++) ++ ++;; The helper definition that allows combiner to use the previous pattern. ++ ++(define_insn_and_split "*aarch64_cmlt_tmp" ++ [(set (match_operand: 0 "register_operand" "=w") ++ (and: ++ (lshiftrt: ++ (match_operand:VDQHSD 1 "register_operand" "w") ++ (match_operand:VDQHSD 2 "half_size_minus_one_operand")) ++ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))] ++ "TARGET_SIMD && flag_cmlt_arith" ++ "#" ++ "&& reload_completed" ++ [(set (match_operand: 0 "register_operand") ++ (lshiftrt: ++ (match_operand:VDQHSD 1 "register_operand") ++ (match_operand:VDQHSD 2 "half_size_minus_one_operand"))) ++ (set (match_dup 0) ++ (and: ++ (match_dup 0) ++ (match_operand:VDQHSD 3 "cmlt_arith_mask_operand")))] ++ "" ++ [(set_attr "type" "neon_compare_zero")] ++) ++ + (define_insn_and_split "aarch64_cmdi" + [(set (match_operand:DI 0 "register_operand" "=w,w,r") + (neg:DI +diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c +index cbdde11b0..7a00a0817 100644 +--- a/gcc/config/aarch64/aarch64.c ++++ b/gcc/config/aarch64/aarch64.c +@@ -12659,6 +12659,21 @@ cost_minus: + return true; + } + ++ /* Detect aarch64_cmlt_as_arith instruction. Now only this pattern ++ matches the condition. The costs of cmlt and sub instructions ++ are comparable, so we are not increasing the cost here. */ ++ if (flag_cmlt_arith && GET_CODE (op0) == ASHIFT ++ && GET_CODE (op1) == AND) ++ { ++ rtx op0_subop0 = XEXP (op0, 0); ++ if (rtx_equal_p (op0_subop0, op1)) ++ { ++ rtx lshrt_op = XEXP (op0_subop0, 0); ++ if (GET_CODE (lshrt_op) == LSHIFTRT) ++ return true; ++ } ++ } ++ + /* Look for SUB (extended register). */ + if (is_a (mode, &int_mode) + && aarch64_rtx_arith_op_extract_p (op1, int_mode)) +diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt +index bb888461a..c42494036 100644 +--- a/gcc/config/aarch64/aarch64.opt ++++ b/gcc/config/aarch64/aarch64.opt +@@ -273,6 +273,10 @@ Use an immediate to offset from the stack protector guard register, sp_el0. + This option is for use with fstack-protector-strong and not for use in + user-land code. + ++mcmlt-arith ++Target Report Var(flag_cmlt_arith) Optimization Init(0) ++Use SIMD cmlt instruction to perform some arithmetic/logic calculations. ++ + TargetVariable + long aarch64_stack_protector_guard_offset = 0 + +diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md +index 0a7145281..d3be06c6f 100644 +--- a/gcc/config/aarch64/iterators.md ++++ b/gcc/config/aarch64/iterators.md +@@ -1228,7 +1228,8 @@ + (V2DI "2s")]) + + ;; Register suffix narrowed modes for VQN. +-(define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h") ++(define_mode_attr V2ntype [(V4HI "8b") (V2SI "4h") ++ (V8HI "16b") (V4SI "8h") + (V2DI "4s")]) + + ;; Widened modes of vector modes. +diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md +index 1754b1eff..de58562a7 100644 +--- a/gcc/config/aarch64/predicates.md ++++ b/gcc/config/aarch64/predicates.md +@@ -47,6 +47,31 @@ + return CONST_INT_P (op) && IN_RANGE (INTVAL (op), 1, 3); + }) + ++(define_predicate "half_size_minus_one_operand" ++ (match_code "const_vector") ++{ ++ op = unwrap_const_vec_duplicate (op); ++ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2; ++ return CONST_INT_P (op) && (UINTVAL (op) == size - 1); ++}) ++ ++(define_predicate "half_size_operand" ++ (match_code "const_vector") ++{ ++ op = unwrap_const_vec_duplicate (op); ++ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2; ++ return CONST_INT_P (op) && (UINTVAL (op) == size); ++}) ++ ++(define_predicate "cmlt_arith_mask_operand" ++ (match_code "const_vector") ++{ ++ op = unwrap_const_vec_duplicate (op); ++ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) / 2; ++ unsigned long long mask = ((unsigned long long) 1 << size) | 1; ++ return CONST_INT_P (op) && (UINTVAL (op) == mask); ++}) ++ + (define_predicate "subreg_lowpart_operator" + (ior (match_code "truncate") + (and (match_code "subreg") +diff --git a/gcc/testsuite/gcc.dg/combine-cmlt.c b/gcc/testsuite/gcc.dg/combine-cmlt.c +new file mode 100755 +index 000000000..b4c9a37ff +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/combine-cmlt.c +@@ -0,0 +1,20 @@ ++/* { dg-do compile { target aarch64-*-* } } */ ++/* { dg-options "-O3 -mcmlt-arith" } */ ++ ++/* The test checks usage of cmlt insns for arithmetic/logic calculations ++ * in foo (). It's inspired by sources of x264 codec. */ ++ ++typedef unsigned short int uint16_t; ++typedef unsigned int uint32_t; ++ ++void foo( uint32_t *a, uint32_t *b) ++{ ++ for (unsigned i = 0; i < 4; i++) ++ { ++ uint32_t s = ((a[i]>>((8 * sizeof(uint16_t))-1)) ++ &(((uint32_t)1<<(8 * sizeof(uint16_t)))+1))*((uint16_t)-1); ++ b[i] = (a[i]+s)^s; ++ } ++} ++ ++/* { dg-final { scan-assembler-times {cmlt\t} 1 } } */ +-- +2.33.0 + diff --git a/0148-Introduce-RTL-ifcvt-enhancements.patch b/0148-Introduce-RTL-ifcvt-enhancements.patch new file mode 100644 index 0000000..1bc1aca --- /dev/null +++ b/0148-Introduce-RTL-ifcvt-enhancements.patch @@ -0,0 +1,502 @@ +From df68d120a049049671e44f6cda51e96a9a82c613 Mon Sep 17 00:00:00 2001 +From: Chernonog Vyacheslav 00812786 +Date: Mon, 28 Nov 2022 14:16:48 +0300 +Subject: [PATCH 10/13] Introduce RTL ifcvt enhancements + +It is controlled by option -fifcvt-allow-complicated-cmps, allowing +ifcvt to deal with complicated cmps like + if (cmp) + X = reg1 + else + X = reg2 + reg3 +and + if (cmp) + X = reg1 + reg3 + Y = reg2 + reg4 + Z = reg3 + +Parameter -param=ifcvt-allow-register-renaming=[0,1,2] allows ifcvt to +aggressively rename registers in basic blocks. +* 0: does not allow ifcvt to rename registers +* 1: allows ifcvt to rename registers in then and else bb +* 2: allows to rename registers in condition and else/then bb +--- + gcc/ifcvt.c | 298 ++++++++++++++++++++++++++++++++++++++----------- + gcc/params.opt | 8 ++ + 2 files changed, 240 insertions(+), 66 deletions(-) + +diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c +index 2452f231c..50a73a7ca 100644 +--- a/gcc/ifcvt.c ++++ b/gcc/ifcvt.c +@@ -1,5 +1,5 @@ + /* If-conversion support. +- Copyright (C) 2000-2020 Free Software Foundation, Inc. ++ Copyright (C) 2000-2022 Free Software Foundation, Inc. + + This file is part of GCC. + +@@ -876,7 +876,9 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep, + } + + /* Don't even try if the comparison operands or the mode of X are weird. */ +- if (cond_complex || !SCALAR_INT_MODE_P (GET_MODE (x))) ++ if (!param_ifcvt_allow_complicated_cmps ++ && (cond_complex ++ || !SCALAR_INT_MODE_P (GET_MODE (x)))) + return NULL_RTX; + + return emit_store_flag (x, code, XEXP (cond, 0), +@@ -1743,8 +1745,9 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code, + + /* Don't even try if the comparison operands are weird + except that the target supports cbranchcc4. */ +- if (! general_operand (cmp_a, GET_MODE (cmp_a)) +- || ! general_operand (cmp_b, GET_MODE (cmp_b))) ++ if (! param_ifcvt_allow_complicated_cmps ++ && (! general_operand (cmp_a, GET_MODE (cmp_a)) ++ || ! general_operand (cmp_b, GET_MODE (cmp_b)))) + { + if (!have_cbranchcc4 + || GET_MODE_CLASS (GET_MODE (cmp_a)) != MODE_CC +@@ -1915,19 +1918,6 @@ noce_try_cmove (struct noce_if_info *if_info) + return FALSE; + } + +-/* Return true if X contains a conditional code mode rtx. */ +- +-static bool +-contains_ccmode_rtx_p (rtx x) +-{ +- subrtx_iterator::array_type array; +- FOR_EACH_SUBRTX (iter, array, x, ALL) +- if (GET_MODE_CLASS (GET_MODE (*iter)) == MODE_CC) +- return true; +- +- return false; +-} +- + /* Helper for bb_valid_for_noce_process_p. Validate that + the rtx insn INSN is a single set that does not set + the conditional register CC and is in general valid for +@@ -1946,7 +1936,6 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc) + /* Currently support only simple single sets in test_bb. */ + if (!sset + || !noce_operand_ok (SET_DEST (sset)) +- || contains_ccmode_rtx_p (SET_DEST (sset)) + || !noce_operand_ok (SET_SRC (sset))) + return false; + +@@ -1960,13 +1949,17 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc) + in this function. */ + + static bool +-bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) ++bbs_ok_for_cmove_arith (basic_block bb_a, ++ basic_block bb_b, ++ rtx to_rename, ++ bitmap conflict_regs) + { + rtx_insn *a_insn; + bitmap bba_sets = BITMAP_ALLOC (®_obstack); +- ++ bitmap intersections = BITMAP_ALLOC (®_obstack); + df_ref def; + df_ref use; ++ rtx_insn *last_a = last_active_insn (bb_a, FALSE); + + FOR_BB_INSNS (bb_a, a_insn) + { +@@ -1976,30 +1969,25 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) + rtx sset_a = single_set (a_insn); + + if (!sset_a) +- { +- BITMAP_FREE (bba_sets); +- return false; +- } ++ goto end_cmove_arith_check_and_fail; ++ if (a_insn == last_a) ++ continue; + /* Record all registers that BB_A sets. */ + FOR_EACH_INSN_DEF (def, a_insn) + if (!(to_rename && DF_REF_REG (def) == to_rename)) + bitmap_set_bit (bba_sets, DF_REF_REGNO (def)); + } + ++ bitmap_and (intersections, df_get_live_in (bb_b), bba_sets); + rtx_insn *b_insn; +- + FOR_BB_INSNS (bb_b, b_insn) + { + if (!active_insn_p (b_insn)) + continue; +- + rtx sset_b = single_set (b_insn); + + if (!sset_b) +- { +- BITMAP_FREE (bba_sets); +- return false; +- } ++ goto end_cmove_arith_check_and_fail; + + /* Make sure this is a REG and not some instance + of ZERO_EXTRACT or SUBREG or other dangerous stuff. +@@ -2011,25 +1999,34 @@ bbs_ok_for_cmove_arith (basic_block bb_a, basic_block bb_b, rtx to_rename) + if (MEM_P (SET_DEST (sset_b))) + gcc_assert (rtx_equal_p (SET_DEST (sset_b), to_rename)); + else if (!REG_P (SET_DEST (sset_b))) +- { +- BITMAP_FREE (bba_sets); +- return false; +- } ++ goto end_cmove_arith_check_and_fail; + +- /* If the insn uses a reg set in BB_A return false. */ ++ /* If the insn uses a reg set in BB_A return false ++ or try to collect register list for renaming. */ + FOR_EACH_INSN_USE (use, b_insn) + { +- if (bitmap_bit_p (bba_sets, DF_REF_REGNO (use))) ++ if (bitmap_bit_p (intersections, DF_REF_REGNO (use))) + { +- BITMAP_FREE (bba_sets); +- return false; ++ if (param_ifcvt_allow_register_renaming < 1) ++ goto end_cmove_arith_check_and_fail; ++ ++ /* Those regs should be renamed. We can't rename CC reg, but ++ possibly we can provide combined comparison in the future. */ ++ if (GET_MODE_CLASS (GET_MODE (DF_REF_REG (use))) == MODE_CC) ++ goto end_cmove_arith_check_and_fail; ++ bitmap_set_bit (conflict_regs, DF_REF_REGNO (use)); + } + } +- + } + + BITMAP_FREE (bba_sets); ++ BITMAP_FREE (intersections); + return true; ++ ++end_cmove_arith_check_and_fail: ++ BITMAP_FREE (bba_sets); ++ BITMAP_FREE (intersections); ++ return false; + } + + /* Emit copies of all the active instructions in BB except the last. +@@ -2084,6 +2081,134 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple) + return true; + } + ++/* This function tries to rename regs that intersect with considered bb. */ ++ ++static bool ++noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs) ++{ ++ bool success = true; ++ if (bitmap_empty_p (cond_rename_regs)) ++ return true; ++ if (param_ifcvt_allow_register_renaming < 2) ++ return false; ++ df_ref use; ++ rtx_insn* cmp_insn = if_info->cond_earliest; ++ /* Jump instruction as a condion currently unsupported. */ ++ if (JUMP_P (cmp_insn)) ++ return false; ++ rtx_insn* before_cmp = PREV_INSN (cmp_insn); ++ start_sequence (); ++ rtx_insn *copy_of_cmp = as_a (copy_rtx (cmp_insn)); ++ basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn); ++ FOR_EACH_INSN_USE (use, cmp_insn) ++ { ++ if (bitmap_bit_p (cond_rename_regs, DF_REF_REGNO (use))) ++ { ++ rtx use_reg = DF_REF_REG (use); ++ rtx tmp = gen_reg_rtx (GET_MODE (use_reg)); ++ if (!validate_replace_rtx (use_reg, tmp, copy_of_cmp)) ++ { ++ end_sequence (); ++ return false; ++ } ++ noce_emit_move_insn (tmp, use_reg); ++ } ++ } ++ ++ emit_insn (PATTERN (copy_of_cmp)); ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); ++ ++ emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn)); ++ delete_insn_and_edges (cmp_insn); ++ rtx_insn* insn; ++ FOR_BB_INSNS (cmp_block, insn) ++ df_insn_rescan (insn); ++ ++ if_info->cond = noce_get_condition (if_info->jump, ++ ©_of_cmp, ++ if_info->then_else_reversed); ++ if_info->cond_earliest = copy_of_cmp; ++ if_info->rev_cond = NULL_RTX; ++ ++ return success; ++} ++ ++/* This function tries to rename regs that intersect with considered bb. */ ++static bool ++noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs) ++{ ++ if (bitmap_empty_p (rename_regs)) ++ return true; ++ rtx_insn* insn; ++ rtx_insn *last_insn = last_active_insn (test_bb, FALSE); ++ bool res = true; ++ start_sequence (); ++ FOR_BB_INSNS (test_bb, insn) ++ { ++ if (!active_insn_p (insn)) ++ continue; ++ /* Only ssets are supported for now. */ ++ rtx sset = single_set (insn); ++ gcc_assert (sset); ++ rtx x = SET_DEST (sset); ++ if (!REG_P (x) || bitmap_bit_p (rename_regs, REGNO (x))) ++ continue; ++ ++ machine_mode mode = GET_MODE (x); ++ rtx tmp = gen_reg_rtx (mode); ++ if (!validate_replace_rtx_part (x, tmp, &SET_DEST (sset), insn)) ++ { ++ gcc_assert (insn != last_insn); ++ /* We can generate additional move for such case, ++ but it will increase register preasure. ++ For now just stop transformation. */ ++ rtx result_rtx = SET_DEST (single_set (last_insn)); ++ if (REG_P (result_rtx) && (x != result_rtx)) ++ { ++ res = false; ++ break; ++ } ++ if (!validate_replace_rtx (x, tmp, insn)) ++ gcc_unreachable (); ++ noce_emit_move_insn (tmp,x); ++ } ++ set_used_flags (insn); ++ rtx_insn* rename_candidate; ++ for (rename_candidate = NEXT_INSN (insn); ++ rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb)); ++ rename_candidate = NEXT_INSN (rename_candidate)) ++ { ++ if (!reg_overlap_mentioned_p (x, rename_candidate)) ++ continue; ++ ++ int replace_res = TRUE; ++ if (rename_candidate == last_insn) ++ { ++ validate_replace_src_group (x, tmp, rename_candidate); ++ replace_res = apply_change_group (); ++ } ++ else ++ replace_res = validate_replace_rtx (x, tmp, rename_candidate); ++ gcc_assert (replace_res); ++ set_used_flags (rename_candidate); ++ ++ } ++ set_used_flags (x); ++ set_used_flags (tmp); ++ ++ } ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); ++ emit_insn_before_setloc (seq, first_active_insn (test_bb), ++ INSN_LOCATION (first_active_insn (test_bb))); ++ FOR_BB_INSNS (test_bb, insn) ++ df_insn_rescan (insn); ++ return res; ++} ++ + /* Try more complex cases involving conditional_move. */ + + static int +@@ -2166,11 +2291,29 @@ noce_try_cmove_arith (struct noce_if_info *if_info) + std::swap (then_bb, else_bb); + } + } +- ++ bitmap else_bb_rename_regs = BITMAP_ALLOC (®_obstack); ++ bitmap then_bb_rename_regs = BITMAP_ALLOC (®_obstack); + if (then_bb && else_bb +- && (!bbs_ok_for_cmove_arith (then_bb, else_bb, if_info->orig_x) +- || !bbs_ok_for_cmove_arith (else_bb, then_bb, if_info->orig_x))) +- return FALSE; ++ && (!bbs_ok_for_cmove_arith (then_bb, else_bb, ++ if_info->orig_x, ++ then_bb_rename_regs) ++ || !bbs_ok_for_cmove_arith (else_bb, then_bb, ++ if_info->orig_x, ++ else_bb_rename_regs))) ++ { ++ BITMAP_FREE (then_bb_rename_regs); ++ BITMAP_FREE (else_bb_rename_regs); ++ return FALSE; ++ } ++ bool prepass_renaming = true; ++ prepass_renaming |= noce_rename_regs_in_bb (then_bb, then_bb_rename_regs); ++ prepass_renaming |= noce_rename_regs_in_bb (else_bb, else_bb_rename_regs); ++ ++ BITMAP_FREE (then_bb_rename_regs); ++ BITMAP_FREE (else_bb_rename_regs); ++ ++ if (!prepass_renaming) ++ return FALSE; + + start_sequence (); + +@@ -2178,7 +2321,6 @@ noce_try_cmove_arith (struct noce_if_info *if_info) + came from the test block. The non-empty complex block that we will + emit might clobber the register used by B or A, so move it to a pseudo + first. */ +- + rtx tmp_a = NULL_RTX; + rtx tmp_b = NULL_RTX; + +@@ -3052,7 +3194,8 @@ noce_operand_ok (const_rtx op) + + static bool + bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, +- unsigned int *cost, bool *simple_p) ++ unsigned int *cost, bool *simple_p, ++ bitmap cond_rename_regs) + { + if (!test_bb) + return false; +@@ -3086,10 +3229,10 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, + rtx_insn *prev_last_insn = PREV_INSN (last_insn); + gcc_assert (prev_last_insn); + +- /* For now, disallow setting x multiple times in test_bb. */ +- if (REG_P (x) && reg_set_between_p (x, first_insn, prev_last_insn)) ++ if (REG_P (x) ++ && reg_set_between_p (x, first_insn, prev_last_insn) ++ && param_ifcvt_allow_register_renaming < 1) + return false; +- + bitmap test_bb_temps = BITMAP_ALLOC (®_obstack); + + /* The regs that are live out of test_bb. */ +@@ -3099,25 +3242,35 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, + rtx_insn *insn; + FOR_BB_INSNS (test_bb, insn) + { +- if (insn != last_insn) +- { +- if (!active_insn_p (insn)) +- continue; ++ if (insn == last_insn) ++ continue; ++ if (!active_insn_p (insn)) ++ continue; + +- if (!insn_valid_noce_process_p (insn, cc)) +- goto free_bitmap_and_fail; ++ if (!insn_valid_noce_process_p (insn, cc)) ++ goto free_bitmap_and_fail; + +- rtx sset = single_set (insn); +- gcc_assert (sset); ++ rtx sset = single_set (insn); ++ gcc_assert (sset); + +- if (contains_mem_rtx_p (SET_SRC (sset)) +- || !REG_P (SET_DEST (sset)) +- || reg_overlap_mentioned_p (SET_DEST (sset), cond)) +- goto free_bitmap_and_fail; ++ if (contains_mem_rtx_p (SET_SRC (sset)) ++ || !REG_P (SET_DEST (sset))) ++ goto free_bitmap_and_fail; + +- potential_cost += pattern_cost (sset, speed_p); +- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); ++ if (reg_overlap_mentioned_p (SET_DEST (sset), cond)) ++ { ++ if (param_ifcvt_allow_register_renaming < 1) ++ goto free_bitmap_and_fail; ++ rtx sset_dest = SET_DEST (sset); ++ if (REG_P (sset_dest) ++ && (GET_MODE_CLASS (GET_MODE (sset_dest)) != MODE_CC)) ++ bitmap_set_bit (cond_rename_regs, REGNO (sset_dest)); ++ else ++ goto free_bitmap_and_fail; + } ++ potential_cost += pattern_cost (sset, speed_p); ++ if (SET_DEST (sset) != SET_DEST (last_set)) ++ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); + } + + /* If any of the intermediate results in test_bb are live after test_bb +@@ -3475,14 +3628,27 @@ noce_process_if_block (struct noce_if_info *if_info) + + bool speed_p = optimize_bb_for_speed_p (test_bb); + unsigned int then_cost = 0, else_cost = 0; ++ bitmap cond_rename_regs = BITMAP_ALLOC (®_obstack); + if (!bb_valid_for_noce_process_p (then_bb, cond, &then_cost, +- &if_info->then_simple)) +- return false; ++ &if_info->then_simple, cond_rename_regs)) ++ { ++ BITMAP_FREE (cond_rename_regs); ++ return false; ++ } + + if (else_bb + && !bb_valid_for_noce_process_p (else_bb, cond, &else_cost, +- &if_info->else_simple)) ++ &if_info->else_simple, cond_rename_regs)) ++ { ++ BITMAP_FREE (cond_rename_regs); ++ return false; ++ } ++ ++ if (!noce_rename_regs_in_cond (if_info, cond_rename_regs)) + return false; ++ cond = if_info->cond; ++ ++ BITMAP_FREE (cond_rename_regs); + + if (speed_p) + if_info->original_cost += average_cost (then_cost, else_cost, +@@ -5426,7 +5592,7 @@ if_convert (bool after_combine) + { + basic_block bb; + int pass; +- ++ cleanup_cfg (CLEANUP_EXPENSIVE); + if (optimize == 1) + { + df_live_add_problem (); +diff --git a/gcc/params.opt b/gcc/params.opt +index 83fd705ee..345f9b3ff 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -574,6 +574,14 @@ Maximum permissible cost for the sequence that would be generated by the RTL if- + Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization + Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable. + ++-param=ifcvt-allow-complicated-cmps= ++Common Joined UInteger Var(param_ifcvt_allow_complicated_cmps) IntegerRange(0, 1) Param Optimization ++Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time). ++ ++-param=ifcvt-allow-register-renaming= ++Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization ++Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created. ++ + -param=max-sched-extend-regions-iters= + Common Joined UInteger Var(param_max_sched_extend_regions_iters) Param Optimization + The maximum number of iterations through CFG to extend regions. +-- +2.33.0 + diff --git a/0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch b/0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch new file mode 100644 index 0000000..2d53e18 --- /dev/null +++ b/0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch @@ -0,0 +1,239 @@ +From f43bdfbdcfdeb425a0bd303f4787a13323fd2934 Mon Sep 17 00:00:00 2001 +From: vchernon +Date: Wed, 27 Sep 2023 11:07:29 +0800 +Subject: [PATCH 11/13] Add more flexible check for pointer aliasing during + vectorization + +It takes minimum between number of iteration and segment length and helps to +speed up loops with small number of iterations when only tail can be vectorized. +--- + gcc/params.opt | 5 ++ + .../sve/var_stride_flexible_segment_len_1.c | 23 +++++++ + gcc/tree-data-ref.c | 68 +++++++++++++------ + gcc/tree-data-ref.h | 11 ++- + gcc/tree-vect-data-refs.c | 14 +++- + 5 files changed, 95 insertions(+), 26 deletions(-) + create mode 100644 gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c + +diff --git a/gcc/params.opt b/gcc/params.opt +index 83fd705ee..7f335a94b 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -964,6 +964,11 @@ Maximum number of loop peels to enhance alignment of data references in a loop. + Common Joined UInteger Var(param_vect_max_version_for_alias_checks) Init(10) Param Optimization + Bound on number of runtime checks inserted by the vectorizer's loop versioning for alias check. + ++-param=vect-alias-flexible-segment-len= ++Common Joined UInteger Var(param_flexible_seg_len) Init(0) IntegerRange(0, 1) Param Optimization ++Use a minimum length of different segments. Currently the minimum between ++iteration number and vectorization length is chosen by this param. ++ + -param=vect-max-version-for-alignment-checks= + Common Joined UInteger Var(param_vect_max_version_for_alignment_checks) Init(6) Param Optimization + Bound on number of runtime checks inserted by the vectorizer's loop versioning for alignment check. +diff --git a/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c +new file mode 100644 +index 000000000..894f075f3 +--- /dev/null ++++ b/gcc/testsuite/gcc.target/aarch64/sve/var_stride_flexible_segment_len_1.c +@@ -0,0 +1,23 @@ ++/* { dg-do compile } */ ++/* { dg-options "-O2 -ftree-vectorize --param=vect-alias-flexible-segment-len=1" } */ ++ ++#define TYPE int ++#define SIZE 257 ++ ++void __attribute__ ((weak)) ++f (TYPE *x, TYPE *y, unsigned short n, long m __attribute__((unused))) ++{ ++ for (int i = 0; i < SIZE; ++i) ++ x[i * n] += y[i * n]; ++} ++ ++/* { dg-final { scan-assembler {\tld1w\tz[0-9]+} } } */ ++/* { dg-final { scan-assembler {\tst1w\tz[0-9]+} } } */ ++/* { dg-final { scan-assembler {\tldr\tw[0-9]+} } } */ ++/* { dg-final { scan-assembler {\tstr\tw[0-9]+} } } */ ++/* Should use a WAR check that multiplies by (VF-2)*4 rather than ++ an overlap check that multiplies by (257-1)*4. */ ++/* { dg-final { scan-assembler {\tcntb\t(x[0-9]+)\n.*\tsub\tx[0-9]+, \1, #8\n.*\tmul\tx[0-9]+,[^\n]*\1} } } */ ++/* One range check and a check for n being zero. */ ++/* { dg-final { scan-assembler-times {\t(?:cmp|tst)\t} 2 } } */ ++/* { dg-final { scan-assembler-times {\tccmp\t} 1 } } */ +diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c +index 2cb54def8..8c5f1048c 100644 +--- a/gcc/tree-data-ref.c ++++ b/gcc/tree-data-ref.c +@@ -2071,31 +2071,14 @@ create_intersect_range_checks_index (class loop *loop, tree *cond_expr, + same arguments. Try to optimize cases in which the second access + is a write and in which some overlap is valid. */ + +-static bool +-create_waw_or_war_checks (tree *cond_expr, ++static void ++create_waw_or_war_checks2 (tree *cond_expr, tree seg_len_a, + const dr_with_seg_len_pair_t &alias_pair) + { + const dr_with_seg_len& dr_a = alias_pair.first; + const dr_with_seg_len& dr_b = alias_pair.second; + +- /* Check for cases in which: +- +- (a) DR_B is always a write; +- (b) the accesses are well-ordered in both the original and new code +- (see the comment above the DR_ALIAS_* flags for details); and +- (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ +- if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) +- return false; +- +- /* Check for equal (but possibly variable) steps. */ + tree step = DR_STEP (dr_a.dr); +- if (!operand_equal_p (step, DR_STEP (dr_b.dr))) +- return false; +- +- /* Make sure that we can operate on sizetype without loss of precision. */ +- tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr)); +- if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype)) +- return false; + + /* All addresses involved are known to have a common alignment ALIGN. + We can therefore subtract ALIGN from an exclusive endpoint to get +@@ -2112,9 +2095,6 @@ create_waw_or_war_checks (tree *cond_expr, + fold_convert (ssizetype, indicator), + ssize_int (0)); + +- /* Get lengths in sizetype. */ +- tree seg_len_a +- = fold_convert (sizetype, rewrite_to_non_trapping_overflow (dr_a.seg_len)); + step = fold_convert (sizetype, rewrite_to_non_trapping_overflow (step)); + + /* Each access has the following pattern: +@@ -2221,6 +2201,50 @@ create_waw_or_war_checks (tree *cond_expr, + *cond_expr = fold_build2 (GT_EXPR, boolean_type_node, subject, limit); + if (dump_enabled_p ()) + dump_printf (MSG_NOTE, "using an address-based WAR/WAW test\n"); ++} ++ ++/* This is a wrapper function for create_waw_or_war_checks2. */ ++static bool ++create_waw_or_war_checks (tree *cond_expr, ++ const dr_with_seg_len_pair_t &alias_pair) ++{ ++ const dr_with_seg_len& dr_a = alias_pair.first; ++ const dr_with_seg_len& dr_b = alias_pair.second; ++ ++ /* Check for cases in which: ++ ++ (a) DR_B is always a write; ++ (b) the accesses are well-ordered in both the original and new code ++ (see the comment above the DR_ALIAS_* flags for details); and ++ (c) the DR_STEPs describe all access pairs covered by ALIAS_PAIR. */ ++ if (alias_pair.flags & ~(DR_ALIAS_WAR | DR_ALIAS_WAW)) ++ return false; ++ ++ /* Check for equal (but possibly variable) steps. */ ++ tree step = DR_STEP (dr_a.dr); ++ if (!operand_equal_p (step, DR_STEP (dr_b.dr))) ++ return false; ++ ++ /* Make sure that we can operate on sizetype without loss of precision. */ ++ tree addr_type = TREE_TYPE (DR_BASE_ADDRESS (dr_a.dr)); ++ if (TYPE_PRECISION (addr_type) != TYPE_PRECISION (sizetype)) ++ return false; ++ ++ /* Get lengths in sizetype. */ ++ tree seg_len_a ++ = fold_convert (sizetype, ++ rewrite_to_non_trapping_overflow (dr_a.seg_len)); ++ create_waw_or_war_checks2 (cond_expr, seg_len_a, alias_pair); ++ if (param_flexible_seg_len && dr_a.seg_len != dr_a.seg_len2) ++ { ++ tree seg_len2_a ++ = fold_convert (sizetype, ++ rewrite_to_non_trapping_overflow (dr_a.seg_len2)); ++ tree cond_expr2; ++ create_waw_or_war_checks2 (&cond_expr2, seg_len2_a, alias_pair); ++ *cond_expr = fold_build2 (TRUTH_OR_EXPR, boolean_type_node, ++ *cond_expr, cond_expr2); ++ } + return true; + } + +diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h +index 771d20fbb..5903ce66a 100644 +--- a/gcc/tree-data-ref.h ++++ b/gcc/tree-data-ref.h +@@ -208,12 +208,19 @@ class dr_with_seg_len + public: + dr_with_seg_len (data_reference_p d, tree len, unsigned HOST_WIDE_INT size, + unsigned int a) +- : dr (d), seg_len (len), access_size (size), align (a) {} +- ++ : dr (d), seg_len (len), seg_len2 (len), access_size (size), align (a) ++ {} ++ dr_with_seg_len (data_reference_p d, tree len, tree len2, ++ unsigned HOST_WIDE_INT size, unsigned int a) ++ : dr (d), seg_len (len), seg_len2 (len2), access_size (size), align (a) ++ {} + data_reference_p dr; + /* The offset of the last access that needs to be checked minus + the offset of the first. */ + tree seg_len; ++ /* The second version of segment length. Currently this is used to ++ soften checks for a small number of iterations. */ ++ tree seg_len2; + /* A value that, when added to abs (SEG_LEN), gives the total number of + bytes in the segment. */ + poly_uint64 access_size; +diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c +index e4466a4f3..1b8a03c9c 100644 +--- a/gcc/tree-vect-data-refs.c ++++ b/gcc/tree-vect-data-refs.c +@@ -3498,6 +3498,7 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) + { + poly_uint64 lower_bound; + tree segment_length_a, segment_length_b; ++ tree segment_length2_a, segment_length2_b; + unsigned HOST_WIDE_INT access_size_a, access_size_b; + unsigned int align_a, align_b; + +@@ -3598,6 +3599,8 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) + { + segment_length_a = size_zero_node; + segment_length_b = size_zero_node; ++ segment_length2_a = size_zero_node; ++ segment_length2_b = size_zero_node; + } + else + { +@@ -3606,8 +3609,15 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) + length_factor = scalar_loop_iters; + else + length_factor = size_int (vect_factor); ++ /* In any case we should rememeber scalar_loop_iters ++ this helps to create flexible aliasing check ++ for small number of iterations. */ + segment_length_a = vect_vfa_segment_size (dr_info_a, length_factor); + segment_length_b = vect_vfa_segment_size (dr_info_b, length_factor); ++ segment_length2_a ++ = vect_vfa_segment_size (dr_info_a, scalar_loop_iters); ++ segment_length2_b ++ = vect_vfa_segment_size (dr_info_b, scalar_loop_iters); + } + access_size_a = vect_vfa_access_size (dr_info_a); + access_size_b = vect_vfa_access_size (dr_info_b); +@@ -3652,9 +3662,9 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo) + } + + dr_with_seg_len dr_a (dr_info_a->dr, segment_length_a, +- access_size_a, align_a); ++ segment_length2_a, access_size_a, align_a); + dr_with_seg_len dr_b (dr_info_b->dr, segment_length_b, +- access_size_b, align_b); ++ segment_length2_b, access_size_b, align_b); + /* Canonicalize the order to be the one that's needed for accurate + RAW, WAR and WAW flags, in cases where the data references are + well-ordered. The order doesn't really matter otherwise, +-- +2.33.0 + From 15fb19070f0eeccb7bc87a789059a42d249e05da Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:53:06 +0800 Subject: [PATCH 5/7] [Sync] Sync patch from openeuler/gcc 0150-Implement-propagation-of-permutations-in-fwprop.patch --- ...ropagation-of-permutations-in-fwprop.patch | 1050 +++++++++++++++++ 1 file changed, 1050 insertions(+) create mode 100644 0150-Implement-propagation-of-permutations-in-fwprop.patch diff --git a/0150-Implement-propagation-of-permutations-in-fwprop.patch b/0150-Implement-propagation-of-permutations-in-fwprop.patch new file mode 100644 index 0000000..005730e --- /dev/null +++ b/0150-Implement-propagation-of-permutations-in-fwprop.patch @@ -0,0 +1,1050 @@ +From 07aa5f889dc8bc3e642affe21dcfc197ad7d8b3b Mon Sep 17 00:00:00 2001 +From: Diachkov Ilia +Date: Sun, 3 Sep 2023 05:52:32 +0800 +Subject: [PATCH 12/13] Implement propagation of permutations in fwprop + +It is an implementation of permutation forward propagation, which is a +transformation designed to decrease the number of vector permutation +instructions in vectorized code, moving the permutations over arithmetic +operations. +--- + gcc/config/aarch64/aarch64-simd.md | 26 + + gcc/params.opt | 4 + + gcc/testsuite/gcc.dg/vect/transpose-9.c | 56 ++ + gcc/tree-ssa-forwprop.c | 891 ++++++++++++++++++++++++ + 4 files changed, 977 insertions(+) + create mode 100755 gcc/testsuite/gcc.dg/vect/transpose-9.c + +diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md +index 6049adc3f..af6d3ebf6 100644 +--- a/gcc/config/aarch64/aarch64-simd.md ++++ b/gcc/config/aarch64/aarch64-simd.md +@@ -4615,6 +4615,19 @@ + [(set_attr "type" "neon_shift_imm_long")] + ) + ++(define_insn "*aarch64_simd_vec_unpacks_lo_shiftsi" ++ [(set (match_operand:V4SI 0 "register_operand" "=w") ++ (ashift:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "register_operand" "w") ++ (match_operand:V8HI 2 "vect_par_cnst_lo_half" ""))) ++ (match_operand:V4SI 3 "aarch64_simd_rshift_imm" "Dr")))] ++ "TARGET_SIMD" ++ "shll\t%0.4s, %1.4h, #%3" ++ [(set_attr "type" "neon_compare_zero")] ++) ++ + ;; vshll_high_n + + (define_insn "aarch64_shll2_n" +@@ -4632,6 +4645,19 @@ + [(set_attr "type" "neon_shift_imm_long")] + ) + ++(define_insn "*aarch64_simd_vec_unpacks_hi_shiftsi" ++ [(set (match_operand:V4SI 0 "register_operand" "=w") ++ (ashift:V4SI ++ (sign_extend:V4SI ++ (vec_select:V4HI ++ (match_operand:V8HI 1 "register_operand" "w") ++ (match_operand:V8HI 2 "vect_par_cnst_hi_half" ""))) ++ (match_operand:V4SI 3 "aarch64_simd_rshift_imm" "Dr")))] ++ "TARGET_SIMD" ++ "shll2\t%0.4s, %1.8h, #%3" ++ [(set_attr "type" "neon_compare_zero")] ++) ++ + ;; vrshr_n + + (define_insn "aarch64_shr_n" +diff --git a/gcc/params.opt b/gcc/params.opt +index 83fd705ee..a87f6f00a 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -852,6 +852,10 @@ Maximum size, in storage units, of an aggregate which should be considered for s + Common Joined UInteger Var(param_sra_max_propagations) Param Optimization Init(32) + Maximum number of artificial accesses to enable forward propagation that Scalar Replacement of Aggregates will keep for one local variable. + ++-param=tree-forwprop-perm= ++Common Joined UInteger Var(param_tree_forwprop_perm) Param Optimization Init(0) ++Propagate permutations in vectorized code on tree forward propagation. ++ + -param=ssa-name-def-chain-limit= + Common Joined UInteger Var(param_ssa_name_def_chain_limit) Init(512) Param Optimization + The maximum number of SSA_NAME assignments to follow in determining a value. +diff --git a/gcc/testsuite/gcc.dg/vect/transpose-9.c b/gcc/testsuite/gcc.dg/vect/transpose-9.c +new file mode 100755 +index 000000000..f20a67c6e +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/vect/transpose-9.c +@@ -0,0 +1,56 @@ ++/* { dg-do compile { target { aarch64*-*-linux* } } } */ ++/* { dg-additional-options "-mtune=tsv110 --param=tree-forwprop-perm=1 -fdump-tree-forwprop-details" } */ ++/* { dg-require-effective-target vect_int } */ ++#include ++#include ++#include "tree-vect.h" ++ ++typedef unsigned short int sum_t; ++typedef unsigned int sum2_t; ++typedef long int intptr_t; ++typedef unsigned char data; ++#define BITS_PER_SUM (8 * sizeof(sum_t)) ++ ++static sum2_t bar(sum2_t a ) ++{ ++ sum2_t s = ((a>>(BITS_PER_SUM-1))&(((sum2_t)1<>BITS_PER_SUM)) >> 1; ++} ++/* { dg-final { scan-tree-dump "Initial permutations were reduced:" "forwprop4" } } */ ++/* { dg-final { scan-tree-dump "Permutations were moved through binary operations:" "forwprop4" } } */ ++ +diff --git a/gcc/tree-ssa-forwprop.c b/gcc/tree-ssa-forwprop.c +index ba0b55f4a..92ef5d036 100644 +--- a/gcc/tree-ssa-forwprop.c ++++ b/gcc/tree-ssa-forwprop.c +@@ -2225,6 +2225,893 @@ simplify_permutation (gimple_stmt_iterator *gsi) + return 0; + } + ++/* Compare the UID of two gimple stmts for sorting in ascending order. */ ++ ++static int ++gimple_uid_cmp (const void *ptr0, const void *ptr1) ++{ ++ const gimple *stmt0 = *(gimple * const *) ptr0; ++ const gimple *stmt1 = *(gimple * const *) ptr1; ++ ++ if (gimple_uid (stmt0) < gimple_uid (stmt1)) ++ return -1; ++ else if (gimple_uid (stmt0) > gimple_uid (stmt1)) ++ return 1; ++ return 0; ++} ++ ++/* Find a source permutation statement in backward direction through a chain of ++ unary, single or binary operations. In the last case only one variable ++ operand is allowed. If it's found, return true and save the statement in ++ perm_stmts, otherwise return false. */ ++ ++static bool ++find_src_perm_stmt (tree op, auto_vec &perm_stmts) ++{ ++ gimple *stmt; ++ while ((stmt = get_prop_source_stmt (op, false, NULL))) ++ { ++ if (!can_propagate_from (stmt)) ++ return false; ++ ++ if (gimple_assign_rhs_code (stmt) == VEC_PERM_EXPR) ++ { ++ perm_stmts.safe_push (stmt); ++ return true; ++ } ++ ++ /* TODO: check vector length and element size. */ ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ switch (get_gimple_rhs_class (code)) ++ { ++ case GIMPLE_TERNARY_RHS: ++ return false; ++ case GIMPLE_BINARY_RHS: ++ { ++ tree op1 = gimple_assign_rhs1 (stmt); ++ tree op2 = gimple_assign_rhs2 (stmt); ++ bool is_cst_op1 = is_gimple_constant (op1); ++ bool is_cst_op2 = is_gimple_constant (op2); ++ if ((is_cst_op1 && is_cst_op2) || (!is_cst_op1 && !is_cst_op2)) ++ return false; ++ op = !is_cst_op1 && is_cst_op2 ? op1 : op2; ++ break; ++ } ++ case GIMPLE_UNARY_RHS: ++ case GIMPLE_SINGLE_RHS: ++ op = gimple_assign_rhs1 (stmt); ++ break; ++ default: ++ gcc_unreachable (); ++ } ++ if (TREE_CODE (op) != SSA_NAME) ++ return false; ++ } ++ return false; ++} ++ ++/* Check the stmt is binary operation and find initial permutations for both ++ of its sources. */ ++ ++static bool ++find_initial_permutations (gimple_stmt_iterator *gsi, tree &type, ++ auto_vec &perm_stmts) ++{ ++ gimple *stmt = gsi_stmt (*gsi); ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ ++ // TODO: support other initial binary operations. ++ gcc_checking_assert (code == PLUS_EXPR || code == MINUS_EXPR); ++ ++ type = TREE_TYPE (gimple_assign_lhs (stmt)); ++ if (!VECTOR_TYPE_P (type)) ++ return false; ++ tree op1 = gimple_assign_rhs1 (stmt); ++ tree op2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME ++ || TREE_TYPE (op1) != type || TREE_TYPE (op2) != type || op1 == op2) ++ return false; ++ ++ if (find_src_perm_stmt (op1, perm_stmts) ++ && find_src_perm_stmt (op2, perm_stmts)) ++ return true; ++ return false; ++} ++ ++/* Check if the permutation statement is suitable for the transformation. */ ++ ++static bool ++check_perm_stmt (gimple *stmt, tree type, vec *perm_stmts, ++ vec *src_vects) ++{ ++ if (!stmt || !can_propagate_from (stmt)) ++ return false; ++ ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ if (code != VEC_PERM_EXPR) ++ return false; ++ ++ tree op3 = gimple_assign_rhs3 (stmt); ++ tree op1 = gimple_assign_rhs1 (stmt); ++ tree op2 = gimple_assign_rhs2 (stmt); ++ if (TREE_CODE (op1) != SSA_NAME || TREE_CODE (op2) != SSA_NAME ++ || TREE_CODE (op3) != VECTOR_CST) ++ return false; ++ if (type != NULL_TREE && (TREE_TYPE (op1) != type ++ || TREE_TYPE (op2) != type)) ++ return false; ++ if (perm_stmts) ++ perm_stmts->safe_push (stmt); ++ if (src_vects) ++ { ++ src_vects->safe_push (op1); ++ src_vects->safe_push (op2); ++ } ++ return true; ++} ++ ++/* Collect permutation stmts preceding the given stmt. */ ++ ++static bool ++find_perm_set (gimple *stmt, tree type, vec &perm_stmts, ++ vec &src_vects) ++{ ++ auto_vec ops; ++ if (!check_perm_stmt (stmt, NULL, NULL, &ops)) ++ return false; ++ ++ unsigned i; ++ tree op; ++ bool single_use_op = false; ++ FOR_EACH_VEC_ELT (ops, i, op) ++ { ++ /* Skip if we already processed the same operand. */ ++ if (i > 0 && ops[i] == ops[i - 1]) ++ continue; ++ /* Find one permutation stmt. */ ++ gimple *def_stmt = get_prop_source_stmt (op, false, &single_use_op); ++ if (!check_perm_stmt (def_stmt, type, &perm_stmts, &src_vects)) ++ return false; ++ if (single_use_op || src_vects.length () <= 1) ++ return false; ++ unsigned last_i = src_vects.length () - 1; ++ unsigned before_last_i = src_vects.length () - 2; ++ ++ /* Find one more permutation stmt. */ ++ gimple *use_stmt; ++ imm_use_iterator iter; ++ FOR_EACH_IMM_USE_STMT (use_stmt, iter, src_vects[before_last_i]) ++ if (use_stmt != def_stmt) ++ BREAK_FROM_IMM_USE_STMT (iter); ++ if (!use_stmt || use_stmt == def_stmt ++ || gimple_assign_rhs_code (use_stmt) != VEC_PERM_EXPR ++ || src_vects[before_last_i] != gimple_assign_rhs1 (use_stmt) ++ || src_vects[last_i] != gimple_assign_rhs2 (use_stmt)) ++ return false; ++ perm_stmts.safe_push (use_stmt); ++ } ++ return true; ++} ++ ++/* Walk permutation pattern and make a vector of permutation indices. */ ++ ++static bool ++make_vec_of_indices (vec &perm_pattern, vec &perm_indices) ++{ ++ unsigned i, j; ++ tree tree_it; ++ FOR_EACH_VEC_ELT (perm_pattern, i, tree_it) ++ { ++ unsigned HOST_WIDE_INT nelts; ++ if (!VECTOR_CST_NELTS (tree_it).is_constant (&nelts)) ++ return false; ++ for (j = 0; j < nelts; j++) ++ { ++ tree val = VECTOR_CST_ELT (tree_it, j); ++ gcc_checking_assert (TREE_CODE (val) == INTEGER_CST); ++ perm_indices.safe_push (TREE_INT_CST_LOW (val)); ++ } ++ } ++ return true; ++} ++ ++/* Check or collect a permutation pattern in the provided perm_stmts depending ++ on the passed parameters. If collect_pattern is true, collect permutation ++ vectors to pattern. In other case, check the pattern suits perm_stmts. */ ++ ++static bool ++check_or_collect_perm_pattern (vec &perm_stmts, vec &pattern, ++ bool collect_pattern) ++{ ++ unsigned i, j; ++ gimple *stmt_it; ++ tree tree_it; ++ FOR_EACH_VEC_ELT (perm_stmts, i, stmt_it) ++ { ++ gcc_assert (gimple_assign_rhs_code (stmt_it) == VEC_PERM_EXPR); ++ tree perm_vec = gimple_assign_rhs3 (stmt_it); ++ bool found = false; ++ FOR_EACH_VEC_ELT (pattern, j, tree_it) ++ if (operand_equal_p (tree_it, perm_vec)) ++ { ++ found = true; ++ break; ++ } ++ if (collect_pattern && !found) ++ pattern.safe_push (perm_vec); ++ else ++ gcc_assert (found); ++ if (i % pattern.length () != j) ++ return false; ++ } ++ return true; ++} ++ ++/* Identify the permutation pattern and check it. For now, we are checking ++ only transposition permutations with no more than 2 lines in their patterns. ++ Collect permutation const vectors and the second permutation stmts. */ ++ ++static bool ++check_perm_pattern (vec &first_perm_stmts, vec &perm_pattern, ++ vec &second_perm_stmts) ++{ ++ unsigned i, j; ++ gimple *stmt_it; ++ if (!check_or_collect_perm_pattern (first_perm_stmts, perm_pattern, true)) ++ return false; ++ ++ if (perm_pattern.length () == 0 || perm_pattern.length () > 2) ++ return false; ++ ++ /* Find the second permutation stmts. */ ++ hash_set visited; ++ FOR_EACH_VEC_ELT (first_perm_stmts, i, stmt_it) ++ { ++ tree dst = gimple_assign_lhs (stmt_it); ++ use_operand_p use_p; ++ imm_use_iterator iter; ++ FOR_EACH_IMM_USE_FAST (use_p, iter, dst) ++ { ++ gimple *stmt_it2 = USE_STMT (use_p); ++ if (visited.contains (stmt_it2)) ++ continue; ++ second_perm_stmts.safe_push (stmt_it2); ++ visited.add (stmt_it2); ++ } ++ } ++ second_perm_stmts.qsort (gimple_uid_cmp); ++ ++ if (first_perm_stmts.length () != second_perm_stmts.length ()) ++ return false; ++ ++ /* Check that all second_perm_stmts are VEC_PERM_EXPR. */ ++ FOR_EACH_VEC_ELT (second_perm_stmts, i, stmt_it) ++ if (gimple_assign_rhs_code (stmt_it) != VEC_PERM_EXPR) ++ return false; ++ ++ /* Check permutation pattern on the second permutation stmts. */ ++ if (!check_or_collect_perm_pattern (second_perm_stmts, perm_pattern, false)) ++ return false; ++ ++ /* Check values of permutation indices. */ ++ auto_vec perm_indices (vector_cst_encoded_nelts (perm_pattern[0]) ++ * perm_pattern.length ()); ++ if (!make_vec_of_indices (perm_pattern, perm_indices)) ++ return false; ++ ++ unsigned val, half_len = perm_indices.length () / 2; ++ FOR_EACH_VEC_ELT (perm_indices, j, val) ++ if (val != (j % 2 ? half_len + j / 2 : j / 2)) ++ return false; ++ ++ /* Check the correspondence of defs in first_perm_stmts and uses in ++ second_perm_stmts. */ ++ tree type1 = TREE_TYPE (gimple_assign_lhs (first_perm_stmts[0])); ++ tree type2 = TREE_TYPE (gimple_assign_lhs (second_perm_stmts[0])); ++ if (type1 != type2) ++ return false; ++ ++ unsigned HOST_WIDE_INT len = TYPE_VECTOR_SUBPARTS (type1).to_constant (); ++ FOR_EACH_VEC_ELT (second_perm_stmts, i, stmt_it) ++ { ++ /* Vectors of first/second perm stmts consist of blocks, each block ++ transposes its own set of input vectors. J corresponds to the number ++ of such block in the vector. */ ++ unsigned j = (i / len) * len; ++ gimple *src_stmt1 = first_perm_stmts[j + (i - j) / 2]; ++ gimple *src_stmt2 = first_perm_stmts[j + (i - j) / 2 + len / 2]; ++ if (gimple_assign_rhs1 (stmt_it) != gimple_assign_lhs (src_stmt1) ++ || gimple_assign_rhs2 (stmt_it) != gimple_assign_lhs (src_stmt2)) ++ return false; ++ } ++ return true; ++} ++ ++/* For the given vector of stmts find all immediate def or use stmts. ++ It uses SSA and don't go trough loads/stores. */ ++ ++static bool ++find_next_stmts (auto_vec &stmts, auto_vec &next_stmts, ++ bool is_forward, bool skip_perms) ++{ ++ unsigned i; ++ gimple *stmt_it; ++ hash_set new_stmt_set; ++ FOR_EACH_VEC_ELT (stmts, i, stmt_it) ++ { ++ if (is_forward) ++ { ++ tree lhs = gimple_assign_lhs (stmt_it); ++ if (!lhs || TREE_CODE (lhs) != SSA_NAME) ++ continue; ++ imm_use_iterator iter; ++ gimple *use_stmt; ++ FOR_EACH_IMM_USE_STMT (use_stmt, iter, lhs) ++ if (!new_stmt_set.contains (use_stmt)) ++ { ++ new_stmt_set.add (use_stmt); ++ if (!skip_perms ++ || gimple_assign_rhs_code (use_stmt) != VEC_PERM_EXPR) ++ next_stmts.safe_push (use_stmt); ++ } ++ } ++ else ++ { ++ tree rhs; ++ auto_vec rhs_vec (3); ++ if ((rhs = gimple_assign_rhs1 (stmt_it))) ++ rhs_vec.quick_push (rhs); ++ if ((rhs = gimple_assign_rhs2 (stmt_it))) ++ rhs_vec.quick_push (rhs); ++ if ((rhs = gimple_assign_rhs3 (stmt_it))) ++ rhs_vec.quick_push (rhs); ++ unsigned j; ++ FOR_EACH_VEC_ELT (rhs_vec, j, rhs) ++ { ++ if (TREE_CODE (rhs) == VIEW_CONVERT_EXPR) ++ rhs = TREE_OPERAND (rhs, 0); ++ if (TREE_CODE (rhs) != SSA_NAME) ++ continue; ++ gimple *def_stmt = get_prop_source_stmt (rhs, false, NULL); ++ if (!def_stmt) ++ return false; ++ if (new_stmt_set.contains (def_stmt)) ++ continue; ++ new_stmt_set.add (def_stmt); ++ if (!skip_perms ++ || gimple_assign_rhs_code (def_stmt) != VEC_PERM_EXPR) ++ next_stmts.safe_push (def_stmt); ++ } ++ } ++ } ++ return true; ++} ++ ++/* Check if stmts in the vector have similar code and type. Process only ++ assign stmts. */ ++ ++static bool ++check_stmts_similarity (auto_vec &stmts, enum tree_code &code) ++{ ++ code = NOP_EXPR; ++ tree type = NULL_TREE; ++ unsigned i; ++ gimple *stmt_it; ++ FOR_EACH_VEC_ELT (stmts, i, stmt_it) ++ { ++ if (!is_gimple_assign (stmt_it)) ++ return false; ++ tree lhs = gimple_assign_lhs (stmt_it); ++ enum tree_code code2 = gimple_assign_rhs_code (stmt_it); ++ if (type != NULL_TREE) ++ { ++ /* Unpack lo/hi are the same for the analysis. */ ++ if (((code2 != VEC_UNPACK_LO_EXPR && code2 != VEC_UNPACK_HI_EXPR) ++ || (code != VEC_UNPACK_LO_EXPR && code != VEC_UNPACK_HI_EXPR)) ++ && (!lhs || type != TREE_TYPE (lhs) ++ || (code != NOP_EXPR && code != code2))) ++ return false; ++ } ++ else if (lhs) ++ type = TREE_TYPE (lhs); ++ if (code == NOP_EXPR) ++ code = code2; ++ } ++ return true; ++} ++ ++/* Check that the order of definitions of first_stmts and uses of second_stmts ++ is the same. */ ++ ++static bool ++check_def_use_order (vec &first_stmts, vec &second_stmts) ++{ ++ first_stmts.qsort (gimple_uid_cmp); ++ second_stmts.qsort (gimple_uid_cmp); ++ unsigned len1 = first_stmts.length (); ++ unsigned len2 = second_stmts.length (); ++ ++ /* Skip if one of the blocks is empty or the second block is permutaions. */ ++ if (!len1 || !len2 ++ || gimple_assign_rhs_code (second_stmts[0]) == VEC_PERM_EXPR) ++ return true; ++ ++ unsigned i; ++ gimple *stmt_it; ++ FOR_EACH_VEC_ELT (first_stmts, i, stmt_it) ++ { ++ tree op = gimple_assign_lhs (stmt_it); ++ imm_use_iterator iter; ++ gimple *stmt; ++ FOR_EACH_IMM_USE_STMT (stmt, iter, op) ++ { ++ if ((len1 == len2 && stmt != second_stmts[i]) ++ || (len1 == len2 * 2 && stmt != second_stmts[i % len2])) ++ RETURN_FROM_IMM_USE_STMT (iter, false); ++ enum tree_code code = gimple_assign_rhs_code (stmt); ++ if ((len1 * 2 == len2) ++ && ((code == VEC_UNPACK_LO_EXPR && stmt != second_stmts[2 * i]) ++ || (code == VEC_UNPACK_HI_EXPR ++ && stmt != second_stmts[2 * i + 1]))) ++ RETURN_FROM_IMM_USE_STMT (iter, false); ++ } ++ } ++ return true; ++} ++ ++/* Check similarity of stmts in the block of arithmetic operations. */ ++ ++static bool ++check_arithmetic_block (vec &initial_perm_stmts, unsigned nstmts) ++{ ++ auto_vec next_stmts (nstmts); ++ auto_vec prev_stmts (nstmts); ++ ++ enum tree_code code; ++ unsigned i; ++ gimple *stmt_it; ++ FOR_EACH_VEC_ELT (initial_perm_stmts, i, stmt_it) ++ prev_stmts.quick_push (stmt_it); ++ ++ do ++ { ++ next_stmts.block_remove (0, next_stmts.length ()); ++ if (!find_next_stmts (prev_stmts, next_stmts, false, true)) ++ return false; ++ ++ /* Check that types and codes of all stmts in the list are the same. */ ++ if (!check_stmts_similarity (next_stmts, code)) ++ return false; ++ /* Check that the order of all operands is the same. */ ++ if (!check_def_use_order (next_stmts, prev_stmts)) ++ return false; ++ prev_stmts.block_remove (0, prev_stmts.length ()); ++ ++ FOR_EACH_VEC_ELT (next_stmts, i, stmt_it) ++ prev_stmts.safe_push (stmt_it); ++ } ++ while (code != NOP_EXPR); ++ ++ return true; ++} ++ ++/* Find two blocks of permutations on two sets of input vectors which are ++ used in the same vectorized arithmetic operations after the permutaion: ++ Va1...VaN = PERM{P1} (Sa1...SaN) ++ Vb1...VbN = PERM{P1} (Sb1...SbN) ++ Vc1...VcN = binops (Va1...VaN, Vb1...VbN) ++ The goal of the transformation is to execute the block of permutations ++ only once on the result of the arithmetic operations: ++ Va1...VaN = binops (Sa1...SaN, Sb1...SbN) ++ Vc1...VcN = PERM{P1} (Va1...VaN) ++ ++ Currently the analysis looks for transposition permutations that consist ++ of two layers of statements e.g.: ++ Vt1 = PERM { 0, 4, 1, 5 } Sa1, Sa2 // the first ++ Vt2 = PERM { 2, 6, 3, 7 } Sa1, Sa2 ++ Vt3 = PERM { 0, 4, 1, 5 } Sa3, Sa4 ++ Vt4 = PERM { 2, 6, 3, 7 } Sa3, Sa4 ++ Va1 = PERM { 0, 4, 1, 5 } Vt1, Vt3 // the second ++ Va2 = PERM { 2, 6, 3, 7 } Vt1, Vt3 ++ Va3 = PERM { 0, 4, 1, 5 } Vt2, Vt4 ++ Va4 = PERM { 2, 6, 3, 7 } Vt2, Vt4 ++ Permutation stmts are collected in first_perm_stmts and second_perm_stmts ++ vectors correspondinglys. ++ ++ Arithmetic operations may contain several stmts for one pair of input source ++ vectors e.g.: ++ Vtmp1 = unop (Va1) ++ Vtmp2 = binop (Vb1, const) ++ Vc1 = binop (Vtmp1, Vtmp2) ++ The last stmts of each sequence in the arithmetic block are collected ++ in final_arith_stmts. */ ++ ++static bool ++analyze_perm_fwprop (tree type, unsigned HOST_WIDE_INT nelts, ++ vec &stmts, auto_vec &src_vects, ++ auto_vec &perm_pattern, ++ auto_vec &final_arith_stmts, ++ auto_vec &second_perm_stmts) ++{ ++ gcc_checking_assert (stmts.length () == 2); ++ auto_vec first_perm_stmts (nelts * 2); ++ if (!find_perm_set (stmts[0], type, first_perm_stmts, src_vects) ++ || !find_perm_set (stmts[1], type, first_perm_stmts, src_vects)) ++ return false; ++ first_perm_stmts.qsort (gimple_uid_cmp); ++ ++ /* Determine permutation pattern. */ ++ if (!check_perm_pattern (first_perm_stmts, perm_pattern, second_perm_stmts)) ++ return false; ++ ++ /* Find all arithmetic stmts. */ ++ unsigned i; ++ gimple *stmt_it; ++ auto_vec all_arith_stmts (nelts * 2); ++ hash_set visited; ++ FOR_EACH_VEC_ELT (second_perm_stmts, i, stmt_it) ++ { ++ tree dst = gimple_assign_lhs (stmt_it); ++ use_operand_p use_p; ++ gimple *use_stmt; ++ if (!single_imm_use (dst, &use_p, &use_stmt)) ++ return false; ++ all_arith_stmts.quick_push (use_stmt); ++ visited.add (use_stmt); ++ } ++ ++ /* Select final arithmetic stmts. */ ++ FOR_EACH_VEC_ELT (all_arith_stmts, i, stmt_it) ++ { ++ tree dst = gimple_assign_lhs (stmt_it); ++ use_operand_p use_p; ++ imm_use_iterator iter; ++ bool use_only_outside_arith_stmts = true; ++ FOR_EACH_IMM_USE_FAST (use_p, iter, dst) ++ if (visited.contains (USE_STMT (use_p))) ++ { ++ use_only_outside_arith_stmts = false; ++ break; ++ } ++ if (use_only_outside_arith_stmts) ++ final_arith_stmts.quick_push (stmt_it); ++ } ++ ++ /* Check that all results has the same arithmetic patterns. */ ++ if (!check_arithmetic_block (final_arith_stmts, nelts)) ++ return false; ++ ++ if (final_arith_stmts.length () < nelts) ++ return false; ++ return true; ++} ++ ++/* Substitute uses of stmts' results by new_uses. */ ++ ++static void ++substitute_uses (vec &stmts, vec &new_uses) ++{ ++ gcc_checking_assert (stmts.length () == new_uses.length ()); ++ unsigned i; ++ gimple *stmt_it; ++ FOR_EACH_VEC_ELT (stmts, i, stmt_it) ++ { ++ tree op = gimple_assign_lhs (stmt_it); ++ imm_use_iterator iter; ++ gimple *use_stmt; ++ FOR_EACH_IMM_USE_STMT (use_stmt, iter, op) ++ { ++ use_operand_p use_p; ++ FOR_EACH_IMM_USE_ON_STMT (use_p, iter) ++ SET_USE (use_p, new_uses[i]); ++ update_stmt (use_stmt); ++ } ++ } ++} ++ ++/* Propagate permutations through the block of arithmetic operations. */ ++ ++static void ++fwprop_perms (tree type, auto_vec &src_vects, ++ auto_vec &perm_pattern, ++ auto_vec &final_arith_stmts, ++ auto_vec &second_perm_stmts) ++{ ++ /* Build new permutation stmts after the block of arithmetic stmts. */ ++ gimple_seq new_stmts = NULL; ++ unsigned perm_block_size = final_arith_stmts.length (); ++ auto_vec new_first_perm_vals (perm_block_size); ++ hash_set new_stmts_set; ++ unsigned i, perm_pattern_size = perm_pattern.length (); ++ for (i = 0; i < perm_block_size; i++) ++ { ++ tree op0 = gimple_assign_lhs (final_arith_stmts[i / 2]); ++ unsigned idx = i / 2 + perm_block_size / 2; ++ tree op1 = gimple_assign_lhs (final_arith_stmts[idx]); ++ tree res = gimple_build (&new_stmts, VEC_PERM_EXPR, type, op0, op1, ++ perm_pattern[i % perm_pattern_size]); ++ new_first_perm_vals.quick_push (res); ++ new_stmts_set.add (gimple_seq_last (new_stmts)); ++ } ++ auto_vec new_second_perm_vals (perm_block_size); ++ for (i = 0; i < perm_block_size; i++) ++ { ++ tree op0 = new_first_perm_vals[i / 2]; ++ tree op1 = new_first_perm_vals[i / 2 + perm_block_size/ 2]; ++ tree res = gimple_build (&new_stmts, VEC_PERM_EXPR, type, op0, op1, ++ perm_pattern[i % perm_pattern_size]); ++ new_second_perm_vals.quick_push (res); ++ new_stmts_set.add (gimple_seq_last (new_stmts)); ++ } ++ ++ gimple_stmt_iterator g = gsi_for_stmt (final_arith_stmts.last ()); ++ gsi_insert_seq_after (&g, new_stmts, GSI_SAME_STMT); ++ ++ /* Replace old uses of the arithmetic block results by destinations of ++ the new permutation block. */ ++ gimple *stmt_it; ++ FOR_EACH_VEC_ELT (final_arith_stmts, i, stmt_it) ++ { ++ tree op0 = gimple_assign_lhs (final_arith_stmts[i]); ++ imm_use_iterator iter; ++ gimple *use_stmt; ++ use_operand_p use_p; ++ FOR_EACH_IMM_USE_STMT (use_stmt, iter, op0) ++ { ++ if (new_stmts_set.contains (use_stmt)) ++ continue; ++ FOR_EACH_IMM_USE_ON_STMT (use_p, iter) ++ SET_USE (use_p, new_second_perm_vals[i]); ++ update_stmt (use_stmt); ++ } ++ } ++ ++ /* Disconnect the old permutation stmts. */ ++ substitute_uses (second_perm_stmts, src_vects); ++} ++ ++/* Find the permutation stmts in the forward or backward direction (in terms of ++ def/use graph) starting from the vector of initial stmts. Count reduction ++ stmts (i.e. binary operations) if they can change the number of processed ++ elements. */ ++ ++static bool ++find_perm_stmts (vec &initial_stmts, unsigned nstmts, ++ vec &final_perm_stmts, bool is_forward, ++ unsigned &nreduct) ++{ ++ auto_vec next_stmts (nstmts); ++ auto_vec prev_stmts (nstmts); ++ ++ nreduct = 0; ++ enum tree_code code; ++ unsigned i; ++ gimple *stmt_it; ++ FOR_EACH_VEC_ELT (initial_stmts, i, stmt_it) ++ prev_stmts.quick_push (stmt_it); ++ ++ do ++ { ++ next_stmts.block_remove (0, next_stmts.length ()); ++ if (!find_next_stmts (prev_stmts, next_stmts, is_forward, false)) ++ return false; ++ ++ /* Check that types and codes of all stmts in the list are the same. */ ++ if (!check_stmts_similarity (next_stmts, code)) ++ return false; ++ ++ /* TODO: don't take into account binary operations with constants. */ ++ if (TREE_CODE_CLASS (code) == tcc_binary) ++ nreduct += 1; ++ ++ if (is_forward ? !check_def_use_order (prev_stmts, next_stmts) ++ : !check_def_use_order (next_stmts, prev_stmts)) ++ return false; ++ ++ prev_stmts.block_remove (0, prev_stmts.length ()); ++ ++ FOR_EACH_VEC_ELT (next_stmts, i, stmt_it) ++ prev_stmts.safe_push (stmt_it); ++ } ++ while (code != NOP_EXPR && code != VEC_PERM_EXPR); ++ ++ if (code != VEC_PERM_EXPR) ++ return false; ++ ++ FOR_EACH_VEC_ELT (next_stmts, i, stmt_it) ++ final_perm_stmts.safe_push (stmt_it); ++ final_perm_stmts.qsort (gimple_uid_cmp); ++ return true; ++} ++ ++/* Check if the initial and the final permutations can be optimized i.e. ++ the initial permutation can be removed with the modification of ++ the final one. */ ++ ++static bool ++can_reduce_permutations (unsigned init_nelts, vec &perm_pattern, ++ vec &init_perm_stmts) ++{ ++ auto_vec perm_indices (init_nelts); ++ if (!make_vec_of_indices (perm_pattern, perm_indices)) ++ return false; ++ unsigned i, j; ++ gimple *stmt_it; ++ unsigned perm_vec_size = perm_indices.length (); ++ FOR_EACH_VEC_ELT (init_perm_stmts, i, stmt_it) ++ { ++ gcc_assert (gimple_assign_rhs_code (stmt_it) == VEC_PERM_EXPR); ++ tree perm_vec2 = gimple_assign_rhs3 (stmt_it); ++ unsigned HOST_WIDE_INT mask_elts; ++ if (!VECTOR_CST_NELTS (perm_vec2).is_constant (&mask_elts)) ++ return false; ++ for (j = 0; j < mask_elts; j++) ++ { ++ tree val = VECTOR_CST_ELT (perm_vec2, j); ++ gcc_assert (TREE_CODE (val) == INTEGER_CST); ++ unsigned HOST_WIDE_INT int_val = TREE_INT_CST_LOW (val); ++ if (int_val != perm_indices[j % perm_vec_size] ++ + (j / perm_vec_size) * perm_vec_size) ++ return false; ++ } ++ } ++ return true; ++} ++ ++/* Find permutation blocks before and after arithmetic operations and decide ++ if the number of permutations can be reduced, e.g: ++ Va1...VaN = PERM{P1} (Sa1...SaN) ++ Vb1...VbM = some operations (Va1...VaN) ++ Vb1...VbM = PERM{P2} (Sb1...SbM) ++ can be transformed to: ++ Vb1...VbM = some operations (Va1...VaN) ++ Vb1...VbM = PERM{P3} (Sb1...SbM) ++ ++ Currently it supports initial permutations like this: ++ Va1 = PERM { 0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15} Sa1 ++ and transposition permutations with two layers of permutation stmts as ++ final permutaions. ++ ++ Operations between permutations can include unary and binary arithmetic, ++ element conversions and vector packing/unpacking. */ ++ ++static bool ++analyze_perm_reduction (unsigned HOST_WIDE_INT nelts, ++ vec &perm_stmts, ++ vec &init_perm_stmts, ++ vec &second_perm_stmts) ++{ ++ auto_vec first_perm_stmts (nelts * 2); ++ if (!check_perm_stmt (perm_stmts[0], NULL_TREE, &first_perm_stmts, NULL) ++ || !check_perm_stmt (perm_stmts[1], NULL_TREE, &first_perm_stmts, NULL)) ++ return false; ++ ++ unsigned nreduct; ++ auto_vec final_perm_stmts (nelts * 2); ++ if (!find_perm_stmts (first_perm_stmts, nelts, final_perm_stmts, true, ++ nreduct)) ++ return false; ++ ++ if (!find_perm_stmts (final_perm_stmts, nelts, init_perm_stmts, false, ++ nreduct)) ++ return false; ++ ++ /* Check number of elemetns in the inital and final data block. */ ++ tree init_elem_type = TREE_TYPE (gimple_assign_lhs (init_perm_stmts[0])); ++ unsigned init_nelts = TYPE_VECTOR_SUBPARTS (init_elem_type).to_constant () ++ * init_perm_stmts.length (); ++ tree final_elem_type = TREE_TYPE (gimple_assign_lhs (final_perm_stmts[0])); ++ unsigned final_nelts = TYPE_VECTOR_SUBPARTS (final_elem_type).to_constant () ++ * final_perm_stmts.length (); ++ if (init_nelts != final_nelts * (1 + nreduct)) ++ return false; ++ ++ /* Check the final permutations and detect its pattern. */ ++ auto_vec perm_pattern (nelts); ++ if (!check_perm_pattern (final_perm_stmts, perm_pattern, second_perm_stmts)) ++ return false; ++ ++ return can_reduce_permutations (init_nelts, perm_pattern, init_perm_stmts); ++} ++ ++/* Do the optimization: skip the initial permutation and change the order ++ of destinations after the second layer of permutation statements in ++ the final permutation block. */ ++ ++static void ++reduce_perms (vec &init_perm_stmts, vec &second_perm_stmts) ++{ ++ unsigned i; ++ gimple *stmt_it; ++ auto_vec new_srcs (init_perm_stmts.length ()); ++ FOR_EACH_VEC_ELT (init_perm_stmts, i, stmt_it) ++ new_srcs.quick_push (gimple_assign_rhs1 (stmt_it)); ++ substitute_uses (init_perm_stmts, new_srcs); ++ ++ unsigned half = second_perm_stmts.length () / 2; ++ auto_vec new_dsts (second_perm_stmts.length ()); ++ FOR_EACH_VEC_ELT (second_perm_stmts, i, stmt_it) ++ { ++ unsigned idx = i < half ? i << 1 : ((i - half) << 1) + 1; ++ new_dsts.quick_push (gimple_assign_lhs (second_perm_stmts[idx])); ++ } ++ ++ FOR_EACH_VEC_ELT (second_perm_stmts, i, stmt_it) ++ { ++ gimple_assign_set_lhs (stmt_it, new_dsts[i]); ++ update_stmt (stmt_it); ++ } ++} ++ ++/* Optimize permutations in the following two cases: ++ 1. Recognize the same permutations of two sets of vectors with subsequent ++ binary arithmetic operations on them: ++ V1 = PERM{1} (S1); ++ V2 = PERM{1} (S2); ++ V3 = V1 binop V2; ++ then move the permutation after the operations: ++ V0 = S1 binop S2; ++ V3 = PERM{1} V0; ++ 2. Detect the first permutation before some operations on a set of vectors ++ and the second one after the operations: ++ V1 = PERM{1} (S1) ++ V2 = set of operations (V1) ++ V3 = PERM{2} (V2) ++ try to reduce them: ++ V2 = set of operations (S1) ++ V3 = PERM{3} (V2) ++ Return true if the optimization is successful. */ ++ ++static bool ++propagate_permutations (gimple_stmt_iterator *gsi) ++{ ++ tree type; ++ auto_vec perm_stmts (2); ++ ++ if (!find_initial_permutations (gsi, type, perm_stmts)) ++ return false; ++ ++ unsigned HOST_WIDE_INT nelts = TYPE_VECTOR_SUBPARTS (type).to_constant (); ++ auto_vec final_arith_stmts (nelts * 2); ++ auto_vec second_perm_stmts (nelts * 2); ++ auto_vec src_vects (nelts * 2); ++ auto_vec perm_pattern (nelts); ++ if (analyze_perm_fwprop (type, nelts, perm_stmts, src_vects, perm_pattern, ++ final_arith_stmts, second_perm_stmts)) ++ { ++ fwprop_perms (type, src_vects, perm_pattern, final_arith_stmts, ++ second_perm_stmts); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ unsigned i; ++ gimple *stmt_it; ++ fprintf (dump_file, "Permutations were moved through " ++ "binary operations:\n"); ++ FOR_EACH_VEC_ELT (second_perm_stmts, i, stmt_it) ++ print_gimple_stmt (dump_file, stmt_it, 0); ++ } ++ return true; ++ } ++ ++ auto_vec init_perm_stmts (nelts * 2); ++ auto_vec final_perm_stmts (nelts * 2); ++ if (analyze_perm_reduction (nelts, perm_stmts, init_perm_stmts, ++ final_perm_stmts)) ++ { ++ reduce_perms (init_perm_stmts, final_perm_stmts); ++ if (dump_file && (dump_flags & TDF_DETAILS)) ++ { ++ unsigned i; ++ gimple *stmt_it; ++ fprintf (dump_file, "Initial permutations were reduced:\n"); ++ FOR_EACH_VEC_ELT (init_perm_stmts, i, stmt_it) ++ print_gimple_stmt (dump_file, stmt_it, 0); ++ } ++ return true; ++ } ++ return false; ++} ++ + /* Get the BIT_FIELD_REF definition of VAL, if any, looking through + conversions with code CONV_CODE or update it if still ERROR_MARK. + Return NULL_TREE if no such matching def was found. */ +@@ -3155,6 +4042,10 @@ pass_forwprop::execute (function *fun) + || code == BIT_XOR_EXPR) + && simplify_rotate (&gsi)) + changed = true; ++ else if ((code == PLUS_EXPR || code == MINUS_EXPR) ++ && param_tree_forwprop_perm ++ && propagate_permutations (&gsi)) ++ changed = true; + else if (code == VEC_PERM_EXPR) + { + int did_something = simplify_permutation (&gsi); +-- +2.33.0 + From 602c07bfd07a13196d85a1bf5de22b3c22b596bd Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:53:24 +0800 Subject: [PATCH 6/7] [Sync] Sync patch from openeuler/gcc 0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch --- ...Fix-bugs-and-add-tests-for-RTL-ifcvt.patch | 381 ++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100644 0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch diff --git a/0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch b/0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch new file mode 100644 index 0000000..b228e2e --- /dev/null +++ b/0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch @@ -0,0 +1,381 @@ +From 4bcb19923cdcb042d66057766d661ef68bf70e92 Mon Sep 17 00:00:00 2001 +From: Chernonog Vyacheslav 00812786 +Date: Wed, 29 Mar 2023 05:22:17 +0300 +Subject: [PATCH 13/13] Fix bugs and add tests for RTL ifcvt + +1. Fix bug in rtl ifcvt that run pass despite renaming failure. +2. Fix bug that prevent final set register to be renamed. +3. Clean up dominance info before runnig cleanup_cfg to avoid fixup + invalid dominance info. +4. Remove duplicated cleanup_cfg. +5. Add tests. +--- + gcc/common.opt | 4 + + gcc/ifcvt.c | 88 ++++++++++++------- + gcc/params.opt | 4 - + .../gcc.c-torture/execute/ifcvt-renaming-1.c | 38 ++++++++ + gcc/testsuite/gcc.dg/ifcvt-6.c | 29 ++++++ + 5 files changed, 128 insertions(+), 35 deletions(-) + create mode 100644 gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c + create mode 100644 gcc/testsuite/gcc.dg/ifcvt-6.c + +diff --git a/gcc/common.opt b/gcc/common.opt +index 6f0ed7cea..92d3a1986 100644 +--- a/gcc/common.opt ++++ b/gcc/common.opt +@@ -3534,4 +3534,8 @@ fipa-ra + Common Report Var(flag_ipa_ra) Optimization + Use caller save register across calls if possible. + ++fifcvt-allow-complicated-cmps ++Common Report Var(flag_ifcvt_allow_complicated_cmps) Optimization ++Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time). ++ + ; This comment is to ensure we retain the blank line above. +diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c +index 50a73a7ca..209987ebc 100644 +--- a/gcc/ifcvt.c ++++ b/gcc/ifcvt.c +@@ -876,7 +876,7 @@ noce_emit_store_flag (struct noce_if_info *if_info, rtx x, int reversep, + } + + /* Don't even try if the comparison operands or the mode of X are weird. */ +- if (!param_ifcvt_allow_complicated_cmps ++ if (!flag_ifcvt_allow_complicated_cmps + && (cond_complex + || !SCALAR_INT_MODE_P (GET_MODE (x)))) + return NULL_RTX; +@@ -1745,7 +1745,7 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code, + + /* Don't even try if the comparison operands are weird + except that the target supports cbranchcc4. */ +- if (! param_ifcvt_allow_complicated_cmps ++ if (! flag_ifcvt_allow_complicated_cmps + && (! general_operand (cmp_a, GET_MODE (cmp_a)) + || ! general_operand (cmp_b, GET_MODE (cmp_b)))) + { +@@ -1918,6 +1918,19 @@ noce_try_cmove (struct noce_if_info *if_info) + return FALSE; + } + ++/* Return true if X contains a conditional code mode rtx. */ ++ ++static bool ++contains_ccmode_rtx_p (rtx x) ++{ ++ subrtx_iterator::array_type array; ++ FOR_EACH_SUBRTX (iter, array, x, ALL) ++ if (GET_MODE_CLASS (GET_MODE (*iter)) == MODE_CC) ++ return true; ++ ++ return false; ++} ++ + /* Helper for bb_valid_for_noce_process_p. Validate that + the rtx insn INSN is a single set that does not set + the conditional register CC and is in general valid for +@@ -1936,6 +1949,8 @@ insn_valid_noce_process_p (rtx_insn *insn, rtx cc) + /* Currently support only simple single sets in test_bb. */ + if (!sset + || !noce_operand_ok (SET_DEST (sset)) ++ || (!flag_ifcvt_allow_complicated_cmps ++ && contains_ccmode_rtx_p (SET_DEST (sset))) + || !noce_operand_ok (SET_SRC (sset))) + return false; + +@@ -1974,8 +1989,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, + continue; + /* Record all registers that BB_A sets. */ + FOR_EACH_INSN_DEF (def, a_insn) +- if (!(to_rename && DF_REF_REG (def) == to_rename)) +- bitmap_set_bit (bba_sets, DF_REF_REGNO (def)); ++ bitmap_set_bit (bba_sets, DF_REF_REGNO (def)); + } + + bitmap_and (intersections, df_get_live_in (bb_b), bba_sets); +@@ -1984,6 +1998,7 @@ bbs_ok_for_cmove_arith (basic_block bb_a, + { + if (!active_insn_p (b_insn)) + continue; ++ + rtx sset_b = single_set (b_insn); + + if (!sset_b) +@@ -2081,7 +2096,12 @@ noce_emit_bb (rtx last_insn, basic_block bb, bool simple) + return true; + } + +-/* This function tries to rename regs that intersect with considered bb. */ ++/* This function tries to rename regs that intersect with considered bb ++ inside condition expression. Condition expression will be moved down ++ if the optimization will be applied, so it is essential to be sure that ++ all intersected registers will be renamed otherwise transformation ++ can't be applied. Function returns true if renaming was successful ++ and optimization can proceed futher. */ + + static bool + noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs) +@@ -2092,11 +2112,11 @@ noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs) + if (param_ifcvt_allow_register_renaming < 2) + return false; + df_ref use; +- rtx_insn* cmp_insn = if_info->cond_earliest; ++ rtx_insn *cmp_insn = if_info->cond_earliest; + /* Jump instruction as a condion currently unsupported. */ + if (JUMP_P (cmp_insn)) + return false; +- rtx_insn* before_cmp = PREV_INSN (cmp_insn); ++ rtx_insn *before_cmp = PREV_INSN (cmp_insn); + start_sequence (); + rtx_insn *copy_of_cmp = as_a (copy_rtx (cmp_insn)); + basic_block cmp_block = BLOCK_FOR_INSN (cmp_insn); +@@ -2122,7 +2142,7 @@ noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs) + + emit_insn_after_setloc (seq, before_cmp, INSN_LOCATION (cmp_insn)); + delete_insn_and_edges (cmp_insn); +- rtx_insn* insn; ++ rtx_insn *insn; + FOR_BB_INSNS (cmp_block, insn) + df_insn_rescan (insn); + +@@ -2135,13 +2155,15 @@ noce_rename_regs_in_cond (struct noce_if_info *if_info, bitmap cond_rename_regs) + return success; + } + +-/* This function tries to rename regs that intersect with considered bb. */ ++/* This function tries to rename regs that intersect with considered bb. ++ return true if the renaming was successful and optimization can ++ proceed futher, false otherwise. */ + static bool + noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs) + { + if (bitmap_empty_p (rename_regs)) + return true; +- rtx_insn* insn; ++ rtx_insn *insn; + rtx_insn *last_insn = last_active_insn (test_bb, FALSE); + bool res = true; + start_sequence (); +@@ -2153,7 +2175,7 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs) + rtx sset = single_set (insn); + gcc_assert (sset); + rtx x = SET_DEST (sset); +- if (!REG_P (x) || bitmap_bit_p (rename_regs, REGNO (x))) ++ if (!REG_P (x) || !bitmap_bit_p (rename_regs, REGNO (x))) + continue; + + machine_mode mode = GET_MODE (x); +@@ -2175,7 +2197,7 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs) + noce_emit_move_insn (tmp,x); + } + set_used_flags (insn); +- rtx_insn* rename_candidate; ++ rtx_insn *rename_candidate; + for (rename_candidate = NEXT_INSN (insn); + rename_candidate && rename_candidate!= NEXT_INSN (BB_END (test_bb)); + rename_candidate = NEXT_INSN (rename_candidate)) +@@ -2193,17 +2215,16 @@ noce_rename_regs_in_bb (basic_block test_bb, bitmap rename_regs) + replace_res = validate_replace_rtx (x, tmp, rename_candidate); + gcc_assert (replace_res); + set_used_flags (rename_candidate); +- + } + set_used_flags (x); + set_used_flags (tmp); +- + } +- rtx_insn *seq = get_insns (); +- unshare_all_rtl_in_chain (seq); +- end_sequence (); +- emit_insn_before_setloc (seq, first_active_insn (test_bb), +- INSN_LOCATION (first_active_insn (test_bb))); ++ ++ rtx_insn *seq = get_insns (); ++ unshare_all_rtl_in_chain (seq); ++ end_sequence (); ++ emit_insn_before_setloc (seq, first_active_insn (test_bb), ++ INSN_LOCATION (first_active_insn (test_bb))); + FOR_BB_INSNS (test_bb, insn) + df_insn_rescan (insn); + return res; +@@ -2305,9 +2326,10 @@ noce_try_cmove_arith (struct noce_if_info *if_info) + BITMAP_FREE (else_bb_rename_regs); + return FALSE; + } +- bool prepass_renaming = true; +- prepass_renaming |= noce_rename_regs_in_bb (then_bb, then_bb_rename_regs); +- prepass_renaming |= noce_rename_regs_in_bb (else_bb, else_bb_rename_regs); ++ bool prepass_renaming = noce_rename_regs_in_bb (then_bb, ++ then_bb_rename_regs) ++ && noce_rename_regs_in_bb (else_bb, ++ else_bb_rename_regs); + + BITMAP_FREE (then_bb_rename_regs); + BITMAP_FREE (else_bb_rename_regs); +@@ -2321,6 +2343,7 @@ noce_try_cmove_arith (struct noce_if_info *if_info) + came from the test block. The non-empty complex block that we will + emit might clobber the register used by B or A, so move it to a pseudo + first. */ ++ + rtx tmp_a = NULL_RTX; + rtx tmp_b = NULL_RTX; + +@@ -3233,6 +3256,7 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, + && reg_set_between_p (x, first_insn, prev_last_insn) + && param_ifcvt_allow_register_renaming < 1) + return false; ++ + bitmap test_bb_temps = BITMAP_ALLOC (®_obstack); + + /* The regs that are live out of test_bb. */ +@@ -3268,9 +3292,10 @@ bb_valid_for_noce_process_p (basic_block test_bb, rtx cond, + else + goto free_bitmap_and_fail; + } +- potential_cost += pattern_cost (sset, speed_p); +- if (SET_DEST (sset) != SET_DEST (last_set)) +- bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); ++ ++ potential_cost += pattern_cost (sset, speed_p); ++ if (SET_DEST (sset) != SET_DEST (last_set)) ++ bitmap_set_bit (test_bb_temps, REGNO (SET_DEST (sset))); + } + + /* If any of the intermediate results in test_bb are live after test_bb +@@ -3645,11 +3670,12 @@ noce_process_if_block (struct noce_if_info *if_info) + } + + if (!noce_rename_regs_in_cond (if_info, cond_rename_regs)) +- return false; +- cond = if_info->cond; +- ++ { ++ BITMAP_FREE (cond_rename_regs); ++ return false; ++ } + BITMAP_FREE (cond_rename_regs); +- ++ cond = if_info->cond; + if (speed_p) + if_info->original_cost += average_cost (then_cost, else_cost, + find_edge (test_bb, then_bb)); +@@ -5592,12 +5618,13 @@ if_convert (bool after_combine) + { + basic_block bb; + int pass; +- cleanup_cfg (CLEANUP_EXPENSIVE); ++ + if (optimize == 1) + { + df_live_add_problem (); + df_live_set_all_dirty (); + } ++ cleanup_cfg (CLEANUP_EXPENSIVE); + + /* Record whether we are after combine pass. */ + ifcvt_after_combine = after_combine; +@@ -5702,7 +5729,6 @@ rest_of_handle_if_conversion (void) + dump_reg_info (dump_file); + dump_flow_info (dump_file, dump_flags); + } +- cleanup_cfg (CLEANUP_EXPENSIVE); + if_convert (false); + if (num_updated_if_blocks) + /* Get rid of any dead CC-related instructions. */ +diff --git a/gcc/params.opt b/gcc/params.opt +index 345f9b3ff..272a0eb2b 100644 +--- a/gcc/params.opt ++++ b/gcc/params.opt +@@ -574,10 +574,6 @@ Maximum permissible cost for the sequence that would be generated by the RTL if- + Common Joined UInteger Var(param_max_rtl_if_conversion_unpredictable_cost) Init(40) IntegerRange(0, 200) Param Optimization + Maximum permissible cost for the sequence that would be generated by the RTL if-conversion pass for a branch that is considered unpredictable. + +--param=ifcvt-allow-complicated-cmps= +-Common Joined UInteger Var(param_ifcvt_allow_complicated_cmps) IntegerRange(0, 1) Param Optimization +-Allow RTL if-conversion pass to deal with complicated cmps (can increase compilation time). +- + -param=ifcvt-allow-register-renaming= + Common Joined UInteger Var(param_ifcvt_allow_register_renaming) IntegerRange(0, 2) Param Optimization + Allow RTL if-conversion pass to aggressively rename registers in basic blocks. Sometimes additional moves will be created. +diff --git a/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c +new file mode 100644 +index 000000000..761c8ab7e +--- /dev/null ++++ b/gcc/testsuite/gcc.c-torture/execute/ifcvt-renaming-1.c +@@ -0,0 +1,38 @@ ++ ++extern void abort(void); ++ ++__attribute__ ((noinline)) ++int foo (int x, int y, int z, int a, int b) ++{ ++ if (a < 2) ++ { ++ if (a == 0) ++ { ++ if (x - y < 0) ++ x = x - y + z; ++ else ++ x = x - y; ++ } ++ else ++ { ++ if (x + y >= z) ++ x = x + y - z; ++ else ++ x = x + y; ++ } ++ } ++ return x; ++} ++ ++int main(void) ++{ ++ if (foo (5,10,7,0,1) != 2) // x - y + z = -5 + 7 = 2 ++ abort (); ++ if (foo (50,10,7,0,1) != 40) // x - y = 40 ++ abort (); ++ if (foo (5,10,7,1,1) != 8) // x + y - z = 5 + 10 - 7 = 8 ++ abort (); ++ if (foo (5,10,70,1,1) != 15) // x + y = 15 ++ abort (); ++ return 0; ++} +diff --git a/gcc/testsuite/gcc.dg/ifcvt-6.c b/gcc/testsuite/gcc.dg/ifcvt-6.c +new file mode 100644 +index 000000000..7d2a8d58b +--- /dev/null ++++ b/gcc/testsuite/gcc.dg/ifcvt-6.c +@@ -0,0 +1,29 @@ ++/* { dg-do compile { target { aarch64*-*-* } } } */ ++/* { dg-options "-fdump-rtl-ce1 -O2 -fifcvt-allow-complicated-cmps --param max-rtl-if-conversion-unpredictable-cost=100 --param max-rtl-if-conversion-predictable-cost=100 --param=ifcvt-allow-register-renaming=2 " } */ ++ ++typedef unsigned int uint16_t; ++ ++uint16_t ++foo (uint16_t x, uint16_t y, uint16_t z, uint16_t a, ++ uint16_t b, uint16_t c, uint16_t d) ++{ ++ int i = 1; ++ int j = 1; ++ if (a > b) ++ { ++ j = x; ++ if (b > c) ++ i = y; ++ else ++ i = z; ++ } ++ else ++ { ++ j = y; ++ if (c > d) ++ i = z; ++ } ++ return i * j; ++} ++ ++/* { dg-final { scan-rtl-dump "7 true changes made" "ce1" } } */ +-- +2.33.0 + From 0e0248a528c7d4682d3b68dd12ce92605f42d9a8 Mon Sep 17 00:00:00 2001 From: wangding16 Date: Wed, 6 Dec 2023 11:53:59 +0800 Subject: [PATCH 7/7] [Sync] Sync patch from openeuler/gcc gcc.spec --- gcc.spec | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/gcc.spec b/gcc.spec index f1cd95e..0c11ad2 100644 --- a/gcc.spec +++ b/gcc.spec @@ -61,7 +61,7 @@ Summary: Various compilers (C, C++, Objective-C, ...) Name: gcc Version: %{gcc_version} -Release: 39 +Release: 40 License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD URL: https://gcc.gnu.org @@ -249,6 +249,16 @@ Patch138: 0138-Fix-ICE-bugs-in-transpose-test-cases-with-vector-ind.patch Patch139: 0139-Fix-errors-on-testsuite-c-c-tests-and-505.mcf_r.patch Patch140: 0140-Fix-an-error-in-memory-allocation-deallocation.patch Patch141: 0141-Fix-warnings-and-errors-with-debug-prints.patch +Patch142: 0142-crc-loop-optimization-initial.patch +Patch143: 0143-Perform-early-if-conversion-of-simple-arithmetic.patch +Patch144: 0144-Add-option-to-allow-matching-uaddsub-overflow-for-wi.patch +Patch145: 0145-Match-double-sized-mul-pattern.patch +Patch146: 0146-LOOP-CRC32-Add-Crc32-Optimization-in-Gzip-For-crc32-.patch +Patch147: 0147-add-insn-defs-and-correct-costs-for-cmlt-generation.patch +Patch148: 0148-Introduce-RTL-ifcvt-enhancements.patch +Patch149: 0149-Add-more-flexible-check-for-pointer-aliasing-during-.patch +Patch150: 0150-Implement-propagation-of-permutations-in-fwprop.patch +Patch151: 0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch %global gcc_target_platform %{_arch}-linux-gnu @@ -843,6 +853,16 @@ not stable, so plugins must be rebuilt any time GCC is updated. %patch139 -p1 %patch140 -p1 %patch141 -p1 +%patch142 -p1 +%patch143 -p1 +%patch144 -p1 +%patch145 -p1 +%patch146 -p1 +%patch147 -p1 +%patch148 -p1 +%patch149 -p1 +%patch150 -p1 +%patch151 -p1 %build @@ -2867,6 +2887,12 @@ end %doc rpm.doc/changelogs/libcc1/ChangeLog* %changelog +* Wed Dec 6 2023 Wang Ding - 10.3.1-40 +- Type:Spec +- ID:NA +- SUG:NA +- DESC: Sync patch from openeuler/gcc + * Wed Nov 29 2023 Mingchuan Wu - 10.3.1-39 - Type:Spec - ID:NA