[Sync] Sync patches from openeuler/gcc

This commit is contained in:
Xiong Zhou 2023-12-12 11:18:26 +08:00
parent de52087262
commit 1003614af1
8 changed files with 11949 additions and 1 deletions

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,477 @@
From 1e886b98ff7ffdac023dcee8645717f2849d2eb7 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Wed, 25 Oct 2023 18:12:28 +0300
Subject: [PATCH 1/6] Add maxmin and uzp1/uzp2 combining
---
gcc/config/aarch64/aarch64-simd.md | 339 +++++++++++++++++++++++++-
gcc/config/aarch64/predicates.md | 19 ++
gcc/testsuite/gcc.dg/combine-maxmin.c | 46 ++++
3 files changed, 399 insertions(+), 5 deletions(-)
create mode 100755 gcc/testsuite/gcc.dg/combine-maxmin.c
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 6049adc3f..7f707de57 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1034,6 +1034,82 @@
[(set_attr "type" "neon_shift_imm<q>")]
)
+;; Simplify the extension with following truncation for shift+neg operation.
+
+(define_insn_and_split "*aarch64_sshr_neg_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 1)
+ (match_operand:V8HI 4 "vect_par_cnst_hi_half"))))
+ (match_dup 2)))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (ashiftrt:V8HI
+ (neg:V8HI
+ (match_operand:V8HI 1 "register_operand" "w"))
+ (match_operand:V8HI 2 "aarch64_simd_imm_minus_one")))]
+ {
+ /* Reduce the shift amount to smaller mode. */
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[2], 0))
+ - (GET_MODE_UNIT_BITSIZE (GET_MODE (operands[2])) / 2);
+ operands[2] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
+ }
+ [(set_attr "type" "multiple")]
+)
+
+;; The helper definition that allows combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_sshr_neg_tmpv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_operand:V4SI 1 "register_operand" "w"))
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+ (truncate:V4HI
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_operand:V4SI 3 "register_operand" "w"))
+ (match_dup 2)))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_dup 1))
+ (match_operand:V4SI 2 "maxmin_arith_shift_operand")))
+ (set (match_operand:V4SI 3 "register_operand" "=w")
+ (ashiftrt:V4SI
+ (neg:V4SI
+ (match_dup 3))
+ (match_dup 2)))
+ (set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (match_dup 1))
+ (truncate:V4HI
+ (match_dup 3))))]
+ ""
+ [(set_attr "type" "multiple")]
+)
+
(define_insn "*aarch64_simd_sra<mode>"
[(set (match_operand:VDQ_I 0 "register_operand" "=w")
(plus:VDQ_I
@@ -1459,6 +1535,78 @@
[(set_attr "type" "neon_minmax<q>")]
)
+;; Use sequential smax+smin to replace vector arithmetic operations like this:
+;; a = ((x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x);
+;; TODO: maybe extend to scalar operations.
+
+(define_insn_and_split "*aarch64_maxmin_arith<mode>"
+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (xor:VDQHSD
+ (and:VDQHSD
+ (xor:VDQHSD
+ (ashiftrt:VDQHSD
+ (neg:VDQHSD
+ (match_operand:VDQHSD 1 "register_operand"))
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand"))
+ (match_dup 1))
+ (neg:VDQHSD
+ (eq:VDQHSD
+ (and:VDQHSD
+ (match_dup 1)
+ (match_operand:VDQHSD 3 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:VDQHSD 4 "aarch64_simd_or_scalar_imm_zero"))))
+ (ashiftrt:VDQHSD
+ (neg:VDQHSD
+ (match_dup 1))
+ (match_dup 2))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:VDQHSD 5 "register_operand" "w") (match_dup 3))
+ (set (match_operand:VDQHSD 6 "register_operand" "w") (match_dup 4))
+ (set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (smax:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")
+ (match_operand:VDQHSD 6 "register_operand" "w")))
+ (set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (smin:VDQHSD (match_operand:VDQHSD 0 "register_operand" "w")
+ (match_operand:VDQHSD 5 "register_operand" "w")))]
+ {
+ if (can_create_pseudo_p ())
+ {
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[3], 0));
+ operands[3] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+ ~val);
+ operands[5] = gen_reg_rtx (<MODE>mode);
+ operands[6] = gen_reg_rtx (<MODE>mode);
+ }
+ else
+ FAIL;
+ }
+ [(set_attr "type" "neon_minmax<q>")]
+)
+
+;; The helper definition that allows combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_maxmin_tmp<mode>"
+ [(set (match_operand:VDQHSD 0 "register_operand" "=w")
+ (ashiftrt:VDQHSD
+ (neg:VDQHSD
+ (match_operand:VDQHSD 1 "register_operand" "w"))
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
+ "TARGET_SIMD"
+ "#"
+ "&& reload_completed"
+ [(set (match_operand:VDQHSD 0 "register_operand")
+ (neg:VDQHSD
+ (match_operand:VDQHSD 1 "register_operand" "w")))
+ (set (match_dup 0)
+ (ashiftrt:VDQHSD
+ (match_dup 0)
+ (match_operand:VDQHSD 2 "maxmin_arith_shift_operand")))]
+ ""
+ [(set_attr "type" "neon_minmax<q>")]
+)
+
;; Pairwise FP Max/Min operations.
(define_insn "aarch64_<maxmin_uns>p<mode>"
[(set (match_operand:VHSDF 0 "register_operand" "=w")
@@ -1599,7 +1747,8 @@
DONE;
})
-;; For quads.
+;; For quads. Use UZP1 on the narrower type, which discards the high part of
+;; each wide element.
(define_insn "vec_pack_trunc_<mode>"
[(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
@@ -1609,12 +1758,32 @@
"TARGET_SIMD"
{
if (BYTES_BIG_ENDIAN)
- return "xtn\\t%0.<Vntype>, %2.<Vtype>\;xtn2\\t%0.<V2ntype>, %1.<Vtype>";
+ return "uzp1\\t%0.<V2ntype>, %2.<V2ntype>, %1.<V2ntype>";
else
- return "xtn\\t%0.<Vntype>, %1.<Vtype>\;xtn2\\t%0.<V2ntype>, %2.<Vtype>";
+ return "uzp1\\t%0.<V2ntype>, %1.<V2ntype>, %2.<V2ntype>";
}
- [(set_attr "type" "multiple")
- (set_attr "length" "8")]
+ [(set_attr "type" "neon_permute<q>")
+ (set_attr "length" "4")]
+)
+
+(define_insn "vec_pack_trunc_shifted_<mode>"
+ [(set (match_operand:<VNARROWQ2> 0 "register_operand" "=&w")
+ (vec_concat:<VNARROWQ2>
+ (truncate:<VNARROWQ>
+ (ashiftrt:VQN (match_operand:VQN 1 "register_operand" "w")
+ (match_operand:VQN 2 "half_size_operand" "w")))
+ (truncate:<VNARROWQ>
+ (ashiftrt:VQN (match_operand:VQN 3 "register_operand" "w")
+ (match_operand:VQN 4 "half_size_operand" "w")))))]
+ "TARGET_SIMD"
+ {
+ if (BYTES_BIG_ENDIAN)
+ return "uzp2\\t%0.<V2ntype>, %3.<V2ntype>, %1.<V2ntype>";
+ else
+ return "uzp2\\t%0.<V2ntype>, %1.<V2ntype>, %3.<V2ntype>";
+ }
+ [(set_attr "type" "neon_permute<q>")
+ (set_attr "length" "4")]
)
;; Widening operations.
@@ -4852,6 +5021,166 @@
[(set_attr "type" "neon_tst<q>")]
)
+;; Simplify the extension with following truncation for cmtst-like operation.
+
+(define_insn_and_split "*aarch64_cmtst_arith_v8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (plus:V4HI
+ (truncate:V4HI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero")))
+ (match_operand:V4HI 5 "aarch64_simd_imm_minus_one"))
+ (plus:V4HI
+ (truncate:V4HI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_dup 1)
+ (match_dup 2))
+ (match_operand:V8HI 6 "vect_par_cnst_hi_half")))
+ (match_dup 4)))
+ (match_dup 5))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 6 "register_operand" "=w")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (set (match_operand:V8HI 0 "register_operand" "=w")
+ (plus:V8HI
+ (eq:V8HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand" "w")
+ (match_dup 6))
+ (match_operand:V8HI 4 "aarch64_simd_imm_zero"))
+ (match_operand:V8HI 5 "aarch64_simd_imm_minus_one")))]
+ {
+ if (can_create_pseudo_p ())
+ {
+ int val = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[4], 0));
+ operands[4] = aarch64_simd_gen_const_vector_dup (V8HImode, val);
+ int val2 = INTVAL (CONST_VECTOR_ENCODED_ELT (operands[5], 0));
+ operands[5] = aarch64_simd_gen_const_vector_dup (V8HImode, val2);
+
+ operands[6] = gen_reg_rtx (V8HImode);
+ }
+ else
+ FAIL;
+ }
+ [(set_attr "type" "neon_tst_q")]
+)
+
+;; Three helper definitions that allow combiner to use the previous pattern.
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_lo_v8hi"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (neg:V4SI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half")))
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
+ (set (match_operand:V4SI 0 "register_operand" "=w")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 5)
+ (match_operand:V8HI 3 "vect_par_cnst_lo_half"))))
+ (set (match_dup 0)
+ (neg:V4SI
+ (eq:V4SI
+ (match_dup 0)
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ {
+ if (can_create_pseudo_p ())
+ operands[5] = gen_reg_rtx (V8HImode);
+ else
+ FAIL;
+ }
+ [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmp_hi_v8hi"
+ [(set (match_operand:V4SI 0 "register_operand" "=w")
+ (neg:V4SI
+ (eq:V4SI
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin"))
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half")))
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ "TARGET_SIMD && !reload_completed"
+ "#"
+ "&& true"
+ [(set (match_operand:V8HI 5 "register_operand" "=w")
+ (and:V8HI
+ (match_operand:V8HI 1 "register_operand")
+ (match_operand:V8HI 2 "aarch64_bic_imm_for_maxmin")))
+ (set (match_operand:V4SI 0 "register_operand" "=w")
+ (sign_extend:V4SI
+ (vec_select:V4HI
+ (match_dup 5)
+ (match_operand:V8HI 3 "vect_par_cnst_hi_half"))))
+ (set (match_dup 0)
+ (neg:V4SI
+ (eq:V4SI
+ (match_dup 0)
+ (match_operand:V4SI 4 "aarch64_simd_or_scalar_imm_zero"))))]
+ {
+ if (can_create_pseudo_p ())
+ operands[5] = gen_reg_rtx (V8HImode);
+ else
+ FAIL;
+ }
+ [(set_attr "type" "multiple")]
+)
+
+(define_insn_and_split "*aarch64_cmtst_arith_tmpv8hi"
+ [(set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (not:V4SI
+ (match_operand:V4SI 1 "register_operand" "w")))
+ (truncate:V4HI
+ (not:V4SI
+ (match_operand:V4SI 2 "register_operand" "w")))))]
+ "TARGET_SIMD"
+ "#"
+ "&& true"
+ [(set (match_operand:V4SI 1 "register_operand" "=w")
+ (not:V4SI
+ (match_dup 1)))
+ (set (match_operand:V4SI 2 "register_operand" "=w")
+ (not:V4SI
+ (match_dup 2)))
+ (set (match_operand:V8HI 0 "register_operand" "=w")
+ (vec_concat:V8HI
+ (truncate:V4HI
+ (match_dup 1))
+ (truncate:V4HI
+ (match_dup 2))))]
+ ""
+ [(set_attr "type" "multiple")]
+)
+
(define_insn_and_split "aarch64_cmtstdi"
[(set (match_operand:DI 0 "register_operand" "=w,r")
(neg:DI
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 1754b1eff..3cd83334b 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -91,6 +91,25 @@
(match_test "aarch64_simd_valid_immediate (op, NULL,
AARCH64_CHECK_ORR)"))))
+(define_predicate "aarch64_bic_imm_for_maxmin"
+ (match_code "const_vector")
+{
+ if (!aarch64_simd_valid_immediate (op, NULL, AARCH64_CHECK_BIC))
+ return false;
+ op = unwrap_const_vec_duplicate (op);
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode);
+ return CONST_INT_P (op)
+ && ((~UINTVAL (op)) < (((long unsigned int) 1 << size) - 1));
+})
+
+(define_predicate "maxmin_arith_shift_operand"
+ (match_code "const_vector")
+{
+ op = unwrap_const_vec_duplicate (op);
+ unsigned int size = GET_MODE_UNIT_BITSIZE (mode) - 1;
+ return CONST_INT_P (op) && (UINTVAL (op) == size);
+})
+
(define_predicate "aarch64_reg_or_bic_imm"
(ior (match_operand 0 "register_operand")
(and (match_code "const_vector")
diff --git a/gcc/testsuite/gcc.dg/combine-maxmin.c b/gcc/testsuite/gcc.dg/combine-maxmin.c
new file mode 100755
index 000000000..06bce7029
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/combine-maxmin.c
@@ -0,0 +1,46 @@
+/* { dg-do compile { target aarch64-*-* } } */
+/* { dg-options "-O3 -fdump-rtl-combine-all" } */
+
+/* The test checks usage of smax/smin insns for clip evaluation and
+ * uzp1/uzp2 insns for vector element narrowing. It's inspired by
+ * sources of x264 codec. */
+
+typedef unsigned char uint8_t;
+typedef long int intptr_t;
+typedef signed short int int16_t;
+
+static __attribute__((always_inline)) inline uint8_t clip (int x )
+{
+ return ( (x & ~((1 << 8)-1)) ? (-x)>>31 & ((1 << 8)-1) : x );
+}
+
+void hf (uint8_t *dsth, uint8_t *dstv, uint8_t *dstc, uint8_t *src,
+ intptr_t stride, int width, int height, int16_t *buf)
+{
+ const int pad = (8 > 9) ? (-10 * ((1 << 8)-1)) : 0;
+ for( int y = 0; y < height; y++ ) {
+ for( int x = -2; x < width+3; x++ ) {
+ int v = ((src)[x-2*stride] + (src)[x+3*stride] - 5*((src)[x-stride]
+ + (src)[x+2*stride]) + 20*((src)[x] + (src)[x+stride]));
+ dstv[x] = clip ( (v + 16) >> 5 );
+ buf[x+2] = v + pad;
+ }
+ for( int x = 0; x < width; x++ )
+ dstc[x] = clip ((((buf+2)[x-2*1] + (buf+2)[x+3*1] - 5*((buf+2)[x-1]
+ + (buf+2)[x+2*1]) + 20*((buf+2)[x] + (buf+2)[x+1]))
+ - 32*pad + 512) >> 10);
+ for( int x = 0; x < width; x++ )
+ dsth[x] = clip ((((src)[x-2*1] + (src)[x+3*1] - 5*((src)[x-1]
+ + (src)[x+2*1]) + 20*((src)[x] + (src)[x+1]))
+ + 16) >> 5);
+ dsth += stride;
+ dstv += stride;
+ dstc += stride;
+ src += stride;
+ }
+}
+
+/* { dg-final { scan-assembler-times {smax\t} 4 } } */
+/* { dg-final { scan-assembler-times {smin\t} 4 } } */
+/* { dg-final { scan-assembler-times {cmtst\t} 2 } } */
+/* { dg-final { scan-assembler-times {uzp1\t} 6 } } */
--
2.33.0

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,233 @@
From 3a48cd1be0915a0fabbfb3a30bd9b67ccd5c65d3 Mon Sep 17 00:00:00 2001
From: Diachkov Ilia WX1215920 <diachkov.ilia1@huawei-partners.com>
Date: Tue, 12 Dec 2023 10:41:12 +0800
Subject: [PATCH 6/6] Implement AES pattern matching
---
gcc/Makefile.in | 1 +
gcc/common.opt | 4 ++++
gcc/config/aarch64/aarch64.c | 24 +++++++++++++++++++++
gcc/doc/tm.texi | 29 +++++++++++++++++++++++++
gcc/doc/tm.texi.in | 12 +++++++++++
gcc/passes.def | 1 +
gcc/target.def | 41 ++++++++++++++++++++++++++++++++++++
gcc/timevar.def | 1 +
gcc/tree-pass.h | 1 +
9 files changed, 114 insertions(+)
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 31bf2cde2..75b28722e 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1288,6 +1288,7 @@ OBJS = \
cgraphunit.o \
cgraphclones.o \
combine.o \
+ crypto-accel.o \
combine-stack-adj.o \
compare-elim.o \
context.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index 36b016253..eb995f701 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -1069,6 +1069,10 @@ floop-crc
Common Report Var(flag_loop_crc) Optimization
Do the loop crc conversion.
+fcrypto-accel-aes
+Common Report Var(flag_crypto_accel_aes) Init(0) Optimization
+Perform crypto acceleration AES pattern matching.
+
fauto-inc-dec
Common Report Var(flag_auto_inc_dec) Init(1) Optimization
Generate auto-inc/dec instructions.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index ae9e0802b..75efbcb97 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -23894,6 +23894,30 @@ is_aarch64_stp_insn (int icode)
return false;
}
+machine_mode
+aarch64_get_v16qi_mode ()
+{
+ return V16QImode;
+}
+
+#undef TARGET_GET_V16QI_MODE
+#define TARGET_GET_V16QI_MODE aarch64_get_v16qi_mode
+
+#undef TARGET_GEN_REV32V16QI
+#define TARGET_GEN_REV32V16QI gen_aarch64_rev32v16qi
+
+#undef TARGET_GEN_AESEV16QI
+#define TARGET_GEN_AESEV16QI gen_aarch64_crypto_aesev16qi
+
+#undef TARGET_GEN_AESDV16QI
+#define TARGET_GEN_AESDV16QI gen_aarch64_crypto_aesdv16qi
+
+#undef TARGET_GEN_AESMCV16QI
+#define TARGET_GEN_AESMCV16QI gen_aarch64_crypto_aesmcv16qi
+
+#undef TARGET_GEN_AESIMCV16QI
+#define TARGET_GEN_AESIMCV16QI gen_aarch64_crypto_aesimcv16qi
+
#undef TARGET_IS_LDP_INSN
#define TARGET_IS_LDP_INSN is_aarch64_ldp_insn
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index ac1d665c5..4a998aa76 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -11870,6 +11870,35 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@deftypefn {Target Hook} machine_mode TARGET_GET_V16QI_MODE ()
+This function get the 16 byte elements vector mode if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_REV32V16QI (rtx @var{dest}, rtx @var{src})
+This function generate the byte reverse instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESEV16QI (rtx @var{dest}, rtx @var{src1}, rtx @var{src2})
+This function generate the AES encryption instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESDV16QI (rtx @var{dest}, rtx @var{src1}, rtx @var{src2})
+This function generate the AES decryption instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESMCV16QI (rtx @var{dest}, rtx @var{src})
+This function generate the AES mix columns instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
+@deftypefn {Target Hook} rtx TARGET_GEN_AESIMCV16QI (rtx @var{dest}, rtx @var{src})
+This function generate the AES inversed mix columns instruction
+ of 16 byte elements vector if target supports this.
+@end deftypefn
+
@deftypefn {Target Hook} bool TARGET_IS_LDP_INSN (int @var{icode})
Return true if icode is corresponding to any of the LDP instruction types.
@end deftypefn
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index 0cd70dda4..f7094d8c2 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -8010,6 +8010,18 @@ object files that are not referenced from @code{main} and uses export
lists.
@end defmac
+@hook TARGET_GET_V16QI_MODE
+
+@hook TARGET_GEN_REV32V16QI
+
+@hook TARGET_GEN_AESEV16QI
+
+@hook TARGET_GEN_AESDV16QI
+
+@hook TARGET_GEN_AESMCV16QI
+
+@hook TARGET_GEN_AESIMCV16QI
+
@hook TARGET_IS_LDP_INSN
@hook TARGET_IS_STP_INSN
diff --git a/gcc/passes.def b/gcc/passes.def
index ba13d897c..da5d71646 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -448,6 +448,7 @@ along with GCC; see the file COPYING3. If not see
NEXT_PASS (pass_rtl_fwprop_addr);
NEXT_PASS (pass_inc_dec);
NEXT_PASS (pass_initialize_regs);
+ NEXT_PASS (pass_crypto_accel);
NEXT_PASS (pass_ud_rtl_dce);
NEXT_PASS (pass_combine);
NEXT_PASS (pass_if_after_combine);
diff --git a/gcc/target.def b/gcc/target.def
index 48c8a8234..b4dff78ea 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -2727,6 +2727,47 @@ modes and they have different conditional execution capability, such as ARM.",
bool, (void),
default_have_conditional_execution)
+DEFHOOK
+(get_v16qi_mode,
+ "This function get the 16 byte elements vector mode if target supports this.",
+ machine_mode, (),
+ NULL)
+
+DEFHOOK
+(gen_rev32v16qi,
+ "This function generate the byte reverse instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src),
+ NULL)
+
+DEFHOOK
+(gen_aesev16qi,
+ "This function generate the AES encryption instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src1, rtx src2),
+ NULL)
+
+DEFHOOK
+(gen_aesdv16qi,
+ "This function generate the AES decryption instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src1, rtx src2),
+ NULL)
+
+DEFHOOK
+(gen_aesmcv16qi,
+ "This function generate the AES mix columns instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src),
+ NULL)
+
+DEFHOOK
+(gen_aesimcv16qi,
+ "This function generate the AES inversed mix columns instruction\n\
+ of 16 byte elements vector if target supports this.",
+ rtx, (rtx dest, rtx src),
+ NULL)
+
DEFHOOK
(is_ldp_insn,
"Return true if icode is corresponding to any of the LDP instruction types.",
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 24caf1b5d..9ca74dffe 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -258,6 +258,7 @@ DEFTIMEVAR (TV_AUTO_INC_DEC , "auto inc dec")
DEFTIMEVAR (TV_CSE2 , "CSE 2")
DEFTIMEVAR (TV_BRANCH_PROB , "branch prediction")
DEFTIMEVAR (TV_COMBINE , "combiner")
+DEFTIMEVAR (TV_CRYPTO_ACCEL , "crypto accel")
DEFTIMEVAR (TV_IFCVT , "if-conversion")
DEFTIMEVAR (TV_MODE_SWITCH , "mode switching")
DEFTIMEVAR (TV_SMS , "sms modulo scheduling")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 232a3fdf6..29dc7e34b 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -570,6 +570,7 @@ extern rtl_opt_pass *make_pass_cse2 (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_df_initialize_opt (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_df_initialize_no_opt (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_reginfo_init (gcc::context *ctxt);
+extern rtl_opt_pass *make_pass_crypto_accel (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_inc_dec (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_stack_ptr_mod (gcc::context *ctxt);
extern rtl_opt_pass *make_pass_initialize_regs (gcc::context *ctxt);
--
2.33.0

File diff suppressed because it is too large Load Diff

View File

@ -61,7 +61,7 @@
Summary: Various compilers (C, C++, Objective-C, ...)
Name: gcc
Version: %{gcc_version}
Release: 42
Release: 43
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
URL: https://gcc.gnu.org
@ -261,6 +261,13 @@ Patch150: 0150-Implement-propagation-of-permutations-in-fwprop.patch
Patch151: 0151-Fix-bugs-and-add-tests-for-RTL-ifcvt.patch
Patch152: 0152-Add-LLC-Allocation-Pass.patch
Patch153: 0153-LLC-add-extending-outer-loop.patch
Patch154: 0154-Loop-CRC32-Judge-null-on-pointers-and-solving-coding.patch
Patch155: 0155-Add-maxmin-and-uzp1-uzp2-combining.patch
Patch156: 0156-add-icp-optimization.patch
Patch157: 0157-Add-split-complex-instructions-pass.patch
Patch158: 0158-Implement-IPA-prefetch-optimization.patch
Patch159: 0159-Implement-AES-pattern-matching.patch
Patch160: 0160-AES-Add-lost-files.patch
%global gcc_target_platform %{_arch}-linux-gnu
@ -867,6 +874,13 @@ not stable, so plugins must be rebuilt any time GCC is updated.
%patch151 -p1
%patch152 -p1
%patch153 -p1
%patch154 -p1
%patch155 -p1
%patch156 -p1
%patch157 -p1
%patch158 -p1
%patch159 -p1
%patch160 -p1
%build
@ -2891,6 +2905,12 @@ end
%doc rpm.doc/changelogs/libcc1/ChangeLog*
%changelog
* Tue Dec 12 2023 Xiong Zhou <xiongzhou4@huawei.com> - 10.3.1-43
- Type:Spec
- ID:NA
- SUG:NA
- DESC: Sync patches from openeuler/gcc
* Tue Dec 12 2023 Shujian Zhao <zhaoshujian@huawei.com> - 10.3.1-42
- Type:Spec
- ID:NA