Add feedback llc allocate and support llc prefetch instruction
This commit is contained in:
parent
22381869b4
commit
80f6018297
69
0190-sync-LLC-difference-between-source-and-patch-code.patch
Normal file
69
0190-sync-LLC-difference-between-source-and-patch-code.patch
Normal file
@ -0,0 +1,69 @@
|
||||
From 0f667a2f934023d2dd1636572f2dc8391334d7f8 Mon Sep 17 00:00:00 2001
|
||||
From: liuf9 <liufeiyang6@huawei.com>
|
||||
Date: Wed, 29 May 2024 20:14:02 +0800
|
||||
Subject: [PATCH] b
|
||||
|
||||
---
|
||||
gcc/tree-ssa-llc-allocate.c | 13 ++++++-------
|
||||
1 file changed, 6 insertions(+), 7 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
|
||||
index 297790a..62b5f18 100644
|
||||
--- a/gcc/tree-ssa-llc-allocate.c
|
||||
+++ b/gcc/tree-ssa-llc-allocate.c
|
||||
@@ -1527,7 +1527,7 @@ trace_ref_dimension_and_loop_bounds (data_ref &mem_ref)
|
||||
|
||||
tree
|
||||
get_cur_loop_niters (map<class loop*, vector<data_ref> > &loop_refs,
|
||||
- class loop* loop)
|
||||
+ class loop* loop)
|
||||
{
|
||||
if (loop_refs.count (loop) == 0)
|
||||
return NULL_TREE;
|
||||
@@ -1565,7 +1565,6 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
{
|
||||
/* Trace the SSA that define this niter. */
|
||||
def_stmt = SSA_NAME_DEF_STMT (niters);
|
||||
- enum gimple_code stmt_code = gimple_code (def_stmt);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "ssa_name of niters: ");
|
||||
@@ -1575,7 +1574,8 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
fprintf (dump_file, "\n");
|
||||
}
|
||||
/* Termination condition of dfs. Return the depth of the bb block. */
|
||||
- if (stmt_code == GIMPLE_PHI || stmt_code == GIMPLE_NOP)
|
||||
+ if (gimple_code (def_stmt) == GIMPLE_PHI
|
||||
+ || gimple_code (def_stmt) == GIMPLE_NOP)
|
||||
{
|
||||
basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (niters));
|
||||
if (def_bb == NULL || def_bb->loop_father == NULL)
|
||||
@@ -1584,13 +1584,13 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
fprintf (dump_file, "Stop tracing the outer loop depth, ");
|
||||
- fprintf (dump_file, "current depth: %d, current bb: %d\n", \
|
||||
+ fprintf (dump_file, "current depth: %d, current bb: %d\n",
|
||||
ret_depth, def_bb->index);
|
||||
}
|
||||
return ret_depth;
|
||||
}
|
||||
/* 'ASSIGN': Use dfs to trace the rhs of the assignment statement. */
|
||||
- else if (stmt_code == GIMPLE_ASSIGN)
|
||||
+ else if (gimple_code (def_stmt) == GIMPLE_ASSIGN)
|
||||
{
|
||||
tree rhs = gimple_assign_rhs1 (def_stmt);
|
||||
if (TREE_CODE (rhs) == TARGET_MEM_REF)
|
||||
@@ -1605,8 +1605,7 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
/* 'ASSIGN': start from 1 because op[0] is the lhs. */
|
||||
for (unsigned i = 1; i < operand_num; i++)
|
||||
{
|
||||
- tree subtree = GIMPLE_CHECK2<const gassign *>
|
||||
- (def_stmt)->op[i];
|
||||
+ tree subtree = dyn_cast<gassign *>(def_stmt)->op[i];
|
||||
if (subtree == NULL)
|
||||
continue;
|
||||
unsigned depth = trace_outer_loop_depth (subtree, \
|
||||
--
|
||||
2.33.0
|
||||
|
||||
640
0191-LLC-Allocation-Bugfix-Fix-ambiguous-reference-due-to.patch
Normal file
640
0191-LLC-Allocation-Bugfix-Fix-ambiguous-reference-due-to.patch
Normal file
@ -0,0 +1,640 @@
|
||||
From 5acce23a2d8412df874f78d0b703c9643d15ecc2 Mon Sep 17 00:00:00 2001
|
||||
From: liuf9 <liufeiyang6@huawei.com>
|
||||
Date: Tue, 27 Feb 2024 15:40:06 +0800
|
||||
Subject: [PATCH 1/4] [LLC Allocation][Bugfix] Fix ambiguous reference due to
|
||||
namespace.
|
||||
|
||||
---
|
||||
gcc/tree-ssa-llc-allocate.c | 189 +++++++++++++++++++-----------------
|
||||
1 file changed, 98 insertions(+), 91 deletions(-)
|
||||
|
||||
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
|
||||
index 62b5f18ad..0b4ad637d 100644
|
||||
--- a/gcc/tree-ssa-llc-allocate.c
|
||||
+++ b/gcc/tree-ssa-llc-allocate.c
|
||||
@@ -79,8 +79,6 @@ const unsigned int WRITE_COST = 2;
|
||||
|
||||
namespace {
|
||||
|
||||
-using namespace std;
|
||||
-
|
||||
/* loop bound info of the memory reference located. */
|
||||
struct loop_bound
|
||||
{
|
||||
@@ -144,7 +142,7 @@ struct data_ref
|
||||
tree step;
|
||||
|
||||
/* loop boundary info of each dimension. */
|
||||
- vector<loop_bound> loop_bounds;
|
||||
+ std::vector<loop_bound> loop_bounds;
|
||||
|
||||
/* memory data size, Unit: MB. */
|
||||
double data_size;
|
||||
@@ -191,7 +189,7 @@ struct data_ref
|
||||
/* Add ref node and print. */
|
||||
|
||||
void
|
||||
-add_ref (vector<data_ref> &references, tree op, gimple *stmt,
|
||||
+add_ref (std::vector<data_ref> &references, tree op, gimple *stmt,
|
||||
bool vectorize_p, bool read_p)
|
||||
{
|
||||
data_ref ref;
|
||||
@@ -210,7 +208,7 @@ add_ref (vector<data_ref> &references, tree op, gimple *stmt,
|
||||
/* Get the references from the simple call (vectorization type). */
|
||||
|
||||
void
|
||||
-get_references_in_gimple_call (gimple *stmt, vector<data_ref> &references)
|
||||
+get_references_in_gimple_call (gimple *stmt, std::vector<data_ref> &references)
|
||||
{
|
||||
if (gimple_code (stmt) != GIMPLE_CALL)
|
||||
return;
|
||||
@@ -276,7 +274,7 @@ get_references_in_gimple_call (gimple *stmt, vector<data_ref> &references)
|
||||
/* Stores the locations of memory references in STMT to REFERENCES. */
|
||||
|
||||
void
|
||||
-get_references_in_stmt (gimple *stmt, vector<data_ref> &references)
|
||||
+get_references_in_stmt (gimple *stmt, std::vector<data_ref> &references)
|
||||
{
|
||||
if (!gimple_vuse (stmt))
|
||||
return;
|
||||
@@ -326,7 +324,7 @@ struct loop_filter_out_flag
|
||||
|
||||
/* Check whether an external node is used. */
|
||||
|
||||
-bool use_ext_node_p (const vector<data_ref> &references,
|
||||
+bool use_ext_node_p (const std::vector<data_ref> &references,
|
||||
unsigned int &start)
|
||||
{
|
||||
expanded_location cfun_xloc
|
||||
@@ -352,7 +350,7 @@ bool use_ext_node_p (const vector<data_ref> &references,
|
||||
|
||||
bool
|
||||
filter_out_loop_by_stmt_p (loop_filter_out_flag &loop_filter, gimple *stmt,
|
||||
- const vector<data_ref> &references, unsigned int &start)
|
||||
+ const std::vector<data_ref> &references, unsigned int &start)
|
||||
{
|
||||
expanded_location xloc = expand_location (stmt->location);
|
||||
/* check use_ext_call. */
|
||||
@@ -431,7 +429,7 @@ dump_loop_filter_out_flag (loop_filter_out_flag &loop_filter)
|
||||
/* Get references in loop. */
|
||||
|
||||
bool
|
||||
-get_references_in_loop (vector<data_ref> &references,
|
||||
+get_references_in_loop (std::vector<data_ref> &references,
|
||||
loop_filter_out_flag &loop_filter,
|
||||
class loop *loop)
|
||||
{
|
||||
@@ -501,7 +499,7 @@ estimate_loop_insns (class loop *loop, eni_weights *weights)
|
||||
/* Check whether the memory access is dense. */
|
||||
|
||||
bool
|
||||
-dense_memory_p (const vector<data_ref> &references, class loop *loop)
|
||||
+dense_memory_p (const std::vector<data_ref> &references, class loop *loop)
|
||||
{
|
||||
int ref_count = references.size ();
|
||||
unsigned int ninsns = estimate_loop_insns (loop, &eni_size_weights);
|
||||
@@ -550,11 +548,12 @@ dense_memory_p (const vector<data_ref> &references, class loop *loop)
|
||||
/* Analyze the inner loop and get the loop with dense memory access. */
|
||||
|
||||
void
|
||||
-analyze_loop_dense_memory (vector<class loop *> &kernels,
|
||||
- map<class loop *, vector<data_ref> > &kernels_refs,
|
||||
- class loop *loop)
|
||||
+analyze_loop_dense_memory (std::vector<class loop *> &kernels,
|
||||
+ std::map<class loop *,
|
||||
+ std::vector<data_ref> > &kernels_refs,
|
||||
+ class loop *loop)
|
||||
{
|
||||
- vector<data_ref> references;
|
||||
+ std::vector<data_ref> references;
|
||||
number_of_latch_executions (loop);
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
@@ -589,8 +588,9 @@ analyze_loop_dense_memory (vector<class loop *> &kernels,
|
||||
/* Analyze the inner loop and get the loop with dense memory access. */
|
||||
|
||||
bool
|
||||
-get_dense_memory_kernels (vector<class loop *> &kernels,
|
||||
- map<class loop *, vector<data_ref> > &kernels_refs)
|
||||
+get_dense_memory_kernels (std::vector<class loop *> &kernels,
|
||||
+ std::map<class loop *,
|
||||
+ std::vector<data_ref> > &kernels_refs)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\nPhase 1: get_dense_memory_kernels\n\n");
|
||||
@@ -631,7 +631,8 @@ generic_decl_p (tree expr)
|
||||
Add different initial node based on different gimple statements. */
|
||||
|
||||
void
|
||||
-add_worklist (vector<tree> &worklist, set<tree> &walked, gimple *def_stmt)
|
||||
+add_worklist (std::vector<tree> &worklist, std::set<tree> &walked,
|
||||
+ gimple *def_stmt)
|
||||
{
|
||||
if (gimple_code (def_stmt) == GIMPLE_PHI)
|
||||
{
|
||||
@@ -715,8 +716,8 @@ add_worklist (vector<tree> &worklist, set<tree> &walked, gimple *def_stmt)
|
||||
*/
|
||||
|
||||
void
|
||||
-trace_base_var_helper (tree arg, set<tree> &walked,
|
||||
- map<tree, int>& base_var_candid)
|
||||
+trace_base_var_helper (tree arg, std::set<tree> &walked,
|
||||
+ std::map<tree, int>& base_var_candid)
|
||||
{
|
||||
if (arg == NULL)
|
||||
return;
|
||||
@@ -765,7 +766,7 @@ trace_base_var_helper (tree arg, set<tree> &walked,
|
||||
print_gimple_stmt (dump_file, def_stmt, 0, TDF_SLIM);
|
||||
}
|
||||
|
||||
- vector<tree> worklist;
|
||||
+ std::vector<tree> worklist;
|
||||
add_worklist (worklist, walked, def_stmt);
|
||||
for (unsigned i = 0; i < worklist.size (); ++i)
|
||||
trace_base_var_helper (worklist[i], walked, base_var_candid);
|
||||
@@ -781,9 +782,9 @@ trace_base_var_helper (tree arg, set<tree> &walked,
|
||||
(e.g., criterion 2: 1 -> any odd number). */
|
||||
|
||||
bool
|
||||
-trace_base_var (tree &var, tree arg, set<tree> &walked)
|
||||
+trace_base_var (tree &var, tree arg, std::set<tree> &walked)
|
||||
{
|
||||
- map<tree, int> base_var_candid;
|
||||
+ std::map<tree, int> base_var_candid;
|
||||
trace_base_var_helper (arg, walked, base_var_candid);
|
||||
bool is_tracing_unusual = false;
|
||||
if (base_var_candid.size () == 1)
|
||||
@@ -791,7 +792,7 @@ trace_base_var (tree &var, tree arg, set<tree> &walked)
|
||||
else
|
||||
{
|
||||
is_tracing_unusual = true;
|
||||
- for (const pair<tree, int>& base_var_count : base_var_candid)
|
||||
+ for (const std::pair<tree, int>& base_var_count : base_var_candid)
|
||||
if (base_var_count.second == 1)
|
||||
var = base_var_count.first;
|
||||
}
|
||||
@@ -800,7 +801,7 @@ trace_base_var (tree &var, tree arg, set<tree> &walked)
|
||||
fprintf (dump_file, "Traced variables at ");
|
||||
print_generic_expr (dump_file, arg, TDF_SLIM);
|
||||
fprintf (dump_file, ":\n");
|
||||
- for (const pair<tree, int>& base_var_count : base_var_candid)
|
||||
+ for (const std::pair<tree, int>& base_var_count : base_var_candid)
|
||||
fprintf (dump_file, "%s:%d, ", get_name (base_var_count.first),
|
||||
base_var_count.second);
|
||||
fprintf (dump_file, "\n");
|
||||
@@ -817,7 +818,7 @@ trace_base_var (tree &var, tree arg, set<tree> &walked)
|
||||
/* Tracing direct memory reference information. */
|
||||
|
||||
bool
|
||||
-trace_direct_mem_ref (data_ref &mem_ref, set <gimple *> &traced_ref_stmt)
|
||||
+trace_direct_mem_ref (data_ref &mem_ref, std::set<gimple *> &traced_ref_stmt)
|
||||
{
|
||||
if (TREE_CODE (mem_ref.ref) != TARGET_MEM_REF)
|
||||
return false;
|
||||
@@ -829,7 +830,7 @@ trace_direct_mem_ref (data_ref &mem_ref, set <gimple *> &traced_ref_stmt)
|
||||
mem_ref.index = TREE_OPERAND (mem_ref.ref, 2);
|
||||
mem_ref.step = TREE_OPERAND (mem_ref.ref, 3);
|
||||
|
||||
- set<tree> walked;
|
||||
+ std::set<tree> walked;
|
||||
if (mem_ref.var == NULL_TREE
|
||||
&& !trace_base_var (mem_ref.var, mem_ref.base, walked))
|
||||
return false;
|
||||
@@ -843,7 +844,7 @@ trace_direct_mem_ref (data_ref &mem_ref, set <gimple *> &traced_ref_stmt)
|
||||
If true, it is an indirect access. */
|
||||
|
||||
bool
|
||||
-trace_indirect_operand (tree arg, set<gimple *> &traced_ref_stmt)
|
||||
+trace_indirect_operand (tree arg, std::set<gimple *> &traced_ref_stmt)
|
||||
{
|
||||
if (TREE_CODE (arg) != SSA_NAME)
|
||||
return false;
|
||||
@@ -889,7 +890,7 @@ trace_indirect_operand (tree arg, set<gimple *> &traced_ref_stmt)
|
||||
|
||||
bool
|
||||
trace_indirect_ptr (tree &base, tree &index, tree arg,
|
||||
- set<gimple *> traced_ref_stmt)
|
||||
+ std::set<gimple *> traced_ref_stmt)
|
||||
{
|
||||
gimple *def_stmt = SSA_NAME_DEF_STMT (arg);
|
||||
|
||||
@@ -922,7 +923,7 @@ trace_indirect_ptr (tree &base, tree &index, tree arg,
|
||||
|
||||
bool
|
||||
trace_indirect_array (tree &base, tree &index,
|
||||
- set<gimple *> traced_ref_stmt, tree ref)
|
||||
+ std::set<gimple *> traced_ref_stmt, tree ref)
|
||||
{
|
||||
if (TREE_CODE (ref) != ARRAY_REF)
|
||||
return false;
|
||||
@@ -937,7 +938,7 @@ trace_indirect_array (tree &base, tree &index,
|
||||
|
||||
bool
|
||||
trace_indirect_mem_ref (data_ref &mem_ref,
|
||||
- set <gimple *> &traced_ref_stmt)
|
||||
+ std::set<gimple *> &traced_ref_stmt)
|
||||
{
|
||||
/* Processing of vectorization types. */
|
||||
if (mem_ref.vectorize_p)
|
||||
@@ -947,7 +948,7 @@ trace_indirect_mem_ref (data_ref &mem_ref,
|
||||
{
|
||||
mem_ref.base = gimple_call_arg (mem_ref.stmt, 0);
|
||||
mem_ref.regular_p = false;
|
||||
- set<tree> walked;
|
||||
+ std::set<tree> walked;
|
||||
if (mem_ref.var == NULL_TREE
|
||||
&& !trace_base_var (mem_ref.var, mem_ref.base, walked))
|
||||
return false;
|
||||
@@ -983,7 +984,7 @@ trace_indirect_mem_ref (data_ref &mem_ref,
|
||||
mem_ref.base = base;
|
||||
mem_ref.index = index;
|
||||
mem_ref.regular_p = false;
|
||||
- set<tree> walked;
|
||||
+ std::set<tree> walked;
|
||||
if (mem_ref.var == NULL_TREE
|
||||
&& !trace_base_var (mem_ref.var, mem_ref.base, walked))
|
||||
return false;
|
||||
@@ -1002,7 +1003,7 @@ trace_indirect_mem_ref (data_ref &mem_ref,
|
||||
*/
|
||||
|
||||
void
|
||||
-trace_ref_info (data_ref &mem_ref, set <gimple *> &traced_ref_stmt)
|
||||
+trace_ref_info (data_ref &mem_ref, std::set<gimple *> &traced_ref_stmt)
|
||||
{
|
||||
enum tree_code ref_code = TREE_CODE (mem_ref.ref);
|
||||
if (/* Vectorized and non-vectorized direct access. */
|
||||
@@ -1041,7 +1042,8 @@ trace_ref_info (data_ref &mem_ref, set <gimple *> &traced_ref_stmt)
|
||||
/* Trace all references in the loop. */
|
||||
|
||||
void
|
||||
-trace_loop_refs_info (vector<data_ref> &refs, set <gimple *> &traced_ref_stmt)
|
||||
+trace_loop_refs_info (std::vector<data_ref> &refs,
|
||||
+ std::set<gimple *> &traced_ref_stmt)
|
||||
{
|
||||
for (unsigned i = 0; i < refs.size (); ++i)
|
||||
{
|
||||
@@ -1058,9 +1060,9 @@ trace_loop_refs_info (vector<data_ref> &refs, set <gimple *> &traced_ref_stmt)
|
||||
/* Tracing and sorting reference groups. */
|
||||
|
||||
void
|
||||
-trace_data_refs_info (vector<class loop *> &kernels,
|
||||
- map<class loop*, vector<data_ref> > &loop_refs,
|
||||
- set <gimple *> &traced_ref_stmt)
|
||||
+trace_data_refs_info (std::vector<class loop *> &kernels,
|
||||
+ std::map<class loop*, std::vector<data_ref> > &loop_refs,
|
||||
+ std::set<gimple *> &traced_ref_stmt)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\nPhase 2: trace_all_references_info\n\n");
|
||||
@@ -1140,7 +1142,8 @@ loop_bound_iv_p (tree t, tree &outer_loop_t)
|
||||
/* add worklist and walked list. */
|
||||
|
||||
void
|
||||
-add_worklist_walked (vector<tree> &worklist, set<tree> &walked, tree node)
|
||||
+add_worklist_walked (std::vector<tree> &worklist, std::set<tree> &walked,
|
||||
+ tree node)
|
||||
{
|
||||
if (!walked.count (node))
|
||||
{
|
||||
@@ -1154,7 +1157,8 @@ add_worklist_walked (vector<tree> &worklist, set<tree> &walked, tree node)
|
||||
/* check bound iv and add worklist. */
|
||||
|
||||
void
|
||||
-check_bound_iv_and_add_worklist (vector<tree> &worklist, set<tree> &walked,
|
||||
+check_bound_iv_and_add_worklist (std::vector<tree> &worklist,
|
||||
+ std::set<tree> &walked,
|
||||
tree t, data_ref &mem_ref)
|
||||
{
|
||||
if (t == NULL_TREE || TREE_CODE (t) != SSA_NAME)
|
||||
@@ -1216,9 +1220,9 @@ trace_loop_bound_iv (data_ref &mem_ref)
|
||||
mem_ref.loop_bounds.push_back (
|
||||
loop_bound (mem_ref.index, SSA_NAME_DEF_STMT (mem_ref.index)));
|
||||
|
||||
- vector<tree> worklist;
|
||||
+ std::vector<tree> worklist;
|
||||
worklist.push_back (mem_ref.base);
|
||||
- set<tree> walked;
|
||||
+ std::set<tree> walked;
|
||||
|
||||
while (worklist.size ())
|
||||
{
|
||||
@@ -1509,11 +1513,11 @@ trace_ref_dimension_and_loop_bounds (data_ref &mem_ref)
|
||||
loop_bound_dump (dump_file, mem_ref.loop_bounds[i]);
|
||||
|
||||
if (niters == NULL_TREE || niters == chrec_dont_know)
|
||||
- mem_ref.calc_by = min (mem_ref.calc_by, UNHANDLE_CALC);
|
||||
+ mem_ref.calc_by = std::min (mem_ref.calc_by, UNHANDLE_CALC);
|
||||
else if (TREE_CODE (niters) != INTEGER_CST)
|
||||
- mem_ref.calc_by = min (mem_ref.calc_by, RUNTIME_CALC);
|
||||
+ mem_ref.calc_by = std::min (mem_ref.calc_by, RUNTIME_CALC);
|
||||
else
|
||||
- mem_ref.calc_by = min (mem_ref.calc_by, STATIC_CALC);
|
||||
+ mem_ref.calc_by = std::min (mem_ref.calc_by, STATIC_CALC);
|
||||
}
|
||||
|
||||
if (mem_ref.calc_by == RUNTIME_CALC)
|
||||
@@ -1526,12 +1530,12 @@ trace_ref_dimension_and_loop_bounds (data_ref &mem_ref)
|
||||
Return NULL_TREE if not found. */
|
||||
|
||||
tree
|
||||
-get_cur_loop_niters (map<class loop*, vector<data_ref> > &loop_refs,
|
||||
+get_cur_loop_niters (std::map<class loop*, std::vector<data_ref> > &loop_refs,
|
||||
class loop* loop)
|
||||
{
|
||||
if (loop_refs.count (loop) == 0)
|
||||
return NULL_TREE;
|
||||
- vector<loop_bound> bounds = loop_refs[loop][0].loop_bounds;
|
||||
+ std::vector<loop_bound> bounds = loop_refs[loop][0].loop_bounds;
|
||||
return bounds.size () ? bounds[0].niters : NULL_TREE;
|
||||
}
|
||||
|
||||
@@ -1575,7 +1579,7 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
}
|
||||
/* Termination condition of dfs. Return the depth of the bb block. */
|
||||
if (gimple_code (def_stmt) == GIMPLE_PHI
|
||||
- || gimple_code (def_stmt) == GIMPLE_NOP)
|
||||
+ || gimple_code (def_stmt) == GIMPLE_NOP)
|
||||
{
|
||||
basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (niters));
|
||||
if (def_bb == NULL || def_bb->loop_father == NULL)
|
||||
@@ -1610,7 +1614,7 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
continue;
|
||||
unsigned depth = trace_outer_loop_depth (subtree, \
|
||||
start_depth);
|
||||
- min_depth = MIN (min_depth, depth);
|
||||
+ min_depth = std::min (min_depth, depth);
|
||||
}
|
||||
return min_depth;
|
||||
}
|
||||
@@ -1648,7 +1652,7 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
if (subtree == NULL)
|
||||
continue;
|
||||
unsigned depth = trace_outer_loop_depth (subtree, start_depth);
|
||||
- min_depth = MIN (min_depth, depth);
|
||||
+ min_depth = std::min (min_depth, depth);
|
||||
}
|
||||
return min_depth;
|
||||
}
|
||||
@@ -1668,7 +1672,7 @@ trace_outer_loop_depth (tree niters, unsigned start_depth)
|
||||
/* Traces the ref dimension information in each loop. */
|
||||
|
||||
void
|
||||
-analyze_loop_refs_dimension (vector<data_ref> &refs)
|
||||
+analyze_loop_refs_dimension (std::vector<data_ref> &refs)
|
||||
{
|
||||
for (unsigned i = 0; i < refs.size (); ++i)
|
||||
{
|
||||
@@ -1689,9 +1693,10 @@ analyze_loop_refs_dimension (vector<data_ref> &refs)
|
||||
*/
|
||||
|
||||
bool
|
||||
-analyze_nested_kernels (vector<class loop *> &kernels,
|
||||
- map<class loop*, vector<data_ref> > &loop_refs,
|
||||
- set <gimple *> &traced_ref_stmt)
|
||||
+analyze_nested_kernels (std::vector<class loop *> &kernels,
|
||||
+ std::map<class loop*,
|
||||
+ std::vector<data_ref> > &loop_refs,
|
||||
+ std::set<gimple *> &traced_ref_stmt)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\nPhase 3: analyze_nested_kernels\n\n");
|
||||
@@ -1840,7 +1845,7 @@ next_high_probability_bb (basic_block bb)
|
||||
/* Dump loop header bb. */
|
||||
|
||||
void
|
||||
-dump_loop_headers (const char *name, vector<class loop *> &loops)
|
||||
+dump_loop_headers (const char *name, std::vector<class loop *> &loops)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
@@ -1855,15 +1860,15 @@ dump_loop_headers (const char *name, vector<class loop *> &loops)
|
||||
/* Combine and sort candidate loops. */
|
||||
|
||||
bool
|
||||
-filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
|
||||
- vector<class loop *> &kernels)
|
||||
+filter_and_sort_kernels (std::vector<class loop *> &sorted_kernels,
|
||||
+ std::vector<class loop *> &kernels)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "\nPhase 4: filter_and_sort_kernels:\n\n");
|
||||
|
||||
- set<basic_block> end_bb;
|
||||
- list<basic_block> walked_header_bb; /* Used to record nested loops. */
|
||||
- set<int> walked_non_header_bb_idx;
|
||||
+ std::set<basic_block> end_bb;
|
||||
+ std::list<basic_block> walked_header_bb; /* Used to record nested loops. */
|
||||
+ std::set<int> walked_non_header_bb_idx;
|
||||
|
||||
for (unsigned i = 0; i < kernels.size (); ++i)
|
||||
{
|
||||
@@ -1875,7 +1880,7 @@ filter_and_sort_kernels (vector<class loop *> &sorted_kernels,
|
||||
|
||||
if (!param_filter_kernels)
|
||||
{
|
||||
- for (vector<class loop *>::iterator it = kernels.begin ();
|
||||
+ for (std::vector<class loop *>::iterator it = kernels.begin ();
|
||||
it != kernels.end (); ++it)
|
||||
sorted_kernels.push_back (*it);
|
||||
}
|
||||
@@ -1985,10 +1990,10 @@ struct ref_group
|
||||
110: read, regular, non-parallel
|
||||
111: read, regular, parallel
|
||||
*/
|
||||
- map<int, vector<data_ref> > ref_use;
|
||||
+ std::map<int, std::vector<data_ref> > ref_use;
|
||||
|
||||
/* scores for different memory references. */
|
||||
- vector<ref_score> ref_scores;
|
||||
+ std::vector<ref_score> ref_scores;
|
||||
|
||||
ref_group ()
|
||||
{
|
||||
@@ -2003,10 +2008,10 @@ struct ref_group
|
||||
/* calculate reuse level. */
|
||||
|
||||
unsigned int
|
||||
-calculate_reuse_level (map<int, vector<data_ref> > &var_use)
|
||||
+calculate_reuse_level (std::map<int, std::vector<data_ref> > &var_use)
|
||||
{
|
||||
unsigned int level = 0;
|
||||
- for (map<int, vector<data_ref> >::iterator it = var_use.begin ();
|
||||
+ for (std::map<int, std::vector<data_ref> >::iterator it = var_use.begin ();
|
||||
it != var_use.end (); ++it)
|
||||
{
|
||||
unsigned int parallel = 1;
|
||||
@@ -2043,13 +2048,13 @@ ref_group_reuse_cmp (const ref_group &a, const ref_group &b)
|
||||
/* Sort reference groups. */
|
||||
|
||||
void
|
||||
-sort_ref_groups (vector<ref_group> &ref_groups,
|
||||
- map<tree, ref_group> &ref_groups_map)
|
||||
+sort_ref_groups (std::vector<ref_group> &ref_groups,
|
||||
+ std::map<tree, ref_group> &ref_groups_map)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "\nsort_ref_groups_by_reuse_level\n");
|
||||
|
||||
- for (map<tree, ref_group>::iterator it = ref_groups_map.begin ();
|
||||
+ for (std::map<tree, ref_group>::iterator it = ref_groups_map.begin ();
|
||||
it != ref_groups_map.end (); ++it)
|
||||
{
|
||||
(*it).second.reuse_level = calculate_reuse_level ((*it).second.ref_use);
|
||||
@@ -2062,7 +2067,7 @@ sort_ref_groups (vector<ref_group> &ref_groups,
|
||||
}
|
||||
}
|
||||
|
||||
- sort (ref_groups.begin (), ref_groups.end (), ref_group_reuse_cmp);
|
||||
+ std::sort (ref_groups.begin (), ref_groups.end (), ref_group_reuse_cmp);
|
||||
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
{
|
||||
@@ -2111,7 +2116,7 @@ enum data_attribute
|
||||
If the reference group is not found, create a group. */
|
||||
|
||||
void
|
||||
-record_mem_ref (map<tree, ref_group> &ref_groups, data_ref &mem_ref)
|
||||
+record_mem_ref (std::map<tree, ref_group> &ref_groups, data_ref &mem_ref)
|
||||
{
|
||||
unsigned int index = (mem_ref.parallel_p << DA_PARALLEL)
|
||||
+ (mem_ref.regular_p << DA_REGULAR) + (mem_ref.read_p << DA_READ);
|
||||
@@ -2127,9 +2132,9 @@ record_mem_ref (map<tree, ref_group> &ref_groups, data_ref &mem_ref)
|
||||
/* Ref_groups' calc_by depends on the inserted mem_ref's calc_by.
|
||||
Runtime issue requires the specified mem_ref's calc_by to be >= 1.
|
||||
Temporarily modified ref_group's first_use after sorting mem_refs. */
|
||||
- ref_groups[mem_ref.var].calc_by = max (ref_groups[mem_ref.var].calc_by,
|
||||
+ ref_groups[mem_ref.var].calc_by = std::max (ref_groups[mem_ref.var].calc_by,
|
||||
mem_ref.calc_by);
|
||||
- ref_groups[mem_ref.var].var_size = max (ref_groups[mem_ref.var].var_size,
|
||||
+ ref_groups[mem_ref.var].var_size = std::max (ref_groups[mem_ref.var].var_size,
|
||||
mem_ref.data_size);
|
||||
ref_groups[mem_ref.var].ref_use[index].push_back (mem_ref);
|
||||
|
||||
@@ -2182,15 +2187,16 @@ data_ref_reuse_cmp (const ref_score &a, const ref_score &b)
|
||||
order of the customized sorting scheme. */
|
||||
|
||||
void
|
||||
-sort_mem_ref_in_ref_group (map<tree, ref_group> &ref_groups_map)
|
||||
+sort_mem_ref_in_ref_group (std::map<tree, ref_group> &ref_groups_map)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\nsorted data_references:\n");
|
||||
- for (map<tree, ref_group>::iterator it = ref_groups_map.begin ();
|
||||
+ for (std::map<tree, ref_group>::iterator it = ref_groups_map.begin ();
|
||||
it != ref_groups_map.end (); ++it)
|
||||
{
|
||||
- vector<ref_score> &ref_scores = (*it).second.ref_scores;
|
||||
- stable_sort (ref_scores.begin (), ref_scores.end (), data_ref_reuse_cmp);
|
||||
+ std::vector<ref_score> &ref_scores = (*it).second.ref_scores;
|
||||
+ std::stable_sort (ref_scores.begin (), ref_scores.end (),
|
||||
+ data_ref_reuse_cmp);
|
||||
/* Update ref_group's first_use and calc_by with the first mem_ref after
|
||||
sorting. */
|
||||
(*it).second.first_use = (*it).second.ref_scores[0].d_ref;
|
||||
@@ -2214,14 +2220,15 @@ sort_mem_ref_in_ref_group (map<tree, ref_group> &ref_groups_map)
|
||||
/* Tracing and sorting reference groups. */
|
||||
|
||||
bool
|
||||
-record_and_sort_ref_groups (vector<ref_group> &ref_groups,
|
||||
- vector<class loop *> &kernels,
|
||||
- map<class loop*, vector<data_ref> > &loop_refs)
|
||||
+record_and_sort_ref_groups (std::vector<ref_group> &ref_groups,
|
||||
+ std::vector<class loop *> &kernels,
|
||||
+ std::map<class loop*,
|
||||
+ std::vector<data_ref> > &loop_refs)
|
||||
{
|
||||
if (dump_file)
|
||||
fprintf (dump_file, "\nPhase 5: trace_all_references_details\n\n");
|
||||
|
||||
- map<tree, ref_group> ref_groups_map;
|
||||
+ std::map<tree, ref_group> ref_groups_map;
|
||||
|
||||
for (unsigned i = 0; i < kernels.size (); ++i)
|
||||
{
|
||||
@@ -2395,7 +2402,7 @@ issue_builtin_prefetch (data_ref &mem_ref)
|
||||
determination of the ARM SVE architecture before SVE hint insertion. */
|
||||
|
||||
void
|
||||
-static_issue (vector<ref_group> &ref_groups, int num_issue_var)
|
||||
+static_issue (std::vector<ref_group> &ref_groups, int num_issue_var)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "static issue\n");
|
||||
@@ -2425,8 +2432,8 @@ static_issue (vector<ref_group> &ref_groups, int num_issue_var)
|
||||
a COND_EXPR. */
|
||||
|
||||
tree
|
||||
-calc_stmts_gen (vector<ref_group> &ref_groups, gimple_seq &cond_expr_stmt_list,
|
||||
- int num_issue_var)
|
||||
+calc_stmts_gen (std::vector<ref_group> &ref_groups,
|
||||
+ gimple_seq &cond_expr_stmt_list, int num_issue_var)
|
||||
{
|
||||
/* Accumulated keep size. */
|
||||
tree total_size = build_real_from_int_cst
|
||||
@@ -2483,7 +2490,7 @@ calc_stmts_gen (vector<ref_group> &ref_groups, gimple_seq &cond_expr_stmt_list,
|
||||
/* Runtime form insertion and issue instruction. */
|
||||
|
||||
void
|
||||
-runtime_issue (vector<ref_group> &ref_groups, int num_issue_var)
|
||||
+runtime_issue (std::vector<ref_group> &ref_groups, int num_issue_var)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "runtime issue\n");
|
||||
@@ -2547,7 +2554,7 @@ runtime_issue (vector<ref_group> &ref_groups, int num_issue_var)
|
||||
/* Issue llc hints through prefetch instructions. */
|
||||
|
||||
void
|
||||
-issue_llc_hint (vector<ref_group> &ref_groups)
|
||||
+issue_llc_hint (std::vector<ref_group> &ref_groups)
|
||||
{
|
||||
if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
fprintf (dump_file, "issue_llc_hint:\n");
|
||||
@@ -2567,7 +2574,7 @@ issue_llc_hint (vector<ref_group> &ref_groups)
|
||||
if (ref_groups.size () == 0)
|
||||
return;
|
||||
|
||||
- int num_issue_var = min (param_issue_topn,
|
||||
+ int num_issue_var = std::min (param_issue_topn,
|
||||
static_cast<int>(ref_groups.size ()));
|
||||
if (num_issue_var < param_issue_topn
|
||||
&& dump_file && (dump_flags & TDF_DETAILS))
|
||||
@@ -2583,7 +2590,7 @@ issue_llc_hint (vector<ref_group> &ref_groups)
|
||||
}
|
||||
calc_type topn_calc_type = STATIC_CALC;
|
||||
for (int i = 0; i < num_issue_var; ++i)
|
||||
- topn_calc_type = min (topn_calc_type, ref_groups[i].calc_by);
|
||||
+ topn_calc_type = std::min (topn_calc_type, ref_groups[i].calc_by);
|
||||
|
||||
if (topn_calc_type == STATIC_CALC)
|
||||
{
|
||||
@@ -2616,22 +2623,22 @@ issue_llc_hint (vector<ref_group> &ref_groups)
|
||||
void
|
||||
llc_allocate (void)
|
||||
{
|
||||
- map<class loop *, vector<data_ref> > kernels_refs;
|
||||
- vector<class loop *> kernels;
|
||||
+ std::map<class loop *, std::vector<data_ref> > kernels_refs;
|
||||
+ std::vector<class loop *> kernels;
|
||||
if (!get_dense_memory_kernels (kernels, kernels_refs))
|
||||
return;
|
||||
|
||||
- set <gimple *> traced_ref_stmt;
|
||||
+ std::set<gimple *> traced_ref_stmt;
|
||||
trace_data_refs_info (kernels, kernels_refs, traced_ref_stmt);
|
||||
|
||||
if (!analyze_nested_kernels (kernels, kernels_refs, traced_ref_stmt))
|
||||
return;
|
||||
|
||||
- vector<class loop *> sorted_kernels;
|
||||
+ std::vector<class loop *> sorted_kernels;
|
||||
if (!filter_and_sort_kernels (sorted_kernels, kernels))
|
||||
return;
|
||||
|
||||
- vector<ref_group> ref_groups;
|
||||
+ std::vector<ref_group> ref_groups;
|
||||
if (!record_and_sort_ref_groups (ref_groups, sorted_kernels, kernels_refs))
|
||||
return;
|
||||
|
||||
--
|
||||
2.33.0
|
||||
|
||||
3503
0192-Add-feedback-directed-filter_and_sort_kernels-in-Pha.patch
Normal file
3503
0192-Add-feedback-directed-filter_and_sort_kernels-in-Pha.patch
Normal file
File diff suppressed because it is too large
Load Diff
827
0193-Add-prefetch-level-parameter-to-specify-the-last-lev.patch
Normal file
827
0193-Add-prefetch-level-parameter-to-specify-the-last-lev.patch
Normal file
@ -0,0 +1,827 @@
|
||||
From 599d6f94c11fd906cfbabbd7ba4e5e2e5642cac9 Mon Sep 17 00:00:00 2001
|
||||
From: yzyssdd <yuzeyang4@huawei.com>
|
||||
Date: Tue, 28 May 2024 10:43:20 +0800
|
||||
Subject: [PATCH 2/2] Add prefetch level parameter to specify the last level
|
||||
cache. Add l4 inst and deja case
|
||||
|
||||
---
|
||||
gcc/builtins.c | 82 +++++++++++++++++++
|
||||
gcc/builtins.def | 1 +
|
||||
gcc/config/aarch64/aarch64-protos.h | 6 +-
|
||||
gcc/config/aarch64/aarch64.md | 39 +++++++++
|
||||
gcc/dce.c | 1 +
|
||||
gcc/hsa-gen.c | 4 +-
|
||||
gcc/ipa-pure-const.c | 1 +
|
||||
gcc/params.opt | 5 ++
|
||||
gcc/print-rtl.c | 6 ++
|
||||
gcc/rtl.def | 9 ++
|
||||
gcc/rtl.h | 4 +
|
||||
gcc/rtlanal.c | 2 +
|
||||
gcc/sched-deps.c | 4 +-
|
||||
gcc/target-insns.def | 1 +
|
||||
.../llc-prefetch-full-pldl1keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pldl1strm.c | 15 ++++
|
||||
.../llc-prefetch-full-pldl2keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pldl2strm.c | 15 ++++
|
||||
.../llc-prefetch-full-pldl3keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pldl3strm.c | 15 ++++
|
||||
.../llc-prefetch-full-pldl4keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pldl4strm.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl1keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl1strm.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl2keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl2strm.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl3keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl3strm.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl4keep.c | 15 ++++
|
||||
.../llc-prefetch-full-pstl4strm.c | 15 ++++
|
||||
gcc/tree-ssa-llc-allocate.c | 54 ++++++++++--
|
||||
31 files changed, 449 insertions(+), 10 deletions(-)
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1strm.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2strm.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3strm.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4strm.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1strm.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2strm.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3strm.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4keep.c
|
||||
create mode 100644 gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4strm.c
|
||||
|
||||
diff --git a/gcc/builtins.c b/gcc/builtins.c
|
||||
index 1b1c75cc1..ffbb2cae9 100644
|
||||
--- a/gcc/builtins.c
|
||||
+++ b/gcc/builtins.c
|
||||
@@ -1463,6 +1463,85 @@ expand_builtin_prefetch (tree exp)
|
||||
emit_insn (op0);
|
||||
}
|
||||
|
||||
+/* Expand a call to __builtin_prefetch_full. */
|
||||
+
|
||||
+static void
|
||||
+expand_builtin_prefetch_full (tree exp)
|
||||
+{
|
||||
+ tree arg0, arg1, arg2;
|
||||
+ int nargs;
|
||||
+ rtx op0, op1, op2;
|
||||
+
|
||||
+ if (!validate_arglist (exp, POINTER_TYPE, 0))
|
||||
+ return;
|
||||
+
|
||||
+ arg0 = CALL_EXPR_ARG (exp, 0);
|
||||
+
|
||||
+ /* Arguments 1 and 2 are optional; argument 1 (read/write) defaults to
|
||||
+ zero (read) and argument 2 (locality) defaults to 3 (high degree of
|
||||
+ locality). */
|
||||
+ nargs = call_expr_nargs (exp);
|
||||
+ if (nargs > 1)
|
||||
+ arg1 = CALL_EXPR_ARG (exp, 1);
|
||||
+ else
|
||||
+ arg1 = integer_zero_node;
|
||||
+ if (nargs > 2)
|
||||
+ arg2 = CALL_EXPR_ARG (exp, 2);
|
||||
+ else
|
||||
+ arg2 = integer_three_node;
|
||||
+
|
||||
+ /* Argument 0 is an address. */
|
||||
+ op0 = expand_expr (arg0, NULL_RTX, Pmode, EXPAND_NORMAL);
|
||||
+
|
||||
+ /* Argument 1 (read/write flag) must be a compile-time constant int. */
|
||||
+ if (TREE_CODE (arg1) != INTEGER_CST)
|
||||
+ {
|
||||
+ error ("second argument to %<__builtin_prefetch_full%> must be a "
|
||||
+ "constant");
|
||||
+ arg1 = integer_zero_node;
|
||||
+ }
|
||||
+ op1 = expand_normal (arg1);
|
||||
+ /* Argument 1 must be either zero or one. */
|
||||
+ if (INTVAL (op1) != 0 && INTVAL (op1) != 1)
|
||||
+ {
|
||||
+ warning (0, "invalid second argument to %<__builtin_prefetch_full%>;"
|
||||
+ " using zero");
|
||||
+ op1 = const0_rtx;
|
||||
+ }
|
||||
+
|
||||
+ /* Argument 2 (locality) must be a compile-time constant int. */
|
||||
+ if (TREE_CODE (arg2) != INTEGER_CST)
|
||||
+ {
|
||||
+ error ("third argument to %<__builtin_prefetch_full%> must be a "
|
||||
+ "constant");
|
||||
+ arg2 = integer_zero_node;
|
||||
+ }
|
||||
+ op2 = expand_normal (arg2);
|
||||
+ /* Argument 2 must be 0-7. */
|
||||
+ if (INTVAL (op2) < 0 || INTVAL (op2) > 7)
|
||||
+ {
|
||||
+ warning (0, "invalid third argument to %<__builtin_prefetch_full%>; "
|
||||
+ "using zero");
|
||||
+ op2 = const0_rtx;
|
||||
+ }
|
||||
+
|
||||
+ if (targetm.have_prefetch_full ())
|
||||
+ {
|
||||
+ class expand_operand ops[3];
|
||||
+
|
||||
+ create_address_operand (&ops[0], op0);
|
||||
+ create_integer_operand (&ops[1], INTVAL (op1));
|
||||
+ create_integer_operand (&ops[2], INTVAL (op2));
|
||||
+ if (maybe_expand_insn (targetm.code_for_prefetch_full, 3, ops))
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ /* Don't do anything with direct references to volatile memory, but
|
||||
+ generate code to handle other side effects. */
|
||||
+ if (!MEM_P (op0) && side_effects_p (op0))
|
||||
+ emit_insn (op0);
|
||||
+}
|
||||
+
|
||||
/* Get a MEM rtx for expression EXP which is the address of an operand
|
||||
to be used in a string instruction (cmpstrsi, cpymemsi, ..). LEN is
|
||||
the maximum length of the block of memory that might be accessed or
|
||||
@@ -8386,6 +8465,9 @@ expand_builtin (tree exp, rtx target, rtx subtarget, machine_mode mode,
|
||||
case BUILT_IN_PREFETCH:
|
||||
expand_builtin_prefetch (exp);
|
||||
return const0_rtx;
|
||||
+ case BUILT_IN_PREFETCH_FULL:
|
||||
+ expand_builtin_prefetch_full (exp);
|
||||
+ return const0_rtx;
|
||||
|
||||
case BUILT_IN_INIT_TRAMPOLINE:
|
||||
return expand_builtin_init_trampoline (exp, true);
|
||||
diff --git a/gcc/builtins.def b/gcc/builtins.def
|
||||
index ee67ac15d..b89cec11f 100644
|
||||
--- a/gcc/builtins.def
|
||||
+++ b/gcc/builtins.def
|
||||
@@ -927,6 +927,7 @@ DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTL, "popcountl", BT_FN_INT_ULONG, ATTR_C
|
||||
DEF_GCC_BUILTIN (BUILT_IN_POPCOUNTLL, "popcountll", BT_FN_INT_ULONGLONG, ATTR_CONST_NOTHROW_LEAF_LIST)
|
||||
DEF_EXT_LIB_BUILTIN (BUILT_IN_POSIX_MEMALIGN, "posix_memalign", BT_FN_INT_PTRPTR_SIZE_SIZE, ATTR_NOTHROW_NONNULL_LEAF)
|
||||
DEF_GCC_BUILTIN (BUILT_IN_PREFETCH, "prefetch", BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
|
||||
+DEF_GCC_BUILTIN (BUILT_IN_PREFETCH_FULL, "prefetch_full", BT_FN_VOID_CONST_PTR_VAR, ATTR_NOVOPS_LEAF_LIST)
|
||||
DEF_LIB_BUILTIN (BUILT_IN_REALLOC, "realloc", BT_FN_PTR_PTR_SIZE, ATTR_ALLOC_WARN_UNUSED_RESULT_SIZE_2_NOTHROW_LEAF_LIST)
|
||||
DEF_GCC_BUILTIN (BUILT_IN_RETURN, "return", BT_FN_VOID_PTR, ATTR_NORETURN_NOTHROW_LEAF_LIST)
|
||||
DEF_GCC_BUILTIN (BUILT_IN_RETURN_ADDRESS, "return_address", BT_FN_PTR_UINT, ATTR_LEAF_LIST)
|
||||
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
|
||||
index 1a4fc2028..c8388f902 100644
|
||||
--- a/gcc/config/aarch64/aarch64-protos.h
|
||||
+++ b/gcc/config/aarch64/aarch64-protos.h
|
||||
@@ -455,12 +455,16 @@ extern struct tune_params aarch64_tune_params;
|
||||
T (PLDL2STRM, pldl2strm, 3) \
|
||||
T (PLDL3KEEP, pldl3keep, 4) \
|
||||
T (PLDL3STRM, pldl3strm, 5) \
|
||||
+ T (PLDL4KEEP, pldl4keep, 6) \
|
||||
+ T (PLDL4STRM, pldl4strm, 7) \
|
||||
T (PSTL1KEEP, pstl1keep, 8) \
|
||||
T (PSTL1STRM, pstl1strm, 9) \
|
||||
T (PSTL2KEEP, pstl2keep, 10) \
|
||||
T (PSTL2STRM, pstl2strm, 11) \
|
||||
T (PSTL3KEEP, pstl3keep, 12) \
|
||||
- T (PSTL3STRM, pstl3strm, 13)
|
||||
+ T (PSTL3STRM, pstl3strm, 13) \
|
||||
+ T (PSTL4KEEP, pstl4keep, 14) \
|
||||
+ T (PSTL4STRM, pstl4strm, 15)
|
||||
|
||||
#define AARCH64_SVENUM(UPPER, LOWER, VALUE) AARCH64_SV_##UPPER = VALUE,
|
||||
enum aarch64_svpattern {
|
||||
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
|
||||
index 38af8d000..2ec1c5d19 100644
|
||||
--- a/gcc/config/aarch64/aarch64.md
|
||||
+++ b/gcc/config/aarch64/aarch64.md
|
||||
@@ -831,6 +831,45 @@
|
||||
[(set_attr "type" "load_4")]
|
||||
)
|
||||
|
||||
+(define_insn "prefetch_full"
|
||||
+ [(prefetch_full (match_operand:DI 0 "aarch64_prefetch_operand" "Dp")
|
||||
+ (match_operand:QI 1 "const_int_operand" "")
|
||||
+ (match_operand:QI 2 "const_int_operand" ""))]
|
||||
+ ""
|
||||
+ {
|
||||
+ const char * pftype[2][8] =
|
||||
+ {
|
||||
+ {"prfm\\tPLDL1KEEP, %0",
|
||||
+ "prfm\\tPLDL1STRM, %0",
|
||||
+ "prfm\\tPLDL2KEEP, %0",
|
||||
+ "prfm\\tPLDL2STRM, %0",
|
||||
+ "prfm\\tPLDL3KEEP, %0",
|
||||
+ "prfm\\tPLDL3STRM, %0",
|
||||
+ "prfm\\tPLDL4KEEP, %0",
|
||||
+ "prfm\\tPLDL4STRM, %0"},
|
||||
+ {"prfm\\tPSTL1KEEP, %0",
|
||||
+ "prfm\\tPSTL1STRM, %0",
|
||||
+ "prfm\\tPSTL2KEEP, %0",
|
||||
+ "prfm\\tPSTL2STRM, %0",
|
||||
+ "prfm\\tPSTL3KEEP, %0",
|
||||
+ "prfm\\tPSTL3STRM, %0",
|
||||
+ "prfm\\tPSTL4KEEP, %0",
|
||||
+ "prfm\\tPSTL4STRM, %0"},
|
||||
+ };
|
||||
+
|
||||
+ int prfop = INTVAL (operands[2]);
|
||||
+
|
||||
+ gcc_assert (IN_RANGE (prfop, 0, 7));
|
||||
+
|
||||
+ /* PRFM accepts the same addresses as a 64-bit LDR so wrap
|
||||
+ the address into a DImode MEM so that aarch64_print_operand knows
|
||||
+ how to print it. */
|
||||
+ operands[0] = gen_rtx_MEM (DImode, operands[0]);
|
||||
+ return pftype[INTVAL(operands[1])][prfop];
|
||||
+ }
|
||||
+ [(set_attr "type" "load_4")]
|
||||
+)
|
||||
+
|
||||
(define_insn "trap"
|
||||
[(trap_if (const_int 1) (const_int 8))]
|
||||
""
|
||||
diff --git a/gcc/dce.c b/gcc/dce.c
|
||||
index a6a1599b5..aaa63b63a 100644
|
||||
--- a/gcc/dce.c
|
||||
+++ b/gcc/dce.c
|
||||
@@ -72,6 +72,7 @@ deletable_insn_p_1 (rtx body)
|
||||
switch (GET_CODE (body))
|
||||
{
|
||||
case PREFETCH:
|
||||
+ case PREFETCH_FULL:
|
||||
case TRAP_IF:
|
||||
/* The UNSPEC case was added here because the ia-64 claims that
|
||||
USEs do not work after reload and generates UNSPECS rather
|
||||
diff --git a/gcc/hsa-gen.c b/gcc/hsa-gen.c
|
||||
index 767badab6..c121aee8d 100644
|
||||
--- a/gcc/hsa-gen.c
|
||||
+++ b/gcc/hsa-gen.c
|
||||
@@ -5309,7 +5309,8 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
|
||||
|
||||
/* Prefetch pass can create type-mismatching prefetch builtin calls which
|
||||
fail the gimple_call_builtin_p test above. Handle them here. */
|
||||
- if (fndecl_built_in_p (function_decl, BUILT_IN_PREFETCH))
|
||||
+ if (fndecl_built_in_p (function_decl, BUILT_IN_PREFETCH)
|
||||
+ || fndecl_built_in_p (function_decl, BUILT_IN_PREFETCH_FULL))
|
||||
return;
|
||||
|
||||
if (hsa_callable_function_p (function_decl))
|
||||
@@ -5723,6 +5724,7 @@ gen_hsa_insns_for_call (gimple *stmt, hsa_bb *hbb)
|
||||
break;
|
||||
}
|
||||
case BUILT_IN_PREFETCH:
|
||||
+ case BUILT_IN_PREFETCH_FULL:
|
||||
break;
|
||||
default:
|
||||
{
|
||||
diff --git a/gcc/ipa-pure-const.c b/gcc/ipa-pure-const.c
|
||||
index 564c6629c..0dc8e60a8 100644
|
||||
--- a/gcc/ipa-pure-const.c
|
||||
+++ b/gcc/ipa-pure-const.c
|
||||
@@ -534,6 +534,7 @@ special_builtin_state (enum pure_const_state_e *state, bool *looping,
|
||||
*state = IPA_CONST;
|
||||
return true;
|
||||
case BUILT_IN_PREFETCH:
|
||||
+ case BUILT_IN_PREFETCH_FULL:
|
||||
*looping = true;
|
||||
*state = IPA_CONST;
|
||||
return true;
|
||||
diff --git a/gcc/params.opt b/gcc/params.opt
|
||||
index 0c9a270b4..f128ae6a4 100644
|
||||
--- a/gcc/params.opt
|
||||
+++ b/gcc/params.opt
|
||||
@@ -1059,6 +1059,11 @@ Common Joined UInteger Var(param_outer_loop_num) Init(1) IntegerRange(1, 10) Par
|
||||
Maximum number of outer loops allowed to extend outer loops for loops that
|
||||
cannot recognize inner loop boundaries.
|
||||
|
||||
+-param=llc-level=
|
||||
+Common Joined UInteger Var(param_llc_level) Init(3) IntegerRange(3, 4)
|
||||
+Param Optimization
|
||||
+Specifies the HBM cache level.
|
||||
+
|
||||
-param=filter-mode=
|
||||
Common Joined UInteger Var(param_filter_mode) Init(1) IntegerRange(0, 1) Param
|
||||
Set kernel filtering mode. Use basic block count by default; use branch probability mode when filter mode is turned off.
|
||||
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
|
||||
index 611ea079c..4443caf4a 100644
|
||||
--- a/gcc/print-rtl.c
|
||||
+++ b/gcc/print-rtl.c
|
||||
@@ -1549,6 +1549,12 @@ print_exp (pretty_printer *pp, const_rtx x, int verbose)
|
||||
op[1] = XEXP (x, 1);
|
||||
op[2] = XEXP (x, 2);
|
||||
break;
|
||||
+ case PREFETCH_FULL:
|
||||
+ fun = "prefetch_full";
|
||||
+ op[0] = XEXP (x, 0);
|
||||
+ op[1] = XEXP (x, 1);
|
||||
+ op[2] = XEXP (x, 2);
|
||||
+ break;
|
||||
case UNSPEC:
|
||||
case UNSPEC_VOLATILE:
|
||||
{
|
||||
diff --git a/gcc/rtl.def b/gcc/rtl.def
|
||||
index 9754333ea..30fd1cf81 100644
|
||||
--- a/gcc/rtl.def
|
||||
+++ b/gcc/rtl.def
|
||||
@@ -282,6 +282,15 @@ DEF_RTL_EXPR(ADDR_DIFF_VEC, "addr_diff_vec", "eEee0", RTX_EXTRA)
|
||||
whose prefetch instructions do not support them. */
|
||||
DEF_RTL_EXPR(PREFETCH, "prefetch", "eee", RTX_EXTRA)
|
||||
|
||||
+/* Memory prefetch, with attributes supported on some targets.
|
||||
+ Operand 1 is the address of the memory to fetch.
|
||||
+ Operand 2 is 1 for a write access, 0 otherwise.
|
||||
+ Operand 3 is the level of prfop.
|
||||
+
|
||||
+ The attributes specified by operands 2 and 3 are ignored for targets
|
||||
+ whose prefetch instructions do not support them. */
|
||||
+DEF_RTL_EXPR(PREFETCH_FULL, "prefetch_full", "eee", RTX_EXTRA)
|
||||
+
|
||||
/* ----------------------------------------------------------------------
|
||||
At the top level of an instruction (perhaps under PARALLEL).
|
||||
---------------------------------------------------------------------- */
|
||||
diff --git a/gcc/rtl.h b/gcc/rtl.h
|
||||
index b29afca8d..fbcd05562 100644
|
||||
--- a/gcc/rtl.h
|
||||
+++ b/gcc/rtl.h
|
||||
@@ -2804,6 +2804,10 @@ do { \
|
||||
#define PREFETCH_SCHEDULE_BARRIER_P(RTX) \
|
||||
(RTL_FLAG_CHECK1 ("PREFETCH_SCHEDULE_BARRIER_P", (RTX), PREFETCH)->volatil)
|
||||
|
||||
+/* True if RTX is flagged to be a scheduling barrier. */
|
||||
+#define PREFETCH_FULL_SCHEDULE_BARRIER_P(RTX) \
|
||||
+ (RTL_FLAG_CHECK1 ("PREFETCH_FULL_SCHEDULE_BARRIER_P", (RTX), PREFETCH_FULL)->volatil)
|
||||
+
|
||||
/* Indicate whether the machine has any sort of auto increment addressing.
|
||||
If not, we can avoid checking for REG_INC notes. */
|
||||
|
||||
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
|
||||
index 0ebde7622..63bf1bf58 100644
|
||||
--- a/gcc/rtlanal.c
|
||||
+++ b/gcc/rtlanal.c
|
||||
@@ -1195,6 +1195,7 @@ reg_referenced_p (const_rtx x, const_rtx body)
|
||||
return reg_overlap_mentioned_p (x, TRAP_CONDITION (body));
|
||||
|
||||
case PREFETCH:
|
||||
+ case PREFETCH_FULL:
|
||||
return reg_overlap_mentioned_p (x, XEXP (body, 0));
|
||||
|
||||
case UNSPEC:
|
||||
@@ -2007,6 +2008,7 @@ note_uses (rtx *pbody, void (*fun) (rtx *, void *), void *data)
|
||||
return;
|
||||
|
||||
case PREFETCH:
|
||||
+ case PREFETCH_FULL:
|
||||
(*fun) (&XEXP (body, 0), data);
|
||||
return;
|
||||
|
||||
diff --git a/gcc/sched-deps.c b/gcc/sched-deps.c
|
||||
index 331af5ffd..cb5a64ed9 100644
|
||||
--- a/gcc/sched-deps.c
|
||||
+++ b/gcc/sched-deps.c
|
||||
@@ -2720,7 +2720,9 @@ sched_analyze_2 (class deps_desc *deps, rtx x, rtx_insn *insn)
|
||||
break;
|
||||
|
||||
case PREFETCH:
|
||||
- if (PREFETCH_SCHEDULE_BARRIER_P (x))
|
||||
+ case PREFETCH_FULL:
|
||||
+ if ((code == PREFETCH && PREFETCH_SCHEDULE_BARRIER_P (x))
|
||||
+ || (code == PREFETCH_FULL && PREFETCH_FULL_SCHEDULE_BARRIER_P (x)))
|
||||
reg_pending_barrier = TRUE_BARRIER;
|
||||
/* Prefetch insn contains addresses only. So if the prefetch
|
||||
address has no registers, there will be no dependencies on
|
||||
diff --git a/gcc/target-insns.def b/gcc/target-insns.def
|
||||
index 4d7eb92cf..e80361f0a 100644
|
||||
--- a/gcc/target-insns.def
|
||||
+++ b/gcc/target-insns.def
|
||||
@@ -77,6 +77,7 @@ DEF_TARGET_INSN (omp_simt_vote_any, (rtx x0, rtx x1))
|
||||
DEF_TARGET_INSN (omp_simt_xchg_bfly, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (omp_simt_xchg_idx, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (prefetch, (rtx x0, rtx x1, rtx x2))
|
||||
+DEF_TARGET_INSN (prefetch_full, (rtx x0, rtx x1, rtx x2))
|
||||
DEF_TARGET_INSN (probe_stack, (rtx x0))
|
||||
DEF_TARGET_INSN (probe_stack_address, (rtx x0))
|
||||
DEF_TARGET_INSN (prologue, (void))
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1keep.c
|
||||
new file mode 100644
|
||||
index 000000000..c0fa2db2f
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options " -S -O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,0);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL1KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1strm.c
|
||||
new file mode 100644
|
||||
index 000000000..bcd1113d1
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl1strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,1);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL1STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2keep.c
|
||||
new file mode 100644
|
||||
index 000000000..46702bfbc
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,2);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL2KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2strm.c
|
||||
new file mode 100644
|
||||
index 000000000..e359ad178
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl2strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,3);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL2STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3keep.c
|
||||
new file mode 100644
|
||||
index 000000000..0a9dae090
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,4);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL3KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3strm.c
|
||||
new file mode 100644
|
||||
index 000000000..58db40ba1
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl3strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,5);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL3STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4keep.c
|
||||
new file mode 100644
|
||||
index 000000000..6f6b7bbd4
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,6);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL4KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4strm.c
|
||||
new file mode 100644
|
||||
index 000000000..b69b4a5e6
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pldl4strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],0,7);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PLDL4STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1keep.c
|
||||
new file mode 100644
|
||||
index 000000000..f5a474eb5
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,0);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL1KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1strm.c
|
||||
new file mode 100644
|
||||
index 000000000..6798824a9
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl1strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,1);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL1STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2keep.c
|
||||
new file mode 100644
|
||||
index 000000000..c19fcc830
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,2);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL2KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2strm.c
|
||||
new file mode 100644
|
||||
index 000000000..dde160a28
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl2strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,3);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL2STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3keep.c
|
||||
new file mode 100644
|
||||
index 000000000..fa698243d
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,4);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL3KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3strm.c
|
||||
new file mode 100644
|
||||
index 000000000..653f7786e
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl3strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,5);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL3STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4keep.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4keep.c
|
||||
new file mode 100644
|
||||
index 000000000..16a3b6552
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4keep.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,6);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL4KEEP" } } */
|
||||
+
|
||||
diff --git a/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4strm.c b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4strm.c
|
||||
new file mode 100644
|
||||
index 000000000..60d671bf5
|
||||
--- /dev/null
|
||||
+++ b/gcc/testsuite/gcc.dg/llc-allocate/llc-prefetch-full-pstl4strm.c
|
||||
@@ -0,0 +1,15 @@
|
||||
+
|
||||
+/* { dg-do compile { target { aarch64*-*-linux* } } } */
|
||||
+/* { dg-options "-O3 -march=armv8.2-a+sve -static -fllc-allocate -fdump-tree-llc_allocate-details-lineno --param=outer-loop-nums=10 --param=issue-topn=4 --param=force-issue=1 --param=filter-kernels=0" } */
|
||||
+
|
||||
+
|
||||
+int val[100000];
|
||||
+int main(){
|
||||
+ for(int i=0;i<100000;i++){
|
||||
+ __builtin_prefetch_full(&val[i],1,7);
|
||||
+ val[i]=i+1;
|
||||
+ }
|
||||
+}
|
||||
+
|
||||
+/* { dg-final { scan-assembler "PSTL4STRM" } } */
|
||||
+
|
||||
diff --git a/gcc/tree-ssa-llc-allocate.c b/gcc/tree-ssa-llc-allocate.c
|
||||
index 107d5da26..75501f41c 100644
|
||||
--- a/gcc/tree-ssa-llc-allocate.c
|
||||
+++ b/gcc/tree-ssa-llc-allocate.c
|
||||
@@ -3271,8 +3271,19 @@ issue_mask_prefetch (gimple *stmt)
|
||||
target = gimple_call_arg (stmt, 3);
|
||||
else if (gimple_call_internal_fn (stmt) == IFN_MASK_LOAD)
|
||||
target = gimple_call_lhs (stmt);
|
||||
- /* 4: PLDL3KEEP. */
|
||||
- tree prfop = build_int_cst (TREE_TYPE (integer_zero_node), 4);
|
||||
+ tree prfop = NULL_TREE;
|
||||
+ if (param_llc_level == 3)
|
||||
+ /* for simulation, 4: PLDL3KEEP. */
|
||||
+ prfop = build_int_cst (TREE_TYPE (integer_zero_node), 4);
|
||||
+ else if (param_llc_level == 4)
|
||||
+ /* 6: PLDL4KEEP. */
|
||||
+ prfop = build_int_cst (TREE_TYPE (integer_zero_node), 6);
|
||||
+ else
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "LLC cache levels are illegal.\n");
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
/* add offset. */
|
||||
gimple_stmt_iterator si = gsi_for_stmt (stmt);
|
||||
@@ -3310,9 +3321,19 @@ issue_mask_gather_prefetch (gimple *stmt)
|
||||
tree scale = gimple_call_arg (stmt, 2);
|
||||
tree zero = gimple_call_arg (stmt, 3);
|
||||
tree final_mask = gimple_call_arg (stmt, 4);
|
||||
- tree prfop = build_int_cst (TREE_TYPE (integer_zero_node), 4);
|
||||
- tree target = gimple_call_lhs (stmt);
|
||||
+ tree prfop = NULL_TREE;
|
||||
+ if (param_llc_level == 3) // for simulation
|
||||
+ prfop = build_int_cst (TREE_TYPE (integer_zero_node), 4); // 4: PLDL3KEEP
|
||||
+ else if (param_llc_level == 4)
|
||||
+ prfop = build_int_cst (TREE_TYPE (integer_zero_node), 6); // 6: PLDL4KEEP
|
||||
+ else
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "LLC cache levels are illegal.\n");
|
||||
+ return;
|
||||
+ }
|
||||
|
||||
+ tree target = gimple_call_lhs (stmt);
|
||||
/* add offset. */
|
||||
gimple_stmt_iterator si = gsi_for_stmt (stmt);
|
||||
if (target == NULL_TREE)
|
||||
@@ -3373,8 +3394,27 @@ issue_builtin_prefetch (data_ref &mem_ref)
|
||||
/* __builtin_prefetch (_68, 0, 1);
|
||||
1st param: *addr, 2nd param: write/read (1/0), 3rd param: temporal locality
|
||||
(high means strong locality) */
|
||||
- gcall *call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH), 3,
|
||||
- addr, integer_zero_node, integer_one_node);
|
||||
+ gcall *call = NULL;
|
||||
+ if (param_llc_level == 3)
|
||||
+ {
|
||||
+ /* for simulation.
|
||||
+ BUILT_IN_PREFETCH (addr, rw, locality). */
|
||||
+ call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH),
|
||||
+ 3, addr, integer_zero_node, integer_one_node);
|
||||
+ }
|
||||
+ else if (param_llc_level == 4)
|
||||
+ {
|
||||
+ tree prfop = build_int_cst (TREE_TYPE (integer_zero_node), 6);
|
||||
+ call = gimple_build_call (builtin_decl_explicit (BUILT_IN_PREFETCH_FULL),
|
||||
+ 3, addr, integer_zero_node, prfop);
|
||||
+ }
|
||||
+ else
|
||||
+ {
|
||||
+ if (dump_file && (dump_flags & TDF_DETAILS))
|
||||
+ fprintf (dump_file, "LLC cache levels are illegal.\n");
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
gsi_insert_after (&si, call, GSI_SAME_STMT);
|
||||
update_ssa (TODO_update_ssa_only_virtuals);
|
||||
}
|
||||
@@ -3724,7 +3764,7 @@ issue_llc_hint (std::vector<ref_group> &ref_groups,
|
||||
fprintf (dump_file, "issue_llc_hint:\n");
|
||||
|
||||
/* 1) If the issue-topn and force-issue options are available, top N var is
|
||||
- forcibly allocated and no runtime branch is generated.
|
||||
+ forcibly allocated then no runtime branch is generated.
|
||||
2) If the issue-topn option is available and the size of top N var is
|
||||
statically known, top N is statically allocated and no runtime branch
|
||||
is generated.
|
||||
--
|
||||
2.33.0
|
||||
|
||||
16
gcc.spec
16
gcc.spec
@ -61,7 +61,7 @@
|
||||
Summary: Various compilers (C, C++, Objective-C, ...)
|
||||
Name: gcc
|
||||
Version: %{gcc_version}
|
||||
Release: 54
|
||||
Release: 55
|
||||
License: GPLv3+ and GPLv3+ with exceptions and GPLv2+ with exceptions and LGPLv2+ and BSD
|
||||
URL: https://gcc.gnu.org
|
||||
|
||||
@ -297,6 +297,10 @@ Patch186: 0186-Loop-CRC-Solving-the-problem-of-insufficient-CRC-tab.patch
|
||||
Patch187: 0187-Add-IPA-prefetch-test.patch
|
||||
Patch188: 0188-Fix-fails-in-ICP-for-src-openEuler-gcc-I90P7M-I91CZ8.patch
|
||||
Patch189: 0189-Add-hip11-CPU-pipeline-scheduling.patch
|
||||
Patch190: 0190-sync-LLC-difference-between-source-and-patch-code.patch
|
||||
Patch191: 0191-LLC-Allocation-Bugfix-Fix-ambiguous-reference-due-to.patch
|
||||
Patch192: 0192-Add-feedback-directed-filter_and_sort_kernels-in-Pha.patch
|
||||
Patch193: 0193-Add-prefetch-level-parameter-to-specify-the-last-lev.patch
|
||||
|
||||
%global gcc_target_platform %{_arch}-linux-gnu
|
||||
|
||||
@ -939,6 +943,10 @@ not stable, so plugins must be rebuilt any time GCC is updated.
|
||||
%patch187 -p1
|
||||
%patch188 -p1
|
||||
%patch189 -p1
|
||||
%patch190 -p1
|
||||
%patch191 -p1
|
||||
%patch192 -p1
|
||||
%patch193 -p1
|
||||
|
||||
%build
|
||||
|
||||
@ -2973,6 +2981,12 @@ end
|
||||
%doc rpm.doc/changelogs/libcc1/ChangeLog*
|
||||
|
||||
%changelog
|
||||
* Wed May 29 2024 yzyssdd <yuzeyang4@huawei.com> - 10.3.1-55
|
||||
- Type:SPEC
|
||||
- ID:NA
|
||||
- SUG:NA
|
||||
- DESC: Add feedback llc allocate and support llc prefetch instruction
|
||||
|
||||
* Thu Mar 14 2024 chenyuanfeng <yuanfeng.chen@shingroup.cn> - 10.3.1-54
|
||||
- Type: Spec
|
||||
- ID:NA
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user