gcc/0204-Try-to-use-AI-model-to-guide-optimization.patch
2024-06-22 15:57:01 +08:00

679 lines
21 KiB
Diff
Raw Permalink Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

From 25014ea924bfe3659e88195636ec08f87dd72c07 Mon Sep 17 00:00:00 2001
From: Mingchuan Wu <wumingchuan1992@foxmail.com>
Date: Fri, 21 Jun 2024 20:26:04 +0800
Subject: [PATCH] Try to use AI model to guide optimization.
---
gcc/Makefile.in | 1 +
gcc/common.opt | 7 ++
gcc/config/aarch64/aarch64.c | 130 ++++++++++++++++++++
gcc/ipa-hardware-detection.c | 228 +++++++++++++++++++++++++++++++++++
gcc/opts-common.c | 154 +++++++++++++++++++++++
gcc/opts.c | 11 ++
gcc/passes.def | 1 +
gcc/timevar.def | 1 +
gcc/tree-pass.h | 2 +
9 files changed, 535 insertions(+)
create mode 100644 gcc/ipa-hardware-detection.c
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index aed321d27..f21bc5f9a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1398,6 +1398,7 @@ OBJS = \
inchash.o \
incpath.o \
init-regs.o \
+ ipa-hardware-detection.o \
internal-fn.o \
ipa-struct-reorg/ipa-struct-reorg.o \
ipa-cp.o \
diff --git a/gcc/common.opt b/gcc/common.opt
index aad6fb281..9b32ea50a 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -188,6 +188,9 @@ const char *main_input_basename
Variable
int main_input_baselength
+Variable
+bool optimize_machine
+
; Which options have been printed by --help.
Variable
char *help_printed
@@ -467,6 +470,10 @@ Ofast
Common Optimization
Optimize for speed disregarding exact standards compliance.
+Om
+Common Optimization
+Optimize for radical optimization for machines.
+
Og
Common Optimization
Optimize for debugging experience rather than speed or size.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 2117326ba..e67e77e6a 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -14416,6 +14416,135 @@ aarch64_sve_adjust_stmt_cost (vect_cost_for_stmt kind,
return stmt_cost;
}
+/* Check whether in C language or LTO with only C language. */
+extern bool lang_c_p (void);
+
+static void
+override_C_optimize_options (struct gcc_options *opts)
+{
+ opts->x_flag_ipa_reorder_fields = 1;
+ opts->x_flag_ipa_struct_reorg = 6;
+ opts->x_struct_layout_optimize_level = 6;
+ opts->x_flag_gnu89_inline = 1;
+ opts->x_flag_ccmp2 = 1;
+ opts->x_flag_array_widen_compare = 1;
+ opts->x_flag_convert_minmax = 1;
+ opts->x_flag_tree_slp_transpose_vectorize = 1;
+ opts->x_param_max_inline_insns_auto = 64;
+ opts->x_param_inline_unit_growth = 96;
+ opts->x_flag_cmlt_arith = 1;
+}
+
+/* Check whether in CPP language or LTO with only CPP language. */
+static bool
+lang_cpp_p (void)
+{
+ const char *language_string = lang_hooks.name;
+ if (!language_string)
+ {
+ return false;
+ }
+ if (lang_GNU_CXX ())
+ {
+ return true;
+ }
+ else if (strcmp (language_string, "GNU GIMPLE") == 0) // for LTO check
+ {
+ unsigned i = 0;
+ tree t = NULL_TREE;
+ FOR_EACH_VEC_SAFE_ELT (all_translation_units, i, t)
+ {
+ language_string = TRANSLATION_UNIT_LANGUAGE (t);
+ if (language_string == NULL
+ || strncmp (lang_hooks.name, "GNU C++", 7))
+ {
+ return false;
+ }
+ }
+ return true;
+ }
+ return false;
+}
+
+static void
+override_CPP_optimize_options (struct gcc_options *opts)
+{
+ opts->x_flag_finite_loops = 1;
+ opts->x_flag_omit_frame_pointer = 1;
+ opts->x_flag_sized_deallocation = 0;
+ opts->x_flag_loop_elim = 1;
+ opts->x_flag_convert_minmax = 1;
+ opts->x_param_early_inlining_insns = 256;
+ opts->x_param_max_inline_insns_auto = 128;
+ opts->x_param_inline_unit_growth = 256;
+ opts->x_flag_cmlt_arith = 1;
+}
+
+static void
+override_optimize_options_1 (struct gcc_options *opts)
+{
+ opts->x_flag_split_ldp_stp = 1;
+ opts->x_flag_if_conversion_gimple = 1;
+ opts->x_param_tree_forwprop_perm = 1;
+ opts->x_flag_ifcvt_allow_complicated_cmps = 1;
+ opts->x_param_ifcvt_allow_register_renaming = 2;
+ opts->x_param_max_rtl_if_conversion_unpredictable_cost = 48;
+ opts->x_param_max_rtl_if_conversion_predictable_cost = 48;
+}
+
+static void
+override_Fortran_optimize_options (struct gcc_options *opts)
+{
+ opts->x_flag_unroll_loops = 1;
+ opts->x_flag_unconstrained_commons = 1;
+ opts->x_param_ipa_cp_eval_threshold = 1;
+ opts->x_param_ipa_cp_unit_growth = 80;
+ opts->x_param_ipa_cp_max_recursive_depth = 8;
+ opts->x_param_large_unit_insns = 30000;
+ opts->x_flag_ira_loop_pressure = 1;
+ opts->x_flag_inline_functions_called_once = 0;
+ opts->x_flag_ira_algorithm = IRA_ALGORITHM_PRIORITY;
+ opts->x_flag_delayed_branch = 1;
+ opts->x_flag_gcse_las = 1;
+ opts->x_flag_gcse_sm = 1;
+ opts->x_flag_ipa_pta = 1;
+ opts->x_flag_reorder_blocks_and_partition = 1;
+ opts->x_flag_reorder_blocks = 1;
+ opts->x_flag_crypto_accel_aes = 1;
+ opts->x_param_flexible_seg_len = 1;
+}
+
+/* Reset the optimize option.
+ After checking the model result, this function can
+ reset the more appropriate options. */
+static void
+reset_machine_option (struct gcc_options *opts)
+{
+ if (!(opts->x_optimize_machine)
+ || strstr (opts->x_aarch64_tune_string, "hip09") == NULL)
+ {
+ return;
+ }
+
+ const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
+ if (ai_infer_level)
+ {
+ override_optimize_options_1 (opts);
+ if (lang_c_p ())
+ {
+ override_C_optimize_options (opts);
+ }
+ else if (lang_cpp_p ())
+ {
+ override_CPP_optimize_options (opts);
+ }
+ else if (lang_GNU_Fortran ())
+ {
+ override_Fortran_optimize_options (opts);
+ }
+ }
+}
+
/* Implement targetm.vectorize.add_stmt_cost. */
static unsigned
aarch64_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
@@ -15060,6 +15189,7 @@ aarch64_override_options_internal (struct gcc_options *opts)
if (opts->x_aarch64_tune_string == NULL)
opts->x_aarch64_tune_string = selected_tune->name;
+ reset_machine_option (opts);
aarch64_override_options_after_change_1 (opts);
}
diff --git a/gcc/ipa-hardware-detection.c b/gcc/ipa-hardware-detection.c
new file mode 100644
index 000000000..f127ebe2c
--- /dev/null
+++ b/gcc/ipa-hardware-detection.c
@@ -0,0 +1,228 @@
+/* Hardware Detection.
+ Copyright (C) 2022-2022 Free Software Foundation, Inc.
+This file is part of GCC.
+GCC is free software; you can redistribute it and/or modify it
+under the terms of the GNU General Public License as published by the
+Free Software Foundation; either version 3, or (at your option) any
+later version.
+GCC is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>. */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "tree.h"
+#include "gimple.h"
+#include "tree-pass.h"
+#include "gimple-ssa.h"
+#include "tree-pretty-print.h"
+#include "fold-const.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "tree-ssa-loop-manip.h"
+#include "tree-ssa-loop.h"
+#include "ssa.h"
+#include "tree-into-ssa.h"
+#include "cfganal.h"
+#include "cfgloop.h"
+#include "gimple-pretty-print.h"
+#include "tree-cfg.h"
+#include "cgraph.h"
+#include "print-tree.h"
+#include "cfghooks.h"
+#include "gimple-fold.h"
+
+namespace {
+
+static basic_block
+create_abort_bb (basic_block last_bb)
+{
+ basic_block bb = create_empty_bb (last_bb);
+ if (last_bb->loop_father != NULL)
+ {
+ add_bb_to_loop (bb, last_bb->loop_father);
+ loops_state_set (LOOPS_NEED_FIXUP);
+ }
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
+ tree fn = builtin_decl_implicit (BUILT_IN_ABORT);
+ gimple *g = gimple_build_call (fn, 0);
+ gsi_insert_after (&gsi, g, GSI_NEW_STMT);
+ return bb;
+}
+
+static basic_block
+create_part_bb (basic_block last_bb, tree part_base)
+{
+ basic_block bb = create_empty_bb (last_bb);
+ if (last_bb->loop_father != NULL)
+ {
+ add_bb_to_loop (bb, last_bb->loop_father);
+ loops_state_set (LOOPS_NEED_FIXUP);
+ }
+ gimple_stmt_iterator gsi = gsi_last_bb (bb);
+ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
+ /* This number is used to efficiently identify the supported part range. */
+ tree part_cond = gimplify_build2 (
+ &gsi, PLUS_EXPR, unsigned_type_node, part_base,
+ build_int_cst (unsigned_type_node, 4294963967));
+ gcond *cond = gimple_build_cond (LE_EXPR, part_cond,
+ build_int_cst (unsigned_type_node, 2),
+ NULL_TREE, NULL_TREE);
+ gimple_set_location (cond, input_location);
+ gsi_insert_before (&gsi, cond, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+ return bb;
+}
+
+static void
+create_detection_bb ()
+{
+ edge old_e = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ basic_block ret_bb = old_e->dest;
+
+ basic_block detection_bb = create_empty_bb (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+ if (ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father != NULL)
+ {
+ add_bb_to_loop (detection_bb, ENTRY_BLOCK_PTR_FOR_FN (cfun)->loop_father);
+ loops_state_set (LOOPS_NEED_FIXUP);
+ }
+ tree cpuid_decl = build_decl (input_location, VAR_DECL,
+ get_identifier ("cpuid"), unsigned_type_node);
+ add_local_decl (cfun, cpuid_decl);
+
+ gimple_stmt_iterator gsi = gsi_last_bb (detection_bb);
+ vec<tree, va_gc> *outputs = NULL;
+ tree purpose = build_string (strlen ("=r"), "=r");
+ tree output = build_tree_list (
+ build_tree_list (NULL_TREE, purpose), cpuid_decl);
+ vec_safe_push (outputs, output);
+ gasm *asm_stmt = gimple_build_asm_vec (
+ "mrs %0, MIDR_EL1", NULL, outputs, NULL, NULL);
+ gsi_insert_after (&gsi, asm_stmt, GSI_NEW_STMT);
+ gsi_insert_after (&gsi, gimple_build_nop (), GSI_NEW_STMT);
+
+ tree implementer = gimplify_build2 (
+ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
+ build_int_cst (unsigned_type_node, 24));
+ tree part_base = gimplify_build2 (
+ &gsi, RSHIFT_EXPR, unsigned_type_node, cpuid_decl,
+ build_int_cst (unsigned_type_node, 4));
+ tree part = gimplify_build2 (
+ &gsi, BIT_AND_EXPR, unsigned_type_node, part_base,
+ build_int_cst (unsigned_type_node, 4095));
+ gcond *implementer_cond = gimple_build_cond (
+ EQ_EXPR, implementer,
+ build_int_cst (unsigned_type_node, 72),
+ NULL_TREE, NULL_TREE);
+ gimple_set_location (implementer_cond, input_location);
+ gsi_insert_before (&gsi, implementer_cond, GSI_SAME_STMT);
+ gsi_remove (&gsi, true);
+
+ basic_block part_bb = create_part_bb (detection_bb, part);
+ basic_block abort_bb = create_abort_bb (part_bb);
+
+ remove_edge_raw (old_e);
+ make_single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun),
+ detection_bb, EDGE_FALLTHRU);
+ edge etrue = make_edge (detection_bb, part_bb, EDGE_TRUE_VALUE);
+ etrue->probability = profile_probability::likely ();
+ edge efalse = make_edge (detection_bb, abort_bb, EDGE_FALSE_VALUE);
+ efalse->probability = profile_probability::unlikely ();
+ edge part_true = make_edge (part_bb, ret_bb, EDGE_TRUE_VALUE);
+ part_true->probability = profile_probability::likely ();
+ edge part_false = make_edge (part_bb, abort_bb, EDGE_FALSE_VALUE);
+ part_false->probability = profile_probability::unlikely ();
+ make_single_succ_edge (abort_bb, ret_bb, EDGE_FALLTHRU);
+ if (dom_info_available_p (CDI_DOMINATORS))
+ {
+ set_immediate_dominator (CDI_DOMINATORS, part_bb, detection_bb);
+ set_immediate_dominator (CDI_DOMINATORS, ret_bb, detection_bb);
+ set_immediate_dominator (CDI_DOMINATORS, abort_bb, detection_bb);
+ }
+}
+
+const pass_data pass_data_ipa_hardware_detection =
+{
+ SIMPLE_IPA_PASS,
+ "hardware_detection",
+ OPTGROUP_NONE,
+ TV_IPA_HARDWARE_DETECTION,
+ (PROP_cfg | PROP_ssa),
+ 0,
+ 0,
+ 0,
+ (TODO_update_ssa | TODO_verify_all)
+};
+
+class pass_ipa_hardware_detection : public simple_ipa_opt_pass
+{
+public:
+ pass_ipa_hardware_detection (gcc::context *ctxt)
+ : simple_ipa_opt_pass (pass_data_ipa_hardware_detection, ctxt)
+ {}
+
+ virtual bool gate (function *);
+ virtual unsigned int execute (function *);
+}; // class pass_ipa_hardware_detection
+
+bool
+pass_ipa_hardware_detection::gate (function *)
+{
+ const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
+ return (ai_infer_level
+ && optimize_machine > 0
+ /* Only enable in lto or whole_program.  */
+ && (in_lto_p || flag_whole_program));
+}
+
+unsigned int
+pass_ipa_hardware_detection::execute (function *)
+{
+ unsigned int ret = 0;
+ cgraph_node *cnode;
+ FOR_EACH_FUNCTION (cnode)
+ {
+ if (!cnode->real_symbol_p ())
+ {
+ continue;
+ }
+ if (cnode->definition)
+ {
+ if (!cnode->has_gimple_body_p () || cnode->inlined_to)
+ continue;
+
+ cnode->get_body ();
+ function *fn = DECL_STRUCT_FUNCTION (cnode->decl);
+ if (!fn)
+ continue;
+
+ if (DECL_NAME (cnode->decl)
+ && MAIN_NAME_P (DECL_NAME (cnode->decl)))
+ {
+ push_cfun (fn);
+ calculate_dominance_info (CDI_DOMINATORS);
+
+ create_detection_bb ();
+
+ cgraph_edge::rebuild_edges ();
+ free_dominance_info (CDI_DOMINATORS);
+ pop_cfun ();
+ }
+ }
+ }
+ return ret;
+}
+} // anon namespace
+
+simple_ipa_opt_pass *
+make_pass_ipa_hardware_detection (gcc::context *ctxt)
+{
+ return new pass_ipa_hardware_detection (ctxt);
+}
diff --git a/gcc/opts-common.c b/gcc/opts-common.c
index bf82b05c8..52e28e2dc 100644
--- a/gcc/opts-common.c
+++ b/gcc/opts-common.c
@@ -926,6 +926,158 @@ opts_concat (const char *first, ...)
return newstr;
}
+typedef int64_t (*run_ai_model_func)(int, const char **,
+ const char *, int, int64_t *);
+#define PTR_UNION_TYPE(TOTYPE) union { void *_q; TOTYPE _nq; }
+#define PTR_UNION_AS_VOID_PTR(NAME) (NAME._q)
+#define PTR_UNION_AS_CAST_PTR(NAME) (NAME._nq)
+
+static int64_t
+ai_infer_optimization (int argc, const char **argv,
+ const char *mcpu_option,
+ int argc_hw, int64_t *argv_hw)
+{
+ /* Load dependent AI-framework libraries. */
+ void *onnxruntime_lib_handle = NULL;
+ const char *onnxruntime_lib_path = "libonnxruntime.so";
+
+ onnxruntime_lib_handle = dlopen (onnxruntime_lib_path,
+ RTLD_LAZY | RTLD_GLOBAL);
+ if (!onnxruntime_lib_handle)
+ {
+ return -1;
+ }
+
+ void *ai4c_lib_handle = NULL;
+ const char *ai4c_lib_path = "libONNXRunner.so";
+
+ ai4c_lib_handle = dlopen (ai4c_lib_path, RTLD_LAZY | RTLD_GLOBAL);
+ if (!ai4c_lib_handle)
+ {
+ return -1;
+ }
+
+ /* Clear any existing error. */
+ dlerror ();
+
+ /* Run AI4Compiler model. */
+ if (ai4c_lib_handle == NULL || onnxruntime_lib_handle == NULL)
+ {
+ return -1;
+ }
+
+ run_ai_model_func run_ai_model;
+ PTR_UNION_TYPE (run_ai_model_func) run_ai_model_func_union;
+ PTR_UNION_AS_VOID_PTR (run_ai_model_func_union)
+ = dlsym (ai4c_lib_handle, "runONNXModelOptimizer");
+ run_ai_model = PTR_UNION_AS_CAST_PTR (run_ai_model_func_union);
+ if (!run_ai_model)
+ {
+ dlclose (ai4c_lib_handle);
+ dlclose (onnxruntime_lib_handle);
+ return -1;
+ }
+ int64_t model_pred = (*run_ai_model) (argc, argv,
+ mcpu_option, argc_hw, argv_hw);
+
+ if (ai4c_lib_handle)
+ dlclose (ai4c_lib_handle);
+
+ if (onnxruntime_lib_handle)
+ dlclose (onnxruntime_lib_handle);
+
+ if (model_pred == 1)
+ putenv ("AI_INFER_LEVEL=1");
+ return model_pred;
+}
+
+static int
+handle_lto_option (unsigned int lang_mask,
+ unsigned int num_decoded_options,
+ unsigned int argc,
+ const char **argv,
+ struct cl_decoded_option *&opt_array)
+{
+ int ret = 0;
+ char *lan = "";
+ char *compiler = xstrdup (argv[0]);
+ lan = strrchr (compiler, '/');
+ if (lan != NULL)
+ lan ++;
+ else
+ lan = compiler;
+ if (strstr (lan, "gcc") != NULL)
+ {
+ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 2);
+ const char* lto_flag = "-flto=8";
+ decode_cmdline_option (&lto_flag, lang_mask,
+ &opt_array[num_decoded_options]);
+ ret++;
+ const char* ltopartition_flag = "-flto-partition=one";
+ decode_cmdline_option (&ltopartition_flag, lang_mask,
+ &opt_array[num_decoded_options + 1]);
+ ret++;
+ }
+ else if (strstr (lan, "g++") != NULL
+ || strstr (lan, "gfortran") != NULL)
+ {
+ opt_array = XRESIZEVEC (struct cl_decoded_option, opt_array, argc + 1);
+ const char* lto_flag = "-flto=8";
+ decode_cmdline_option (&lto_flag, lang_mask,
+ &opt_array[num_decoded_options]);
+ ret++;
+ }
+ if (compiler)
+ free (compiler);
+ return ret;
+}
+
+static int
+handle_machine_option (unsigned int lang_mask,
+ unsigned int num_decoded_options,
+ unsigned int argc,
+ const char **argv,
+ struct cl_decoded_option *&opt_array)
+{
+ int ret = 0;
+ bool flag_Om = false;
+ bool flag_hip09 = false;
+ for (unsigned i = 1; i < argc; i ++)
+ {
+ if (strcmp (argv[i], "-Om") == 0)
+ flag_Om = true;
+ if (strstr (argv[i], "mcpu=hip09") != NULL)
+ flag_hip09 = true;
+ }
+ if (!flag_hip09 || !flag_Om)
+ {
+ return ret;
+ }
+
+ const char *ai_infer_level = getenv ("AI_INFER_LEVEL");
+ if (ai_infer_level)
+ {
+ return ret;
+ }
+ int argc_hw = 6;
+ int64_t argv_hw[argc_hw] = {
+ global_options.x_param_simultaneous_prefetches,
+ global_options.x_param_l1_cache_size,
+ global_options.x_param_l1_cache_line_size,
+ global_options.x_param_l2_cache_size,
+ global_options.x_param_llc_capacity_per_core,
+ global_options.x_param_ipa_prefetch_distance_factor};
+ int64_t output_pred = ai_infer_optimization (
+ argc, argv, "hip09", argc_hw, argv_hw);
+ if (output_pred != 1)
+ {
+ return ret;
+ }
+
+ return handle_lto_option (lang_mask, num_decoded_options,
+ argc, argv, opt_array);
+}
+
/* Decode command-line options (ARGC and ARGV being the arguments of
main) into an array, setting *DECODED_OPTIONS to a pointer to that
array and *DECODED_OPTIONS_COUNT to the number of entries in the
@@ -987,6 +1139,8 @@ decode_cmdline_options_to_array (unsigned int argc, const char **argv,
num_decoded_options++;
}
+ num_decoded_options += handle_machine_option (lang_mask, num_decoded_options,
+ argc, argv, opt_array);
*decoded_options = opt_array;
*decoded_options_count = num_decoded_options;
prune_options (decoded_options, decoded_options_count, lang_mask);
diff --git a/gcc/opts.c b/gcc/opts.c
index c0ccd0853..dc61216c0 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -632,6 +632,15 @@ default_options_optimization (struct gcc_options *opts,
opts->x_optimize_debug = 1;
break;
+ case OPT_Om:
+ /* -Om adds flags to -O3 & -Ofast. */
+ opts->x_optimize_size = 0;
+ opts->x_optimize = 3;
+ opts->x_optimize_fast = 1;
+ opts->x_optimize_machine = true;
+ opts->x_optimize_debug = 0;
+ break;
+
case OPT_fopenacc:
if (opt->value)
openacc_mode = true;
@@ -2378,6 +2387,8 @@ common_handle_option (struct gcc_options *opts,
opts->x_flag_sanitize_coverage, value, true);
break;
+ case OPT_Om:
+ break;
case OPT_O:
case OPT_Os:
case OPT_Ofast:
diff --git a/gcc/passes.def b/gcc/passes.def
index b6006de22..8898b72fc 100644
--- a/gcc/passes.def
+++ b/gcc/passes.def
@@ -177,6 +177,7 @@ along with GCC; see the file COPYING3. If not see
compiled unit. */
INSERT_PASSES_AFTER (all_late_ipa_passes)
NEXT_PASS (pass_materialize_all_clones);
+ NEXT_PASS (pass_ipa_hardware_detection);
NEXT_PASS (pass_ipa_pta);
/* FIXME: this should a normal IP pass */
NEXT_PASS (pass_ipa_struct_reorg);
diff --git a/gcc/timevar.def b/gcc/timevar.def
index 929e9e1d3..66b21f166 100644
--- a/gcc/timevar.def
+++ b/gcc/timevar.def
@@ -81,6 +81,7 @@ DEFTIMEVAR (TV_IPA_CONSTANT_PROP , "ipa cp")
DEFTIMEVAR (TV_IPA_INLINING , "ipa inlining heuristics")
DEFTIMEVAR (TV_IPA_FNSPLIT , "ipa function splitting")
DEFTIMEVAR (TV_IPA_COMDATS , "ipa comdats")
+DEFTIMEVAR (TV_IPA_HARDWARE_DETECTION, "ipa detection")
DEFTIMEVAR (TV_IPA_PREFETCH , "ipa prefetch")
DEFTIMEVAR (TV_IPA_STRUCT_REORG , "ipa struct reorg optimization")
DEFTIMEVAR (TV_IPA_EXTEND_AUTO_PROFILE, "ipa extend auto profile")
diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h
index 4d952884d..d3a41d0d5 100644
--- a/gcc/tree-pass.h
+++ b/gcc/tree-pass.h
@@ -513,6 +513,8 @@ extern ipa_opt_pass_d *make_pass_ipa_odr (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_reference (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_hsa (gcc::context *ctxt);
extern ipa_opt_pass_d *make_pass_ipa_pure_const (gcc::context *ctxt);
+extern simple_ipa_opt_pass *make_pass_ipa_hardware_detection (gcc::context *
+ ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_prefetch (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_struct_reorg (gcc::context *ctxt);
extern simple_ipa_opt_pass *make_pass_ipa_extend_auto_profile (gcc::context
--
2.33.0