commit da29c21db2050a6fb3b8c428eb0fc20e63856b6c
Author: Kyrylo Tkachov <kyrylo.tkachov@arm.com>
Date: Wed Sep 30 09:29:59 2015 +0100
[AArch64] Enable autoprefetcher modelling in the scheduler
@@ -194,6 +194,23 @@ struct tune_params
int vec_reassoc_width;
int min_div_recip_mul_sf;
int min_div_recip_mul_df;
+
+/* An enum specifying how to take into account CPU autoprefetch capabilities
+ during instruction scheduling:
+ - AUTOPREFETCHER_OFF: Do not take autoprefetch capabilities into account.
+ - AUTOPREFETCHER_WEAK: Attempt to sort sequences of loads/store in order of
+ offsets but allow the pipeline hazard recognizer to alter that order to
+ maximize multi-issue opportunities.
+ - AUTOPREFETCHER_STRONG: Attempt to sort sequences of loads/store in order of
+ offsets and prefer this even if it restricts multi-issue opportunities. */
+
+ enum aarch64_autoprefetch_model
+ {
+ AUTOPREFETCHER_OFF,
+ AUTOPREFETCHER_WEAK,
+ AUTOPREFETCHER_STRONG
+ } autoprefetcher_model;
+
unsigned int extra_tuning_flags;
};
@@ -64,6 +64,7 @@
#include "gimple-fold.h"
#include "tree-eh.h"
#include "gimplify.h"
+#include "params.h"
#include "optabs.h"
#include "dwarf2.h"
#include "cfgloop.h"
@@ -364,6 +365,7 @@ static const struct tune_params generic_tunings =
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
@@ -386,6 +388,7 @@ static const struct tune_params cortexa53_tunings =
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
@@ -408,6 +411,7 @@ static const struct tune_params cortexa57_tunings =
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
+ tune_params::AUTOPREFETCHER_WEAK, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_RENAME_FMA_REGS) /* tune_flags. */
};
@@ -430,6 +434,7 @@ static const struct tune_params cortexa72_tunings =
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
@@ -451,6 +456,7 @@ static const struct tune_params thunderx_tunings =
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
@@ -472,6 +478,7 @@ static const struct tune_params xgene1_tunings =
1, /* vec_reassoc_width. */
2, /* min_div_recip_mul_sf. */
2, /* min_div_recip_mul_df. */
+ tune_params::AUTOPREFETCHER_OFF, /* autoprefetcher_model. */
(AARCH64_EXTRA_TUNE_NONE) /* tune_flags. */
};
@@ -7024,6 +7031,19 @@ aarch64_sched_first_cycle_multipass_dfa_lookahead (void)
return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
}
+
+/* Implement TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD as
+ autopref_multipass_dfa_lookahead_guard from haifa-sched.c. It only
+ has an effect if PARAM_SCHED_AUTOPREF_QUEUE_DEPTH > 0. */
+
+static int
+aarch64_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn,
+ int ready_index)
+{
+ return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
+}
+
+
/* Vectorizer cost model target hooks. */
/* Implement targetm.vectorize.builtin_vectorization_cost. */
@@ -7615,6 +7635,29 @@ aarch64_override_options_internal (struct gcc_options *opts)
initialize_aarch64_code_model (opts);
initialize_aarch64_tls_size (opts);
+ int queue_depth = 0;
+ switch (aarch64_tune_params.autoprefetcher_model)
+ {
+ case tune_params::AUTOPREFETCHER_OFF:
+ queue_depth = -1;
+ break;
+ case tune_params::AUTOPREFETCHER_WEAK:
+ queue_depth = 0;
+ break;
+ case tune_params::AUTOPREFETCHER_STRONG:
+ queue_depth = max_insn_queue_index + 1;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* We don't mind passing in global_options_set here as we don't use
+ the *options_set structs anyway. */
+ maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
+ queue_depth,
+ opts->x_param_values,
+ global_options_set.x_param_values);
+
aarch64_override_options_after_change_1 (opts);
}
@@ -13481,6 +13524,10 @@ aarch64_promoted_type (const_tree t)
#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
aarch64_sched_first_cycle_multipass_dfa_lookahead
+#undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
+#define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
+ aarch64_first_cycle_multipass_dfa_lookahead_guard
+
#undef TARGET_TRAMPOLINE_INIT
#define TARGET_TRAMPOLINE_INIT aarch64_trampoline_init