Message ID | CAAgBjMndb7=-CM4Jodi6+BRPkSxj9=KyeRUGCG+vsahNZoUghQ@mail.gmail.com |
---|---|
State | New |
Headers | show |
ping https://gcc.gnu.org/ml/gcc-patches/2016-06/msg01703.html Thanks, Prathamesh On 23 June 2016 at 22:51, Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> wrote: > On 17 June 2016 at 19:52, Prathamesh Kulkarni > <prathamesh.kulkarni@linaro.org> wrote: >> On 14 June 2016 at 18:31, Prathamesh Kulkarni >> <prathamesh.kulkarni@linaro.org> wrote: >>> On 13 June 2016 at 16:13, Jan Hubicka <hubicka@ucw.cz> wrote: >>>>> diff --git a/gcc/cgraph.h b/gcc/cgraph.h >>>>> index ecafe63..41ac408 100644 >>>>> --- a/gcc/cgraph.h >>>>> +++ b/gcc/cgraph.h >>>>> @@ -1874,6 +1874,9 @@ public: >>>>> if we did not do any inter-procedural code movement. */ >>>>> unsigned used_by_single_function : 1; >>>>> >>>>> + /* Set if -fsection-anchors is set. */ >>>>> + unsigned section_anchor : 1; >>>>> + >>>>> private: >>>>> /* Assemble thunks and aliases associated to varpool node. */ >>>>> void assemble_aliases (void); >>>>> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c >>>>> index 4bfcad7..e75d5c0 100644 >>>>> --- a/gcc/cgraphunit.c >>>>> +++ b/gcc/cgraphunit.c >>>>> @@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl) >>>>> it is available to notice_global_symbol. */ >>>>> node->definition = true; >>>>> notice_global_symbol (decl); >>>>> + >>>>> + node->section_anchor = flag_section_anchors; >>>>> + >>>>> if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl) >>>>> /* Traditionally we do not eliminate static variables when not >>>>> optimizing and when not doing toplevel reoder. */ >>>>> diff --git a/gcc/common.opt b/gcc/common.opt >>>>> index f0d7196..e497795 100644 >>>>> --- a/gcc/common.opt >>>>> +++ b/gcc/common.opt >>>>> @@ -1590,6 +1590,10 @@ fira-algorithm= >>>>> Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) Init(IRA_ALGORITHM_CB) Optimization >>>>> -fira-algorithm=[CB|priority] Set the used IRA algorithm. >>>>> >>>>> +fipa-increase_alignment >>>>> +Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization >>>>> +Option to gate increase_alignment ipa pass. >>>>> + >>>>> Enum >>>>> Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA algorithm %qs) >>>>> >>>>> @@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic) Init(1) Optimization >>>>> Enable the dependent count heuristic in the scheduler. >>>>> >>>>> fsection-anchors >>>>> -Common Report Var(flag_section_anchors) Optimization >>>>> +Common Report Var(flag_section_anchors) >>>>> Access data in the same section from shared anchor points. >>>>> >>>>> fsee >>>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >>>>> index a0db3a4..1482566 100644 >>>>> --- a/gcc/config/aarch64/aarch64.c >>>>> +++ b/gcc/config/aarch64/aarch64.c >>>>> @@ -8252,6 +8252,8 @@ aarch64_override_options (void) >>>>> >>>>> aarch64_register_fma_steering (); >>>>> >>>>> + /* Enable increase_alignment pass. */ >>>>> + flag_ipa_increase_alignment = 1; >>>> >>>> I would rather enable it always on targets that do support anchors. >>> AFAIK aarch64 supports section anchors. >>>>> diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c >>>>> index ce9e146..7f09f3a 100644 >>>>> --- a/gcc/lto/lto-symtab.c >>>>> +++ b/gcc/lto/lto-symtab.c >>>>> @@ -342,6 +342,13 @@ lto_symtab_merge (symtab_node *prevailing, symtab_node *entry) >>>>> The type compatibility checks or the completing of types has properly >>>>> dealt with most issues. */ >>>>> >>>>> + /* ??? is this assert necessary ? */ >>>>> + varpool_node *v_prevailing = dyn_cast<varpool_node *> (prevailing); >>>>> + varpool_node *v_entry = dyn_cast<varpool_node *> (entry); >>>>> + gcc_assert (v_prevailing && v_entry); >>>>> + /* section_anchor of prevailing_decl wins. */ >>>>> + v_entry->section_anchor = v_prevailing->section_anchor; >>>>> + >>>> Other flags are merged in lto_varpool_replace_node so please move this there. >>> Ah indeed, thanks for the pointers. >>> I wonder though if we need to set >>> prevailing_node->section_anchor = vnode->section_anchor ? >>> IIUC, the function merges flags from vnode into prevailing_node >>> and removes vnode. However we want prevailing_node->section_anchor >>> to always take precedence. >>>>> +/* Return true if alignment should be increased for this vnode. >>>>> + This is done if every function that references/referring to vnode >>>>> + has flag_tree_loop_vectorize set. */ >>>>> + >>>>> +static bool >>>>> +increase_alignment_p (varpool_node *vnode) >>>>> +{ >>>>> + ipa_ref *ref; >>>>> + >>>>> + for (int i = 0; vnode->iterate_reference (i, ref); i++) >>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referred)) >>>>> + { >>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >>>>> + if (!opts->x_flag_tree_loop_vectorize) >>>>> + return false; >>>>> + } >>>> >>>> If you take address of function that has vectorizer enabled probably doesn't >>>> imply need to increase alignment of that var. So please drop the loop. >>>> >>>> You only want function that read/writes or takes address of the symbol. But >>>> onthe other hand, you need to walk all aliases of the symbol by >>>> call_for_symbol_and_aliases >>>>> + >>>>> + for (int i = 0; vnode->iterate_referring (i, ref); i++) >>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referring)) >>>>> + { >>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >>>>> + if (!opts->x_flag_tree_loop_vectorize) >>>>> + return false; >>>>> + } >>>>> + >>>>> + return true; >>>>> +} >>>>> + >>>>> /* Entry point to increase_alignment pass. */ >>>>> static unsigned int >>>>> increase_alignment (void) >>>>> @@ -914,9 +942,12 @@ increase_alignment (void) >>>>> tree decl = vnode->decl; >>>>> unsigned int alignment; >>>>> >>>>> - if ((decl_in_symtab_p (decl) >>>>> - && !symtab_node::get (decl)->can_increase_alignment_p ()) >>>>> - || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) >>>>> + if (!vnode->section_anchor >>>>> + || (decl_in_symtab_p (decl) >>>>> + && !symtab_node::get (decl)->can_increase_alignment_p ()) >>>>> + || DECL_USER_ALIGN (decl) >>>>> + || DECL_ARTIFICIAL (decl) >>>>> + || !increase_alignment_p (vnode)) >>>> >>>> Incrementally we probably should do more testing whether the variable looks like >>>> someting that can be vectorized, i.e. it contains array, has address taken or the >>>> accesses are array accesses within loop. >>>> This can be done by the analysis phase of the IPA pass inspecting the function >>>> bodies. >>> Thanks, I will try to check for array accesses are within a loop in >>> followup patch. >>> I was wondering if we could we treat a homogeneous global struct >>> (having members of one type), >>> as a global array of that type and increase it's alignment if required ? >>>> >>>> I think it is important waste to bump up everything including error messages etc. >>>> At least on i386 the effect on firefox datasegment of various alignment setting is >>>> very visible. >>> Um for a start, would it be OK to check if all functions referencing variable >>> have attribute noreturn, and in that case we skip increasing the alignment ? >>> I suppose that error functions would be having attribute noreturn set ? >>>> >>>> Looks OK to me otherwise. please send updated patch. >>> I have done the changes in the attached patch (stage-1 built). >>> I am not sure what to return from the callback function and >>> arbitrarily chose to return true. >> Hi, >> In this version (stage-1 built), I added read/write summaries which >> stream those variables >> which we want to increase alignment for. >> >> I have a few questions: >> >> a) To check if global var is used inside a loop, I obtained >> corresponding ipa_ref >> and checked loop_containing_stmt (ref->stmt), however it returned non-NULL >> even when ref->stmt was not inside a loop. >> for instance: >> int a[32]; >> int f() { int x = a[0]; return x; } >> Um how to check if stmt is used inside a loop ? >> >> b) Is it necessary during WPA to check if function has >> flag_tree_vectorize_set since >> during analysis phase in increase_alignment_generate_summary() I check >> if cnode has flag_tree_loop_vectorize_set ? >> >> c) In LTO_increase_alignment_section, the following is streamed: >> i) a "count" of type uwhi, to represent number of variables >> ii) decls corresponding to the variables >> The variables are then obtained during read_summay using >> symtab_node::get_create(). >> I suppose since decls for varpool_nodes would already be streamed in >> LTO_section_decls, I was wondering if I >> could somehow refer to the decls in that section to avoid duplicate >> streaming of decls ? > Hi, > In this version, the variable is streamed if it occurs within a loop > or it's address is taken. > To check if stmt is inside a loop I am using: > > struct loop *l = loop_containing_stmt (ref->stmt); > if (l != DECL_STRUCT_FUNCTION (cnode->decl)->x_current_loops->tree_root) > vars->add (vnode); > Is this correct ? > > I came across an unexpected issue in my previous patch with -ffat-lto-objects. > I am allocating vars = new hash_set<varpool_node *> () in > generate_summary() and freeing it in write_summary(). > However with -ffat-lto-objects, it appears execute() gets called after > write_summary() > during LGEN and we hit segfault in execute() at: > for (hash_set<varpool_node *>::iterator it = vars.begin (); it != > vars.end (); it++) > { ... } > because write_summary() has freed vars. > To workaround the issue, I gated call to free vars in write_summary on > flag_fat_lto_objects, > I am not sure if that's a good solution. > > Cross tested on arm*-*-*, aarch64*-*-*. > > Thanks, > Prathamesh >> >> Thanks, >> Prathamesh >>> >>> Thanks, >>> Prathamesh >>>> >>>> Honza
ping * 2 ping https://gcc.gnu.org/ml/gcc-patches/2016-06/msg01703.html Thanks, Prathamesh On 28 June 2016 at 14:49, Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> wrote: > ping https://gcc.gnu.org/ml/gcc-patches/2016-06/msg01703.html > > Thanks, > Prathamesh > > On 23 June 2016 at 22:51, Prathamesh Kulkarni > <prathamesh.kulkarni@linaro.org> wrote: >> On 17 June 2016 at 19:52, Prathamesh Kulkarni >> <prathamesh.kulkarni@linaro.org> wrote: >>> On 14 June 2016 at 18:31, Prathamesh Kulkarni >>> <prathamesh.kulkarni@linaro.org> wrote: >>>> On 13 June 2016 at 16:13, Jan Hubicka <hubicka@ucw.cz> wrote: >>>>>> diff --git a/gcc/cgraph.h b/gcc/cgraph.h >>>>>> index ecafe63..41ac408 100644 >>>>>> --- a/gcc/cgraph.h >>>>>> +++ b/gcc/cgraph.h >>>>>> @@ -1874,6 +1874,9 @@ public: >>>>>> if we did not do any inter-procedural code movement. */ >>>>>> unsigned used_by_single_function : 1; >>>>>> >>>>>> + /* Set if -fsection-anchors is set. */ >>>>>> + unsigned section_anchor : 1; >>>>>> + >>>>>> private: >>>>>> /* Assemble thunks and aliases associated to varpool node. */ >>>>>> void assemble_aliases (void); >>>>>> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c >>>>>> index 4bfcad7..e75d5c0 100644 >>>>>> --- a/gcc/cgraphunit.c >>>>>> +++ b/gcc/cgraphunit.c >>>>>> @@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl) >>>>>> it is available to notice_global_symbol. */ >>>>>> node->definition = true; >>>>>> notice_global_symbol (decl); >>>>>> + >>>>>> + node->section_anchor = flag_section_anchors; >>>>>> + >>>>>> if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl) >>>>>> /* Traditionally we do not eliminate static variables when not >>>>>> optimizing and when not doing toplevel reoder. */ >>>>>> diff --git a/gcc/common.opt b/gcc/common.opt >>>>>> index f0d7196..e497795 100644 >>>>>> --- a/gcc/common.opt >>>>>> +++ b/gcc/common.opt >>>>>> @@ -1590,6 +1590,10 @@ fira-algorithm= >>>>>> Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) Init(IRA_ALGORITHM_CB) Optimization >>>>>> -fira-algorithm=[CB|priority] Set the used IRA algorithm. >>>>>> >>>>>> +fipa-increase_alignment >>>>>> +Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization >>>>>> +Option to gate increase_alignment ipa pass. >>>>>> + >>>>>> Enum >>>>>> Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA algorithm %qs) >>>>>> >>>>>> @@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic) Init(1) Optimization >>>>>> Enable the dependent count heuristic in the scheduler. >>>>>> >>>>>> fsection-anchors >>>>>> -Common Report Var(flag_section_anchors) Optimization >>>>>> +Common Report Var(flag_section_anchors) >>>>>> Access data in the same section from shared anchor points. >>>>>> >>>>>> fsee >>>>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >>>>>> index a0db3a4..1482566 100644 >>>>>> --- a/gcc/config/aarch64/aarch64.c >>>>>> +++ b/gcc/config/aarch64/aarch64.c >>>>>> @@ -8252,6 +8252,8 @@ aarch64_override_options (void) >>>>>> >>>>>> aarch64_register_fma_steering (); >>>>>> >>>>>> + /* Enable increase_alignment pass. */ >>>>>> + flag_ipa_increase_alignment = 1; >>>>> >>>>> I would rather enable it always on targets that do support anchors. >>>> AFAIK aarch64 supports section anchors. >>>>>> diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c >>>>>> index ce9e146..7f09f3a 100644 >>>>>> --- a/gcc/lto/lto-symtab.c >>>>>> +++ b/gcc/lto/lto-symtab.c >>>>>> @@ -342,6 +342,13 @@ lto_symtab_merge (symtab_node *prevailing, symtab_node *entry) >>>>>> The type compatibility checks or the completing of types has properly >>>>>> dealt with most issues. */ >>>>>> >>>>>> + /* ??? is this assert necessary ? */ >>>>>> + varpool_node *v_prevailing = dyn_cast<varpool_node *> (prevailing); >>>>>> + varpool_node *v_entry = dyn_cast<varpool_node *> (entry); >>>>>> + gcc_assert (v_prevailing && v_entry); >>>>>> + /* section_anchor of prevailing_decl wins. */ >>>>>> + v_entry->section_anchor = v_prevailing->section_anchor; >>>>>> + >>>>> Other flags are merged in lto_varpool_replace_node so please move this there. >>>> Ah indeed, thanks for the pointers. >>>> I wonder though if we need to set >>>> prevailing_node->section_anchor = vnode->section_anchor ? >>>> IIUC, the function merges flags from vnode into prevailing_node >>>> and removes vnode. However we want prevailing_node->section_anchor >>>> to always take precedence. >>>>>> +/* Return true if alignment should be increased for this vnode. >>>>>> + This is done if every function that references/referring to vnode >>>>>> + has flag_tree_loop_vectorize set. */ >>>>>> + >>>>>> +static bool >>>>>> +increase_alignment_p (varpool_node *vnode) >>>>>> +{ >>>>>> + ipa_ref *ref; >>>>>> + >>>>>> + for (int i = 0; vnode->iterate_reference (i, ref); i++) >>>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referred)) >>>>>> + { >>>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >>>>>> + if (!opts->x_flag_tree_loop_vectorize) >>>>>> + return false; >>>>>> + } >>>>> >>>>> If you take address of function that has vectorizer enabled probably doesn't >>>>> imply need to increase alignment of that var. So please drop the loop. >>>>> >>>>> You only want function that read/writes or takes address of the symbol. But >>>>> onthe other hand, you need to walk all aliases of the symbol by >>>>> call_for_symbol_and_aliases >>>>>> + >>>>>> + for (int i = 0; vnode->iterate_referring (i, ref); i++) >>>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referring)) >>>>>> + { >>>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >>>>>> + if (!opts->x_flag_tree_loop_vectorize) >>>>>> + return false; >>>>>> + } >>>>>> + >>>>>> + return true; >>>>>> +} >>>>>> + >>>>>> /* Entry point to increase_alignment pass. */ >>>>>> static unsigned int >>>>>> increase_alignment (void) >>>>>> @@ -914,9 +942,12 @@ increase_alignment (void) >>>>>> tree decl = vnode->decl; >>>>>> unsigned int alignment; >>>>>> >>>>>> - if ((decl_in_symtab_p (decl) >>>>>> - && !symtab_node::get (decl)->can_increase_alignment_p ()) >>>>>> - || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) >>>>>> + if (!vnode->section_anchor >>>>>> + || (decl_in_symtab_p (decl) >>>>>> + && !symtab_node::get (decl)->can_increase_alignment_p ()) >>>>>> + || DECL_USER_ALIGN (decl) >>>>>> + || DECL_ARTIFICIAL (decl) >>>>>> + || !increase_alignment_p (vnode)) >>>>> >>>>> Incrementally we probably should do more testing whether the variable looks like >>>>> someting that can be vectorized, i.e. it contains array, has address taken or the >>>>> accesses are array accesses within loop. >>>>> This can be done by the analysis phase of the IPA pass inspecting the function >>>>> bodies. >>>> Thanks, I will try to check for array accesses are within a loop in >>>> followup patch. >>>> I was wondering if we could we treat a homogeneous global struct >>>> (having members of one type), >>>> as a global array of that type and increase it's alignment if required ? >>>>> >>>>> I think it is important waste to bump up everything including error messages etc. >>>>> At least on i386 the effect on firefox datasegment of various alignment setting is >>>>> very visible. >>>> Um for a start, would it be OK to check if all functions referencing variable >>>> have attribute noreturn, and in that case we skip increasing the alignment ? >>>> I suppose that error functions would be having attribute noreturn set ? >>>>> >>>>> Looks OK to me otherwise. please send updated patch. >>>> I have done the changes in the attached patch (stage-1 built). >>>> I am not sure what to return from the callback function and >>>> arbitrarily chose to return true. >>> Hi, >>> In this version (stage-1 built), I added read/write summaries which >>> stream those variables >>> which we want to increase alignment for. >>> >>> I have a few questions: >>> >>> a) To check if global var is used inside a loop, I obtained >>> corresponding ipa_ref >>> and checked loop_containing_stmt (ref->stmt), however it returned non-NULL >>> even when ref->stmt was not inside a loop. >>> for instance: >>> int a[32]; >>> int f() { int x = a[0]; return x; } >>> Um how to check if stmt is used inside a loop ? >>> >>> b) Is it necessary during WPA to check if function has >>> flag_tree_vectorize_set since >>> during analysis phase in increase_alignment_generate_summary() I check >>> if cnode has flag_tree_loop_vectorize_set ? >>> >>> c) In LTO_increase_alignment_section, the following is streamed: >>> i) a "count" of type uwhi, to represent number of variables >>> ii) decls corresponding to the variables >>> The variables are then obtained during read_summay using >>> symtab_node::get_create(). >>> I suppose since decls for varpool_nodes would already be streamed in >>> LTO_section_decls, I was wondering if I >>> could somehow refer to the decls in that section to avoid duplicate >>> streaming of decls ? >> Hi, >> In this version, the variable is streamed if it occurs within a loop >> or it's address is taken. >> To check if stmt is inside a loop I am using: >> >> struct loop *l = loop_containing_stmt (ref->stmt); >> if (l != DECL_STRUCT_FUNCTION (cnode->decl)->x_current_loops->tree_root) >> vars->add (vnode); >> Is this correct ? >> >> I came across an unexpected issue in my previous patch with -ffat-lto-objects. >> I am allocating vars = new hash_set<varpool_node *> () in >> generate_summary() and freeing it in write_summary(). >> However with -ffat-lto-objects, it appears execute() gets called after >> write_summary() >> during LGEN and we hit segfault in execute() at: >> for (hash_set<varpool_node *>::iterator it = vars.begin (); it != >> vars.end (); it++) >> { ... } >> because write_summary() has freed vars. >> To workaround the issue, I gated call to free vars in write_summary on >> flag_fat_lto_objects, >> I am not sure if that's a good solution. >> >> Cross tested on arm*-*-*, aarch64*-*-*. >> >> Thanks, >> Prathamesh >>> >>> Thanks, >>> Prathamesh >>>> >>>> Thanks, >>>> Prathamesh >>>>> >>>>> Honza
ping * 3 https://gcc.gnu.org/ml/gcc-patches/2016-06/msg01703.html Thanks, Prathamesh On 5 July 2016 at 10:53, Prathamesh Kulkarni <prathamesh.kulkarni@linaro.org> wrote: > ping * 2 ping https://gcc.gnu.org/ml/gcc-patches/2016-06/msg01703.html > > Thanks, > Prathamesh > > On 28 June 2016 at 14:49, Prathamesh Kulkarni > <prathamesh.kulkarni@linaro.org> wrote: >> ping https://gcc.gnu.org/ml/gcc-patches/2016-06/msg01703.html >> >> Thanks, >> Prathamesh >> >> On 23 June 2016 at 22:51, Prathamesh Kulkarni >> <prathamesh.kulkarni@linaro.org> wrote: >>> On 17 June 2016 at 19:52, Prathamesh Kulkarni >>> <prathamesh.kulkarni@linaro.org> wrote: >>>> On 14 June 2016 at 18:31, Prathamesh Kulkarni >>>> <prathamesh.kulkarni@linaro.org> wrote: >>>>> On 13 June 2016 at 16:13, Jan Hubicka <hubicka@ucw.cz> wrote: >>>>>>> diff --git a/gcc/cgraph.h b/gcc/cgraph.h >>>>>>> index ecafe63..41ac408 100644 >>>>>>> --- a/gcc/cgraph.h >>>>>>> +++ b/gcc/cgraph.h >>>>>>> @@ -1874,6 +1874,9 @@ public: >>>>>>> if we did not do any inter-procedural code movement. */ >>>>>>> unsigned used_by_single_function : 1; >>>>>>> >>>>>>> + /* Set if -fsection-anchors is set. */ >>>>>>> + unsigned section_anchor : 1; >>>>>>> + >>>>>>> private: >>>>>>> /* Assemble thunks and aliases associated to varpool node. */ >>>>>>> void assemble_aliases (void); >>>>>>> diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c >>>>>>> index 4bfcad7..e75d5c0 100644 >>>>>>> --- a/gcc/cgraphunit.c >>>>>>> +++ b/gcc/cgraphunit.c >>>>>>> @@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl) >>>>>>> it is available to notice_global_symbol. */ >>>>>>> node->definition = true; >>>>>>> notice_global_symbol (decl); >>>>>>> + >>>>>>> + node->section_anchor = flag_section_anchors; >>>>>>> + >>>>>>> if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl) >>>>>>> /* Traditionally we do not eliminate static variables when not >>>>>>> optimizing and when not doing toplevel reoder. */ >>>>>>> diff --git a/gcc/common.opt b/gcc/common.opt >>>>>>> index f0d7196..e497795 100644 >>>>>>> --- a/gcc/common.opt >>>>>>> +++ b/gcc/common.opt >>>>>>> @@ -1590,6 +1590,10 @@ fira-algorithm= >>>>>>> Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) Init(IRA_ALGORITHM_CB) Optimization >>>>>>> -fira-algorithm=[CB|priority] Set the used IRA algorithm. >>>>>>> >>>>>>> +fipa-increase_alignment >>>>>>> +Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization >>>>>>> +Option to gate increase_alignment ipa pass. >>>>>>> + >>>>>>> Enum >>>>>>> Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA algorithm %qs) >>>>>>> >>>>>>> @@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic) Init(1) Optimization >>>>>>> Enable the dependent count heuristic in the scheduler. >>>>>>> >>>>>>> fsection-anchors >>>>>>> -Common Report Var(flag_section_anchors) Optimization >>>>>>> +Common Report Var(flag_section_anchors) >>>>>>> Access data in the same section from shared anchor points. >>>>>>> >>>>>>> fsee >>>>>>> diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c >>>>>>> index a0db3a4..1482566 100644 >>>>>>> --- a/gcc/config/aarch64/aarch64.c >>>>>>> +++ b/gcc/config/aarch64/aarch64.c >>>>>>> @@ -8252,6 +8252,8 @@ aarch64_override_options (void) >>>>>>> >>>>>>> aarch64_register_fma_steering (); >>>>>>> >>>>>>> + /* Enable increase_alignment pass. */ >>>>>>> + flag_ipa_increase_alignment = 1; >>>>>> >>>>>> I would rather enable it always on targets that do support anchors. >>>>> AFAIK aarch64 supports section anchors. >>>>>>> diff --git a/gcc/lto/lto-symtab.c b/gcc/lto/lto-symtab.c >>>>>>> index ce9e146..7f09f3a 100644 >>>>>>> --- a/gcc/lto/lto-symtab.c >>>>>>> +++ b/gcc/lto/lto-symtab.c >>>>>>> @@ -342,6 +342,13 @@ lto_symtab_merge (symtab_node *prevailing, symtab_node *entry) >>>>>>> The type compatibility checks or the completing of types has properly >>>>>>> dealt with most issues. */ >>>>>>> >>>>>>> + /* ??? is this assert necessary ? */ >>>>>>> + varpool_node *v_prevailing = dyn_cast<varpool_node *> (prevailing); >>>>>>> + varpool_node *v_entry = dyn_cast<varpool_node *> (entry); >>>>>>> + gcc_assert (v_prevailing && v_entry); >>>>>>> + /* section_anchor of prevailing_decl wins. */ >>>>>>> + v_entry->section_anchor = v_prevailing->section_anchor; >>>>>>> + >>>>>> Other flags are merged in lto_varpool_replace_node so please move this there. >>>>> Ah indeed, thanks for the pointers. >>>>> I wonder though if we need to set >>>>> prevailing_node->section_anchor = vnode->section_anchor ? >>>>> IIUC, the function merges flags from vnode into prevailing_node >>>>> and removes vnode. However we want prevailing_node->section_anchor >>>>> to always take precedence. >>>>>>> +/* Return true if alignment should be increased for this vnode. >>>>>>> + This is done if every function that references/referring to vnode >>>>>>> + has flag_tree_loop_vectorize set. */ >>>>>>> + >>>>>>> +static bool >>>>>>> +increase_alignment_p (varpool_node *vnode) >>>>>>> +{ >>>>>>> + ipa_ref *ref; >>>>>>> + >>>>>>> + for (int i = 0; vnode->iterate_reference (i, ref); i++) >>>>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referred)) >>>>>>> + { >>>>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >>>>>>> + if (!opts->x_flag_tree_loop_vectorize) >>>>>>> + return false; >>>>>>> + } >>>>>> >>>>>> If you take address of function that has vectorizer enabled probably doesn't >>>>>> imply need to increase alignment of that var. So please drop the loop. >>>>>> >>>>>> You only want function that read/writes or takes address of the symbol. But >>>>>> onthe other hand, you need to walk all aliases of the symbol by >>>>>> call_for_symbol_and_aliases >>>>>>> + >>>>>>> + for (int i = 0; vnode->iterate_referring (i, ref); i++) >>>>>>> + if (cgraph_node *cnode = dyn_cast<cgraph_node *> (ref->referring)) >>>>>>> + { >>>>>>> + struct cl_optimization *opts = opts_for_fn (cnode->decl); >>>>>>> + if (!opts->x_flag_tree_loop_vectorize) >>>>>>> + return false; >>>>>>> + } >>>>>>> + >>>>>>> + return true; >>>>>>> +} >>>>>>> + >>>>>>> /* Entry point to increase_alignment pass. */ >>>>>>> static unsigned int >>>>>>> increase_alignment (void) >>>>>>> @@ -914,9 +942,12 @@ increase_alignment (void) >>>>>>> tree decl = vnode->decl; >>>>>>> unsigned int alignment; >>>>>>> >>>>>>> - if ((decl_in_symtab_p (decl) >>>>>>> - && !symtab_node::get (decl)->can_increase_alignment_p ()) >>>>>>> - || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) >>>>>>> + if (!vnode->section_anchor >>>>>>> + || (decl_in_symtab_p (decl) >>>>>>> + && !symtab_node::get (decl)->can_increase_alignment_p ()) >>>>>>> + || DECL_USER_ALIGN (decl) >>>>>>> + || DECL_ARTIFICIAL (decl) >>>>>>> + || !increase_alignment_p (vnode)) >>>>>> >>>>>> Incrementally we probably should do more testing whether the variable looks like >>>>>> someting that can be vectorized, i.e. it contains array, has address taken or the >>>>>> accesses are array accesses within loop. >>>>>> This can be done by the analysis phase of the IPA pass inspecting the function >>>>>> bodies. >>>>> Thanks, I will try to check for array accesses are within a loop in >>>>> followup patch. >>>>> I was wondering if we could we treat a homogeneous global struct >>>>> (having members of one type), >>>>> as a global array of that type and increase it's alignment if required ? >>>>>> >>>>>> I think it is important waste to bump up everything including error messages etc. >>>>>> At least on i386 the effect on firefox datasegment of various alignment setting is >>>>>> very visible. >>>>> Um for a start, would it be OK to check if all functions referencing variable >>>>> have attribute noreturn, and in that case we skip increasing the alignment ? >>>>> I suppose that error functions would be having attribute noreturn set ? >>>>>> >>>>>> Looks OK to me otherwise. please send updated patch. >>>>> I have done the changes in the attached patch (stage-1 built). >>>>> I am not sure what to return from the callback function and >>>>> arbitrarily chose to return true. >>>> Hi, >>>> In this version (stage-1 built), I added read/write summaries which >>>> stream those variables >>>> which we want to increase alignment for. >>>> >>>> I have a few questions: >>>> >>>> a) To check if global var is used inside a loop, I obtained >>>> corresponding ipa_ref >>>> and checked loop_containing_stmt (ref->stmt), however it returned non-NULL >>>> even when ref->stmt was not inside a loop. >>>> for instance: >>>> int a[32]; >>>> int f() { int x = a[0]; return x; } >>>> Um how to check if stmt is used inside a loop ? >>>> >>>> b) Is it necessary during WPA to check if function has >>>> flag_tree_vectorize_set since >>>> during analysis phase in increase_alignment_generate_summary() I check >>>> if cnode has flag_tree_loop_vectorize_set ? >>>> >>>> c) In LTO_increase_alignment_section, the following is streamed: >>>> i) a "count" of type uwhi, to represent number of variables >>>> ii) decls corresponding to the variables >>>> The variables are then obtained during read_summay using >>>> symtab_node::get_create(). >>>> I suppose since decls for varpool_nodes would already be streamed in >>>> LTO_section_decls, I was wondering if I >>>> could somehow refer to the decls in that section to avoid duplicate >>>> streaming of decls ? >>> Hi, >>> In this version, the variable is streamed if it occurs within a loop >>> or it's address is taken. >>> To check if stmt is inside a loop I am using: >>> >>> struct loop *l = loop_containing_stmt (ref->stmt); >>> if (l != DECL_STRUCT_FUNCTION (cnode->decl)->x_current_loops->tree_root) >>> vars->add (vnode); >>> Is this correct ? >>> >>> I came across an unexpected issue in my previous patch with -ffat-lto-objects. >>> I am allocating vars = new hash_set<varpool_node *> () in >>> generate_summary() and freeing it in write_summary(). >>> However with -ffat-lto-objects, it appears execute() gets called after >>> write_summary() >>> during LGEN and we hit segfault in execute() at: >>> for (hash_set<varpool_node *>::iterator it = vars.begin (); it != >>> vars.end (); it++) >>> { ... } >>> because write_summary() has freed vars. >>> To workaround the issue, I gated call to free vars in write_summary on >>> flag_fat_lto_objects, >>> I am not sure if that's a good solution. >>> >>> Cross tested on arm*-*-*, aarch64*-*-*. >>> >>> Thanks, >>> Prathamesh >>>> >>>> Thanks, >>>> Prathamesh >>>>> >>>>> Thanks, >>>>> Prathamesh >>>>>> >>>>>> Honza
diff --git a/gcc/cgraph.h b/gcc/cgraph.h index ecafe63..41ac408 100644 --- a/gcc/cgraph.h +++ b/gcc/cgraph.h @@ -1874,6 +1874,9 @@ public: if we did not do any inter-procedural code movement. */ unsigned used_by_single_function : 1; + /* Set if -fsection-anchors is set. */ + unsigned section_anchor : 1; + private: /* Assemble thunks and aliases associated to varpool node. */ void assemble_aliases (void); diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c index 4bfcad7..e75d5c0 100644 --- a/gcc/cgraphunit.c +++ b/gcc/cgraphunit.c @@ -800,6 +800,9 @@ varpool_node::finalize_decl (tree decl) it is available to notice_global_symbol. */ node->definition = true; notice_global_symbol (decl); + + node->section_anchor = flag_section_anchors; + if (TREE_THIS_VOLATILE (decl) || DECL_PRESERVE_P (decl) /* Traditionally we do not eliminate static variables when not optimizing and when not doing toplevel reoder. */ diff --git a/gcc/common.opt b/gcc/common.opt index f0d7196..e497795 100644 --- a/gcc/common.opt +++ b/gcc/common.opt @@ -1590,6 +1590,10 @@ fira-algorithm= Common Joined RejectNegative Enum(ira_algorithm) Var(flag_ira_algorithm) Init(IRA_ALGORITHM_CB) Optimization -fira-algorithm=[CB|priority] Set the used IRA algorithm. +fipa-increase_alignment +Common Report Var(flag_ipa_increase_alignment) Init(0) Optimization +Option to gate increase_alignment ipa pass. + Enum Name(ira_algorithm) Type(enum ira_algorithm) UnknownError(unknown IRA algorithm %qs) @@ -2133,7 +2137,7 @@ Common Report Var(flag_sched_dep_count_heuristic) Init(1) Optimization Enable the dependent count heuristic in the scheduler. fsection-anchors -Common Report Var(flag_section_anchors) Optimization +Common Report Var(flag_section_anchors) Access data in the same section from shared anchor points. fsee diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index a0db3a4..1482566 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -8252,6 +8252,8 @@ aarch64_override_options (void) aarch64_register_fma_steering (); + /* Enable increase_alignment pass. */ + flag_ipa_increase_alignment = 1; } /* Implement targetm.override_options_after_change. */ diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index 3503c15..b7f448e 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -3458,6 +3458,9 @@ arm_option_override (void) /* Init initial mode for testing. */ thumb_flipper = TARGET_THUMB; + + /* Enable increase_alignment pass. */ + flag_ipa_increase_alignment = 1; } static void diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2d7df6b..ed59068 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -5011,6 +5011,9 @@ rs6000_option_override (void) = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE }; register_pass (&analyze_swaps_info); + + /* Enable increase_alignment pass. */ + flag_ipa_increase_alignment = 1; } diff --git a/gcc/lto-cgraph.c b/gcc/lto-cgraph.c index 5cef2ba..289d9c3 100644 --- a/gcc/lto-cgraph.c +++ b/gcc/lto-cgraph.c @@ -627,6 +627,7 @@ lto_output_varpool_node (struct lto_simple_output_block *ob, varpool_node *node, bp_pack_value (&bp, node->tls_model, 3); bp_pack_value (&bp, node->used_by_single_function, 1); bp_pack_value (&bp, node->need_bounds_init, 1); + bp_pack_value (&bp, node->section_anchor, 1); streamer_write_bitpack (&bp); group = node->get_comdat_group (); @@ -1401,6 +1402,7 @@ input_varpool_node (struct lto_file_decl_data *file_data, node->tls_model = (enum tls_model)bp_unpack_value (&bp, 3); node->used_by_single_function = (enum tls_model)bp_unpack_value (&bp, 1); node->need_bounds_init = bp_unpack_value (&bp, 1); + node->section_anchor = bp_unpack_value (&bp, 1); group = read_identifier (ib); if (group) { diff --git a/gcc/lto-section-in.c b/gcc/lto-section-in.c index d8e74d7..d56f9d4 100644 --- a/gcc/lto-section-in.c +++ b/gcc/lto-section-in.c @@ -52,7 +52,8 @@ const char *lto_section_name[LTO_N_SECTION_TYPES] = "icf", "offload_table", "mode_table", - "hsa" + "hsa", + "increase_alignment" }; diff --git a/gcc/lto-streamer.h b/gcc/lto-streamer.h index 92efdb8..725d0dc 100644 --- a/gcc/lto-streamer.h +++ b/gcc/lto-streamer.h @@ -245,6 +245,7 @@ enum lto_section_type LTO_section_offload_table, LTO_section_mode_table, LTO_section_ipa_hsa, + LTO_section_increase_alignment, LTO_N_SECTION_TYPES /* Must be last. */ }; diff --git a/gcc/passes.def b/gcc/passes.def index 3647e90..3a8063c 100644 --- a/gcc/passes.def +++ b/gcc/passes.def @@ -138,12 +138,12 @@ along with GCC; see the file COPYING3. If not see PUSH_INSERT_PASSES_WITHIN (pass_ipa_tree_profile) NEXT_PASS (pass_feedback_split_functions); POP_INSERT_PASSES () - NEXT_PASS (pass_ipa_increase_alignment); NEXT_PASS (pass_ipa_tm); NEXT_PASS (pass_ipa_lower_emutls); TERMINATE_PASS_LIST (all_small_ipa_passes) INSERT_PASSES_AFTER (all_regular_ipa_passes) + NEXT_PASS (pass_ipa_increase_alignment); NEXT_PASS (pass_ipa_whole_program_visibility); NEXT_PASS (pass_ipa_profile); NEXT_PASS (pass_ipa_icf); diff --git a/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c new file mode 100644 index 0000000..74eaed8 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-73.c @@ -0,0 +1,25 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target section_anchors } */ +/* { dg-require-effective-target vect_int } */ + +#define N 32 + +/* Clone of section-anchors-vect-70.c with foo() having -fno-tree-loop-vectorize. */ + +static struct A { + int p1, p2; + int e[N]; +} a, b, c; + +__attribute__((optimize("-fno-tree-loop-vectorize"))) +int foo(void) +{ + for (int i = 0; i < N; i++) + a.e[i] = b.e[i] + c.e[i]; + + return a.e[0]; +} + +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target aarch64*-*-* } } } */ +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target powerpc64*-*-* } } } */ +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target arm*-*-* } } } */ diff --git a/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-74.c b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-74.c new file mode 100644 index 0000000..574d917 --- /dev/null +++ b/gcc/testsuite/gcc.dg/vect/aligned-section-anchors-vect-74.c @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-require-effective-target section_anchors } */ +/* { dg-require-effective-target vect_int } */ + +#define N 32 + +/* Do not increase alignment of a since it does not have it's + address taken and doesn't occur inside a loop. */ + +static int a[N]; + +int foo(void) +{ + return a[0]; +} + +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target aarch64*-*-* } } } */ +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target powerpc64*-*-* } } } */ +/* { dg-final { scan-ipa-dump-times "Increasing alignment of decl" 0 "increase_alignment" { target arm*-*-* } } } */ diff --git a/gcc/tree-pass.h b/gcc/tree-pass.h index 36299a6..d36aa1d 100644 --- a/gcc/tree-pass.h +++ b/gcc/tree-pass.h @@ -483,7 +483,7 @@ extern simple_ipa_opt_pass *make_pass_local_optimization_passes (gcc::context *c extern ipa_opt_pass_d *make_pass_ipa_whole_program_visibility (gcc::context *ctxt); -extern simple_ipa_opt_pass *make_pass_ipa_increase_alignment (gcc::context +extern ipa_opt_pass_d *make_pass_ipa_increase_alignment (gcc::context *ctxt); extern ipa_opt_pass_d *make_pass_ipa_inline (gcc::context *ctxt); extern simple_ipa_opt_pass *make_pass_ipa_free_lang_data (gcc::context *ctxt); diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c index 2669813..f240747 100644 --- a/gcc/tree-vectorizer.c +++ b/gcc/tree-vectorizer.c @@ -75,7 +75,10 @@ along with GCC; see the file COPYING3. If not see #include "tree-ssa-propagate.h" #include "dbgcnt.h" #include "tree-scalar-evolution.h" - +#include "tree-ssa-loop.h" +#include "tree-streamer.h" +#include "stringpool.h" +#include "gimple-pretty-print.h" /* Loop or bb location. */ source_location vect_location; @@ -795,7 +798,8 @@ make_pass_slp_vectorize (gcc::context *ctxt) array padding. */ static unsigned get_vec_alignment_for_type (tree); -static hash_map<tree, unsigned> *type_align_map; +static hash_map<tree, unsigned> *type_align_map = NULL; +static hash_set<varpool_node *> *vars = NULL; /* Return alignment of array's vector type corresponding to scalar type. 0 if no vector type exists. */ @@ -899,46 +903,199 @@ get_vec_alignment_for_type (tree type) return (alignment > TYPE_ALIGN (type)) ? alignment : 0; } +/* Return true if alignment should be increased for this vnode. + This is done if every function that referring to vnode + has flag_tree_loop_vectorize set. */ + +static bool +increase_alignment_p (varpool_node *vnode) +{ + ipa_ref *ref; + cgraph_node *cnode; + + /* FIXME: Is this really necessary, since we check if cnode has + flag_tree_loop_vectorize_set during analysis phase in + increase_alignment_analyze_function ? */ + + for (int i = 0; vnode->iterate_referring (i, ref); i++) + /* Walk those functions that read/write/take address of vnode. */ + if ((cnode = dyn_cast<cgraph_node *> (ref->referring))) + if (!opt_for_fn (cnode->decl, flag_tree_loop_vectorize)) + return false; + + return true; +} + +static bool +increase_alignment_callback (varpool_node *vnode, void *data ATTRIBUTE_UNUSED) +{ + tree decl = vnode->decl; + + if (vnode->section_anchor + && decl_in_symtab_p (decl) + && symtab_node::get (decl)->can_increase_alignment_p () + && !DECL_USER_ALIGN (decl) + && increase_alignment_p (vnode)) + { + unsigned alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); + if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) + { + vnode->increase_alignment (alignment); + dump_printf (MSG_NOTE, "Increasing alignment of decl: "); + dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); + dump_printf (MSG_NOTE, "\n"); + } + } + + return true; +} + /* Entry point to increase_alignment pass. */ static unsigned int increase_alignment (void) { - varpool_node *vnode; - vect_location = UNKNOWN_LOCATION; type_align_map = new hash_map<tree, unsigned>; /* Increase the alignment of all global arrays for vectorization. */ - FOR_EACH_DEFINED_VARIABLE (vnode) + for (hash_set<varpool_node *>::iterator it = vars->begin (); it != vars->end (); ++it) { - tree decl = vnode->decl; - unsigned int alignment; - - if ((decl_in_symtab_p (decl) - && !symtab_node::get (decl)->can_increase_alignment_p ()) - || DECL_USER_ALIGN (decl) || DECL_ARTIFICIAL (decl)) - continue; - - alignment = get_vec_alignment_for_type (TREE_TYPE (decl)); - if (alignment && vect_can_force_dr_alignment_p (decl, alignment)) - { - vnode->increase_alignment (alignment); - dump_printf (MSG_NOTE, "Increasing alignment of decl: "); - dump_generic_expr (MSG_NOTE, TDF_SLIM, decl); - dump_printf (MSG_NOTE, "\n"); - } + varpool_node *vnode = *it; + vnode->call_for_symbol_and_aliases (increase_alignment_callback, NULL, true); } delete type_align_map; + delete vars; return 0; } +static void +increase_alignment_analyze_function (cgraph_node *cnode) +{ + ipa_ref *ref; + + for (int i = 0; cnode->iterate_reference (i, ref); i++) + if (varpool_node *vnode = dyn_cast<varpool_node *> (ref->referred)) + { + if (vars->contains (vnode)) + return; + + tree decl = vnode->decl; + if (! (TREE_CODE (TREE_TYPE (decl)) == ARRAY_TYPE + || TREE_CODE (TREE_TYPE (decl)) == RECORD_TYPE) + && !DECL_ARTIFICIAL (decl)) + continue; + + if (ref->use == IPA_REF_ADDR) + vars->add (vnode); + else + { + struct loop *l = loop_containing_stmt (ref->stmt); + if (l != DECL_STRUCT_FUNCTION (cnode->decl)->x_current_loops->tree_root) + vars->add (vnode); + } + } +} + +static void +increase_alignment_generate_summary (void) +{ + vars = new hash_set<varpool_node *> (); + cgraph_node *cnode; + + FOR_EACH_DEFINED_FUNCTION (cnode) + if (opt_for_fn (cnode->decl, flag_tree_loop_optimize)) + increase_alignment_analyze_function (cnode); +} + +static void +increase_alignment_write_summary (void) +{ + /* Following is streamed to the section: + a) count of type uhwi, representing number of decls. + b) the decls. */ + + struct output_block *ob = create_output_block (LTO_section_increase_alignment); + streamer_write_uhwi (ob, vars->elements ()); + + for (hash_set<varpool_node *>::iterator it = vars->begin (); it != vars->end (); ++it) + { + varpool_node *vnode = *it; + stream_write_tree (ob, vnode->decl, true); + } + + produce_asm (ob, NULL); + destroy_output_block (ob); + + /* FIXME: This is a workaround for segfault when flag_fat_lto_objects gets + enabled either directly or by -fno-use-linker-plugin. + With flag_fat_lto_objects, execute() is called also during analysis + stage, after write_summary(). So if we delete vars here, we segfault + during execute() since it requires vars. I am not sure what's a good + solution to workaround the issue. */ + + if (!global_options.x_flag_fat_lto_objects) + delete vars; +} + +static void +increase_alignment_read_section (struct lto_file_decl_data *file_data, + const char *data, size_t len) +{ + const struct lto_function_header *header = + (const struct lto_function_header *) data; + + const int cfg_offset = sizeof (struct lto_function_header); + const int main_offset = cfg_offset + header->cfg_size; + const int string_offset = main_offset + header->main_size; + + lto_input_block ib (data + main_offset, header->main_size, + file_data->mode_table); + + struct data_in *data_in = + lto_data_in_create (file_data, data + string_offset, + header->string_size, vNULL); + + unsigned HOST_WIDE_INT count = streamer_read_uhwi (&ib); + + for (unsigned HOST_WIDE_INT i = 0; i < count; i++) + { + tree decl = stream_read_tree (&ib, data_in); + varpool_node *vnode = varpool_node::get_create (decl); + if (vnode) + vars->add (vnode); + } + + lto_free_section_data (file_data, LTO_section_increase_alignment, NULL, + data, len); + + lto_data_in_delete (data_in); +} + +static void +increase_alignment_read_summary (void) +{ + struct lto_file_decl_data **file_data_vec = lto_get_file_decl_data (); + struct lto_file_decl_data *file_data; + unsigned i = 0; + + vars = new hash_set<varpool_node *> (); + + while ((file_data = file_data_vec[i++])) + { + size_t len; + const char *data = lto_get_section_data (file_data, LTO_section_increase_alignment, NULL, &len); + + if (data) + increase_alignment_read_section (file_data, data, len); + } +} namespace { const pass_data pass_data_ipa_increase_alignment = { - SIMPLE_IPA_PASS, /* type */ + IPA_PASS, /* type */ "increase_alignment", /* name */ OPTGROUP_LOOP | OPTGROUP_VEC, /* optinfo_flags */ TV_IPA_OPT, /* tv_id */ @@ -949,17 +1106,26 @@ const pass_data pass_data_ipa_increase_alignment = 0, /* todo_flags_finish */ }; -class pass_ipa_increase_alignment : public simple_ipa_opt_pass +class pass_ipa_increase_alignment : public ipa_opt_pass_d { public: pass_ipa_increase_alignment (gcc::context *ctxt) - : simple_ipa_opt_pass (pass_data_ipa_increase_alignment, ctxt) + : ipa_opt_pass_d (pass_data_ipa_increase_alignment, ctxt, + increase_alignment_generate_summary, + increase_alignment_write_summary, + increase_alignment_read_summary, + NULL, /* write optimization summary */ + NULL, /* read optimization summary */ + NULL, /* stmt fixup */ + 0, /* function_transform_todo_flags_start */ + NULL, /* transform function */ + NULL )/* variable transform */ {} /* opt_pass methods: */ virtual bool gate (function *) { - return flag_section_anchors && flag_tree_loop_vectorize; + return flag_ipa_increase_alignment != 0; } virtual unsigned int execute (function *) { return increase_alignment (); } @@ -968,7 +1133,7 @@ public: } // anon namespace -simple_ipa_opt_pass * +ipa_opt_pass_d * make_pass_ipa_increase_alignment (gcc::context *ctxt) { return new pass_ipa_increase_alignment (ctxt);