diff mbox

[RFC] introduce --param max-lto-partition for having an upper bound on partition size

Message ID CAAgBjMnGYAHhdsEsyHuEExwsjeF+qN6o=A_fP042198F9H6_DA@mail.gmail.com
State New
Headers show

Commit Message

Prathamesh Kulkarni April 25, 2016, 11:58 a.m. UTC
On 6 April 2016 at 14:54, Richard Biener <rguenther@suse.de> wrote:
> On Wed, 6 Apr 2016, Richard Biener wrote:

>

>> On Wed, 6 Apr 2016, Prathamesh Kulkarni wrote:

>>

>> > On 6 April 2016 at 13:44, Richard Biener <rguenther@suse.de> wrote:

>> > > On Wed, 6 Apr 2016, Prathamesh Kulkarni wrote:

>> > >

>> > >> On 5 April 2016 at 18:28, Richard Biener <rguenther@suse.de> wrote:

>> > >> > On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:

>> > >> >

>> > >> >> On 5 April 2016 at 16:58, Richard Biener <rguenther@suse.de> wrote:

>> > >> >> > On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:

>> > >> >> >

>> > >> >> >> On 4 April 2016 at 19:44, Jan Hubicka <hubicka@ucw.cz> wrote:

>> > >> >> >> >

>> > >> >> >> >> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c

>> > >> >> >> >> index 9eb63c2..bc0c612 100644

>> > >> >> >> >> --- a/gcc/lto/lto-partition.c

>> > >> >> >> >> +++ b/gcc/lto/lto-partition.c

>> > >> >> >> >> @@ -511,9 +511,20 @@ lto_balanced_map (int n_lto_partitions)

>> > >> >> >> >>    varpool_order.qsort (varpool_node_cmp);

>> > >> >> >> >>

>> > >> >> >> >>    /* Compute partition size and create the first partition.  */

>> > >> >> >> >> +  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE (MAX_PARTITION_SIZE))

>> > >> >> >> >> +    fatal_error (input_location, "min partition size cannot be greater than max partition size");

>> > >> >> >> >> +

>> > >> >> >> >>    partition_size = total_size / n_lto_partitions;

>> > >> >> >> >>    if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))

>> > >> >> >> >>      partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);

>> > >> >> >> >> +  else if (partition_size > PARAM_VALUE (MAX_PARTITION_SIZE))

>> > >> >> >> >> +    {

>> > >> >> >> >> +      n_lto_partitions = total_size / PARAM_VALUE (MAX_PARTITION_SIZE);

>> > >> >> >> >> +      if (total_size % PARAM_VALUE (MAX_PARTITION_SIZE))

>> > >> >> >> >> +     n_lto_partitions++;

>> > >> >> >> >> +      partition_size = total_size / n_lto_partitions;

>> > >> >> >> >> +    }

>> > >> >> >> >

>> > >> >> >> > lto_balanced_map actually works in a way that looks for cheapest cutpoint in range

>> > >> >> >> > 3/4*parittion_size to 2*partition_size and picks the cheapest range.

>> > >> >> >> > Setting partition_size to this value will thus not cause partitioner to produce smaller

>> > >> >> >> > partitions only.  I suppose modify the conditional:

>> > >> >> >> >

>> > >> >> >> >       /* Partition is too large, unwind into step when best cost was reached and

>> > >> >> >> >          start new partition.  */

>> > >> >> >> >       if (partition->insns > 2 * partition_size)

>> > >> >> >> >

>> > >> >> >> > and/or in the code above set the partition_size to half of total_size/max_size.

>> > >> >> >> >

>> > >> >> >> > I know this is somewhat sloppy.  This was really just first cut implementation

>> > >> >> >> > many years ago. I expected to reimplement it marter soon, but then there was

>> > >> >> >> > never really a need for it (I am trying to avoid late IPA optimizations so the

>> > >> >> >> > partitioning decisions should mostly affect compile time performance only).

>> > >> >> >> > If ARM is more sensitive for partitining, perhaps it would make sense to try to

>> > >> >> >> > look for something smarter.

>> > >> >> >> >

>> > >> >> >> >> +

>> > >> >> >> >>    npartitions = 1;

>> > >> >> >> >>    partition = new_partition ("");

>> > >> >> >> >>    if (symtab->dump_file)

>> > >> >> >> >> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c

>> > >> >> >> >> index 9dd513f..294b8a4 100644

>> > >> >> >> >> --- a/gcc/lto/lto.c

>> > >> >> >> >> +++ b/gcc/lto/lto.c

>> > >> >> >> >> @@ -3112,6 +3112,12 @@ do_whole_program_analysis (void)

>> > >> >> >> >>    timevar_pop (TV_WHOPR_WPA);

>> > >> >> >> >>

>> > >> >> >> >>    timevar_push (TV_WHOPR_PARTITIONING);

>> > >> >> >> >> +

>> > >> >> >> >> +  if (flag_lto_partition != LTO_PARTITION_BALANCED

>> > >> >> >> >> +      && PARAM_VALUE (MAX_PARTITION_SIZE) != INT_MAX)

>> > >> >> >> >> +    fatal_error (input_location, "--param max-lto-partition should only"

>> > >> >> >> >> +              " be used with balanced partitioning\n");

>> > >> >> >> >> +

>> > >> >> >> >

>> > >> >> >> > I think we should wire in resonable MAX_PARTITION_SIZE default.  THe value you

>> > >> >> >> > found experimentally may be a good start. For that reason we can't really

>> > >> >> >> > refuse a value when !LTO_PARTITION_BALANCED.  Just document it as parameter for

>> > >> >> >> > balanced partitioning only and add a parameter to lto_balanced_map specifying whether

>> > >> >> >> > this param should be honored (because the same path is used for partitioning to one partition)

>> > >> >> >> >

>> > >> >> >> > Otherwise the patch looks good to me modulo missing documentation.

>> > >> >> >> Thanks for the review. I have updated the patch.

>> > >> >> >> Does this version look OK ?

>> > >> >> >> I had randomly chosen 10000, not sure if that's an appropriate value

>> > >> >> >> for default.

>> > >> >> >

>> > >> >> > I think it's way too small.  This is roughly the number of GIMPLE stmts

>> > >> >> > (thus roughly the number of instructions).  So with say a 8 byte

>> > >> >> > instruction format it is on the order of 80kB.  You'd want to have a

>> > >> >> > default of at least several ten times of large-unit-insns (also 10000).

>> > >> >> > I'd choose sth like 1000000 (one million).  I find the lto-min-partition

>> > >> >> > number quite small as well (and up it by a factor of 10).

>> > >> >> Done in this version.

>> > >> >

>> > >> > I'd do that separately.

>> > >> >

>> > >> > Please no default parameter for lto_balanced_map (), instead change

>> > >> > all callers.

>> > >> >

>> > >> >> Is it OK after bootstrap+test ?

>> > >> >

>> > >> > Note that this is for stage1 only.  I'll leave approval to Honza

>> > >> > (also verification of the default max param - not sure if for example

>> > >> > chromium or firefox should/will be split to more than 32 partitions

>> > >> > with the patch)

>> > >> Removed default parameter in this version. I verified with the patch

>> > >> for chromium LTO build:

>> > >> n_lto_partitions == 32, ltrans_partitions.length() == 31

>> > >

>> > > Just noticed that lto_balanced_map already gets PARAM_LTO_PARTITIONS,

>> > > so why not pass it PARAM_MAX_PARTITION_SIZE or 0 (as magic value for

>> > > unlimited) instead of a bool parameter?

>> > Indeed.  Instead of 0, would it be OK to pass INT_MAX as 2nd parameter in case

>> > of single partition, since in that case partition->insns >

>> > max_partition_size will never

>> > be true, which would effectively ignore max_partition_size.

>>

>> You mean we are limited to INT_MAX partition size anyway, even on 64bit

>> systems? ...  (but yes, using a suitable large number works as well)

>

> Ah, even 'total_size' is an int ... I wonder what this means for LTOing

> a -mcmodel=large app (that really needs the large model).

Hi,
Is the attached patch OK for trunk now ?
Bootstrapped and tested on x86_64-unknown-linux-gnu.
Cross tested on arm*-*-* and aarch64*-*-*.

Thanks,
Prathamesh
>

> Richard.

Comments

Prathamesh Kulkarni April 26, 2016, 8:44 p.m. UTC | #1
On 26 April 2016 at 16:31, Richard Biener <rguenther@suse.de> wrote:
> On Mon, 25 Apr 2016, Prathamesh Kulkarni wrote:

>

>> On 6 April 2016 at 14:54, Richard Biener <rguenther@suse.de> wrote:

>> > On Wed, 6 Apr 2016, Richard Biener wrote:

>> >

>> >> On Wed, 6 Apr 2016, Prathamesh Kulkarni wrote:

>> >>

>> >> > On 6 April 2016 at 13:44, Richard Biener <rguenther@suse.de> wrote:

>> >> > > On Wed, 6 Apr 2016, Prathamesh Kulkarni wrote:

>> >> > >

>> >> > >> On 5 April 2016 at 18:28, Richard Biener <rguenther@suse.de> wrote:

>> >> > >> > On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:

>> >> > >> >

>> >> > >> >> On 5 April 2016 at 16:58, Richard Biener <rguenther@suse.de> wrote:

>> >> > >> >> > On Tue, 5 Apr 2016, Prathamesh Kulkarni wrote:

>> >> > >> >> >

>> >> > >> >> >> On 4 April 2016 at 19:44, Jan Hubicka <hubicka@ucw.cz> wrote:

>> >> > >> >> >> >

>> >> > >> >> >> >> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c

>> >> > >> >> >> >> index 9eb63c2..bc0c612 100644

>> >> > >> >> >> >> --- a/gcc/lto/lto-partition.c

>> >> > >> >> >> >> +++ b/gcc/lto/lto-partition.c

>> >> > >> >> >> >> @@ -511,9 +511,20 @@ lto_balanced_map (int n_lto_partitions)

>> >> > >> >> >> >>    varpool_order.qsort (varpool_node_cmp);

>> >> > >> >> >> >>

>> >> > >> >> >> >>    /* Compute partition size and create the first partition.  */

>> >> > >> >> >> >> +  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE (MAX_PARTITION_SIZE))

>> >> > >> >> >> >> +    fatal_error (input_location, "min partition size cannot be greater than max partition size");

>> >> > >> >> >> >> +

>> >> > >> >> >> >>    partition_size = total_size / n_lto_partitions;

>> >> > >> >> >> >>    if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))

>> >> > >> >> >> >>      partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);

>> >> > >> >> >> >> +  else if (partition_size > PARAM_VALUE (MAX_PARTITION_SIZE))

>> >> > >> >> >> >> +    {

>> >> > >> >> >> >> +      n_lto_partitions = total_size / PARAM_VALUE (MAX_PARTITION_SIZE);

>> >> > >> >> >> >> +      if (total_size % PARAM_VALUE (MAX_PARTITION_SIZE))

>> >> > >> >> >> >> +     n_lto_partitions++;

>> >> > >> >> >> >> +      partition_size = total_size / n_lto_partitions;

>> >> > >> >> >> >> +    }

>> >> > >> >> >> >

>> >> > >> >> >> > lto_balanced_map actually works in a way that looks for cheapest cutpoint in range

>> >> > >> >> >> > 3/4*parittion_size to 2*partition_size and picks the cheapest range.

>> >> > >> >> >> > Setting partition_size to this value will thus not cause partitioner to produce smaller

>> >> > >> >> >> > partitions only.  I suppose modify the conditional:

>> >> > >> >> >> >

>> >> > >> >> >> >       /* Partition is too large, unwind into step when best cost was reached and

>> >> > >> >> >> >          start new partition.  */

>> >> > >> >> >> >       if (partition->insns > 2 * partition_size)

>> >> > >> >> >> >

>> >> > >> >> >> > and/or in the code above set the partition_size to half of total_size/max_size.

>> >> > >> >> >> >

>> >> > >> >> >> > I know this is somewhat sloppy.  This was really just first cut implementation

>> >> > >> >> >> > many years ago. I expected to reimplement it marter soon, but then there was

>> >> > >> >> >> > never really a need for it (I am trying to avoid late IPA optimizations so the

>> >> > >> >> >> > partitioning decisions should mostly affect compile time performance only).

>> >> > >> >> >> > If ARM is more sensitive for partitining, perhaps it would make sense to try to

>> >> > >> >> >> > look for something smarter.

>> >> > >> >> >> >

>> >> > >> >> >> >> +

>> >> > >> >> >> >>    npartitions = 1;

>> >> > >> >> >> >>    partition = new_partition ("");

>> >> > >> >> >> >>    if (symtab->dump_file)

>> >> > >> >> >> >> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c

>> >> > >> >> >> >> index 9dd513f..294b8a4 100644

>> >> > >> >> >> >> --- a/gcc/lto/lto.c

>> >> > >> >> >> >> +++ b/gcc/lto/lto.c

>> >> > >> >> >> >> @@ -3112,6 +3112,12 @@ do_whole_program_analysis (void)

>> >> > >> >> >> >>    timevar_pop (TV_WHOPR_WPA);

>> >> > >> >> >> >>

>> >> > >> >> >> >>    timevar_push (TV_WHOPR_PARTITIONING);

>> >> > >> >> >> >> +

>> >> > >> >> >> >> +  if (flag_lto_partition != LTO_PARTITION_BALANCED

>> >> > >> >> >> >> +      && PARAM_VALUE (MAX_PARTITION_SIZE) != INT_MAX)

>> >> > >> >> >> >> +    fatal_error (input_location, "--param max-lto-partition should only"

>> >> > >> >> >> >> +              " be used with balanced partitioning\n");

>> >> > >> >> >> >> +

>> >> > >> >> >> >

>> >> > >> >> >> > I think we should wire in resonable MAX_PARTITION_SIZE default.  THe value you

>> >> > >> >> >> > found experimentally may be a good start. For that reason we can't really

>> >> > >> >> >> > refuse a value when !LTO_PARTITION_BALANCED.  Just document it as parameter for

>> >> > >> >> >> > balanced partitioning only and add a parameter to lto_balanced_map specifying whether

>> >> > >> >> >> > this param should be honored (because the same path is used for partitioning to one partition)

>> >> > >> >> >> >

>> >> > >> >> >> > Otherwise the patch looks good to me modulo missing documentation.

>> >> > >> >> >> Thanks for the review. I have updated the patch.

>> >> > >> >> >> Does this version look OK ?

>> >> > >> >> >> I had randomly chosen 10000, not sure if that's an appropriate value

>> >> > >> >> >> for default.

>> >> > >> >> >

>> >> > >> >> > I think it's way too small.  This is roughly the number of GIMPLE stmts

>> >> > >> >> > (thus roughly the number of instructions).  So with say a 8 byte

>> >> > >> >> > instruction format it is on the order of 80kB.  You'd want to have a

>> >> > >> >> > default of at least several ten times of large-unit-insns (also 10000).

>> >> > >> >> > I'd choose sth like 1000000 (one million).  I find the lto-min-partition

>> >> > >> >> > number quite small as well (and up it by a factor of 10).

>> >> > >> >> Done in this version.

>> >> > >> >

>> >> > >> > I'd do that separately.

>> >> > >> >

>> >> > >> > Please no default parameter for lto_balanced_map (), instead change

>> >> > >> > all callers.

>> >> > >> >

>> >> > >> >> Is it OK after bootstrap+test ?

>> >> > >> >

>> >> > >> > Note that this is for stage1 only.  I'll leave approval to Honza

>> >> > >> > (also verification of the default max param - not sure if for example

>> >> > >> > chromium or firefox should/will be split to more than 32 partitions

>> >> > >> > with the patch)

>> >> > >> Removed default parameter in this version. I verified with the patch

>> >> > >> for chromium LTO build:

>> >> > >> n_lto_partitions == 32, ltrans_partitions.length() == 31

>> >> > >

>> >> > > Just noticed that lto_balanced_map already gets PARAM_LTO_PARTITIONS,

>> >> > > so why not pass it PARAM_MAX_PARTITION_SIZE or 0 (as magic value for

>> >> > > unlimited) instead of a bool parameter?

>> >> > Indeed.  Instead of 0, would it be OK to pass INT_MAX as 2nd parameter in case

>> >> > of single partition, since in that case partition->insns >

>> >> > max_partition_size will never

>> >> > be true, which would effectively ignore max_partition_size.

>> >>

>> >> You mean we are limited to INT_MAX partition size anyway, even on 64bit

>> >> systems? ...  (but yes, using a suitable large number works as well)

>> >

>> > Ah, even 'total_size' is an int ... I wonder what this means for LTOing

>> > a -mcmodel=large app (that really needs the large model).

>> Hi,

>> Is the attached patch OK for trunk now ?

>> Bootstrapped and tested on x86_64-unknown-linux-gnu.

>> Cross tested on arm*-*-* and aarch64*-*-*.

>

> Ok.  How many partitions do we generate for linking cc1 with

> bootstrap-lto now?

No difference with patch in number of partitions:
ltrans_partitions.length() == 31, n_lto_partitions == 32.
Should I commit it ?

Thanks,
Prathamesh
>

> Thanks,

> Richard.
diff mbox

Patch

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 821f8fd..4afa32c 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9477,6 +9477,11 @@  Size of minimal partition for WHOPR (in estimated instructions).
 This prevents expenses of splitting very small programs into too many
 partitions.
 
+@item lto-max-partition
+Size of max partition for WHOPR (in estimated instructions).
+to provide an upper bound for individual size of partition.
+Meant to be used only with balanced partitioning.
+
 @item cxx-max-namespaces-for-diagnostic-help
 The maximum number of namespaces to consult for suggestions when C++
 name lookup fails for an identifier.  The default is 1000.
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..c191d24 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -447,7 +447,7 @@  add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
    and in-partition calls was reached.  */
 
 void
-lto_balanced_map (int n_lto_partitions)
+lto_balanced_map (int n_lto_partitions, int max_partition_size)
 {
   int n_nodes = 0;
   int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
@@ -511,6 +511,9 @@  lto_balanced_map (int n_lto_partitions)
   varpool_order.qsort (varpool_node_cmp);
 
   /* Compute partition size and create the first partition.  */
+  if (PARAM_VALUE (MIN_PARTITION_SIZE) > max_partition_size)
+    fatal_error (input_location, "min partition size cannot be greater than max partition size");
+
   partition_size = total_size / n_lto_partitions;
   if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
     partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
@@ -719,7 +722,8 @@  lto_balanced_map (int n_lto_partitions)
 		 best_cost, best_internal, best_i);
       /* Partition is too large, unwind into step when best cost was reached and
 	 start new partition.  */
-      if (partition->insns > 2 * partition_size)
+      if (partition->insns > 2 * partition_size
+	  || partition->insns > max_partition_size)
 	{
 	  if (best_i != i)
 	    {
diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
index 31e3764..f7abe62 100644
--- a/gcc/lto/lto-partition.h
+++ b/gcc/lto/lto-partition.h
@@ -35,7 +35,7 @@  extern vec<ltrans_partition> ltrans_partitions;
 
 void lto_1_to_1_map (void);
 void lto_max_map (void);
-void lto_balanced_map (int);
+void lto_balanced_map (int, int);
 void lto_promote_cross_file_statics (void);
 void free_ltrans_partitions (void);
 void lto_promote_statics_nonwpa (void);
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 9dd513f..af735cb 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3117,9 +3117,10 @@  do_whole_program_analysis (void)
   else if (flag_lto_partition == LTO_PARTITION_MAX)
     lto_max_map ();
   else if (flag_lto_partition == LTO_PARTITION_ONE)
-    lto_balanced_map (1);
+    lto_balanced_map (1, INT_MAX);
   else if (flag_lto_partition == LTO_PARTITION_BALANCED)
-    lto_balanced_map (PARAM_VALUE (PARAM_LTO_PARTITIONS));
+    lto_balanced_map (PARAM_VALUE (PARAM_LTO_PARTITIONS),
+		      PARAM_VALUE (MAX_PARTITION_SIZE));
   else
     gcc_unreachable ();
 
diff --git a/gcc/params.def b/gcc/params.def
index dbff305..eceee32 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1029,6 +1029,11 @@  DEFPARAM (MIN_PARTITION_SIZE,
 	  "Minimal size of a partition for LTO (in estimated instructions).",
 	  1000, 0, 0)
 
+DEFPARAM (MAX_PARTITION_SIZE,
+	  "lto-max-partition",
+	  "Maximal size of a partition for LTO (in estimated instructions).",
+	  1000000, 0, INT_MAX)
+
 /* Diagnostic parameters.  */
 
 DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,