diff mbox

[RFC] introduce --param max-lto-partition for having an upper bound on partition size

Message ID CAAgBjMn7TX0ZkPrvc78qX2doNNivwLFsC8ubqH=TkTz4+6fnRg@mail.gmail.com
State Superseded
Headers show

Commit Message

Prathamesh Kulkarni April 5, 2016, 11:11 a.m. UTC
On 4 April 2016 at 19:44, Jan Hubicka <hubicka@ucw.cz> wrote:
>

>> diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c

>> index 9eb63c2..bc0c612 100644

>> --- a/gcc/lto/lto-partition.c

>> +++ b/gcc/lto/lto-partition.c

>> @@ -511,9 +511,20 @@ lto_balanced_map (int n_lto_partitions)

>>    varpool_order.qsort (varpool_node_cmp);

>>

>>    /* Compute partition size and create the first partition.  */

>> +  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE (MAX_PARTITION_SIZE))

>> +    fatal_error (input_location, "min partition size cannot be greater than max partition size");

>> +

>>    partition_size = total_size / n_lto_partitions;

>>    if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))

>>      partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);

>> +  else if (partition_size > PARAM_VALUE (MAX_PARTITION_SIZE))

>> +    {

>> +      n_lto_partitions = total_size / PARAM_VALUE (MAX_PARTITION_SIZE);

>> +      if (total_size % PARAM_VALUE (MAX_PARTITION_SIZE))

>> +     n_lto_partitions++;

>> +      partition_size = total_size / n_lto_partitions;

>> +    }

>

> lto_balanced_map actually works in a way that looks for cheapest cutpoint in range

> 3/4*parittion_size to 2*partition_size and picks the cheapest range.

> Setting partition_size to this value will thus not cause partitioner to produce smaller

> partitions only.  I suppose modify the conditional:

>

>       /* Partition is too large, unwind into step when best cost was reached and

>          start new partition.  */

>       if (partition->insns > 2 * partition_size)

>

> and/or in the code above set the partition_size to half of total_size/max_size.

>

> I know this is somewhat sloppy.  This was really just first cut implementation

> many years ago. I expected to reimplement it marter soon, but then there was

> never really a need for it (I am trying to avoid late IPA optimizations so the

> partitioning decisions should mostly affect compile time performance only).

> If ARM is more sensitive for partitining, perhaps it would make sense to try to

> look for something smarter.

>

>> +

>>    npartitions = 1;

>>    partition = new_partition ("");

>>    if (symtab->dump_file)

>> diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c

>> index 9dd513f..294b8a4 100644

>> --- a/gcc/lto/lto.c

>> +++ b/gcc/lto/lto.c

>> @@ -3112,6 +3112,12 @@ do_whole_program_analysis (void)

>>    timevar_pop (TV_WHOPR_WPA);

>>

>>    timevar_push (TV_WHOPR_PARTITIONING);

>> +

>> +  if (flag_lto_partition != LTO_PARTITION_BALANCED

>> +      && PARAM_VALUE (MAX_PARTITION_SIZE) != INT_MAX)

>> +    fatal_error (input_location, "--param max-lto-partition should only"

>> +              " be used with balanced partitioning\n");

>> +

>

> I think we should wire in resonable MAX_PARTITION_SIZE default.  THe value you

> found experimentally may be a good start. For that reason we can't really

> refuse a value when !LTO_PARTITION_BALANCED.  Just document it as parameter for

> balanced partitioning only and add a parameter to lto_balanced_map specifying whether

> this param should be honored (because the same path is used for partitioning to one partition)

>

> Otherwise the patch looks good to me modulo missing documentation.

Thanks for the review. I have updated the patch.
Does this version look OK ?
I had randomly chosen 10000, not sure if that's an appropriate value
for default.

I have a silly question about partitioning: Does it hamper
transformations on ipa optimizations if caller and
callee get placed in separate partitions ? For instance if callee is
supposed to be inlined
into caller, would inlining still take place if callee and caller get
placed in separate partitions ?
I tried with a trivial example with -flto-partition=max
which created 3 partitions for 3 functions (bar, foo and main), and it was
able to inline bar into foo and foo into main.  I am not sure how that happens.
I thought ltrans can perform transformations on functions only within
a single partition
and not across partitions ?

Thanks,
Prathamesh
>

> Honza
diff mbox

Patch

diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 9e54bb7..f0de7ec 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -9477,6 +9477,11 @@  Size of minimal partition for WHOPR (in estimated instructions).
 This prevents expenses of splitting very small programs into too many
 partitions.
 
+@item lto-max-partition
+Size of max partition for WHOPR (in estimated instructions).
+to provide an upper bound for individual size of partition.
+Meant to be used only with balanced partitioning.
+
 @item cxx-max-namespaces-for-diagnostic-help
 The maximum number of namespaces to consult for suggestions when C++
 name lookup fails for an identifier.  The default is 1000.
diff --git a/gcc/lto/lto-partition.c b/gcc/lto/lto-partition.c
index 9eb63c2..d385dd9 100644
--- a/gcc/lto/lto-partition.c
+++ b/gcc/lto/lto-partition.c
@@ -447,7 +447,7 @@  add_sorted_nodes (vec<symtab_node *> &next_nodes, ltrans_partition partition)
    and in-partition calls was reached.  */
 
 void
-lto_balanced_map (int n_lto_partitions)
+lto_balanced_map (int n_lto_partitions, bool honor_max_partition)
 {
   int n_nodes = 0;
   int n_varpool_nodes = 0, varpool_pos = 0, best_varpool_pos = 0;
@@ -511,6 +511,9 @@  lto_balanced_map (int n_lto_partitions)
   varpool_order.qsort (varpool_node_cmp);
 
   /* Compute partition size and create the first partition.  */
+  if (PARAM_VALUE (MIN_PARTITION_SIZE) > PARAM_VALUE (MAX_PARTITION_SIZE))
+    fatal_error (input_location, "min partition size cannot be greater than max partition size");
+
   partition_size = total_size / n_lto_partitions;
   if (partition_size < PARAM_VALUE (MIN_PARTITION_SIZE))
     partition_size = PARAM_VALUE (MIN_PARTITION_SIZE);
@@ -719,7 +723,9 @@  lto_balanced_map (int n_lto_partitions)
 		 best_cost, best_internal, best_i);
       /* Partition is too large, unwind into step when best cost was reached and
 	 start new partition.  */
-      if (partition->insns > 2 * partition_size)
+      if (partition->insns > 2 * partition_size
+	  || (honor_max_partition
+	      && partition->insns > PARAM_VALUE (MAX_PARTITION_SIZE)))
 	{
 	  if (best_i != i)
 	    {
diff --git a/gcc/lto/lto-partition.h b/gcc/lto/lto-partition.h
index 31e3764..2992bee 100644
--- a/gcc/lto/lto-partition.h
+++ b/gcc/lto/lto-partition.h
@@ -35,7 +35,7 @@  extern vec<ltrans_partition> ltrans_partitions;
 
 void lto_1_to_1_map (void);
 void lto_max_map (void);
-void lto_balanced_map (int);
+void lto_balanced_map (int, bool honor_max_partition = true);
 void lto_promote_cross_file_statics (void);
 void free_ltrans_partitions (void);
 void lto_promote_statics_nonwpa (void);
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 9dd513f..82bd9b3 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -3117,7 +3118,7 @@  do_whole_program_analysis (void)
   else if (flag_lto_partition == LTO_PARTITION_MAX)
     lto_max_map ();
   else if (flag_lto_partition == LTO_PARTITION_ONE)
-    lto_balanced_map (1);
+    lto_balanced_map (1, false);
   else if (flag_lto_partition == LTO_PARTITION_BALANCED)
     lto_balanced_map (PARAM_VALUE (PARAM_LTO_PARTITIONS));
   else
diff --git a/gcc/params.def b/gcc/params.def
index 9362c15..9f8a648 100644
--- a/gcc/params.def
+++ b/gcc/params.def
@@ -1029,6 +1029,11 @@  DEFPARAM (MIN_PARTITION_SIZE,
 	  "Minimal size of a partition for LTO (in estimated instructions).",
 	  1000, 0, 0)
 
+DEFPARAM (MAX_PARTITION_SIZE,
+	  "lto-max-partition",
+	  "Maximal size of a partition for LTO (in estimated instructions).",
+	  10000, 0, INT_MAX)
+
 /* Diagnostic parameters.  */
 
 DEFPARAM (CXX_MAX_NAMESPACES_FOR_DIAGNOSTIC_HELP,