[RFC,4/6] sched: powerpc: create a dedicated topology table

Message ID 1394003906-11630-5-git-send-email-vincent.guittot@linaro.org
State New
Headers show

Commit Message

Vincent Guittot March 5, 2014, 7:18 a.m.
Create a dedicated topology table for handling asymetric feature.
The current proposal creates a new level which describes which groups of CPUs
take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
build of the sched_domain topology.

Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
during the boot sequence and before the build of the sched_domain topology.

Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
---
 arch/powerpc/kernel/smp.c |   35 +++++++++++++++++++++++++++--------
 kernel/sched/core.c       |    6 ------
 2 files changed, 27 insertions(+), 14 deletions(-)

Comments

Preeti U Murthy March 11, 2014, 10:08 a.m. | #1
Hi Vincent,

On 03/05/2014 12:48 PM, Vincent Guittot wrote:
> Create a dedicated topology table for handling asymetric feature.
> The current proposal creates a new level which describes which groups of CPUs
> take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
> build of the sched_domain topology.
> 
> Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
> during the boot sequence and before the build of the sched_domain topology.

Is the below what you mean as the other solution? If it is so, I would
strongly recommend this approach rather than adding another level to the
topology level to represent the asymmetric behaviour.

+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};

Regards
Preeti U Murthy
> 
> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
> ---
>  arch/powerpc/kernel/smp.c |   35 +++++++++++++++++++++++++++--------
>  kernel/sched/core.c       |    6 ------
>  2 files changed, 27 insertions(+), 14 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index ac2621a..75da054 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -755,6 +755,32 @@ int setup_profiling_timer(unsigned int multiplier)
>  	return 0;
>  }
> 
> +#ifdef CONFIG_SCHED_SMT
> +/* cpumask of CPUs with asymetric SMT dependancy */
> +static const struct cpumask *cpu_asmt_mask(int cpu)
> +{
> +	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> +		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
> +		return topology_thread_cpumask(cpu);
> +	}
> +	return cpumask_of(cpu);
> +}
> +#endif
> +
> +static struct sched_domain_topology_level powerpc_topology[] = {
> +#ifdef CONFIG_SCHED_SMT
> +	{ cpu_asmt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING, SD_INIT_NAME(ASMT) },
> +	{ cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES, SD_INIT_NAME(SMT) },
> +#endif
> +	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
> +	{ NULL, },
> +};
> +
> +static void __init set_sched_topology(void)
> +{
> +	sched_domain_topology = powerpc_topology;
> +}
> +
>  void __init smp_cpus_done(unsigned int max_cpus)
>  {
>  	cpumask_var_t old_mask;
> @@ -779,15 +805,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
> 
>  	dump_numa_cpu_topology();
> 
> -}
> +	set_sched_topology();
> 
> -int arch_sd_sibling_asym_packing(void)
> -{
> -	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
> -		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
> -		return SD_ASYM_PACKING;
> -	}
> -	return 0;
>  }
> 
>  #ifdef CONFIG_HOTPLUG_CPU
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 3479467..7606de0 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -5818,11 +5818,6 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
>  	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
>  }
> 
> -int __weak arch_sd_sibling_asym_packing(void)
> -{
> -       return 0*SD_ASYM_PACKING;
> -}
> -
>  /*
>   * Initializers for schedule domains
>   * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
> @@ -6000,7 +5995,6 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
>  	if (sd->flags & SD_SHARE_CPUPOWER) {
>  		sd->imbalance_pct = 110;
>  		sd->smt_gain = 1178; /* ~15% */
> -		sd->flags |= arch_sd_sibling_asym_packing();
> 
>  	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
>  		sd->imbalance_pct = 117;
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Vincent Guittot March 11, 2014, 1:18 p.m. | #2
On 11 March 2014 11:08, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
> Hi Vincent,
>
> On 03/05/2014 12:48 PM, Vincent Guittot wrote:
>> Create a dedicated topology table for handling asymetric feature.
>> The current proposal creates a new level which describes which groups of CPUs
>> take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
>> build of the sched_domain topology.
>>
>> Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
>> during the boot sequence and before the build of the sched_domain topology.
>
> Is the below what you mean as the other solution? If it is so, I would
> strongly recommend this approach rather than adding another level to the
> topology level to represent the asymmetric behaviour.
>
> +static struct sched_domain_topology_level powerpc_topology[] = {
> +#ifdef CONFIG_SCHED_SMT
> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() },
> +#endif
> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> +       { NULL, },
> +};

Not exactly like that but something like below

+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+ { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
SD_INIT_NAME(SMT) },
+#endif
+ { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+ { NULL, },
+};
+
+static void __init set_sched_topology(void)
+{
+#ifdef CONFIG_SCHED_SMT
+ powerpc_topology[0].sd_flags |= arch_sd_sibling_asym_packing();
+#endif
+ sched_domain_topology = powerpc_topology;
+}

>
> Regards
> Preeti U Murthy
>>
>> Signed-off-by: Vincent Guittot <vincent.guittot@linaro.org>
>> ---
>>  arch/powerpc/kernel/smp.c |   35 +++++++++++++++++++++++++++--------
>>  kernel/sched/core.c       |    6 ------
>>  2 files changed, 27 insertions(+), 14 deletions(-)
>>
>> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
>> index ac2621a..75da054 100644
>> --- a/arch/powerpc/kernel/smp.c
>> +++ b/arch/powerpc/kernel/smp.c
>> @@ -755,6 +755,32 @@ int setup_profiling_timer(unsigned int multiplier)
>>       return 0;
>>  }
>>
>> +#ifdef CONFIG_SCHED_SMT
>> +/* cpumask of CPUs with asymetric SMT dependancy */
>> +static const struct cpumask *cpu_asmt_mask(int cpu)
>> +{
>> +     if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
>> +             printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
>> +             return topology_thread_cpumask(cpu);
>> +     }
>> +     return cpumask_of(cpu);
>> +}
>> +#endif
>> +
>> +static struct sched_domain_topology_level powerpc_topology[] = {
>> +#ifdef CONFIG_SCHED_SMT
>> +     { cpu_asmt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING, SD_INIT_NAME(ASMT) },
>> +     { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES, SD_INIT_NAME(SMT) },
>> +#endif
>> +     { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>> +     { NULL, },
>> +};
>> +
>> +static void __init set_sched_topology(void)
>> +{
>> +     sched_domain_topology = powerpc_topology;
>> +}
>> +
>>  void __init smp_cpus_done(unsigned int max_cpus)
>>  {
>>       cpumask_var_t old_mask;
>> @@ -779,15 +805,8 @@ void __init smp_cpus_done(unsigned int max_cpus)
>>
>>       dump_numa_cpu_topology();
>>
>> -}
>> +     set_sched_topology();
>>
>> -int arch_sd_sibling_asym_packing(void)
>> -{
>> -     if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
>> -             printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
>> -             return SD_ASYM_PACKING;
>> -     }
>> -     return 0;
>>  }
>>
>>  #ifdef CONFIG_HOTPLUG_CPU
>> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
>> index 3479467..7606de0 100644
>> --- a/kernel/sched/core.c
>> +++ b/kernel/sched/core.c
>> @@ -5818,11 +5818,6 @@ static void init_sched_groups_power(int cpu, struct sched_domain *sd)
>>       atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
>>  }
>>
>> -int __weak arch_sd_sibling_asym_packing(void)
>> -{
>> -       return 0*SD_ASYM_PACKING;
>> -}
>> -
>>  /*
>>   * Initializers for schedule domains
>>   * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
>> @@ -6000,7 +5995,6 @@ sd_init(struct sched_domain_topology_level *tl, int cpu)
>>       if (sd->flags & SD_SHARE_CPUPOWER) {
>>               sd->imbalance_pct = 110;
>>               sd->smt_gain = 1178; /* ~15% */
>> -             sd->flags |= arch_sd_sibling_asym_packing();
>>
>>       } else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
>>               sd->imbalance_pct = 117;
>>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Preeti U Murthy March 12, 2014, 4:42 a.m. | #3
On 03/11/2014 06:48 PM, Vincent Guittot wrote:
> On 11 March 2014 11:08, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>> Hi Vincent,
>>
>> On 03/05/2014 12:48 PM, Vincent Guittot wrote:
>>> Create a dedicated topology table for handling asymetric feature.
>>> The current proposal creates a new level which describes which groups of CPUs
>>> take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
>>> build of the sched_domain topology.
>>>
>>> Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
>>> during the boot sequence and before the build of the sched_domain topology.
>>
>> Is the below what you mean as the other solution? If it is so, I would
>> strongly recommend this approach rather than adding another level to the
>> topology level to represent the asymmetric behaviour.
>>
>> +static struct sched_domain_topology_level powerpc_topology[] = {
>> +#ifdef CONFIG_SCHED_SMT
>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() },
>> +#endif
>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>> +       { NULL, },
>> +};
> 
> Not exactly like that but something like below
> 
> +static struct sched_domain_topology_level powerpc_topology[] = {
> +#ifdef CONFIG_SCHED_SMT
> + { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
> SD_INIT_NAME(SMT) },
> +#endif
> + { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> + { NULL, },
> +};
> +
> +static void __init set_sched_topology(void)
> +{
> +#ifdef CONFIG_SCHED_SMT
> + powerpc_topology[0].sd_flags |= arch_sd_sibling_asym_packing();
> +#endif
> + sched_domain_topology = powerpc_topology;
> +}

I think we can set it in powerpc_topology[] and not bother about setting
additional flags outside of this array. It is clearer this way.

On an additional note, on powerpc we would want to clear the
SD_SHARE_POWERDOMAIN flag at the CPU domain. On Power8, considering we
have 8 threads per core, we would want to consolidate tasks atleast upto
4 threads without significant performance impact before spilling over to
the other cores. By doing so, besides making use of the higher power of
the core we could do cpuidle management at the core level for the
remaining idle cores as a result of this consolidation.

So the powerpc_topology[] would be something like the below:

+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() | SD_SHARE_POWERDOMAIN },
+#endif
+       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
+       { NULL, },
+};

The amount of consolidation to the threads in a core, we will probably
take care of it in cpu capacity or a similar parameter, but the above
topology level would be required to request the scheduler to try
consolidating tasks to cores till the cpu capacity(3/4/5 threads) has
exceeded.

Regards
Preeti U Murthy


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Vincent Guittot March 12, 2014, 7:44 a.m. | #4
On 12 March 2014 05:42, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
> On 03/11/2014 06:48 PM, Vincent Guittot wrote:
>> On 11 March 2014 11:08, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>>> Hi Vincent,
>>>
>>> On 03/05/2014 12:48 PM, Vincent Guittot wrote:
>>>> Create a dedicated topology table for handling asymetric feature.
>>>> The current proposal creates a new level which describes which groups of CPUs
>>>> take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
>>>> build of the sched_domain topology.
>>>>
>>>> Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
>>>> during the boot sequence and before the build of the sched_domain topology.
>>>
>>> Is the below what you mean as the other solution? If it is so, I would
>>> strongly recommend this approach rather than adding another level to the
>>> topology level to represent the asymmetric behaviour.
>>>
>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>> +#ifdef CONFIG_SCHED_SMT
>>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() },
>>> +#endif
>>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>> +       { NULL, },
>>> +};
>>
>> Not exactly like that but something like below
>>
>> +static struct sched_domain_topology_level powerpc_topology[] = {
>> +#ifdef CONFIG_SCHED_SMT
>> + { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>> SD_INIT_NAME(SMT) },
>> +#endif
>> + { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>> + { NULL, },
>> +};
>> +
>> +static void __init set_sched_topology(void)
>> +{
>> +#ifdef CONFIG_SCHED_SMT
>> + powerpc_topology[0].sd_flags |= arch_sd_sibling_asym_packing();
>> +#endif
>> + sched_domain_topology = powerpc_topology;
>> +}
>
> I think we can set it in powerpc_topology[] and not bother about setting
> additional flags outside of this array. It is clearer this way.

IIRC, the arch_sd_sibling_asym_packing of powerpc can be set at
runtime which prevents it from being put directly in the table. Or it
means that we should use a function pointer in the table for setting
flags instead of a static value like the current proposal.

>
> On an additional note, on powerpc we would want to clear the
> SD_SHARE_POWERDOMAIN flag at the CPU domain. On Power8, considering we
> have 8 threads per core, we would want to consolidate tasks atleast upto
> 4 threads without significant performance impact before spilling over to
> the other cores. By doing so, besides making use of the higher power of
> the core we could do cpuidle management at the core level for the
> remaining idle cores as a result of this consolidation.

OK. i will add the SD_SHARE_POWERDOMAIN like below

Thanks
Vincent

>
> So the powerpc_topology[] would be something like the below:
>
> +static struct sched_domain_topology_level powerpc_topology[] = {
> +#ifdef CONFIG_SCHED_SMT
> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() | SD_SHARE_POWERDOMAIN },
> +#endif
> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
> +       { NULL, },
> +};
>
> The amount of consolidation to the threads in a core, we will probably
> take care of it in cpu capacity or a similar parameter, but the above
> topology level would be required to request the scheduler to try
> consolidating tasks to cores till the cpu capacity(3/4/5 threads) has
> exceeded.
>
> Regards
> Preeti U Murthy
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Dietmar Eggemann March 12, 2014, 11:04 a.m. | #5
On 12/03/14 07:44, Vincent Guittot wrote:
> On 12 March 2014 05:42, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>> On 03/11/2014 06:48 PM, Vincent Guittot wrote:
>>> On 11 March 2014 11:08, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>>>> Hi Vincent,
>>>>
>>>> On 03/05/2014 12:48 PM, Vincent Guittot wrote:
>>>>> Create a dedicated topology table for handling asymetric feature.
>>>>> The current proposal creates a new level which describes which groups of CPUs
>>>>> take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
>>>>> build of the sched_domain topology.
>>>>>
>>>>> Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
>>>>> during the boot sequence and before the build of the sched_domain topology.
>>>>
>>>> Is the below what you mean as the other solution? If it is so, I would
>>>> strongly recommend this approach rather than adding another level to the
>>>> topology level to represent the asymmetric behaviour.
>>>>
>>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>>> +#ifdef CONFIG_SCHED_SMT
>>>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() },
>>>> +#endif
>>>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>>> +       { NULL, },
>>>> +};
>>>
>>> Not exactly like that but something like below
>>>
>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>> +#ifdef CONFIG_SCHED_SMT
>>> + { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>> SD_INIT_NAME(SMT) },
>>> +#endif
>>> + { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>> + { NULL, },
>>> +};
>>> +
>>> +static void __init set_sched_topology(void)
>>> +{
>>> +#ifdef CONFIG_SCHED_SMT
>>> + powerpc_topology[0].sd_flags |= arch_sd_sibling_asym_packing();
>>> +#endif
>>> + sched_domain_topology = powerpc_topology;
>>> +}
>>
>> I think we can set it in powerpc_topology[] and not bother about setting
>> additional flags outside of this array. It is clearer this way.
> 
> IIRC, the arch_sd_sibling_asym_packing of powerpc can be set at
> runtime which prevents it from being put directly in the table. Or it
> means that we should use a function pointer in the table for setting
> flags instead of a static value like the current proposal.

Right,

static struct sched_domain_topology_level powerpc_topology[] = {
#ifdef CONFIG_SCHED_SMT
        { cpu_asmt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES |
SD_ASYM_PACKING, SD_INIT_NAME(ASMT) },
        { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES |
arch_sd_sibling_asym_packing() | SD_SHARE_POWERDOMAIN, SD_INIT_NAME(SMT) },
#endif
        { cpu_cpu_mask, SD_INIT_NAME(DIE) },
        { NULL, },
};

is not compiling:

  CC      arch/powerpc/kernel/smp.o
arch/powerpc/kernel/smp.c:787:2: error: initializer element is not constant
arch/powerpc/kernel/smp.c:787:2: error: (near initialization for
'powerpc_topology[1].sd_flags')

So I'm in favour of a function pointer, even w/o the 'int cpu' parameter
to circumvent the issue that it is too easy to create broken sd setups.

-- Dietmar

> 
>>
>> On an additional note, on powerpc we would want to clear the
>> SD_SHARE_POWERDOMAIN flag at the CPU domain. On Power8, considering we
>> have 8 threads per core, we would want to consolidate tasks atleast upto
>> 4 threads without significant performance impact before spilling over to
>> the other cores. By doing so, besides making use of the higher power of
>> the core we could do cpuidle management at the core level for the
>> remaining idle cores as a result of this consolidation.
> 
> OK. i will add the SD_SHARE_POWERDOMAIN like below
> 
> Thanks
> Vincent
> 
>>
>> So the powerpc_topology[] would be something like the below:
>>
>> +static struct sched_domain_topology_level powerpc_topology[] = {
>> +#ifdef CONFIG_SCHED_SMT
>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() | SD_SHARE_POWERDOMAIN },
>> +#endif
>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>> +       { NULL, },
>> +};
>>
>> The amount of consolidation to the threads in a core, we will probably
>> take care of it in cpu capacity or a similar parameter, but the above
>> topology level would be required to request the scheduler to try
>> consolidating tasks to cores till the cpu capacity(3/4/5 threads) has
>> exceeded.
>>
>> Regards
>> Preeti U Murthy
>>
>>
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Preeti U Murthy March 14, 2014, 2:14 a.m. | #6
On 03/12/2014 01:14 PM, Vincent Guittot wrote:
> On 12 March 2014 05:42, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>> On 03/11/2014 06:48 PM, Vincent Guittot wrote:
>>> On 11 March 2014 11:08, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>>>> Hi Vincent,
>>>>
>>>> On 03/05/2014 12:48 PM, Vincent Guittot wrote:
>>>>> Create a dedicated topology table for handling asymetric feature.
>>>>> The current proposal creates a new level which describes which groups of CPUs
>>>>> take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
>>>>> build of the sched_domain topology.
>>>>>
>>>>> Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
>>>>> during the boot sequence and before the build of the sched_domain topology.
>>>>
>>>> Is the below what you mean as the other solution? If it is so, I would
>>>> strongly recommend this approach rather than adding another level to the
>>>> topology level to represent the asymmetric behaviour.
>>>>
>>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>>> +#ifdef CONFIG_SCHED_SMT
>>>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() },
>>>> +#endif
>>>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>>> +       { NULL, },
>>>> +};
>>>
>>> Not exactly like that but something like below
>>>
>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>> +#ifdef CONFIG_SCHED_SMT
>>> + { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>> SD_INIT_NAME(SMT) },
>>> +#endif
>>> + { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>> + { NULL, },
>>> +};
>>> +
>>> +static void __init set_sched_topology(void)
>>> +{
>>> +#ifdef CONFIG_SCHED_SMT
>>> + powerpc_topology[0].sd_flags |= arch_sd_sibling_asym_packing();
>>> +#endif
>>> + sched_domain_topology = powerpc_topology;
>>> +}
>>
>> I think we can set it in powerpc_topology[] and not bother about setting
>> additional flags outside of this array. It is clearer this way.
> 
> IIRC, the arch_sd_sibling_asym_packing of powerpc can be set at
> runtime which prevents it from being put directly in the table. Or it
> means that we should use a function pointer in the table for setting
> flags instead of a static value like the current proposal.

Oh yes you are right. Then the above looks good to me. So we define the
table as usual and set the asymmetric flag in set_sched_topology().
> 
>>
>> On an additional note, on powerpc we would want to clear the
>> SD_SHARE_POWERDOMAIN flag at the CPU domain. On Power8, considering we
>> have 8 threads per core, we would want to consolidate tasks atleast upto
>> 4 threads without significant performance impact before spilling over to
>> the other cores. By doing so, besides making use of the higher power of
>> the core we could do cpuidle management at the core level for the
>> remaining idle cores as a result of this consolidation.
> 
> OK. i will add the SD_SHARE_POWERDOMAIN like below

Thanks!

Regards
Preeti U Murthy
> 
> Thanks
> Vincent
> 
>>
>> So the powerpc_topology[] would be something like the below:
>>
>> +static struct sched_domain_topology_level powerpc_topology[] = {
>> +#ifdef CONFIG_SCHED_SMT
>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() | SD_SHARE_POWERDOMAIN },
>> +#endif
>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>> +       { NULL, },
>> +};
>>
>> The amount of consolidation to the threads in a core, we will probably
>> take care of it in cpu capacity or a similar parameter, but the above
>> topology level would be required to request the scheduler to try
>> consolidating tasks to cores till the cpu capacity(3/4/5 threads) has
>> exceeded.
>>
>> Regards
>> Preeti U Murthy
>>
>>
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/
Preeti U Murthy March 14, 2014, 2:30 a.m. | #7
On 03/12/2014 04:34 PM, Dietmar Eggemann wrote:
> On 12/03/14 07:44, Vincent Guittot wrote:
>> On 12 March 2014 05:42, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>>> On 03/11/2014 06:48 PM, Vincent Guittot wrote:
>>>> On 11 March 2014 11:08, Preeti U Murthy <preeti@linux.vnet.ibm.com> wrote:
>>>>> Hi Vincent,
>>>>>
>>>>> On 03/05/2014 12:48 PM, Vincent Guittot wrote:
>>>>>> Create a dedicated topology table for handling asymetric feature.
>>>>>> The current proposal creates a new level which describes which groups of CPUs
>>>>>> take adavantge of SD_ASYM_PACKING. The useless level will be removed during the
>>>>>> build of the sched_domain topology.
>>>>>>
>>>>>> Another solution would be to set SD_ASYM_PACKING in the sd_flags of SMT level
>>>>>> during the boot sequence and before the build of the sched_domain topology.
>>>>>
>>>>> Is the below what you mean as the other solution? If it is so, I would
>>>>> strongly recommend this approach rather than adding another level to the
>>>>> topology level to represent the asymmetric behaviour.
>>>>>
>>>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>>>> +#ifdef CONFIG_SCHED_SMT
>>>>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>>>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() },
>>>>> +#endif
>>>>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>>>> +       { NULL, },
>>>>> +};
>>>>
>>>> Not exactly like that but something like below
>>>>
>>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>>> +#ifdef CONFIG_SCHED_SMT
>>>> + { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>>> SD_INIT_NAME(SMT) },
>>>> +#endif
>>>> + { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>>> + { NULL, },
>>>> +};
>>>> +
>>>> +static void __init set_sched_topology(void)
>>>> +{
>>>> +#ifdef CONFIG_SCHED_SMT
>>>> + powerpc_topology[0].sd_flags |= arch_sd_sibling_asym_packing();
>>>> +#endif
>>>> + sched_domain_topology = powerpc_topology;
>>>> +}
>>>
>>> I think we can set it in powerpc_topology[] and not bother about setting
>>> additional flags outside of this array. It is clearer this way.
>>
>> IIRC, the arch_sd_sibling_asym_packing of powerpc can be set at
>> runtime which prevents it from being put directly in the table. Or it
>> means that we should use a function pointer in the table for setting
>> flags instead of a static value like the current proposal.
> 
> Right,
> 
> static struct sched_domain_topology_level powerpc_topology[] = {
> #ifdef CONFIG_SCHED_SMT
>         { cpu_asmt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES |
> SD_ASYM_PACKING, SD_INIT_NAME(ASMT) },
>         { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES |
> arch_sd_sibling_asym_packing() | SD_SHARE_POWERDOMAIN, SD_INIT_NAME(SMT) },
> #endif
>         { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>         { NULL, },
> };
> 
> is not compiling:
> 
>   CC      arch/powerpc/kernel/smp.o
> arch/powerpc/kernel/smp.c:787:2: error: initializer element is not constant
> arch/powerpc/kernel/smp.c:787:2: error: (near initialization for
> 'powerpc_topology[1].sd_flags')
> 
> So I'm in favour of a function pointer, even w/o the 'int cpu' parameter
> to circumvent the issue that it is too easy to create broken sd setups.

Alright, this looks fine to me. You could use the function pointer to
retrieve flags and have all initializations of sched domain features
consolidated in the table.

Regards
Preeti U Murthy
> 
> -- Dietmar
> 
>>
>>>
>>> On an additional note, on powerpc we would want to clear the
>>> SD_SHARE_POWERDOMAIN flag at the CPU domain. On Power8, considering we
>>> have 8 threads per core, we would want to consolidate tasks atleast upto
>>> 4 threads without significant performance impact before spilling over to
>>> the other cores. By doing so, besides making use of the higher power of
>>> the core we could do cpuidle management at the core level for the
>>> remaining idle cores as a result of this consolidation.
>>
>> OK. i will add the SD_SHARE_POWERDOMAIN like below
>>
>> Thanks
>> Vincent
>>
>>>
>>> So the powerpc_topology[] would be something like the below:
>>>
>>> +static struct sched_domain_topology_level powerpc_topology[] = {
>>> +#ifdef CONFIG_SCHED_SMT
>>> +       { cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES,
>>> SD_INIT_NAME(SMT) | arch_sd_sibling_asym_packing() | SD_SHARE_POWERDOMAIN },
>>> +#endif
>>> +       { cpu_cpu_mask, SD_INIT_NAME(DIE) },
>>> +       { NULL, },
>>> +};
>>>
>>> The amount of consolidation to the threads in a core, we will probably
>>> take care of it in cpu capacity or a similar parameter, but the above
>>> topology level would be required to request the scheduler to try
>>> consolidating tasks to cores till the cpu capacity(3/4/5 threads) has
>>> exceeded.
>>>
>>> Regards
>>> Preeti U Murthy
>>>
>>>
>>
> 
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Patch

diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index ac2621a..75da054 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -755,6 +755,32 @@  int setup_profiling_timer(unsigned int multiplier)
 	return 0;
 }
 
+#ifdef CONFIG_SCHED_SMT
+/* cpumask of CPUs with asymetric SMT dependancy */
+static const struct cpumask *cpu_asmt_mask(int cpu)
+{
+	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
+		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
+		return topology_thread_cpumask(cpu);
+	}
+	return cpumask_of(cpu);
+}
+#endif
+
+static struct sched_domain_topology_level powerpc_topology[] = {
+#ifdef CONFIG_SCHED_SMT
+	{ cpu_asmt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES | SD_ASYM_PACKING, SD_INIT_NAME(ASMT) },
+	{ cpu_smt_mask, SD_SHARE_CPUPOWER | SD_SHARE_PKG_RESOURCES, SD_INIT_NAME(SMT) },
+#endif
+	{ cpu_cpu_mask, SD_INIT_NAME(DIE) },
+	{ NULL, },
+};
+
+static void __init set_sched_topology(void)
+{
+	sched_domain_topology = powerpc_topology;
+}
+
 void __init smp_cpus_done(unsigned int max_cpus)
 {
 	cpumask_var_t old_mask;
@@ -779,15 +805,8 @@  void __init smp_cpus_done(unsigned int max_cpus)
 
 	dump_numa_cpu_topology();
 
-}
+	set_sched_topology();
 
-int arch_sd_sibling_asym_packing(void)
-{
-	if (cpu_has_feature(CPU_FTR_ASYM_SMT)) {
-		printk_once(KERN_INFO "Enabling Asymmetric SMT scheduling\n");
-		return SD_ASYM_PACKING;
-	}
-	return 0;
 }
 
 #ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3479467..7606de0 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5818,11 +5818,6 @@  static void init_sched_groups_power(int cpu, struct sched_domain *sd)
 	atomic_set(&sg->sgp->nr_busy_cpus, sg->group_weight);
 }
 
-int __weak arch_sd_sibling_asym_packing(void)
-{
-       return 0*SD_ASYM_PACKING;
-}
-
 /*
  * Initializers for schedule domains
  * Non-inlined to reduce accumulated stack pressure in build_sched_domains()
@@ -6000,7 +5995,6 @@  sd_init(struct sched_domain_topology_level *tl, int cpu)
 	if (sd->flags & SD_SHARE_CPUPOWER) {
 		sd->imbalance_pct = 110;
 		sd->smt_gain = 1178; /* ~15% */
-		sd->flags |= arch_sd_sibling_asym_packing();
 
 	} else if (sd->flags & SD_SHARE_PKG_RESOURCES) {
 		sd->imbalance_pct = 117;