diff mbox

Problems booting exynos5420 with >1 CPU

Message ID alpine.LFD.2.11.1406062340480.25775@knanqh.ubzr
State New
Headers show

Commit Message

Nicolas Pitre June 7, 2014, 4:10 p.m. UTC
On Sat, 7 Jun 2014, Abhilash Kesavan wrote:

> Hi Nicolas,
> 
> The first man of the incoming cluster enables its snoops via the
> power_up_setup function. During secondary boot-up, this does not occur
> for the boot cluster. Hence, I enable the snoops for the boot cluster
> as a one-time setup from the u-boot prompt. After secondary boot-up
> there is no modification that I do.

OK that's good.

> Where should this be ideally done ?

If I remember correctly, the CCI can be safely activated only when the 
cache is disabled.  So that means the CCI should ideally be turned on 
for the boot cluster (and *only* for the boot CPU) by the bootloader.

Now... If you _really_ prefer to do it from the kernel to avoid 
difficulties with bootloader updates, then it should be possible to do 
it from the kernel by temporarily turning the cache off.  This is not a 
small thing but the MCPM infrastructure can be leveraged.  Here's what I 
tried on a TC2 which might just work for you as well:


Of course it is not because I'm helping you sidestepping firmware 
problems that I'll stop shaming those who came up with that firmware 
design.  ;-)


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Comments

Lorenzo Pieralisi June 7, 2014, 5:56 p.m. UTC | #1
On Sat, Jun 07, 2014 at 05:10:27PM +0100, Nicolas Pitre wrote:
> On Sat, 7 Jun 2014, Abhilash Kesavan wrote:
> 
> > Hi Nicolas,
> > 
> > The first man of the incoming cluster enables its snoops via the
> > power_up_setup function. During secondary boot-up, this does not occur
> > for the boot cluster. Hence, I enable the snoops for the boot cluster
> > as a one-time setup from the u-boot prompt. After secondary boot-up
> > there is no modification that I do.
> 
> OK that's good.
> 
> > Where should this be ideally done ?
> 
> If I remember correctly, the CCI can be safely activated only when the 
> cache is disabled.  So that means the CCI should ideally be turned on 
> for the boot cluster (and *only* for the boot CPU) by the bootloader.

CCI ports are enabled per-cluster, so the boot loader must turn on
CCI for all clusters before the respective CPUs have a chance to
turn on their caches. It is a secure operation, this can be overriden
and probably that's what has been done, wrongly.

True, TC2 on warm-boot reenables CCI, and that's because it runs the
kernel in secure world, and again that's _wrong_.

It must be done in firmware, and I am totally against any attempt at
papering over what looks like a messed up firmware implementation with
a bunch of hacks in the kernel, because that's what the patch below is
(soft restarting a CPU to enable CCI ? and adding generic code for that ?
what's next ?)

I understand there is an issue and lots at stake here, but I do not want the
patch below to be merged in the kernel, I am sorry, it is a tad too much.

Lorenzo

> 
> Now... If you _really_ prefer to do it from the kernel to avoid 
> difficulties with bootloader updates, then it should be possible to do 
> it from the kernel by temporarily turning the cache off.  This is not a 
> small thing but the MCPM infrastructure can be leveraged.  Here's what I 
> tried on a TC2 which might just work for you as well:
> 
> diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
> index 86fd60fefb..1cc49de308 100644
> --- a/arch/arm/common/mcpm_entry.c
> +++ b/arch/arm/common/mcpm_entry.c
> @@ -12,11 +12,13 @@
>  #include <linux/kernel.h>
>  #include <linux/init.h>
>  #include <linux/irqflags.h>
> +#include <linux/cpu_pm.h>
>  
>  #include <asm/mcpm.h>
>  #include <asm/cacheflush.h>
>  #include <asm/idmap.h>
>  #include <asm/cputype.h>
> +#include <asm/suspend.h>
>  
>  extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
>  
> @@ -146,6 +148,44 @@ int mcpm_cpu_powered_up(void)
>  	return 0;
>  }
>  
> +static int go_down(unsigned long _arg)
> +{
> +	void (*cache_disable)(void) = (void *)_arg;
> +	unsigned int mpidr = read_cpuid_mpidr();
> +	unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
> +	unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
> +	phys_reset_t phys_reset;
> +
> +	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
> +	setup_mm_for_reboot();
> +
> +	__mcpm_cpu_going_down(cpu, cluster);
> +	BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster));
> +	cache_disable();
> +	__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
> +	__mcpm_cpu_down(cpu, cluster);
> +
> +	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
> +	phys_reset(virt_to_phys(mcpm_entry_point));
> +	BUG();
> +}
> +	
> +int mcpm_loopback(void (*cache_disable)(void))
> +{
> +	int ret;
> +
> +	local_irq_disable();
> +	local_fiq_disable();
> +	ret = cpu_pm_enter();
> +	if (!ret) {
> +		ret = cpu_suspend((unsigned long)cache_disable, go_down);
> +		cpu_pm_exit();
> +	}
> +	local_fiq_enable();
> +	local_irq_enable();
> +	return ret;
> +}
> +
>  struct sync_struct mcpm_sync;
>  
>  /*
> diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
> index 29e7785a54..abecdd734f 100644
> --- a/arch/arm/mach-vexpress/tc2_pm.c
> +++ b/arch/arm/mach-vexpress/tc2_pm.c
> @@ -323,6 +323,26 @@ static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
>  "	b	cci_enable_port_for_self ");
>  }
>  
> +int mcpm_loopback(void (*cache_disable)(void));
> +
> +static void tc2_cache_down(void)
> +{
> +	pr_warn("TC2: disabling cache\n");
> +	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
> +		/*
> +		 * On the Cortex-A15 we need to disable
> +		 * L2 prefetching before flushing the cache.
> +		 */
> +		asm volatile(
> +		"mcr	p15, 1, %0, c15, c0, 3 \n\t"
> +		"isb	\n\t"
> +		"dsb	"
> +		: : "r" (0x400) );
> +	}
> +	v7_exit_coherency_flush(all);
> +	cci_disable_port_by_cpu(read_cpuid_mpidr());
> +}
> +
>  static int __init tc2_pm_init(void)
>  {
>  	int ret, irq;
> @@ -370,6 +390,7 @@ static int __init tc2_pm_init(void)
>  	ret = mcpm_platform_register(&tc2_pm_power_ops);
>  	if (!ret) {
>  		mcpm_sync_init(tc2_pm_power_up_setup);
> +		BUG_ON(mcpm_loopback(tc2_cache_down) != 0);
>  		pr_info("TC2 power management initialized\n");
>  	}
>  	return ret;
> 
> Of course it is not because I'm helping you sidestepping firmware 
> problems that I'll stop shaming those who came up with that firmware 
> design.  ;-)
> 
> 
> Nicolas
> 
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
> 

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Pitre June 7, 2014, 8:06 p.m. UTC | #2
On Sat, 7 Jun 2014, Lorenzo Pieralisi wrote:

> On Sat, Jun 07, 2014 at 05:10:27PM +0100, Nicolas Pitre wrote:
> > On Sat, 7 Jun 2014, Abhilash Kesavan wrote:
> > 
> > > Hi Nicolas,
> > > 
> > > The first man of the incoming cluster enables its snoops via the
> > > power_up_setup function. During secondary boot-up, this does not occur
> > > for the boot cluster. Hence, I enable the snoops for the boot cluster
> > > as a one-time setup from the u-boot prompt. After secondary boot-up
> > > there is no modification that I do.
> > 
> > OK that's good.
> > 
> > > Where should this be ideally done ?
> > 
> > If I remember correctly, the CCI can be safely activated only when the 
> > cache is disabled.  So that means the CCI should ideally be turned on 
> > for the boot cluster (and *only* for the boot CPU) by the bootloader.
> 
> CCI ports are enabled per-cluster, so the boot loader must turn on
> CCI for all clusters before the respective CPUs have a chance to
> turn on their caches. It is a secure operation, this can be overriden
> and probably that's what has been done, wrongly.

Careful.  By saying "for all clusters" you might be interpreted as 
saying that the CCI must be turned on even for CPUs that are not powered 
up.

> True, TC2 on warm-boot reenables CCI, and that's because it runs the
> kernel in secure world, and again that's _wrong_.

Let me respectfully disagree.

> It must be done in firmware, and I am totally against any attempt at
> papering over what looks like a messed up firmware implementation with
> a bunch of hacks in the kernel, because that's what the patch below is
> (soft restarting a CPU to enable CCI ? and adding generic code for that ?
> what's next ?)

Are you promoting for the removal of drivers/bus/arm-cci.c ?

You do realize that the fundamental raison d'ĂȘtre for MCPM is actually 
to manage the race free enabling of the cache and CCI ?

> I understand there is an issue and lots at stake here, but I do not want the
> patch below to be merged in the kernel, I am sorry, it is a tad too much.

Lorenzo: Don't get me wrong.  The Chromebooks, and possibly to some 
extent some people at Samsung, were simply too confident in their 
ability to create absolutely bug-free firmware code to the point of not 
making its update easy in the field.  This is completely outrageous in 
my point of view.  Yet one of the reactions was to call upstream kernel 
people as purists because the kernel is so much easier to modify in 
order to cover their mess and yet that might not be accepted.  Like I 
said I won't stop shaming them publicly for their own "incompetence" 
just yet (no pun intended), but being excessively purist does not 
benefit anyone either, and for that they have a point.

*HOWEVER* I have no choice but to say that many people at ARM, including 
a couple individuals for whom I nevertheless have a lot of admiration, 
also have an incredible faith in their ability to convince themselves, 
and then turn around to preach to the world, that *more firmware* is 
going to be so much purer and solve so many more problems than it 
creates and become such a magical success that we should immediately 
dedicate our soul to the cause with no hint of a doubt.

I'm sorry to rain on your parade, but I don't believe in this one iota.

Let me repeat the MCPM story again: it took 3 people, including 2 from 
ARM, over *six* months to get everything right and stable on TC2. I 
think you also contributed to that effort as well. Subsequent MCPM 
backend contributions (yes, just the backend and not the core code) took 
at least *five* rounds of reviews in one case, and after *seven* rounds 
in another case it is still not right, despite the publicly available 
TC2 implementation to serve as a reference.

I'm sure each time a new patch set was posted, their authors honestly 
believed their code was correct.  Otherwise why would they post buggy 
code?

Now you are telling me that they should have put that code into firmware 
instead?  Can you realize what a catastrophe this would have been? Are 
you _seriously_ believing that they would be up to their 5th firmware 
revision by now?  And that updating their firmware six months after 
product launch would be as easy as updating the kernel?

Software ALWAYS has bugs, whether it is user apps, the kernel, firmware 
or boot ROM. The bigger one of those parts is, the more bugs it will 
have. And the cost to vendors for fixing those bugs grow exponentially 
down each level. For proof, we're now considering possible workarounds 
in the kernel to sidestep the difficulty with updating the firmware on a 
Chromebook.

Yet you're saying that firmware should grow code with the same 
complexity as the MCPM core, plus a machine specific backend that 
experience has proven multiple times is evidently hard to get right, 
into firmware because running Linux in secure mode is wrong?  If so we 
don't live in the same world indeed.

The day I see a firmware architecture that allows for 1) the same level 
of peer review as what we enjoy with the Linux kernel code and 2) the 
same ability to perform updates in the field as the kernel, then maybe I 
could be sold on the many advantages having generic firmware might have.  
In the meantime I consider complex firmware as a very suboptimal 
architecture with no bearing on the reality of actual short-cycled 
products, and if they prevail we'd better be ready to pile more of those 
ugly hacks in the kernel.


Nicolas
Lorenzo Pieralisi June 7, 2014, 10:36 p.m. UTC | #3
On Sat, Jun 07, 2014 at 09:06:36PM +0100, Nicolas Pitre wrote:
> On Sat, 7 Jun 2014, Lorenzo Pieralisi wrote:
> 
> > On Sat, Jun 07, 2014 at 05:10:27PM +0100, Nicolas Pitre wrote:
> > > On Sat, 7 Jun 2014, Abhilash Kesavan wrote:
> > > 
> > > > Hi Nicolas,
> > > > 
> > > > The first man of the incoming cluster enables its snoops via the
> > > > power_up_setup function. During secondary boot-up, this does not occur
> > > > for the boot cluster. Hence, I enable the snoops for the boot cluster
> > > > as a one-time setup from the u-boot prompt. After secondary boot-up
> > > > there is no modification that I do.
> > > 
> > > OK that's good.
> > > 
> > > > Where should this be ideally done ?
> > > 
> > > If I remember correctly, the CCI can be safely activated only when the 
> > > cache is disabled.  So that means the CCI should ideally be turned on 
> > > for the boot cluster (and *only* for the boot CPU) by the bootloader.
> > 
> > CCI ports are enabled per-cluster, so the boot loader must turn on
> > CCI for all clusters before the respective CPUs have a chance to
> > turn on their caches. It is a secure operation, this can be overriden
> > and probably that's what has been done, wrongly.
> 
> Careful.  By saying "for all clusters" you might be interpreted as 
> saying that the CCI must be turned on even for CPUs that are not powered 
> up.

Right, CCI snoops and DVMs for a cluster must be enabled by the first man
running in that cluster upon cluster power up with caches off, where the
first man must be arbitrated if multiple CPUs are racing for enabling CCI.
This is not an issue on cold boot, it is on resuming from CPUidle.

> > True, TC2 on warm-boot reenables CCI, and that's because it runs the
> > kernel in secure world, and again that's _wrong_.
> 
> Let me respectfully disagree.

You are welcome =)

> > It must be done in firmware, and I am totally against any attempt at
> > papering over what looks like a messed up firmware implementation with
> > a bunch of hacks in the kernel, because that's what the patch below is
> > (soft restarting a CPU to enable CCI ? and adding generic code for that ?
> > what's next ?)
> 
> Are you promoting for the removal of drivers/bus/arm-cci.c ?

I really do not want to go there, but I might (apart from CCI PMUs).

> You do realize that the fundamental raison d'?tre for MCPM is actually 
> to manage the race free enabling of the cache and CCI ?

Yes I do and I was willing to help implement it for TC2 to provide people
with an example on how to do it _properly_ (in secure world though, and that
was a mistake IMHO). If what we get is a workaround for every platform
going upstream, we are in a bind, seriously.

Whatever the outcome of this thread, a booting protocol update for CCI
is in order, even if we have to debate it for 6 months or more to get
an agreement.

> > I understand there is an issue and lots at stake here, but I do not want the
> > patch below to be merged in the kernel, I am sorry, it is a tad too much.
> 
> Lorenzo: Don't get me wrong.  The Chromebooks, and possibly to some 
> extent some people at Samsung, were simply too confident in their 
> ability to create absolutely bug-free firmware code to the point of not 
> making its update easy in the field.  This is completely outrageous in 
> my point of view.  Yet one of the reactions was to call upstream kernel 
> people as purists because the kernel is so much easier to modify in 
> order to cover their mess and yet that might not be accepted.  Like I 
> said I won't stop shaming them publicly for their own "incompetence" 
> just yet (no pun intended), but being excessively purist does not 
> benefit anyone either, and for that they have a point.

I do not think they do. The kernel should not become a place where firmware
bugs are fixed, if you refuse to fix the bug and this code does not get
upstream I am pretty sure next time more attention will be paid.

Booting, powering up and down cores are standard procedures, why can't we
share the same code for all v7 platforms ? Why ?

And we are not talking about a race condition hit every 10 gazillions
cycles here.

> *HOWEVER* I have no choice but to say that many people at ARM, including 
> a couple individuals for whom I nevertheless have a lot of admiration, 
> also have an incredible faith in their ability to convince themselves, 
> and then turn around to preach to the world, that *more firmware* is 
> going to be so much purer and solve so many more problems than it 
> creates and become such a magical success that we should immediately 
> dedicate our soul to the cause with no hint of a doubt.
> 
> I'm sorry to rain on your parade, but I don't believe in this one iota.
> 
> Let me repeat the MCPM story again: it took 3 people, including 2 from 
> ARM, over *six* months to get everything right and stable on TC2. I 
> think you also contributed to that effort as well. Subsequent MCPM 
> backend contributions (yes, just the backend and not the core code) took 
> at least *five* rounds of reviews in one case, and after *seven* rounds 
> in another case it is still not right, despite the publicly available 
> TC2 implementation to serve as a reference.
> 
> I'm sure each time a new patch set was posted, their authors honestly 
> believed their code was correct.  Otherwise why would they post buggy 
> code?
> 
> Now you are telling me that they should have put that code into firmware 
> instead?  Can you realize what a catastrophe this would have been? Are 
> you _seriously_ believing that they would be up to their 5th firmware 
> revision by now?  And that updating their firmware six months after 
> product launch would be as easy as updating the kernel?

If firmware messes up the DMC controller configuration, would you fix
it in the Linux kernel ? It is an unrealistic (well..) example but you should
catch my drift.

Where do we draw the line, that's my point.

> Software ALWAYS has bugs, whether it is user apps, the kernel, firmware 
> or boot ROM. The bigger one of those parts is, the more bugs it will 
> have. And the cost to vendors for fixing those bugs grow exponentially 
> down each level. For proof, we're now considering possible workarounds 
> in the kernel to sidestep the difficulty with updating the firmware on a 
> Chromebook.
> 
> Yet you're saying that firmware should grow code with the same 
> complexity as the MCPM core, plus a machine specific backend that 
> experience has proven multiple times is evidently hard to get right, 
> into firmware because running Linux in secure mode is wrong?  If so we 
> don't live in the same world indeed.
> 
> The day I see a firmware architecture that allows for 1) the same level 
> of peer review as what we enjoy with the Linux kernel code and 2) the 
> same ability to perform updates in the field as the kernel, then maybe I 
> could be sold on the many advantages having generic firmware might have.  
> In the meantime I consider complex firmware as a very suboptimal 
> architecture with no bearing on the reality of actual short-cycled 
> products, and if they prevail we'd better be ready to pile more of those 
> ugly hacks in the kernel.

Nicolas: it is not a matter of PSCI vs. MCPM, firmware vs. the kernel,
that's a debate worth having, not now.

There is a bug to solve, and you did. What I am telling you is that
I am fed up to my back teeth of having code in the kernel solving
issues that should not be solved in the kernel, if we keep doing that
hacks will become the rule not the exception. Time to draw a line,
firmware is broken, so is the platform. (BTW, if anyone can explain to
me how CPUidle works in this platform, and that's the main reason of
having MCPM in there, I am all ears).

Adding these hacks has serious maintainance consequences (eg CPUidle
code) and that's the main reason I jumped into this discussion.

Let me reiterate my point: it is not a kernel vs firmware debate, it
is about clean and maintainable code vs hackish and unmaintainable
code in the kernel.

And of top of that, people must test their code, we are not talking
about a rare race condition here, this is a blatant bug.

I have lots to add concerning the firmware implementations of power
down procedures, but let's take it offline.

I am not happy with merging your patch, I am sorry, for the reasons I've
just explained.

Lorenzo

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Olof Johansson June 7, 2014, 11:53 p.m. UTC | #4
Lorenzo,

Since you're emailing from @arm.com, some of this is to the wider
recipient and maybe not directly to you:

On Sat, Jun 7, 2014 at 3:36 PM, Lorenzo Pieralisi
<lorenzo.pieralisi@arm.com> wrote:
> On Sat, Jun 07, 2014 at 09:06:36PM +0100, Nicolas Pitre wrote:
>> On Sat, 7 Jun 2014, Lorenzo Pieralisi wrote:
>>
>> > On Sat, Jun 07, 2014 at 05:10:27PM +0100, Nicolas Pitre wrote:
>> > > On Sat, 7 Jun 2014, Abhilash Kesavan wrote:
>> > >
>> > > > Hi Nicolas,
>> > > >
>> > > > The first man of the incoming cluster enables its snoops via the
>> > > > power_up_setup function. During secondary boot-up, this does not occur
>> > > > for the boot cluster. Hence, I enable the snoops for the boot cluster
>> > > > as a one-time setup from the u-boot prompt. After secondary boot-up
>> > > > there is no modification that I do.
>> > >
>> > > OK that's good.
>> > >
>> > > > Where should this be ideally done ?
>> > >
>> > > If I remember correctly, the CCI can be safely activated only when the
>> > > cache is disabled.  So that means the CCI should ideally be turned on
>> > > for the boot cluster (and *only* for the boot CPU) by the bootloader.
>> >
>> > CCI ports are enabled per-cluster, so the boot loader must turn on
>> > CCI for all clusters before the respective CPUs have a chance to
>> > turn on their caches. It is a secure operation, this can be overriden
>> > and probably that's what has been done, wrongly.
>>
>> Careful.  By saying "for all clusters" you might be interpreted as
>> saying that the CCI must be turned on even for CPUs that are not powered
>> up.
>
> Right, CCI snoops and DVMs for a cluster must be enabled by the first man
> running in that cluster upon cluster power up with caches off, where the
> first man must be arbitrated if multiple CPUs are racing for enabling CCI.
> This is not an issue on cold boot, it is on resuming from CPUidle.

That's already handled by the MCPM backend on exynos:

/*
 * Enable cluster-level coherency, in preparation for turning on the MMU.
 */
static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
{
        asm volatile ("\n"
        "cmp    r0, #1\n"
        "bxne   lr\n"
        "b      cci_enable_port_for_self");
}

>> > True, TC2 on warm-boot reenables CCI, and that's because it runs the
>> > kernel in secure world, and again that's _wrong_.
>>
>> Let me respectfully disagree.
>
> You are welcome =)
>
>> > It must be done in firmware, and I am totally against any attempt at
>> > papering over what looks like a messed up firmware implementation with
>> > a bunch of hacks in the kernel, because that's what the patch below is
>> > (soft restarting a CPU to enable CCI ? and adding generic code for that ?
>> > what's next ?)
>>
>> Are you promoting for the removal of drivers/bus/arm-cci.c ?
>
> I really do not want to go there, but I might (apart from CCI PMUs).
>
>> You do realize that the fundamental raison d'?tre for MCPM is actually
>> to manage the race free enabling of the cache and CCI ?
>
> Yes I do and I was willing to help implement it for TC2 to provide people
> with an example on how to do it _properly_ (in secure world though, and that
> was a mistake IMHO). If what we get is a workaround for every platform
> going upstream, we are in a bind, seriously.

Real life is messy. Bugs happen. Having a stance that the kernel has
to be a puritan implementation that can be done in a vacuum where bugs
in hardware and firmware don't exist (and are fixable in the right
place every single case) is not realistic. Linux is a useless piece of
software to us if that is the case.

I've seen this from several other ARM developers in the past. It's
almost like they were a couple of degrees removed from actually
working on shipping real products and dealing with real users.

> Whatever the outcome of this thread, a booting protocol update for CCI
> is in order, even if we have to debate it for 6 months or more to get
> an agreement.

Ding ding ding. Current documentation is in some very complex C
frameworks (MCPM), and some device tree bindings that obviously don't
cover this. Real documentation is obviously needed, but more than that
(see below).

I'd actually argue that you don't have a leg to stand on in your
complaints at all because:

1) There's no documentation of the requirements
2) The only existing golden platform (TC2) manages CCI similar to how
exynos does.

>> > I understand there is an issue and lots at stake here, but I do not want the
>> > patch below to be merged in the kernel, I am sorry, it is a tad too much.
>>
>> Lorenzo: Don't get me wrong.  The Chromebooks, and possibly to some
>> extent some people at Samsung, were simply too confident in their
>> ability to create absolutely bug-free firmware code to the point of not
>> making its update easy in the field.  This is completely outrageous in
>> my point of view.

I would like to clarify that firmware is updateable today. But
Chromebooks are in many ways vertically integrated products, so if a
problem is found, there's always going to be a trade-off between the
places to fix it for our own product.

I have successfully argued for fixes in firmware (or EC) for some bugs
in the past. For others we have to go with the cheaper fix. And at the
end of the day, pushing out a new firmware is a massive undertaking
compared to a kernel workaround, and doing so only because upstream
maintainers aren't happy with the way we and Samsung solved something
in our firmware+kernel combo is an argument that is hard to win -- it
doesn't affect the product at all, only those wanting to run an
upstream kernel on it.

I'm a very strong proponent of enabling upstream support for our
platform (for several reasons -- most of these are actually business
reasons for us, but also because it's the right thing to do). Finding
the trade-off for what workarounds are still reasonable to do in the
kernel for that situation is obviously hard and we're disagreeing. But
the scope for these workarounds is not large.

Personally, I'm of the opinion that if we can add a few hooks to take
care of something, or keep something contained in a piece of platform
code, then it's not a given that we have to reject the workaround. If
we have to modify core code (or substantially impact core ARM code)
then it's time to consider fixing in firmware or wherever else.

In this case, the change we're looking at is enabling the CCI port for
the boot cpu. It's perfectly containable in exynos-only code, and we
can surround it by however many comments of never ever using it as an
example for how to do it as you'll want.

>> Yet one of the reactions was to call upstream kernel
>> people as purists because the kernel is so much easier to modify in
>> order to cover their mess and yet that might not be accepted.  Like I
>> said I won't stop shaming them publicly for their own "incompetence"
>> just yet (no pun intended), but being excessively purist does not
>> benefit anyone either, and for that they have a point.
>
> I do not think they do. The kernel should not become a place where firmware
> bugs are fixed, if you refuse to fix the bug and this code does not get
> upstream I am pretty sure next time more attention will be paid.

You do realize that you have absolutely zero leverage over us on this,
right? Our product is already shipped with kernel code that fixes
this. The only parties you hurt by doing this is community developers
that want to run an upstream kernel on the platform. Apparently
several of them want to work on sorting out energy aware scheduling on
a platform that is easily available -- mostly people at Linaro and
ARM. I think it makes sense to do what we have to do to enable that
without having people tear their machines open and voiding warranties.

> Booting, powering up and down cores are standard procedures, why can't we
> share the same code for all v7 platforms ? Why ?

That's something you should ask your colleagues. ARM has historically
refused to require standards for these things, and it causes the mess
we see now. Arbitrarily changing the requirements from your part of
kernel isn't going to change the marketplace.

> And we are not talking about a race condition hit every 10 gazillions
> cycles here.
>
>> *HOWEVER* I have no choice but to say that many people at ARM, including
>> a couple individuals for whom I nevertheless have a lot of admiration,
>> also have an incredible faith in their ability to convince themselves,
>> and then turn around to preach to the world, that *more firmware* is
>> going to be so much purer and solve so many more problems than it
>> creates and become such a magical success that we should immediately
>> dedicate our soul to the cause with no hint of a doubt.
>>
>> I'm sorry to rain on your parade, but I don't believe in this one iota.
>>
>> Let me repeat the MCPM story again: it took 3 people, including 2 from
>> ARM, over *six* months to get everything right and stable on TC2. I
>> think you also contributed to that effort as well. Subsequent MCPM
>> backend contributions (yes, just the backend and not the core code) took
>> at least *five* rounds of reviews in one case, and after *seven* rounds
>> in another case it is still not right, despite the publicly available
>> TC2 implementation to serve as a reference.
>>
>> I'm sure each time a new patch set was posted, their authors honestly
>> believed their code was correct.  Otherwise why would they post buggy
>> code?
>>
>> Now you are telling me that they should have put that code into firmware
>> instead?  Can you realize what a catastrophe this would have been? Are
>> you _seriously_ believing that they would be up to their 5th firmware
>> revision by now?  And that updating their firmware six months after
>> product launch would be as easy as updating the kernel?
>
> If firmware messes up the DMC controller configuration, would you fix
> it in the Linux kernel ? It is an unrealistic (well..) example but you should
> catch my drift.

It's actually perfectly realistic, and we did exactly this in the
first ARM Chromebook. We never upstreamed the code because it only
affected a smaller number of models (it's something we did update the
RO firmware for in production).

> Where do we draw the line, that's my point.

You draw the line by giving vendors a place to do the nasty stuff that
needs to be done in a place that doesn't impact others, and where
others don't have to look. Quirk tables, fixup functions, or function
pointers that can be replaced on a specific platform if needed. When
it affects core code, you sort it out in a different way if you have
to.

There'll always be some ugly code that needs to go somewhere. This
isn't rocket science. It's something we've done in the kernel since
pretty much forever. In some cases, we can even share the quirks if
several vendors have made the same mistakes.

Maybe it's just me, but I didn't use to see this disconnected puritan
world view from people until DT came along. I don't think it's DTs
fault, but I think the requirements of DT-as-ABI has tainted the
mindset of many developers in a way that they treat everything as
needing to be polished to a perfect shine in all aspects, all the
time.

>> Software ALWAYS has bugs, whether it is user apps, the kernel, firmware
>> or boot ROM. The bigger one of those parts is, the more bugs it will
>> have. And the cost to vendors for fixing those bugs grow exponentially
>> down each level. For proof, we're now considering possible workarounds
>> in the kernel to sidestep the difficulty with updating the firmware on a
>> Chromebook.
>>
>> Yet you're saying that firmware should grow code with the same
>> complexity as the MCPM core, plus a machine specific backend that
>> experience has proven multiple times is evidently hard to get right,
>> into firmware because running Linux in secure mode is wrong?  If so we
>> don't live in the same world indeed.
>>
>> The day I see a firmware architecture that allows for 1) the same level
>> of peer review as what we enjoy with the Linux kernel code and 2) the
>> same ability to perform updates in the field as the kernel, then maybe I
>> could be sold on the many advantages having generic firmware might have.
>> In the meantime I consider complex firmware as a very suboptimal
>> architecture with no bearing on the reality of actual short-cycled
>> products, and if they prevail we'd better be ready to pile more of those
>> ugly hacks in the kernel.

I couldn't agree more. MCPM and the b.L stuff is also a brand new
concept, that the reference code was kept secret for longer than it
should have been, and few people have been through a full product
cycle of it and learned lessons. We're obviously learning several
right now on our first one.

Expecting things to be perfect from day one is not realistic.

> Nicolas: it is not a matter of PSCI vs. MCPM, firmware vs. the kernel,
> that's a debate worth having, not now.
>
> There is a bug to solve, and you did. What I am telling you is that
> I am fed up to my back teeth of having code in the kernel solving
> issues that should not be solved in the kernel, if we keep doing that
> hacks will become the rule not the exception. Time to draw a line,
> firmware is broken, so is the platform. (BTW, if anyone can explain to
> me how CPUidle works in this platform, and that's the main reason of
> having MCPM in there, I am all ears).

Punting all this to software means that there will be bugs to fix in
software. Period. In the real world, fixing those bugs will not be
practical to do in the best possible place. Life sucks.

You do know that we're arguing over a one-time setup of the boot cpu
port on the CCI and coming in and out of system suspend/resume here,
right? Everything else should already be covered by the runtime
MCPM/CCI code from Nico (together with the exynos backend). So this is
just the first CCI setup on boot as well as after S3 suspend/resume.
One-time code paths, not critical path and definitely containable
within the platform code.

> Adding these hacks has serious maintainance consequences (eg CPUidle
> code) and that's the main reason I jumped into this discussion.

> Let me reiterate my point: it is not a kernel vs firmware debate, it
> is about clean and maintainable code vs hackish and unmaintainable
> code in the kernel.

No, it's about having code that runs in the real world, versus some
random framework that doesn't actually fill a useful purpose since
nobody can make use of it without a bunch of out-of-tree code.

> And of top of that, people must test their code, we are not talking
> about a rare race condition here, this is a blatant bug.

Where's the test suite that would have caught this?

Or hell, I'll be happy to see a document that declares the requirement
of firmware to enable the CCI port on the boot cluster. You admit
above that there is none.

Samsung chose to do that in the kernel instead of firmware, nobody
involved knew better at the time and we now have to deal with it. It's
unlikely to go uncaught on the next product iteration, but things like
this happen.

> I have lots to add concerning the firmware implementations of power
> down procedures, but let's take it offline.
>
> I am not happy with merging your patch, I am sorry, for the reasons I've
> just explained.

Wow, you're going to be really, really frustrated over how the world
will start to look with all the "standardized" closed firmware
platforms and their quirks and bug workarounds we'll have to add in
the kernel.


-Olof
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux June 8, 2014, 12:19 a.m. UTC | #5
On Sat, Jun 07, 2014 at 04:53:34PM -0700, Olof Johansson wrote:
> You do realize that you have absolutely zero leverage over us on this,
> right? Our product is already shipped with kernel code that fixes
> this.

That is never a justification for forcing /any/ code into the kernel.

We've been here before with the iPAQs, where there were all sorts of
horrid hacks that were in the code for that device, and we said no to
it, and we kept it out of the mainline kernel, and stopped those hacks
polluting elsewhere (because people got to know on the whole that if
they used those hacks, it would bar them from mainline participation.)

There's many other instances.  Think about it - if we allowed this as
an acceptance criteria, then all that vendors have to do to get their
code into the kernel is change their development cycle: develop
product, ship product, force code into mainline as a done deal not
accepting any review comments back.
Olof Johansson June 8, 2014, 2:52 a.m. UTC | #6
On Sat, Jun 7, 2014 at 5:19 PM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> On Sat, Jun 07, 2014 at 04:53:34PM -0700, Olof Johansson wrote:
>> You do realize that you have absolutely zero leverage over us on this,
>> right? Our product is already shipped with kernel code that fixes
>> this.
>
> That is never a justification for forcing /any/ code into the kernel.

I'm not looking to force code in, I'm just making it clear that we
have limited chance to motivate rework of this since the primary
objective of the platform has already been met: We've shipped a
product with it.

I also don't think it's really in anyone's best interest to have to
open up their device, remove write protect, download and build a
mainline u-boot and try flashing that onto the system to get it to a
state where they can use a mainline kernel. There's too much risk of
bricking your hardware if you get it wrong, and you void your
warranty.

> We've been here before with the iPAQs, where there were all sorts of
> horrid hacks that were in the code for that device, and we said no to
> it, and we kept it out of the mainline kernel, and stopped those hacks
> polluting elsewhere (because people got to know on the whole that if
> they used those hacks, it would bar them from mainline participation.)

Unfortunately, very few companies actually care about mainline
participation today to the point where I don't think they care much
any set examples. :(

> There's many other instances.  Think about it - if we allowed this as
> an acceptance criteria, then all that vendors have to do to get their
> code into the kernel is change their development cycle: develop
> product, ship product, force code into mainline as a done deal not
> accepting any review comments back.

That is of course not a suitable way of working either. Unfortunately
what is mostly happening today is that vendors are doing this: develop
product, ship product. The last step never happens. Finding a middle
ground where we can pick up some of the platform quirks without making
the kernel a big ball of hacks is of course the tricky part.

In this specific case, we're not ignoring review feedback, and the
comments we've gotten we will absolutely make sure are fixed in the
next generation of product (if/when we do another big.LITTLE platform,
etc). There's just no room to fix it for the current generation. In
hindsight, of course what should have happened is that we should have
pushed the vendor to upstream the code sooner, and we're working on
making that better in the future too.


Since we're talking about upstreaming of platform support, this is
something I'm quite passionate about so I'll rant a bit:

Looking at the general case more than just this specific instance of
Samsung Chromebooks, I think we should in general work hard (where
vendors are willing to cooperate) to make sure that we can fully
enable support for platforms to the point where they can just run
unmodified upstream. Too many of the products today aren't shipping
kernels anywhere near what's mainline: It's usually a kernel that is a
couple of years old with a few thousand patches on top. It means that
bugfixes for the platforms (and much other useful code) doesn't find
its way into the kernel, and we have a long (or nonexistent) cycle of
feedback due to it. Drivers are in some cases completely broken
because nobody actually uses the upstream versions, people post
building-but-broken code because they can't actually run and test the
patch on any existing hardware with mainline so they just forward-port
what they have in the product tree and hope for the best. Etc.

I would really like to reach a state where we have several substantial
and popular platforms that work well enough with mainline that people
can use some of the mainline-near distros to run on the machine.
Cubox-i is a great example, even though there are still some pieces
left there as well. The new Chromebooks have hardware specs that are
quite suitable to use as native development platforms (good CPUs, 4GB
ram, and micro-SD card to expand beyond the basic 16GB eMMC), so I
think it makes sense to try to enable them and pick up a minimal set
of the less than ideal code snippets for that. We'll end up with a
better supported and more solid kernel if we can get distros such as
Fedora and Debian to ship with these upstream kernels instead of the
vendor tree (which is 3.8 based in this case, and won't ever move
forward).


-Olof
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lorenzo Pieralisi June 8, 2014, 12:45 p.m. UTC | #7
On Sun, Jun 08, 2014 at 12:53:34AM +0100, Olof Johansson wrote:
> Lorenzo,
> 
> Since you're emailing from @arm.com, some of this is to the wider
> recipient and maybe not directly to you:

I am glad to reply and take blame since this is a debate definitely worth
having.

[...]

> > Right, CCI snoops and DVMs for a cluster must be enabled by the first man
> > running in that cluster upon cluster power up with caches off, where the
> > first man must be arbitrated if multiple CPUs are racing for enabling CCI.
> > This is not an issue on cold boot, it is on resuming from CPUidle.
> 
> That's already handled by the MCPM backend on exynos:
> 
> /*
>  * Enable cluster-level coherency, in preparation for turning on the MMU.
>  */
> static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
> {
>         asm volatile ("\n"
>         "cmp    r0, #1\n"
>         "bxne   lr\n"
>         "b      cci_enable_port_for_self");
> }
> 
> >> > True, TC2 on warm-boot reenables CCI, and that's because it runs the
> >> > kernel in secure world, and again that's _wrong_.
> >>
> >> Let me respectfully disagree.
> >
> > You are welcome =)
> >
> >> > It must be done in firmware, and I am totally against any attempt at
> >> > papering over what looks like a messed up firmware implementation with
> >> > a bunch of hacks in the kernel, because that's what the patch below is
> >> > (soft restarting a CPU to enable CCI ? and adding generic code for that ?
> >> > what's next ?)
> >>
> >> Are you promoting for the removal of drivers/bus/arm-cci.c ?
> >
> > I really do not want to go there, but I might (apart from CCI PMUs).
> >
> >> You do realize that the fundamental raison d'?tre for MCPM is actually
> >> to manage the race free enabling of the cache and CCI ?
> >
> > Yes I do and I was willing to help implement it for TC2 to provide people
> > with an example on how to do it _properly_ (in secure world though, and that
> > was a mistake IMHO). If what we get is a workaround for every platform
> > going upstream, we are in a bind, seriously.
> 
> Real life is messy. Bugs happen. Having a stance that the kernel has
> to be a puritan implementation that can be done in a vacuum where bugs
> in hardware and firmware don't exist (and are fixable in the right
> place every single case) is not realistic. Linux is a useless piece of
> software to us if that is the case.
> 
> I've seen this from several other ARM developers in the past. It's
> almost like they were a couple of degrees removed from actually
> working on shipping real products and dealing with real users.

Guys, do not get me wrong here. There are fixes that can be deemed
acceptable in an OS, there are fixes that can't. I just can't help thinking
that Nicolas' patch is a nasty hack (and I am far, really really far from
blaming him for that, because that's the only patch that can fix that
issue in the kernel), and he perfectly knows that.

> > Whatever the outcome of this thread, a booting protocol update for CCI
> > is in order, even if we have to debate it for 6 months or more to get
> > an agreement.
> 
> Ding ding ding. Current documentation is in some very complex C
> frameworks (MCPM), and some device tree bindings that obviously don't
> cover this. Real documentation is obviously needed, but more than that
> (see below).
> 
> I'd actually argue that you don't have a leg to stand on in your
> complaints at all because:
> 
> 1) There's no documentation of the requirements
> 2) The only existing golden platform (TC2) manages CCI similar to how
> exynos does.

1) Blame taken, nothing to say. That's why I mentioned that CCI enablement
   must be part of a boot protocol document to prevent this from happening
   again.

2) Apart from the boot cluster, but that's related to (1).

> >> > I understand there is an issue and lots at stake here, but I do not want the
> >> > patch below to be merged in the kernel, I am sorry, it is a tad too much.
> >>
> >> Lorenzo: Don't get me wrong.  The Chromebooks, and possibly to some
> >> extent some people at Samsung, were simply too confident in their
> >> ability to create absolutely bug-free firmware code to the point of not
> >> making its update easy in the field.  This is completely outrageous in
> >> my point of view.
> 
> I would like to clarify that firmware is updateable today. But
> Chromebooks are in many ways vertically integrated products, so if a
> problem is found, there's always going to be a trade-off between the
> places to fix it for our own product.
> 
> I have successfully argued for fixes in firmware (or EC) for some bugs
> in the past. For others we have to go with the cheaper fix. And at the
> end of the day, pushing out a new firmware is a massive undertaking
> compared to a kernel workaround, and doing so only because upstream
> maintainers aren't happy with the way we and Samsung solved something
> in our firmware+kernel combo is an argument that is hard to win -- it
> doesn't affect the product at all, only those wanting to run an
> upstream kernel on it.
> 
> I'm a very strong proponent of enabling upstream support for our
> platform (for several reasons -- most of these are actually business
> reasons for us, but also because it's the right thing to do). Finding
> the trade-off for what workarounds are still reasonable to do in the
> kernel for that situation is obviously hard and we're disagreeing. But
> the scope for these workarounds is not large.
> 
> Personally, I'm of the opinion that if we can add a few hooks to take
> care of something, or keep something contained in a piece of platform
> code, then it's not a given that we have to reject the workaround. If
> we have to modify core code (or substantially impact core ARM code)
> then it's time to consider fixing in firmware or wherever else.
> 
> In this case, the change we're looking at is enabling the CCI port for
> the boot cpu. It's perfectly containable in exynos-only code, and we
> can surround it by however many comments of never ever using it as an
> example for how to do it as you'll want.

I agree with what you are saying, but if for any reason someone will
copy that code to paper over yet another firmware quirk and think that's
the right thing to do, that would be grave IMHO.

So:

1) CCI requirements added to boot document
2) If Nico's patch get in we must stick a massive disclaimer summing up
   this thread in there
3) Next time the answer will be: NAK.

Do we agree ? I still have very strong feelings against (2) and I would
be very glad to avoid merging that patch.

> >> Yet one of the reactions was to call upstream kernel
> >> people as purists because the kernel is so much easier to modify in
> >> order to cover their mess and yet that might not be accepted.  Like I
> >> said I won't stop shaming them publicly for their own "incompetence"
> >> just yet (no pun intended), but being excessively purist does not
> >> benefit anyone either, and for that they have a point.
> >
> > I do not think they do. The kernel should not become a place where firmware
> > bugs are fixed, if you refuse to fix the bug and this code does not get
> > upstream I am pretty sure next time more attention will be paid.
> 
> You do realize that you have absolutely zero leverage over us on this,
> right? Our product is already shipped with kernel code that fixes
> this. The only parties you hurt by doing this is community developers
> that want to run an upstream kernel on the platform. Apparently
> several of them want to work on sorting out energy aware scheduling on
> a platform that is easily available -- mostly people at Linaro and
> ARM. I think it makes sense to do what we have to do to enable that
> without having people tear their machines open and voiding warranties.

I have zero leverage, but still the right to say what I think.
The patch fixing CCI enablement for the boot CPU is a nasty hack
and that's certainly not Nicolas' fault.

I understand your point, and I do not want to stop people from using
this platform with upstream code, actually I am the first who is happy
to see power management code getting in the mainline, but not at all costs,
because this has consequences for US.

> > Booting, powering up and down cores are standard procedures, why can't we
> > share the same code for all v7 platforms ? Why ?
> 
> That's something you should ask your colleagues. ARM has historically
> refused to require standards for these things, and it causes the mess
> we see now. Arbitrarily changing the requirements from your part of
> kernel isn't going to change the marketplace.

ARM are pushing for open trusted firmware, ARM TRMs are available to
partners with those sequences described, and I have always been willing
to support developers.

We should do more, but that does not justify these bugs, really.

> > And we are not talking about a race condition hit every 10 gazillions
> > cycles here.
> >
> >> *HOWEVER* I have no choice but to say that many people at ARM, including
> >> a couple individuals for whom I nevertheless have a lot of admiration,
> >> also have an incredible faith in their ability to convince themselves,
> >> and then turn around to preach to the world, that *more firmware* is
> >> going to be so much purer and solve so many more problems than it
> >> creates and become such a magical success that we should immediately
> >> dedicate our soul to the cause with no hint of a doubt.
> >>
> >> I'm sorry to rain on your parade, but I don't believe in this one iota.
> >>
> >> Let me repeat the MCPM story again: it took 3 people, including 2 from
> >> ARM, over *six* months to get everything right and stable on TC2. I
> >> think you also contributed to that effort as well. Subsequent MCPM
> >> backend contributions (yes, just the backend and not the core code) took
> >> at least *five* rounds of reviews in one case, and after *seven* rounds
> >> in another case it is still not right, despite the publicly available
> >> TC2 implementation to serve as a reference.
> >>
> >> I'm sure each time a new patch set was posted, their authors honestly
> >> believed their code was correct.  Otherwise why would they post buggy
> >> code?
> >>
> >> Now you are telling me that they should have put that code into firmware
> >> instead?  Can you realize what a catastrophe this would have been? Are
> >> you _seriously_ believing that they would be up to their 5th firmware
> >> revision by now?  And that updating their firmware six months after
> >> product launch would be as easy as updating the kernel?
> >
> > If firmware messes up the DMC controller configuration, would you fix
> > it in the Linux kernel ? It is an unrealistic (well..) example but you should
> > catch my drift.
> 
> It's actually perfectly realistic, and we did exactly this in the
> first ARM Chromebook. We never upstreamed the code because it only
> affected a smaller number of models (it's something we did update the
> RO firmware for in production).
> 
> > Where do we draw the line, that's my point.
> 
> You draw the line by giving vendors a place to do the nasty stuff that
> needs to be done in a place that doesn't impact others, and where
> others don't have to look. Quirk tables, fixup functions, or function
> pointers that can be replaced on a specific platform if needed. When
> it affects core code, you sort it out in a different way if you have
> to.
> 
> There'll always be some ugly code that needs to go somewhere. This
> isn't rocket science. It's something we've done in the kernel since
> pretty much forever. In some cases, we can even share the quirks if
> several vendors have made the same mistakes.
> 
> Maybe it's just me, but I didn't use to see this disconnected puritan
> world view from people until DT came along. I don't think it's DTs
> fault, but I think the requirements of DT-as-ABI has tainted the
> mindset of many developers in a way that they treat everything as
> needing to be polished to a perfect shine in all aspects, all the
> time.

Olof, it is not puritanism, it is all about upstreaming code. If we
keep accepting these hacks and we end up with mach code full of them
we have a problem, do you agree ?

You, Russell and Nico know what to do, I wanted to get my opinion
across and I think you got my point.

> >> Software ALWAYS has bugs, whether it is user apps, the kernel, firmware
> >> or boot ROM. The bigger one of those parts is, the more bugs it will
> >> have. And the cost to vendors for fixing those bugs grow exponentially
> >> down each level. For proof, we're now considering possible workarounds
> >> in the kernel to sidestep the difficulty with updating the firmware on a
> >> Chromebook.
> >>
> >> Yet you're saying that firmware should grow code with the same
> >> complexity as the MCPM core, plus a machine specific backend that
> >> experience has proven multiple times is evidently hard to get right,
> >> into firmware because running Linux in secure mode is wrong?  If so we
> >> don't live in the same world indeed.
> >>
> >> The day I see a firmware architecture that allows for 1) the same level
> >> of peer review as what we enjoy with the Linux kernel code and 2) the
> >> same ability to perform updates in the field as the kernel, then maybe I
> >> could be sold on the many advantages having generic firmware might have.
> >> In the meantime I consider complex firmware as a very suboptimal
> >> architecture with no bearing on the reality of actual short-cycled
> >> products, and if they prevail we'd better be ready to pile more of those
> >> ugly hacks in the kernel.
> 
> I couldn't agree more. MCPM and the b.L stuff is also a brand new
> concept, that the reference code was kept secret for longer than it
> should have been, and few people have been through a full product
> cycle of it and learned lessons. We're obviously learning several
> right now on our first one.
> 
> Expecting things to be perfect from day one is not realistic.

I do not buy this I am sorry. Fair enough, CCI is a new concept, but
SMP power management has been implemented in older platforms with
the same requirements, nothing new and still people are getting this
wrong.

> > Nicolas: it is not a matter of PSCI vs. MCPM, firmware vs. the kernel,
> > that's a debate worth having, not now.
> >
> > There is a bug to solve, and you did. What I am telling you is that
> > I am fed up to my back teeth of having code in the kernel solving
> > issues that should not be solved in the kernel, if we keep doing that
> > hacks will become the rule not the exception. Time to draw a line,
> > firmware is broken, so is the platform. (BTW, if anyone can explain to
> > me how CPUidle works in this platform, and that's the main reason of
> > having MCPM in there, I am all ears).
> 
> Punting all this to software means that there will be bugs to fix in
> software. Period. In the real world, fixing those bugs will not be
> practical to do in the best possible place. Life sucks.
> 
> You do know that we're arguing over a one-time setup of the boot cpu
> port on the CCI and coming in and out of system suspend/resume here,
> right? Everything else should already be covered by the runtime
> MCPM/CCI code from Nico (together with the exynos backend). So this is
> just the first CCI setup on boot as well as after S3 suspend/resume.
> One-time code paths, not critical path and definitely containable
> within the platform code.

See above. And let's hope MCPM is useful at all on this platform, which
means CPUidle can be enabled and working.

> > Adding these hacks has serious maintainance consequences (eg CPUidle
> > code) and that's the main reason I jumped into this discussion.
> 
> > Let me reiterate my point: it is not a kernel vs firmware debate, it
> > is about clean and maintainable code vs hackish and unmaintainable
> > code in the kernel.
> 
> No, it's about having code that runs in the real world, versus some
> random framework that doesn't actually fill a useful purpose since
> nobody can make use of it without a bunch of out-of-tree code.

PSCI is not a random framework, it is a standard and it runs in real
world platforms and would hide all these HW quirks where they belong.

It won't be perfect, but hey, neither is this code.

> > And of top of that, people must test their code, we are not talking
> > about a rare race condition here, this is a blatant bug.
> 
> Where's the test suite that would have caught this?
> 
> Or hell, I'll be happy to see a document that declares the requirement
> of firmware to enable the CCI port on the boot cluster. You admit
> above that there is none.

See above, blame taken but FW/HW engineers should know that a multicluster
coherent system should have coherent caches to be properly booted.

> Samsung chose to do that in the kernel instead of firmware, nobody
> involved knew better at the time and we now have to deal with it. It's
> unlikely to go uncaught on the next product iteration, but things like
> this happen.

Fair enough, provided it is the last time this happens.

> > I have lots to add concerning the firmware implementations of power
> > down procedures, but let's take it offline.
> >
> > I am not happy with merging your patch, I am sorry, for the reasons I've
> > just explained.
> 
> Wow, you're going to be really, really frustrated over how the world
> will start to look with all the "standardized" closed firmware
> platforms and their quirks and bug workarounds we'll have to add in
> the kernel.

Yes, and I will shout even louder when that will happen =)

Thanks !
Lorenzo

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux June 8, 2014, 2:34 p.m. UTC | #8
On Sun, Jun 08, 2014 at 01:45:30PM +0100, Lorenzo Pieralisi wrote:
> Olof, it is not puritanism, it is all about upstreaming code. If we
> keep accepting these hacks and we end up with mach code full of them
> we have a problem, do you agree ?

To see the kind of problem that accepting hacked up code can cause, you
only have to look at Olof's build logs to see the warnings from the L2x0
cache code, which I've been totally unable to complete the cleanup of
/because/ we've historically accepted hacks into it.

I think that the legacy code is just going to have to stay (and I don't
want the warnings papered over, because I /want/ that crap to stick out
like a sore thumb), until someone can get sufficient motivation to work
out how to finally unuse the old functions.

Had I been on top of the L2 cache code earlier, and prevented these hacks
from going in (insisting that it was done properly) we would not be in
this position today where no one seems to know to fix this stuff.

This is the whole point - and nicely illustrates how easy it is for code
to become unmaintainable by accepting hacks to it.  A bit of push-back at
code acceptance time helps to save us from these problems in the future.

So, what I would want to see is not only what Lorenzo is saying (the
disclaimer in the comments) but also a technical description it, so if
the code needs to be modified in the future, we don't end up in this
kind of situation wondering what the code is doing/why the code exists.
Nicolas Pitre June 8, 2014, 5:53 p.m. UTC | #9
On Sun, 8 Jun 2014, Lorenzo Pieralisi wrote:

> On Sun, Jun 08, 2014 at 12:53:34AM +0100, Olof Johansson wrote:
> > Lorenzo,
> > 
> > Since you're emailing from @arm.com, some of this is to the wider
> > recipient and maybe not directly to you:
> 
> I am glad to reply and take blame since this is a debate definitely worth
> having.

Great.  Because I would like to steer this debate a little towards the 
genuine cause rather than sticking to some particular consequences.

> Guys, do not get me wrong here. There are fixes that can be deemed
> acceptable in an OS, there are fixes that can't. I just can't help thinking
> that Nicolas' patch is a nasty hack (and I am far, really really far from
> blaming him for that, because that's the only patch that can fix that
> issue in the kernel), and he perfectly knows that.

You know what?  The more I think about my patch, the more I consider 
this should be the standard way of setting up things unconditionally on 
_all_ platforms using MCPM.  Why? Because that's the most coherent thing 
to do!

I really think the kernel should either be responsible for the CCI or it 
should not at all.  And conversely for the bootloader.  Right now we 
have an implicit requirement that the bootloader should turn on the CCI, 
but only for cold boot, and only for the boot cluster, and not for CPU 
resuming from idle, and what other case we haven't thought about yet.  
And as noticed this requirement is not documented.

> > > Whatever the outcome of this thread, a booting protocol update for CCI
> > > is in order, even if we have to debate it for 6 months or more to get
> > > an agreement.
> > 

And in the end I don't think the CCI should have to be documented as a 
boot requirement.  Instead of having firmware implementers understand 
when they should care about the CCI and when they shouldn't, I'd much 
prefer if they hadn't to care at all. I really prefer when 
responsibility for something is well encapsulated in one place and not 
shared across layers like the firmware or the kernel depending on some 
context. The complexity of a system (and therefore the probability for 
bugs) grows with the square of the number of interrelations between 
constituent parts. So we should always strive to make the boot protocol 
_simpler_ not more complex.

And if complete responsibility for the CCI in the kernel had been 
assumed from the beginning, we wouldn't be struggling in this power play 
to determine which side should give in.  Especially since the kernel has 
all the necessary infrastructure to do it all already, and I must say in 
a rather elegant manner.

> > I'm a very strong proponent of enabling upstream support for our
> > platform (for several reasons -- most of these are actually business
> > reasons for us, but also because it's the right thing to do). Finding
> > the trade-off for what workarounds are still reasonable to do in the
> > kernel for that situation is obviously hard and we're disagreeing. But
> > the scope for these workarounds is not large.

Will people ever realize that, if the kernel was more in control of the 
hardware (isn't that the role of an OS kernel to serve as the hardware 
abstraction layer?) then we wouldn't be talking about "workarounds" but 
rather "standard fixes"?

> > In this case, the change we're looking at is enabling the CCI port for
> > the boot cpu. It's perfectly containable in exynos-only code, and we
> > can surround it by however many comments of never ever using it as an
> > example for how to do it as you'll want.

In this case, to state my opinion clearly, it is the general design that 
was flawed and the kernel should be fixed to enable the CCI for the boot 
CPU itself _when_ it knows it is going to need it.  To start with, the 
bootloader has no need what so ever for using more than one CPU, unless 
it wants to become an operating system, so it shouldn't have to care at 
all.  The kernel, if booted without the CCI information in the DTB, will 
run with only one CPU and won't rely on the CCI.  Logically the CCI 
could be left turned off in that case, possibly increasing bus 
performance and saving some power.

> I agree with what you are saying, but if for any reason someone will
> copy that code to paper over yet another firmware quirk and think that's
> the right thing to do, that would be grave IMHO.

Someone shouldn't have to copy that code because I'm getting more and 
more convinced it should be made generic and unconditional, and by doing 
so removing any possibility for firmware to get that part wrong again.  
According to my quick experiment on TC2, this took only 271 microseconds 
to perform so this is not like if that would make a significant 
difference in boot time.

> > > I do not think they do. The kernel should not become a place where firmware
> > > bugs are fixed, if you refuse to fix the bug and this code does not get
> > > upstream I am pretty sure next time more attention will be paid.

Again, this is coming about because firmware is a MAGNITUDE harder to 
fix and IMPOSSIBLE to be bug free, just like any other software. So if I 
may get back to the genuine cause for this debate: this came about 
because of TOO MUCH firmware code and encouraging people to create more 
of it is *BAD*.

Sure, in the server world you are likely to want firmware and standards 
because that helps bringing maintenance costs down.  But server 
equipment has much longer life cycles than mobile devices and somewhat 
less aggressive and complex power management to perform. Firmware for 
servers may take *time* to be developed, tested, certified, etc.  To 
illustrate this, we've been working on UEFI and ACPI for a period tat 
can be measured in years at this point.  So, hopefully by the time 
server oriented firmware is available, it would be well tested and 
relied upon for a long time.

none of the above applies to consumer products with fast development and 
short life cycles.

> I understand your point, and I do not want to stop people from using
> this platform with upstream code, actually I am the first who is happy
> to see power management code getting in the mainline, but not at all costs,
> because this has consequences for US.

And those consequences are?

> ARM are pushing for open trusted firmware, ARM TRMs are available to
> partners with those sequences described, and I have always been willing
> to support developers.

Ahhhh...  Here we are. "ARM are pushing for open trusted firmware ..."

> We should do more, but that does not justify these bugs, really.

Bugs are never justifiable, but they happen _all_ the time.

Firmware is a MAGNITUDE harder to fix, and IMPOSSIBLE to be bug free
just like any other software.

> > > Where do we draw the line, that's my point.
> > 
> > You draw the line by giving vendors a place to do the nasty stuff that
> > needs to be done in a place that doesn't impact others, and where
> > others don't have to look. Quirk tables, fixup functions, or function
> > pointers that can be replaced on a specific platform if needed. When
> > it affects core code, you sort it out in a different way if you have
> > to.

Again this is missing the point.  No line would need to be drawn if the 
core code was responsible in the first place.  DMC parameters are 
conceptually so trivial that no one should normally mess that up, and 
the firmware must do it just so that memory is usable.  So there is no 
choice but to do that in firmware.  It is a completely different story 
with complex operations which should never ever be relegated to 
firmware.

> > Maybe it's just me, but I didn't use to see this disconnected puritan
> > world view from people until DT came along. I don't think it's DTs
> > fault, but I think the requirements of DT-as-ABI has tainted the
> > mindset of many developers in a way that they treat everything as
> > needing to be polished to a perfect shine in all aspects, all the
> > time.
> 
> Olof, it is not puritanism, it is all about upstreaming code. If we
> keep accepting these hacks and we end up with mach code full of them
> we have a problem, do you agree ?

Absolutely!

So once again, let's take a step back, open our eyes and look at the 
fundamental reason why hacks are there, and how they could fundamentally 
be avoided.  And no, hoping for fewer bugs in firmware is not realistic 
if people are encouraged to create more of it.

> > Expecting things to be perfect from day one is not realistic.
> 
> I do not buy this I am sorry. Fair enough, CCI is a new concept, but
> SMP power management has been implemented in older platforms with
> the same requirements, nothing new and still people are getting this
> wrong.

Lorenzo: what you say is not exact.  People screwed SMP power management 
in the past for sure.  And they still will because requirement are 
changing all the time they're not the same.  Maybe requirements are 
somewhat stable in the server space, but in the mobile space they're 
not. So this must be implemented where it is cheapest to fix.

> > > Nicolas: it is not a matter of PSCI vs. MCPM, firmware vs. the kernel,
> > > that's a debate worth having, not now.

Why not?  I'm saying that too much firmware is a fundamental design 
mistake for consumer products.  All the rest falls off from that.  Why 
not addressing the source of the problem rather than constantly 
suffering and debating its consequences?

Again I want to clearly state that I have nothing against PSCI the 
interface spec despite the appearances.  I've reviewed its draft and 
provided comments, etc.  My point is, when taking a step back, we may 
only conclude that more firmware does not create a better system overall 
because of real life costs and constraints associated to it. So PSCI is 
not the problem, the problem is at another conceptual level.

> > > Adding these hacks has serious maintainance consequences (eg CPUidle
> > > code) and that's the main reason I jumped into this discussion.

Sorry, I don't see the connection.

> > > Let me reiterate my point: it is not a kernel vs firmware debate,

But of *course* it is, unless you're too invested in your firmware 
strategy to be able to see all the downsides.

> > > it is about clean and maintainable code vs hackish and 
> > > unmaintainable code in the kernel.

No argument there.  Unfortunately, hackish code comes about because of 
broken firmware in most cases.  Kernel code can be cleaned at any moment 
but in practice firmware code cannot.

> > No, it's about having code that runs in the real world, versus some
> > random framework that doesn't actually fill a useful purpose since
> > nobody can make use of it without a bunch of out-of-tree code.
> 
> PSCI is not a random framework, it is a standard and it runs in real
> world platforms and would hide all these HW quirks where they belong.

Which real world platforms?  I'm curious.

> > Wow, you're going to be really, really frustrated over how the world
> > will start to look with all the "standardized" closed firmware
> > platforms and their quirks and bug workarounds we'll have to add in
> > the kernel.
> 
> Yes, and I will shout even louder when that will happen =)

That _will_ happen. Such is life. And you'll have only yourself to blame 
because you pushed for bigger firmware to be created in the first place.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Pitre June 8, 2014, 6:26 p.m. UTC | #10
On Sun, 8 Jun 2014, Russell King - ARM Linux wrote:

> On Sat, Jun 07, 2014 at 04:53:34PM -0700, Olof Johansson wrote:
> > You do realize that you have absolutely zero leverage over us on this,
> > right? Our product is already shipped with kernel code that fixes
> > this.
> 
> That is never a justification for forcing /any/ code into the kernel.
> 
> We've been here before with the iPAQs, where there were all sorts of
> horrid hacks that were in the code for that device, and we said no to
> it, and we kept it out of the mainline kernel, and stopped those hacks
> polluting elsewhere (because people got to know on the whole that if
> they used those hacks, it would bar them from mainline participation.)

That's different.  The iPaq had very little in terms of firmware, and 
the one it had was field upgradable with almost no risk to brick it 
(unless you wanted to hack the firmware code but that's another story).  
The reason iPaq had never been well supported in mainline can be 
attributed to laziness for not wanting to make the code into a shape 
acceptable for mainline inclusion.  But nothing fundamentally prevented 
that from happening if someone had wanted to do it.

Here we're talking about firmware-induced hacks for which, had there 
been no firmware, the kernel would be in full position to fix properly 
because that would have been a kernel bug after all.

Hence my crusade against this "you should abstract things in firmware" 
mantra. Especially for mobile devices.

But, because firmware already exists out there and it is between 
prohibitive and impossible to fix, we have no choice but to compensate 
in the kernel some times.  In this very case my approach is to render 
the firmware irrelevant globally rather than trying to work around it 
for one particular device.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux June 8, 2014, 6:29 p.m. UTC | #11
On Sun, Jun 08, 2014 at 02:26:43PM -0400, Nicolas Pitre wrote:
> On Sun, 8 Jun 2014, Russell King - ARM Linux wrote:
> 
> > On Sat, Jun 07, 2014 at 04:53:34PM -0700, Olof Johansson wrote:
> > > You do realize that you have absolutely zero leverage over us on this,
> > > right? Our product is already shipped with kernel code that fixes
> > > this.
> > 
> > That is never a justification for forcing /any/ code into the kernel.
> > 
> > We've been here before with the iPAQs, where there were all sorts of
> > horrid hacks that were in the code for that device, and we said no to
> > it, and we kept it out of the mainline kernel, and stopped those hacks
> > polluting elsewhere (because people got to know on the whole that if
> > they used those hacks, it would bar them from mainline participation.)
> 
> That's different.  The iPaq had very little in terms of firmware, and 
> the one it had was field upgradable with almost no risk to brick it 
> (unless you wanted to hack the firmware code but that's another story).  
> The reason iPaq had never been well supported in mainline can be 
> attributed to laziness for not wanting to make the code into a shape 
> acceptable for mainline inclusion.  But nothing fundamentally prevented 
> that from happening if someone had wanted to do it.

No, I was specifically thinking about the various iPAQ specific things
like the additional platform specific ATAGs that they invented with
zero reference to mainline, and then expected them to be accepted as-is.

I gave them a very clear message over that (which was "no way") and that
was essentially the last of their mainline submissions.
Nicolas Pitre June 8, 2014, 6:55 p.m. UTC | #12
On Sun, 8 Jun 2014, Russell King - ARM Linux wrote:

> On Sun, Jun 08, 2014 at 02:26:43PM -0400, Nicolas Pitre wrote:
> > On Sun, 8 Jun 2014, Russell King - ARM Linux wrote:
> > 
> > > On Sat, Jun 07, 2014 at 04:53:34PM -0700, Olof Johansson wrote:
> > > > You do realize that you have absolutely zero leverage over us on this,
> > > > right? Our product is already shipped with kernel code that fixes
> > > > this.
> > > 
> > > That is never a justification for forcing /any/ code into the kernel.
> > > 
> > > We've been here before with the iPAQs, where there were all sorts of
> > > horrid hacks that were in the code for that device, and we said no to
> > > it, and we kept it out of the mainline kernel, and stopped those hacks
> > > polluting elsewhere (because people got to know on the whole that if
> > > they used those hacks, it would bar them from mainline participation.)
> > 
> > That's different.  The iPaq had very little in terms of firmware, and 
> > the one it had was field upgradable with almost no risk to brick it 
> > (unless you wanted to hack the firmware code but that's another story).  
> > The reason iPaq had never been well supported in mainline can be 
> > attributed to laziness for not wanting to make the code into a shape 
> > acceptable for mainline inclusion.  But nothing fundamentally prevented 
> > that from happening if someone had wanted to do it.
> 
> No, I was specifically thinking about the various iPAQ specific things
> like the additional platform specific ATAGs that they invented with
> zero reference to mainline, and then expected them to be accepted as-is.
> 
> I gave them a very clear message over that (which was "no way") and that
> was essentially the last of their mainline submissions.

That's basically what I'm saying.  They were too lazy to fix their code.  
But nothing prevented that otherwise. They certainly couldn't use the 
"firmware is broken and too hard to update" excuse.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Russell King - ARM Linux June 8, 2014, 7:02 p.m. UTC | #13
On Sun, Jun 08, 2014 at 02:55:03PM -0400, Nicolas Pitre wrote:
> On Sun, 8 Jun 2014, Russell King - ARM Linux wrote:
> > No, I was specifically thinking about the various iPAQ specific things
> > like the additional platform specific ATAGs that they invented with
> > zero reference to mainline, and then expected them to be accepted as-is.
> > 
> > I gave them a very clear message over that (which was "no way") and that
> > was essentially the last of their mainline submissions.
> 
> That's basically what I'm saying.  They were too lazy to fix their code.  
> But nothing prevented that otherwise. They certainly couldn't use the 
> "firmware is broken and too hard to update" excuse.

No, they just continued to use those ATAGs that they invented.
Kevin Hilman June 9, 2014, 8:27 p.m. UTC | #14
Nicolas Pitre <nicolas.pitre@linaro.org> writes:

> On Sat, 7 Jun 2014, Abhilash Kesavan wrote:
>
>> Hi Nicolas,
>> 
>> The first man of the incoming cluster enables its snoops via the
>> power_up_setup function. During secondary boot-up, this does not occur
>> for the boot cluster. Hence, I enable the snoops for the boot cluster
>> as a one-time setup from the u-boot prompt. After secondary boot-up
>> there is no modification that I do.
>
> OK that's good.
>
>> Where should this be ideally done ?
>
> If I remember correctly, the CCI can be safely activated only when the 
> cache is disabled.  So that means the CCI should ideally be turned on 
> for the boot cluster (and *only* for the boot CPU) by the bootloader.
>
> Now... If you _really_ prefer to do it from the kernel to avoid 
> difficulties with bootloader updates, then it should be possible to do 
> it from the kernel by temporarily turning the cache off.  This is not a 
> small thing but the MCPM infrastructure can be leveraged.  Here's what I 
> tried on a TC2 which might just work for you as well:

FWIW, I dropped the u-boot hack I was using to enable CCI and tested
this patch (with a cut/paste of the TC2 specific stuff into
mach-exynos/mcpm-exynos.c) along with Doug's patch[1] and 
and confirm that all 8 cores boot up on the Chromebook2 using linux-next.

Kevin

[1] http://lists.infradead.org/pipermail/linux-arm-kernel/2014-June/262440.html

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Kevin Hilman June 9, 2014, 8:47 p.m. UTC | #15
Nicolas Pitre <nicolas.pitre@linaro.org> writes:

> On Sun, 8 Jun 2014, Lorenzo Pieralisi wrote:
>
>> On Sun, Jun 08, 2014 at 12:53:34AM +0100, Olof Johansson wrote:
>> > Lorenzo,
>> > 
>> > Since you're emailing from @arm.com, some of this is to the wider
>> > recipient and maybe not directly to you:
>> 
>> I am glad to reply and take blame since this is a debate definitely worth
>> having.
>
> Great.  Because I would like to steer this debate a little towards the 
> genuine cause rather than sticking to some particular consequences.
>
>> Guys, do not get me wrong here. There are fixes that can be deemed
>> acceptable in an OS, there are fixes that can't. I just can't help thinking
>> that Nicolas' patch is a nasty hack (and I am far, really really far from
>> blaming him for that, because that's the only patch that can fix that
>> issue in the kernel), and he perfectly knows that.
>
> You know what?  The more I think about my patch, the more I consider 
> this should be the standard way of setting up things unconditionally on 
> _all_ platforms using MCPM.  Why? Because that's the most coherent thing 
> to do!

I agree.

> I really think the kernel should either be responsible for the CCI or it 
> should not at all.  And conversely for the bootloader.  Right now we 
> have an implicit requirement that the bootloader should turn on the CCI, 
> but only for cold boot, and only for the boot cluster, and not for CPU 
> resuming from idle, and what other case we haven't thought about yet.  
> And as noticed this requirement is not documented.

In addition to being a firmware minimalist like Nico, what I find most
objectional to the bootloader approach is that even with CCI enabled by
the firmware, since it's a runtime requirement (for low-power idle or
suspend), the kernel has to handle it anyways.  So you end up with a
partial solution in the firwmare (for boot cluster only) *and* a full
solution in the kernel.  This doesn't make any sense, expecially because
the kernel might then have to do things differently on cold boot
vs. low-power idle/suspend or differently on the boot cluster vs. other
clusters.  From a maintenance PoV, this is a mess and could easily lead
to just as many SoC specific hacks that are different across platforms.

Stated more simply: If the kernel has to manage the resource at runtime
due to low-power idle/suspend.  I don't see any reason why it shouldn't
manage it at cold boot time also.

Kevin
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lorenzo Pieralisi June 9, 2014, 10:26 p.m. UTC | #16
On Mon, Jun 09, 2014 at 09:47:42PM +0100, Kevin Hilman wrote:
> Nicolas Pitre <nicolas.pitre@linaro.org> writes:
> 
> > On Sun, 8 Jun 2014, Lorenzo Pieralisi wrote:
> >
> >> On Sun, Jun 08, 2014 at 12:53:34AM +0100, Olof Johansson wrote:
> >> > Lorenzo,
> >> > 
> >> > Since you're emailing from @arm.com, some of this is to the wider
> >> > recipient and maybe not directly to you:
> >> 
> >> I am glad to reply and take blame since this is a debate definitely worth
> >> having.
> >
> > Great.  Because I would like to steer this debate a little towards the 
> > genuine cause rather than sticking to some particular consequences.

I commented on Nico's patch because I did not like how it was
implemented (at least remove the CPU PM notifier calls please, because
they are not needed). I also said that's the only thing he could do,
and I still think that's not a nice way to use the cpu_suspend API
for something it was not designed for, that's what I wanted to say,
period.

> >> Guys, do not get me wrong here. There are fixes that can be deemed
> >> acceptable in an OS, there are fixes that can't. I just can't help thinking
> >> that Nicolas' patch is a nasty hack (and I am far, really really far from
> >> blaming him for that, because that's the only patch that can fix that
> >> issue in the kernel), and he perfectly knows that.
> >
> > You know what?  The more I think about my patch, the more I consider 
> > this should be the standard way of setting up things unconditionally on 
> > _all_ platforms using MCPM.  Why? Because that's the most coherent thing 
> > to do!
> 
> I agree.
> 
> > I really think the kernel should either be responsible for the CCI or it 
> > should not at all.  And conversely for the bootloader.  Right now we 
> > have an implicit requirement that the bootloader should turn on the CCI, 
> > but only for cold boot, and only for the boot cluster, and not for CPU 
> > resuming from idle, and what other case we haven't thought about yet.  
> > And as noticed this requirement is not documented.
> 
> In addition to being a firmware minimalist like Nico, what I find most
> objectional to the bootloader approach is that even with CCI enabled by
> the firmware, since it's a runtime requirement (for low-power idle or
> suspend), the kernel has to handle it anyways.  So you end up with a
> partial solution in the firwmare (for boot cluster only) *and* a full
> solution in the kernel.  This doesn't make any sense, expecially because
> the kernel might then have to do things differently on cold boot
> vs. low-power idle/suspend or differently on the boot cluster vs. other
> clusters.  From a maintenance PoV, this is a mess and could easily lead
> to just as many SoC specific hacks that are different across platforms.
> 
> Stated more simply: If the kernel has to manage the resource at runtime
> due to low-power idle/suspend.  I don't see any reason why it shouldn't
> manage it at cold boot time also.

If I am allowed to say something, here is a couple of thoughts.

1) CCI snoops and DVM enablement are secure operations, to do them in
   non-secure world this must be overriden in firmware. You can argue,
   you can think whatever you want, that's a fact. So, to use this
   code SMP bit in SCTLR and CCI enablement must become non-secure
   operations. This is a boot requirement for MCPM, right or wrong
   it is up to platform designers to judge. If CCI and SMP enablement
   are secure operations, we should not start adding random SMC calls
   in the kernel, since managing coherency in the kernel would become
   problematic, with lots of platform quirks. We do not want that to
   happen, and I think we all agree on this.
2) (1) must be documented.
3) When I talked about consequences for CPUidle (implicit), I was referring
   to all sort of hacks we had to come up to bring devices like SPC
   (remember ? I remember very very well unfortunately for me), or whatever
   power controller up in the kernel early, too early to fit in any
   existing kernel device framework. There is still no solution to that, and
   the only way that code can exist is in mach- code. Right or wrong,
   that's a second fact and in my opinion that's not nice for the ARM
   kernel.
4) When I am talking about firmware I am talking about sequences that
   are very close to HW (disabling C bit, cleaning caches, exiting
   coherency). Erratas notwithstanding, they are being standardized at
   ARM the best we can. They might even end up being implemented in HW
   in the not so far future. I understand they are tricky, I understand
   they take lots of time to implement them and to debug them, what I
   want to say is that they are becoming standard and we _must_ reuse the
   same code for all ARM platforms. You can implement them in MCPM (see
   (1)) or in firmware (and please do not start painting me as firmware
   hugger here, I am referring to standard power down sequences that
   again, are very close to HW state machines and more importantly if they
   HAVE to run in secure world that's the only solution we have unless you
   want to split race conditions between kernel and secure world).
5) I agree that the CCI enablement in TC2 (bootmon for cold boot and
   kernel for warm-boot is wrong, nothing to say and it was not the
   reason I commented on Nico's patch - I think I explained to you
   thoroughly why now).

That's what I had to say, I hope it helps.

Thanks,
Lorenzo

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nicolas Pitre June 10, 2014, 4:25 a.m. UTC | #17
On Mon, 9 Jun 2014, Lorenzo Pieralisi wrote:

> I commented on Nico's patch because I did not like how it was
> implemented (at least remove the CPU PM notifier calls please, because
> they are not needed).

OK no problem.  That's easy enough.  I added them to play it safe as a 
test patch in case some VFP content could be lost somehow by looping 
back through the CPU init code for example, and needed to be saved.

> I also said that's the only thing he could do, and I still think 
> that's not a nice way to use the cpu_suspend API for something it was 
> not designed for, that's what I wanted to say, period.

Well... Maybe it wasn't designed for that, but it certainly can be used 
for that. And with no modifications to the core code, making this 
solution fairly elegant.  This is not so different from, say, the BPF 
code being reused for seccomp_filters. BPF wasn't designed for system 
call filtering, but it happens to work well.

> If I am allowed to say something, here is a couple of thoughts.
> 
> 1) CCI snoops and DVM enablement are secure operations, to do them in
>    non-secure world this must be overriden in firmware. You can argue,
>    you can think whatever you want, that's a fact. So, to use this
>    code SMP bit in SCTLR and CCI enablement must become non-secure
>    operations. This is a boot requirement for MCPM, right or wrong
>    it is up to platform designers to judge. If CCI and SMP enablement
>    are secure operations, we should not start adding random SMC calls
>    in the kernel, since managing coherency in the kernel would become
>    problematic, with lots of platform quirks. We do not want that to
>    happen, and I think we all agree on this.

One could certainly question the need for so many controls handled in 
secure world.  But that is not the point.

Here we're talking about MCPM.  That implies the kernel has control over 
SCTLR.SMP and the CCI.  If those things aren't under the kernel's 
control, then MCPM is of no use to you.

Therefore, if you do want to use MCPM, then this implies the kernel has 
access to the CCI. And if it has access to it, then it should turn it on 
by itself in all cases to be consistent, not only in half of the cases.

> 2) (1) must be documented.

Absolutely.  But let's be coherent in the implementation so the 
documentation is as simple as it can be.

> 3) When I talked about consequences for CPUidle (implicit), I was referring
>    to all sort of hacks we had to come up to bring devices like SPC
>    (remember ? I remember very very well unfortunately for me), or whatever
>    power controller up in the kernel early, too early to fit in any
>    existing kernel device framework. There is still no solution to that, and
>    the only way that code can exist is in mach- code. Right or wrong,
>    that's a second fact and in my opinion that's not nice for the ARM
>    kernel.

I disagree.  This can perfectly be turned into driver code. If we need 
it too early for existing kernel device framework to handle this 
properly, then the solution is to extend the existing framework or 
create another one specially for that purpose.  This may not be obvious 
when TC2 is the first/only platform in that situation, but if more 
platforms have the same need then it'll be easier to abstract 
commonalities into a framework.

Saying that no framework exists today or/and upstream maintainers are 
being difficult is _not_ a reason for throwing your hands up and e.g. 
shoving all this code into firmware instead.

> 4) When I am talking about firmware I am talking about sequences that
>    are very close to HW (disabling C bit, cleaning caches, exiting
>    coherency). Erratas notwithstanding, they are being standardized at
>    ARM the best we can. They might even end up being implemented in HW
>    in the not so far future. I understand they are tricky, I understand
>    they take lots of time to implement them and to debug them, what I
>    want to say is that they are becoming standard and we _must_ reuse the
>    same code for all ARM platforms. You can implement them in MCPM (see
>    (1)) or in firmware (and please do not start painting me as firmware
>    hugger here, I am referring to standard power down sequences that
>    again, are very close to HW state machines 

That's where the disconnect lies.  On the one hand you say "I understand 
they are tricky, I understand they take lots of time to implement them 
and to debug them" and on the other hand you say "They might end up being 
implemented in HW in the not so far future."  That simply makes no 
economical sense at all!

When some operation is 1) tricky and takes time to debug, and 2) not 
performance critical (no one is trying to get in and out of idle or 
hibernation a billion times per second), then you should never ever put 
such a thing in firmware, and hardware should be completely out of the 
question!

>    and more importantly if they
>    HAVE to run in secure world that's the only solution we have unless you
>    want to split race conditions between kernel and secure world).

If they HAVE to run in secure world then your secure world architecture 
is simply misdesigned, period.  Someone must have ignored the economics 
of modern software development to have come up with this.

> 5) I agree that the CCI enablement in TC2 (bootmon for cold boot and
>    kernel for warm-boot is wrong, nothing to say and it was not the
>    reason I commented on Nico's patch - I think I explained to you
>    thoroughly why now).

OK. Let's start by agreeing on the spirit behind my patch then.  The 
actual patch implementation details are a secondary concern and open for 
discussion.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Lorenzo Pieralisi June 10, 2014, 9:19 a.m. UTC | #18
On Tue, Jun 10, 2014 at 05:25:47AM +0100, Nicolas Pitre wrote:
> On Mon, 9 Jun 2014, Lorenzo Pieralisi wrote:
> 
> > I commented on Nico's patch because I did not like how it was
> > implemented (at least remove the CPU PM notifier calls please, because
> > they are not needed).
> 
> OK no problem.  That's easy enough.  I added them to play it safe as a 
> test patch in case some VFP content could be lost somehow by looping 
> back through the CPU init code for example, and needed to be saved.

Ok, thanks.

> > I also said that's the only thing he could do, and I still think 
> > that's not a nice way to use the cpu_suspend API for something it was 
> > not designed for, that's what I wanted to say, period.
> 
> Well... Maybe it wasn't designed for that, but it certainly can be used 
> for that. And with no modifications to the core code, making this 
> solution fairly elegant.  This is not so different from, say, the BPF 
> code being reused for seccomp_filters. BPF wasn't designed for system 
> call filtering, but it happens to work well.

You defined yourself "not a small thing", you know what you are doing
and that's enough for me. Please respect that when I reviewed it I thought
that was a hack. cpu_suspend is being used for many things in the kernel
and consolidating that took a while, please comment the code, that's all I
am asking you.

> > If I am allowed to say something, here is a couple of thoughts.
> > 
> > 1) CCI snoops and DVM enablement are secure operations, to do them in
> >    non-secure world this must be overriden in firmware. You can argue,
> >    you can think whatever you want, that's a fact. So, to use this
> >    code SMP bit in SCTLR and CCI enablement must become non-secure
> >    operations. This is a boot requirement for MCPM, right or wrong
> >    it is up to platform designers to judge. If CCI and SMP enablement
> >    are secure operations, we should not start adding random SMC calls
> >    in the kernel, since managing coherency in the kernel would become
> >    problematic, with lots of platform quirks. We do not want that to
> >    happen, and I think we all agree on this.
> 
> One could certainly question the need for so many controls handled in 
> secure world.  But that is not the point.
> 
> Here we're talking about MCPM.  That implies the kernel has control over 
> SCTLR.SMP and the CCI.  If those things aren't under the kernel's 
> control, then MCPM is of no use to you.

ACTLR.SMP for the sake of precision and it was my typo, sorry. That's
all I wanted to read, so nothing to add.

> Therefore, if you do want to use MCPM, then this implies the kernel has 
> access to the CCI. And if it has access to it, then it should turn it on 
> by itself in all cases to be consistent, not only in half of the cases.

I agree.

> > 2) (1) must be documented.
> 
> Absolutely.  But let's be coherent in the implementation so the 
> documentation is as simple as it can be.

Ditto.

> > 3) When I talked about consequences for CPUidle (implicit), I was referring
> >    to all sort of hacks we had to come up to bring devices like SPC
> >    (remember ? I remember very very well unfortunately for me), or whatever
> >    power controller up in the kernel early, too early to fit in any
> >    existing kernel device framework. There is still no solution to that, and
> >    the only way that code can exist is in mach- code. Right or wrong,
> >    that's a second fact and in my opinion that's not nice for the ARM
> >    kernel.
> 
> I disagree.  This can perfectly be turned into driver code. If we need 
> it too early for existing kernel device framework to handle this 
> properly, then the solution is to extend the existing framework or 
> create another one specially for that purpose.  This may not be obvious 
> when TC2 is the first/only platform in that situation, but if more 
> platforms have the same need then it'll be easier to abstract 
> commonalities into a framework.
> 
> Saying that no framework exists today or/and upstream maintainers are 
> being difficult is _not_ a reason for throwing your hands up and e.g. 
> shoving all this code into firmware instead.

You have a point, as long as we are all aware and we do not forget this
is a major problem, not a minor one. I do want to see a consolidate
story for CPUidle for ARM and this bullet is definitely part of the
picture. On a side note, you made me smile, it sounded like I wanted
to bury SPC code in firmware or anywhere else as long as it is not in the
kernel, which in a way is a true statement since I abhor that code =)

> > 4) When I am talking about firmware I am talking about sequences that
> >    are very close to HW (disabling C bit, cleaning caches, exiting
> >    coherency). Erratas notwithstanding, they are being standardized at
> >    ARM the best we can. They might even end up being implemented in HW
> >    in the not so far future. I understand they are tricky, I understand
> >    they take lots of time to implement them and to debug them, what I
> >    want to say is that they are becoming standard and we _must_ reuse the
> >    same code for all ARM platforms. You can implement them in MCPM (see
> >    (1)) or in firmware (and please do not start painting me as firmware
> >    hugger here, I am referring to standard power down sequences that
> >    again, are very close to HW state machines 
> 
> That's where the disconnect lies.  On the one hand you say "I understand 
> they are tricky, I understand they take lots of time to implement them 
> and to debug them" and on the other hand you say "They might end up being 
> implemented in HW in the not so far future."  That simply makes no 
> economical sense at all!

I wanted to say Nico that those operations are so intrinsic for all ARM
cores you can almost think of them as part of the ISA and certainly
HW knows it better than SW when a processor has nothing to do, it does
idle core units all the time without software interaction, going power off
(on cue) could just be one step further and solve those pesky races in HW.

> When some operation is 1) tricky and takes time to debug, and 2) not 
> performance critical (no one is trying to get in and out of idle or 
> hibernation a billion times per second), then you should never ever put 
> such a thing in firmware, and hardware should be completely out of the 
> question!

We can debate this offline, it is an interesting topic in particular on
the performance critical side of things; as for firmware see my point
on security, as I already mentioned.

> >    and more importantly if they
> >    HAVE to run in secure world that's the only solution we have unless you
> >    want to split race conditions between kernel and secure world).
> 
> If they HAVE to run in secure world then your secure world architecture 
> is simply misdesigned, period.  Someone must have ignored the economics 
> of modern software development to have come up with this.

That's your opinion and I respect that. What I said, and that's a fact
not an opinion, is that if those operations are required to be secure in a
platform, there is not much you can do apart from preventing races where the
race conditions are, ie in secure world.

> > 5) I agree that the CCI enablement in TC2 (bootmon for cold boot and
> >    kernel for warm-boot is wrong, nothing to say and it was not the
> >    reason I commented on Nico's patch - I think I explained to you
> >    thoroughly why now).
> 
> OK. Let's start by agreeing on the spirit behind my patch then.  The 
> actual patch implementation details are a secondary concern and open for 
> discussion.

I agree on the spirit of the patch, my concern was about its implementation.

Thank you, this was a constructive discussion.

Lorenzo

--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Catalin Marinas June 10, 2014, 2:14 p.m. UTC | #19
Hi Nico,

Sorry, I can't stay away from this thread ;)

On Tue, Jun 10, 2014 at 12:25:47AM -0400, Nicolas Pitre wrote:
> On Mon, 9 Jun 2014, Lorenzo Pieralisi wrote:
> > 4) When I am talking about firmware I am talking about sequences that
> >    are very close to HW (disabling C bit, cleaning caches, exiting
> >    coherency). Erratas notwithstanding, they are being standardized at
> >    ARM the best we can. They might even end up being implemented in HW
> >    in the not so far future. I understand they are tricky, I understand
> >    they take lots of time to implement them and to debug them, what I
> >    want to say is that they are becoming standard and we _must_ reuse the
> >    same code for all ARM platforms. You can implement them in MCPM (see
> >    (1)) or in firmware (and please do not start painting me as firmware
> >    hugger here, I am referring to standard power down sequences that
> >    again, are very close to HW state machines 
> 
> That's where the disconnect lies.  On the one hand you say "I understand 
> they are tricky, I understand they take lots of time to implement them 
> and to debug them" and on the other hand you say "They might end up being 
> implemented in HW in the not so far future."  That simply makes no 
> economical sense at all!

It makes lots of sense, though not from a software maintainability
perspective. It would be nice if everything still looked like ARM7TDMI
but in the race for performance (vs power), hardware becomes more
complex and it's not just the CPU but adjacent parts like interconnects,
caches, asynchronous bridges, voltage shifters, memory controllers,
clocks/PLLs etc. Many of these are simply hidden from the high level OS
like Linux because the OS assumes certain configuration (e.g. access to
memory) and it's only the hardware itself that knows in what order they
can be turned on or off (when triggered explicitly by the OS or an
external event). Having an dedicated power controller (e.g. M-class
processor) to handle some of these is a rather flexible approach, other
bits require RTL (and usually impossible to update).

> When some operation is 1) tricky and takes time to debug, and 2) not 
> performance critical (no one is trying to get in and out of idle or 
> hibernation a billion times per second), then you should never ever put 
> such a thing in firmware, and hardware should be completely out of the 
> question!

I agree that things can go wrong (both in hardware and software, no
matter where it runs) but please don't think that such power
architecture has been specifically engineered to hide the hardware from
Linux. It's a necessity for complex systems and the optimal solution is
not always simplification (it's not just ARM+vendors doing this, just
look at the power model of modern x86 processors, hidden nicely from the
software behind a few registers while making things harder for scheduler
which cannot rely on a constant performance level; but it's a trade-off
they are happy to make).

> >    and more importantly if they
> >    HAVE to run in secure world that's the only solution we have unless you
> >    want to split race conditions between kernel and secure world).
> 
> If they HAVE to run in secure world then your secure world architecture 
> is simply misdesigned, period.  Someone must have ignored the economics 
> of modern software development to have come up with this.

That's the trade-off between software complexity and hardware cost,
gates, power consumption. You can do proper physical separation of the
secure services but this would require a separate CPU that is rarely
used and adds to the overall SoC cost. On large scale hardware
deployment, it's exactly economics that matter and these translate into
hardware cost. The software cost is irrelevant here, whether we like it
or not.
Nicolas Pitre June 10, 2014, 4:49 p.m. UTC | #20
On Tue, 10 Jun 2014, Catalin Marinas wrote:

> Hi Nico,
> 
> Sorry, I can't stay away from this thread ;)

;-)

> On Tue, Jun 10, 2014 at 12:25:47AM -0400, Nicolas Pitre wrote:
> > On Mon, 9 Jun 2014, Lorenzo Pieralisi wrote:
> > > 4) When I am talking about firmware I am talking about sequences that
> > >    are very close to HW (disabling C bit, cleaning caches, exiting
> > >    coherency). Erratas notwithstanding, they are being standardized at
> > >    ARM the best we can. They might even end up being implemented in HW
> > >    in the not so far future. I understand they are tricky, I understand
> > >    they take lots of time to implement them and to debug them, what I
> > >    want to say is that they are becoming standard and we _must_ reuse the
> > >    same code for all ARM platforms. You can implement them in MCPM (see
> > >    (1)) or in firmware (and please do not start painting me as firmware
> > >    hugger here, I am referring to standard power down sequences that
> > >    again, are very close to HW state machines 
> > 
> > That's where the disconnect lies.  On the one hand you say "I understand 
> > they are tricky, I understand they take lots of time to implement them 
> > and to debug them" and on the other hand you say "They might end up being 
> > implemented in HW in the not so far future."  That simply makes no 
> > economical sense at all!
> 
> It makes lots of sense, though not from a software maintainability
> perspective. It would be nice if everything still looked like ARM7TDMI
> but in the race for performance (vs power), hardware becomes more
> complex and it's not just the CPU but adjacent parts like interconnects,
> caches, asynchronous bridges, voltage shifters, memory controllers,
> clocks/PLLs etc. Many of these are simply hidden from the high level OS
> like Linux because the OS assumes certain configuration (e.g. access to
> memory) and it's only the hardware itself that knows in what order they
> can be turned on or off (when triggered explicitly by the OS or an
> external event).

I agree when the hardware has to handle parallel dependencies ordered in 
waterfall style. In such cases there is usually no point relying on 
software to implement what is nevertheless simple determinism with 
no possible alternative usage.

But the *most* important thing is what you put in parents, so let me 
emphasize on what you just said:

	When triggered _explicitly_ by the OS or external events

> Having an dedicated power controller (e.g. M-class
> processor) to handle some of these is a rather flexible approach, other
> bits require RTL (and usually impossible to update).

The M-class processor should be treated the same way as firmware.  It 
ought to be flexible (certainly more than hardwired hardware), but it 
shares all the same downsides as firmware and the same concerns apply.

> > When some operation is 1) tricky and takes time to debug, and 2) not 
> > performance critical (no one is trying to get in and out of idle or 
> > hibernation a billion times per second), then you should never ever put 
> > such a thing in firmware, and hardware should be completely out of the 
> > question!
> 
> I agree that things can go wrong (both in hardware and software, no
> matter where it runs) but please don't think that such power
> architecture has been specifically engineered to hide the hardware from
> Linux. It's a necessity for complex systems and the optimal solution is
> not always simplification (it's not just ARM+vendors doing this, just
> look at the power model of modern x86 processors, hidden nicely from the
> software behind a few registers while making things harder for scheduler
> which cannot rely on a constant performance level; but it's a trade-off
> they are happy to make).

I'll claim that this is a bad tradeoff.  And the reason why some 
hardware architects might think it is a good one is because so far we 
really sucked at software based power management in Linux (and possibly 
other OSes as well).  Hence the (fairly recent) realization that power 
management has to be integrated and under control of the scheduler 
rather than existing as some ad hoc subsystem.

The reaction from the hardware people often is "the software is crap and 
makes our hardware look bad, we know better, so let's make it easier on 
those poor software dudes by handling power management in hardware 
instead".  But ultimately the hardware just can't predict things like 
software can.  It might do a better job than the current software state 
of affairs, but most likely not be as efficient as a proper software 
architecture.  The hardware may only be reactive, whereas the software 
can be proactive (when properly done that is).

I sense from your paragraph above that ARM might be going the same 
direction as X86 and that would be very sad.  Maybe the best compromise 
would be for all knobs to be made available to software if software 
wants to turn off the hardware auto-pilot and take control.  This way 
the hardware guys would cover their arses while still allowing for the 
possibility that software might be able to out perform the hardware 
solution.

> > >    and more importantly if they
> > >    HAVE to run in secure world that's the only solution we have unless you
> > >    want to split race conditions between kernel and secure world).
> > 
> > If they HAVE to run in secure world then your secure world architecture 
> > is simply misdesigned, period.  Someone must have ignored the economics 
> > of modern software development to have come up with this.
> 
> That's the trade-off between software complexity and hardware cost,
> gates, power consumption. You can do proper physical separation of the
> secure services but this would require a separate CPU that is rarely
> used and adds to the overall SoC cost. On large scale hardware
> deployment, it's exactly economics that matter and these translate into
> hardware cost. The software cost is irrelevant here, whether we like it
> or not.

I agree with you on the hardware cost (and the same argument applies to 
power management by the way).  But once the hardware is there, the 
software cost has to be optimized the same way.

From a cost perspective, firmware is always a magnitude more costly to 
develop and to fix and maintain afterwards than kernel code.  So, 
without requiring full physical separation increasing the hardware cost, 
I think the software architecture would benefit from a rethought, 
possibly with the help of small and cheap hardware enhancements.  I 
really think not enough attention has been dedicated to that aspect.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Catalin Marinas June 10, 2014, 5:42 p.m. UTC | #21
On Tue, Jun 10, 2014 at 05:49:01PM +0100, Nicolas Pitre wrote:
> On Tue, 10 Jun 2014, Catalin Marinas wrote:
> > On Tue, Jun 10, 2014 at 12:25:47AM -0400, Nicolas Pitre wrote:
> > > On Mon, 9 Jun 2014, Lorenzo Pieralisi wrote:
> > > > 4) When I am talking about firmware I am talking about sequences that
> > > >    are very close to HW (disabling C bit, cleaning caches, exiting
> > > >    coherency). Erratas notwithstanding, they are being standardized at
> > > >    ARM the best we can. They might even end up being implemented in HW
> > > >    in the not so far future. I understand they are tricky, I understand
> > > >    they take lots of time to implement them and to debug them, what I
> > > >    want to say is that they are becoming standard and we _must_ reuse the
> > > >    same code for all ARM platforms. You can implement them in MCPM (see
> > > >    (1)) or in firmware (and please do not start painting me as firmware
> > > >    hugger here, I am referring to standard power down sequences that
> > > >    again, are very close to HW state machines 
> > > 
> > > That's where the disconnect lies.  On the one hand you say "I understand 
> > > they are tricky, I understand they take lots of time to implement them 
> > > and to debug them" and on the other hand you say "They might end up being 
> > > implemented in HW in the not so far future."  That simply makes no 
> > > economical sense at all!
> > 
> > It makes lots of sense, though not from a software maintainability
> > perspective. It would be nice if everything still looked like ARM7TDMI
> > but in the race for performance (vs power), hardware becomes more
> > complex and it's not just the CPU but adjacent parts like interconnects,
> > caches, asynchronous bridges, voltage shifters, memory controllers,
> > clocks/PLLs etc. Many of these are simply hidden from the high level OS
> > like Linux because the OS assumes certain configuration (e.g. access to
> > memory) and it's only the hardware itself that knows in what order they
> > can be turned on or off (when triggered explicitly by the OS or an
> > external event).
> 
> I agree when the hardware has to handle parallel dependencies ordered in 
> waterfall style. In such cases there is usually no point relying on 
> software to implement what is nevertheless simple determinism with 
> no possible alternative usage.
> 
> But the *most* important thing is what you put in parens, so let me 
> emphasize on what you just said:
> 
> 	When triggered _explicitly_ by the OS or external events

I don't think anyone is arguing that the policy should not be in the OS.
But part of the mechanism can be in the OS and part in firmware, SCP or
hardware. The kernel part can be a simple PSCI call or more complex
setup (possibly MCPM-based) which usually ends up with a WFI. This WFI,
however, triggers further hardware changes which may be handled by
dedicated power controller.

> > Having an dedicated power controller (e.g. M-class
> > processor) to handle some of these is a rather flexible approach, other
> > bits require RTL (and usually impossible to update).
> 
> The M-class processor should be treated the same way as firmware.  It 
> ought to be flexible (certainly more than hardwired hardware), but it 
> shares all the same downsides as firmware and the same concerns apply.

Yes, we can treat it as firmware, but we don't have a better alternative
to move the functionality into the kernel (well, we could at least allow
the kernel to load a binary blob and restart the controller).

> > > When some operation is 1) tricky and takes time to debug, and 2) not 
> > > performance critical (no one is trying to get in and out of idle or 
> > > hibernation a billion times per second), then you should never ever put 
> > > such a thing in firmware, and hardware should be completely out of the 
> > > question!
> > 
> > I agree that things can go wrong (both in hardware and software, no
> > matter where it runs) but please don't think that such power
> > architecture has been specifically engineered to hide the hardware from
> > Linux. It's a necessity for complex systems and the optimal solution is
> > not always simplification (it's not just ARM+vendors doing this, just
> > look at the power model of modern x86 processors, hidden nicely from the
> > software behind a few registers while making things harder for scheduler
> > which cannot rely on a constant performance level; but it's a trade-off
> > they are happy to make).
> 
> I'll claim that this is a bad tradeoff.  And the reason why some 
> hardware architects might think it is a good one is because so far we 
> really sucked at software based power management in Linux (and possibly 
> other OSes as well).  Hence the (fairly recent) realization that power 
> management has to be integrated and under control of the scheduler 
> rather than existing as some ad hoc subsystem.

But even this is a complex problem. Anyway, I don't think the (ARM at
least) hardware guys aim to take over the cpufreq or Linux scheduler
functionality. Their concern is rather mechanism rather than policy.

> The reaction from the hardware people often is "the software is crap and 
> makes our hardware look bad, we know better, so let's make it easier on 
> those poor software dudes by handling power management in hardware 
> instead".  But ultimately the hardware just can't predict things like 
> software can.  It might do a better job than the current software state 
> of affairs, but most likely not be as efficient as a proper software 
> architecture.  The hardware may only be reactive, whereas the software 
> can be proactive (when properly done that is).

Indeed. But that's not the aim of the power controller on our boards.
It's just a mechanism for safely changing sleep states, CPU frequencies
but entirely under the OS decision.

> I sense from your paragraph above that ARM might be going the same 
> direction as X86 and that would be very sad.  Maybe the best compromise 
> would be for all knobs to be made available to software if software 
> wants to turn off the hardware auto-pilot and take control.  This way 
> the hardware guys would cover their arses while still allowing for the 
> possibility that software might be able to out perform the hardware 
> solution.

I'm definitely not suggesting ARM is going the same route. Just trying
to show that ARM is slightly better here.

As a personal opinion, I like the simplicity of writing a register to
change the P-state but I don't like the non-determinism of the x86
hardware w.r.t. CPU performance. There are however some "policy" aspects
which I find interesting (like detecting whether the workload is memory
bound and automatically lowering the CPU frequency; the OS cannot react
this fast).

> > > >    and more importantly if they
> > > >    HAVE to run in secure world that's the only solution we have unless you
> > > >    want to split race conditions between kernel and secure world).
> > > 
> > > If they HAVE to run in secure world then your secure world architecture 
> > > is simply misdesigned, period.  Someone must have ignored the economics 
> > > of modern software development to have come up with this.
> > 
> > That's the trade-off between software complexity and hardware cost,
> > gates, power consumption. You can do proper physical separation of the
> > secure services but this would require a separate CPU that is rarely
> > used and adds to the overall SoC cost. On large scale hardware
> > deployment, it's exactly economics that matter and these translate into
> > hardware cost. The software cost is irrelevant here, whether we like it
> > or not.
> 
> I agree with you on the hardware cost (and the same argument applies to 
> power management by the way).  But once the hardware is there, the 
> software cost has to be optimized the same way.
> 
> From a cost perspective, firmware is always a magnitude more costly to 
> develop and to fix and maintain afterwards than kernel code.  So, 
> without requiring full physical separation increasing the hardware cost, 
> I think the software architecture would benefit from a rethought, 
> possibly with the help of small and cheap hardware enhancements.  I 
> really think not enough attention has been dedicated to that aspect.

I fully agree (and I think Linaro is well positioned for this ;),
possibly as an extension of the boot+firmware architecture).
Nicolas Pitre June 10, 2014, 7:15 p.m. UTC | #22
On Tue, 10 Jun 2014, Catalin Marinas wrote:
> On Tue, Jun 10, 2014 at 05:49:01PM +0100, Nicolas Pitre wrote:
> > The M-class processor should be treated the same way as firmware.  It 
> > ought to be flexible (certainly more than hardwired hardware), but it 
> > shares all the same downsides as firmware and the same concerns apply.
> 
> Yes, we can treat it as firmware, but we don't have a better alternative
> to move the functionality into the kernel (well, we could at least allow
> the kernel to load a binary blob and restart the controller).

That would address the "easy to update in the field" side of the story.  
So far I've not seen this aspect been addressed with a serious plan 
anywhere.

> > The reaction from the hardware people often is "the software is crap and 
> > makes our hardware look bad, we know better, so let's make it easier on 
> > those poor software dudes by handling power management in hardware 
> > instead".  But ultimately the hardware just can't predict things like 
> > software can.  It might do a better job than the current software state 
> > of affairs, but most likely not be as efficient as a proper software 
> > architecture.  The hardware may only be reactive, whereas the software 
> > can be proactive (when properly done that is).
> 
> Indeed. But that's not the aim of the power controller on our boards.
> It's just a mechanism for safely changing sleep states, CPU frequencies
> but entirely under the OS decision.

Sure.  But then you might want to consider that some usage scenarios 
might benefit from the ability to abort a request, or monitor the 
progress of a request for software timing purposes, or accept parallel 
requests rather than serialize them, etc.  Given the flexibility to 
extend beyond a rigid interface, the system may become even more 
efficient overall, albeit with added complexity in the implementation. 
But for that to work it has to be cheaply achievable.

> > I sense from your paragraph above that ARM might be going the same 
> > direction as X86 and that would be very sad.  Maybe the best compromise 
> > would be for all knobs to be made available to software if software 
> > wants to turn off the hardware auto-pilot and take control.  This way 
> > the hardware guys would cover their arses while still allowing for the 
> > possibility that software might be able to out perform the hardware 
> > solution.
> 
> I'm definitely not suggesting ARM is going the same route. Just trying
> to show that ARM is slightly better here.
> 
> As a personal opinion, I like the simplicity of writing a register to
> change the P-state but I don't like the non-determinism of the x86
> hardware w.r.t. CPU performance. There are however some "policy" aspects
> which I find interesting (like detecting whether the workload is memory
> bound and automatically lowering the CPU frequency; the OS cannot react
> this fast).

This is not really a policy.  This is a straight-forward waterfall 
dependency.  There is simply nothing you can do with those CPU clock 
cycles when stalled most of the time waiting for memory queries to come 
back so the choice is obvious.  If however this has a significant impact 
on code execution speed then this becomes a tradeoff and the choice to 
use this feature or not (the policy) must be implemented in software.


Nicolas
--
To unsubscribe from this list: send the line "unsubscribe linux-samsung-soc" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
diff mbox

Patch

diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c
index 86fd60fefb..1cc49de308 100644
--- a/arch/arm/common/mcpm_entry.c
+++ b/arch/arm/common/mcpm_entry.c
@@ -12,11 +12,13 @@ 
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/irqflags.h>
+#include <linux/cpu_pm.h>
 
 #include <asm/mcpm.h>
 #include <asm/cacheflush.h>
 #include <asm/idmap.h>
 #include <asm/cputype.h>
+#include <asm/suspend.h>
 
 extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
 
@@ -146,6 +148,44 @@  int mcpm_cpu_powered_up(void)
 	return 0;
 }
 
+static int go_down(unsigned long _arg)
+{
+	void (*cache_disable)(void) = (void *)_arg;
+	unsigned int mpidr = read_cpuid_mpidr();
+	unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
+	unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
+	phys_reset_t phys_reset;
+
+	mcpm_set_entry_vector(cpu, cluster, cpu_resume);
+	setup_mm_for_reboot();
+
+	__mcpm_cpu_going_down(cpu, cluster);
+	BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster));
+	cache_disable();
+	__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
+	__mcpm_cpu_down(cpu, cluster);
+
+	phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
+	phys_reset(virt_to_phys(mcpm_entry_point));
+	BUG();
+}
+	
+int mcpm_loopback(void (*cache_disable)(void))
+{
+	int ret;
+
+	local_irq_disable();
+	local_fiq_disable();
+	ret = cpu_pm_enter();
+	if (!ret) {
+		ret = cpu_suspend((unsigned long)cache_disable, go_down);
+		cpu_pm_exit();
+	}
+	local_fiq_enable();
+	local_irq_enable();
+	return ret;
+}
+
 struct sync_struct mcpm_sync;
 
 /*
diff --git a/arch/arm/mach-vexpress/tc2_pm.c b/arch/arm/mach-vexpress/tc2_pm.c
index 29e7785a54..abecdd734f 100644
--- a/arch/arm/mach-vexpress/tc2_pm.c
+++ b/arch/arm/mach-vexpress/tc2_pm.c
@@ -323,6 +323,26 @@  static void __naked tc2_pm_power_up_setup(unsigned int affinity_level)
 "	b	cci_enable_port_for_self ");
 }
 
+int mcpm_loopback(void (*cache_disable)(void));
+
+static void tc2_cache_down(void)
+{
+	pr_warn("TC2: disabling cache\n");
+	if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
+		/*
+		 * On the Cortex-A15 we need to disable
+		 * L2 prefetching before flushing the cache.
+		 */
+		asm volatile(
+		"mcr	p15, 1, %0, c15, c0, 3 \n\t"
+		"isb	\n\t"
+		"dsb	"
+		: : "r" (0x400) );
+	}
+	v7_exit_coherency_flush(all);
+	cci_disable_port_by_cpu(read_cpuid_mpidr());
+}
+
 static int __init tc2_pm_init(void)
 {
 	int ret, irq;
@@ -370,6 +390,7 @@  static int __init tc2_pm_init(void)
 	ret = mcpm_platform_register(&tc2_pm_power_ops);
 	if (!ret) {
 		mcpm_sync_init(tc2_pm_power_up_setup);
+		BUG_ON(mcpm_loopback(tc2_cache_down) != 0);
 		pr_info("TC2 power management initialized\n");
 	}
 	return ret;