diff mbox series

[PATCHv2] soc: qcom: llcc: Support chipsets that can write to llcc registers

Message ID 20200817144722.6665-1-saiprakash.ranjan@codeaurora.org
State New
Headers show
Series [PATCHv2] soc: qcom: llcc: Support chipsets that can write to llcc registers | expand

Commit Message

Sai Prakash Ranjan Aug. 17, 2020, 2:47 p.m. UTC
From: "Isaac J. Manjarres" <isaacm@codeaurora.org>

Older chipsets may not be allowed to configure certain LLCC registers
as that is handled by the secure side software. However, this is not
the case for newer chipsets and they must configure these registers
according to the contents of the SCT table, while keeping in mind that
older targets may not have these capabilities. So add support to allow
such configuration of registers to enable capacity based allocation
and power collapse retention for capable chipsets.

Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
(sai: use table instead of dt property and minor commit msg change)
Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
---

Changes in v2:
 * Fix build errors reported by kernel test robot.

---
 drivers/soc/qcom/llcc-qcom.c | 27 +++++++++++++++++++++++++++
 1 file changed, 27 insertions(+)

Comments

Doug Anderson Aug. 17, 2020, 9:05 p.m. UTC | #1
Hi,

On Mon, Aug 17, 2020 at 7:47 AM Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> From: "Isaac J. Manjarres" <isaacm@codeaurora.org>
>
> Older chipsets may not be allowed to configure certain LLCC registers
> as that is handled by the secure side software. However, this is not
> the case for newer chipsets and they must configure these registers
> according to the contents of the SCT table, while keeping in mind that
> older targets may not have these capabilities. So add support to allow
> such configuration of registers to enable capacity based allocation
> and power collapse retention for capable chipsets.

I have very little idea about what the above means.  That being said,
what's broken that this patch fixes?  Please include this in the CL
description.  It should answer, in the very least, the following two
questions:

a) Were existing attempts to do capacity based allocation failing, or
is capacity based allocation a new whizbang feature that a future
patch will add and you need this one to land first?

b) Why was it bad not to enable power collapse retention?  Was this
causing things to get corrupted after resume?  Was this causing us to
fail to suspend?  Were we burning too little power in S3 and the
battery vendors are looking for an excuse to sell bigger batteries?

I'm not very smart and am also lacking documentation for what the heck
all this is, so I'm looking for the "why" of your patch.


> Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
> (sai: use table instead of dt property and minor commit msg change)
> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
> ---
>
> Changes in v2:
>  * Fix build errors reported by kernel test robot.
>
> ---
>  drivers/soc/qcom/llcc-qcom.c | 27 +++++++++++++++++++++++++++
>  1 file changed, 27 insertions(+)
>
> diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
> index 429b5a60a1ba..865f607cf502 100644
> --- a/drivers/soc/qcom/llcc-qcom.c
> +++ b/drivers/soc/qcom/llcc-qcom.c
> @@ -45,6 +45,9 @@
>  #define LLCC_TRP_ATTR0_CFGn(n)        (0x21000 + SZ_8 * n)
>  #define LLCC_TRP_ATTR1_CFGn(n)        (0x21004 + SZ_8 * n)
>
> +#define LLCC_TRP_SCID_DIS_CAP_ALLOC   0x21F00
> +#define LLCC_TRP_PCB_ACT              0x21F04
> +
>  #define BANK_OFFSET_STRIDE           0x80000
>
>  /**
> @@ -318,6 +321,11 @@ size_t llcc_get_slice_size(struct llcc_slice_desc *desc)
>  }
>  EXPORT_SYMBOL_GPL(llcc_get_slice_size);
>
> +static const struct of_device_id __maybe_unused qcom_llcc_configure_of_match[] = {
> +       { .compatible = "qcom,sc7180-llcc" },
> +       { }
> +};

Why are you introducing a whole second table?  Shouldn't you just add
a field to "struct qcom_llcc_config" ?


> +
>  static int qcom_llcc_cfg_program(struct platform_device *pdev)
>  {
>         int i;
> @@ -327,13 +335,17 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
>         u32 attr0_val;
>         u32 max_cap_cacheline;
>         u32 sz;
> +       u32 disable_cap_alloc = 0, retain_pc = 0;

Don't init to 0.  See below.


>         int ret = 0;
>         const struct llcc_slice_config *llcc_table;
>         struct llcc_slice_desc desc;
> +       const struct of_device_id *llcc_configure;
>
>         sz = drv_data->cfg_size;
>         llcc_table = drv_data->cfg;
>
> +       llcc_configure = of_match_node(qcom_llcc_configure_of_match, pdev->dev.of_node);
> +

As per above, just use the existing config.


>         for (i = 0; i < sz; i++) {
>                 attr1_cfg = LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
>                 attr0_cfg = LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
> @@ -369,6 +381,21 @@ static int qcom_llcc_cfg_program(struct platform_device *pdev)
>                                         attr0_val);
>                 if (ret)
>                         return ret;
> +
> +               if (llcc_configure) {
> +                       disable_cap_alloc |= llcc_table[i].dis_cap_alloc << llcc_table[i].slice_id;

Don't "|=".  You're the only place touching this variable.  Just set it.


> +                       ret = regmap_write(drv_data->bcast_regmap,
> +                                               LLCC_TRP_SCID_DIS_CAP_ALLOC, disable_cap_alloc);
> +                       if (ret)
> +                               return ret;
> +
> +                       retain_pc |= llcc_table[i].retain_on_pc << llcc_table[i].slice_id;

Don't "|=".  You're the only place touching this variable.  Just set it.


-Doug
Sai Prakash Ranjan Aug. 18, 2020, 9:40 a.m. UTC | #2
Hi,

On 2020-08-18 02:35, Doug Anderson wrote:
> Hi,
> 
> On Mon, Aug 17, 2020 at 7:47 AM Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>> 
>> From: "Isaac J. Manjarres" <isaacm@codeaurora.org>
>> 
>> Older chipsets may not be allowed to configure certain LLCC registers
>> as that is handled by the secure side software. However, this is not
>> the case for newer chipsets and they must configure these registers
>> according to the contents of the SCT table, while keeping in mind that
>> older targets may not have these capabilities. So add support to allow
>> such configuration of registers to enable capacity based allocation
>> and power collapse retention for capable chipsets.
> 
> I have very little idea about what the above means.  That being said,
> what's broken that this patch fixes?  Please include this in the CL
> description.  It should answer, in the very least, the following two
> questions:
> 

As the commit message says about secure software configuring these LLCC 
registers,
usually 2 things can happen in that case.

1) Accessing those registers in non secure world like Kernel would 
result in a
fault which is trapped by secure side.

2) Access to those registers may be just ignored and there will be no 
faults.

So for older chipsets, this is a fix to not allow them to access those 
registers.
For newer chipsets, we follow the recommended settings from HW/SW arch 
teams.
But... upstream llcc driver only supports SDM845 currently which is not 
required
to configure those registers and as per my testing, no crash is observed 
on SDM845.
So we won't need fixes tag.

> a) Were existing attempts to do capacity based allocation failing, or
> is capacity based allocation a new whizbang feature that a future
> patch will add and you need this one to land first?
> 

Capacity-based allocation and Way-based allocation are cache 
partitioning
schemes/algorithms usually used in shared LLCs. Now which one to use or 
why
one is preferred over the other are decided by HW/SW architecture teams 
and are
recommended by them. So if the question is what is capacity based 
allocation and
how it works, then I am afraid that I will not be able to explain that 
algorithm
just like that.

> b) Why was it bad not to enable power collapse retention?  Was this
> causing things to get corrupted after resume?  Was this causing us to
> fail to suspend?  Were we burning too little power in S3 and the
> battery vendors are looking for an excuse to sell bigger batteries?
> 
> I'm not very smart and am also lacking documentation for what the heck
> all this is, so I'm looking for the "why" of your patch.
> 

That's a fair point. I will try to dig through to find some context for 
"question b"
and check if there were any battery vendors involved in this decision ;)

> 
>> Signed-off-by: Isaac J. Manjarres <isaacm@codeaurora.org>
>> (sai: use table instead of dt property and minor commit msg change)
>> Signed-off-by: Sai Prakash Ranjan <saiprakash.ranjan@codeaurora.org>
>> ---
>> 
>> Changes in v2:
>>  * Fix build errors reported by kernel test robot.
>> 
>> ---
>>  drivers/soc/qcom/llcc-qcom.c | 27 +++++++++++++++++++++++++++
>>  1 file changed, 27 insertions(+)
>> 
>> diff --git a/drivers/soc/qcom/llcc-qcom.c 
>> b/drivers/soc/qcom/llcc-qcom.c
>> index 429b5a60a1ba..865f607cf502 100644
>> --- a/drivers/soc/qcom/llcc-qcom.c
>> +++ b/drivers/soc/qcom/llcc-qcom.c
>> @@ -45,6 +45,9 @@
>>  #define LLCC_TRP_ATTR0_CFGn(n)        (0x21000 + SZ_8 * n)
>>  #define LLCC_TRP_ATTR1_CFGn(n)        (0x21004 + SZ_8 * n)
>> 
>> +#define LLCC_TRP_SCID_DIS_CAP_ALLOC   0x21F00
>> +#define LLCC_TRP_PCB_ACT              0x21F04
>> +
>>  #define BANK_OFFSET_STRIDE           0x80000
>> 
>>  /**
>> @@ -318,6 +321,11 @@ size_t llcc_get_slice_size(struct llcc_slice_desc 
>> *desc)
>>  }
>>  EXPORT_SYMBOL_GPL(llcc_get_slice_size);
>> 
>> +static const struct of_device_id __maybe_unused 
>> qcom_llcc_configure_of_match[] = {
>> +       { .compatible = "qcom,sc7180-llcc" },
>> +       { }
>> +};
> 
> Why are you introducing a whole second table?  Shouldn't you just add
> a field to "struct qcom_llcc_config" ?
> 

This was my 2nd option, first one was to have this based on the version 
of LLCC
which are exposed by hw info registers. But that didn't turn out good 
since I
couldn't find any relation of this property with LLCC version.

Second option was as you mentioned to have a field to qcom_llcc_config. 
Now this is good,
but then I thought that if we add LLCC support for 20(random number) 
SoCs of which
10 is capable of supporting cap_based_alloc and rest 10 are not, then we 
will still be adding
20 more lines to each SoC's llcc_config if we follow this 2nd option.

So why not opt for a 3rd option with the table where you just need to 
specify only the capable
targets which is just 10 in our sample case above.

Am I just overthinking this too much and should just go with the 2nd 
option as you mentioned?

> 
>> +
>>  static int qcom_llcc_cfg_program(struct platform_device *pdev)
>>  {
>>         int i;
>> @@ -327,13 +335,17 @@ static int qcom_llcc_cfg_program(struct 
>> platform_device *pdev)
>>         u32 attr0_val;
>>         u32 max_cap_cacheline;
>>         u32 sz;
>> +       u32 disable_cap_alloc = 0, retain_pc = 0;
> 
> Don't init to 0.  See below.
> 
> 
>>         int ret = 0;
>>         const struct llcc_slice_config *llcc_table;
>>         struct llcc_slice_desc desc;
>> +       const struct of_device_id *llcc_configure;
>> 
>>         sz = drv_data->cfg_size;
>>         llcc_table = drv_data->cfg;
>> 
>> +       llcc_configure = of_match_node(qcom_llcc_configure_of_match, 
>> pdev->dev.of_node);
>> +
> 
> As per above, just use the existing config.
> 

See above explanation.

> 
>>         for (i = 0; i < sz; i++) {
>>                 attr1_cfg = 
>> LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
>>                 attr0_cfg = 
>> LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
>> @@ -369,6 +381,21 @@ static int qcom_llcc_cfg_program(struct 
>> platform_device *pdev)
>>                                         attr0_val);
>>                 if (ret)
>>                         return ret;
>> +
>> +               if (llcc_configure) {
>> +                       disable_cap_alloc |= 
>> llcc_table[i].dis_cap_alloc << llcc_table[i].slice_id;
> 
> Don't "|=".  You're the only place touching this variable.  Just set 
> it.
> 

Ack, will change.

> 
>> +                       ret = regmap_write(drv_data->bcast_regmap,
>> +                                               
>> LLCC_TRP_SCID_DIS_CAP_ALLOC, disable_cap_alloc);
>> +                       if (ret)
>> +                               return ret;
>> +
>> +                       retain_pc |= llcc_table[i].retain_on_pc << 
>> llcc_table[i].slice_id;
> 
> Don't "|=".  You're the only place touching this variable.  Just set 
> it.
> 

Ack, will change.

Thanks,
Sai
Sai Prakash Ranjan Aug. 18, 2020, 3:37 p.m. UTC | #3
Hi Doug,

On 2020-08-18 20:42, Doug Anderson wrote:
> Hi,
> 
<snip>...

> 
> I guess to start, it wasn't obvious (to me) that there were two
> choices and we were picking one.  Mentioning that the other
> alternative was way-based allocation would help a lot.  Even if you
> can't fully explain the differences between the two, adding something
> to the commit message indicating that this is a policy decision (in
> other words, both work but each have their tradeoffs) would help.
> Something like this, if it's correct:
> 
> In general we try to enable capacity based allocation (instead of the
> default way based allocation) since that gives us better performance
> with the current software / hardware configuration.
> 

Thanks, I will add it for next version. Let me also go poke some arch 
teams
to understand if we actually do gain something with this selection, who 
knows
we might get some additional details as well.

>> >
>> > Why are you introducing a whole second table?  Shouldn't you just add
>> > a field to "struct qcom_llcc_config" ?
>> >
>> 
>> This was my 2nd option, first one was to have this based on the 
>> version
>> of LLCC
>> which are exposed by hw info registers. But that didn't turn out good
>> since I
>> couldn't find any relation of this property with LLCC version.
>> 
>> Second option was as you mentioned to have a field to 
>> qcom_llcc_config.
>> Now this is good,
>> but then I thought that if we add LLCC support for 20(random number)
>> SoCs of which
>> 10 is capable of supporting cap_based_alloc and rest 10 are not, then 
>> we
>> will still be adding
>> 20 more lines to each SoC's llcc_config if we follow this 2nd option.
> 
> If you do it right, you'd only need to add lines to the SoCs that need
> it.  Linux conventions in general are that it's OK (and encouraged) to
> rely on the fact that if you don't mention a variable in static
> initialization it's initted to 0/False/NULL.  So if the member
> variable is "need_llcc_config" then you only need to add this in
> places where you're setting it to true.  It only needs to be a boolean
> so if later someone really is worried about all the bytes that flags
> like this are taking up we can use a bitfield.  For now (presumably)
> just adding a boolean would be more efficient since (presumably) the
> extra code needed to access a bitfield would be more than the extra
> data bytes used.  In theory this could also be initdata probably, too.
> 

Yes but in this case we would better be explicit about the capable SoCs
because for someone in QCOM it would be easier to confirm if the SoC is
actually capable but it will not be very obvious for outside folks that
the particular SoC actually supports it. I will use a boolean field 
properly
initialized to indicate if a particular SoC is capable or not.

> 
>> So why not opt for a 3rd option with the table where you just need to
>> specify only the capable
>> targets which is just 10 in our sample case above.
> 
> Are you trying to save space?  ...or complexity?  Sure a good compiler
> will probably pool the constant string here so you won't need to
> allocate it twice, but IMO having a second match table is more
> complex.  You also need at least a pointer + bool per entry.  Each
> probe will now need to parse through all these strings, too.  None of
> this is a big deal, but I'd bet just adding a field to the existing
> struct is lower overhead all around.
> 

Mostly space, but I agree now that the boolean field is indeed better 
than
a separate table.

> 
>> Am I just overthinking this too much and should just go with the 2nd
>> option as you mentioned?
> 
> Someone could feel free to vote against me, but I'd just add to the
> existing config.
> 

I vote for you :)

Thanks,
Sai
Sai Prakash Ranjan Sept. 3, 2020, 9:58 a.m. UTC | #4
Hi,

On 2020-08-18 21:07, Sai Prakash Ranjan wrote:
> Hi Doug,
> 
>> 
>> I guess to start, it wasn't obvious (to me) that there were two
>> choices and we were picking one.  Mentioning that the other
>> alternative was way-based allocation would help a lot.  Even if you
>> can't fully explain the differences between the two, adding something
>> to the commit message indicating that this is a policy decision (in
>> other words, both work but each have their tradeoffs) would help.
>> Something like this, if it's correct:
>> 
>> In general we try to enable capacity based allocation (instead of the
>> default way based allocation) since that gives us better performance
>> with the current software / hardware configuration.
>> 
> 
> Thanks, I will add it for next version. Let me also go poke some arch 
> teams
> to understand if we actually do gain something with this selection, who 
> knows
> we might get some additional details as well.
> 

I got some information from arch team today, to quote them exactly:

1) What benefits capacity based allocation brings over the default way
based allocation?

"Capacity based allows finer grain partition. It is not about improved
performance but more flexibility in configuration."

2) Retain through power collapse, doesn’t it burn more power?

"This feature is similar to the standard feature of retention. Yes, when 
we
have cache in retention mode it burns more power but it keeps the values 
so
that when we wake up we can get more cache hits."


If its good enough, then I will add this info to the commit msg and post
next version.

Thanks,
Sai
Doug Anderson Sept. 3, 2020, 1:46 p.m. UTC | #5
Hi,

On Thu, Sep 3, 2020 at 2:58 AM Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> Hi,
>
> On 2020-08-18 21:07, Sai Prakash Ranjan wrote:
> > Hi Doug,
> >
> >>
> >> I guess to start, it wasn't obvious (to me) that there were two
> >> choices and we were picking one.  Mentioning that the other
> >> alternative was way-based allocation would help a lot.  Even if you
> >> can't fully explain the differences between the two, adding something
> >> to the commit message indicating that this is a policy decision (in
> >> other words, both work but each have their tradeoffs) would help.
> >> Something like this, if it's correct:
> >>
> >> In general we try to enable capacity based allocation (instead of the
> >> default way based allocation) since that gives us better performance
> >> with the current software / hardware configuration.
> >>
> >
> > Thanks, I will add it for next version. Let me also go poke some arch
> > teams
> > to understand if we actually do gain something with this selection, who
> > knows
> > we might get some additional details as well.
> >
>
> I got some information from arch team today, to quote them exactly:
>
> 1) What benefits capacity based allocation brings over the default way
> based allocation?
>
> "Capacity based allows finer grain partition. It is not about improved
> performance but more flexibility in configuration."
>
> 2) Retain through power collapse, doesn’t it burn more power?
>
> "This feature is similar to the standard feature of retention. Yes, when
> we
> have cache in retention mode it burns more power but it keeps the values
> so
> that when we wake up we can get more cache hits."
>
>
> If its good enough, then I will add this info to the commit msg and post
> next version.

Sounds fine to me.  I was mostly looking for a high level idea of what
was happening here.  I am at least a little curious about the
retention bit.  Is that retention during S3, or during some sort of
Runtime PM?  Any idea how much power is burned?  Unless the power is
miniscule it seems hard to believe that it would be a net win to keep
a cache powered up during S3 unless you're planning on waking up a
lot.

-Doug
Sai Prakash Ranjan Sept. 3, 2020, 3:47 p.m. UTC | #6
On 2020-09-03 19:16, Doug Anderson wrote:
> Hi,
> 
> On Thu, Sep 3, 2020 at 2:58 AM Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>> 
>> Hi,
>> 
>> On 2020-08-18 21:07, Sai Prakash Ranjan wrote:
>> > Hi Doug,
>> >
>> >>
>> >> I guess to start, it wasn't obvious (to me) that there were two
>> >> choices and we were picking one.  Mentioning that the other
>> >> alternative was way-based allocation would help a lot.  Even if you
>> >> can't fully explain the differences between the two, adding something
>> >> to the commit message indicating that this is a policy decision (in
>> >> other words, both work but each have their tradeoffs) would help.
>> >> Something like this, if it's correct:
>> >>
>> >> In general we try to enable capacity based allocation (instead of the
>> >> default way based allocation) since that gives us better performance
>> >> with the current software / hardware configuration.
>> >>
>> >
>> > Thanks, I will add it for next version. Let me also go poke some arch
>> > teams
>> > to understand if we actually do gain something with this selection, who
>> > knows
>> > we might get some additional details as well.
>> >
>> 
>> I got some information from arch team today, to quote them exactly:
>> 
>> 1) What benefits capacity based allocation brings over the default way
>> based allocation?
>> 
>> "Capacity based allows finer grain partition. It is not about improved
>> performance but more flexibility in configuration."
>> 
>> 2) Retain through power collapse, doesn’t it burn more power?
>> 
>> "This feature is similar to the standard feature of retention. Yes, 
>> when
>> we
>> have cache in retention mode it burns more power but it keeps the 
>> values
>> so
>> that when we wake up we can get more cache hits."
>> 
>> 
>> If its good enough, then I will add this info to the commit msg and 
>> post
>> next version.
> 
> Sounds fine to me.  I was mostly looking for a high level idea of what
> was happening here.  I am at least a little curious about the
> retention bit.  Is that retention during S3, or during some sort of
> Runtime PM?  Any idea how much power is burned?  Unless the power is
> miniscule it seems hard to believe that it would be a net win to keep
> a cache powered up during S3 unless you're planning on waking up a
> lot.
> 

The retention setting is based on sub cache id(SCID), so I think its for
runtime pm, the power numbers weren't provided. But I believe these
decisions are made after solid testing and not some random 
approximations.

Thanks,
Sai
Doug Anderson Sept. 3, 2020, 3:54 p.m. UTC | #7
Hi,

On Thu, Sep 3, 2020 at 8:47 AM Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> On 2020-09-03 19:16, Doug Anderson wrote:
> > Hi,
> >
> > On Thu, Sep 3, 2020 at 2:58 AM Sai Prakash Ranjan
> > <saiprakash.ranjan@codeaurora.org> wrote:
> >>
> >> Hi,
> >>
> >> On 2020-08-18 21:07, Sai Prakash Ranjan wrote:
> >> > Hi Doug,
> >> >
> >> >>
> >> >> I guess to start, it wasn't obvious (to me) that there were two
> >> >> choices and we were picking one.  Mentioning that the other
> >> >> alternative was way-based allocation would help a lot.  Even if you
> >> >> can't fully explain the differences between the two, adding something
> >> >> to the commit message indicating that this is a policy decision (in
> >> >> other words, both work but each have their tradeoffs) would help.
> >> >> Something like this, if it's correct:
> >> >>
> >> >> In general we try to enable capacity based allocation (instead of the
> >> >> default way based allocation) since that gives us better performance
> >> >> with the current software / hardware configuration.
> >> >>
> >> >
> >> > Thanks, I will add it for next version. Let me also go poke some arch
> >> > teams
> >> > to understand if we actually do gain something with this selection, who
> >> > knows
> >> > we might get some additional details as well.
> >> >
> >>
> >> I got some information from arch team today, to quote them exactly:
> >>
> >> 1) What benefits capacity based allocation brings over the default way
> >> based allocation?
> >>
> >> "Capacity based allows finer grain partition. It is not about improved
> >> performance but more flexibility in configuration."
> >>
> >> 2) Retain through power collapse, doesn’t it burn more power?
> >>
> >> "This feature is similar to the standard feature of retention. Yes,
> >> when
> >> we
> >> have cache in retention mode it burns more power but it keeps the
> >> values
> >> so
> >> that when we wake up we can get more cache hits."
> >>
> >>
> >> If its good enough, then I will add this info to the commit msg and
> >> post
> >> next version.
> >
> > Sounds fine to me.  I was mostly looking for a high level idea of what
> > was happening here.  I am at least a little curious about the
> > retention bit.  Is that retention during S3, or during some sort of
> > Runtime PM?  Any idea how much power is burned?  Unless the power is
> > miniscule it seems hard to believe that it would be a net win to keep
> > a cache powered up during S3 unless you're planning on waking up a
> > lot.
> >
>
> The retention setting is based on sub cache id(SCID), so I think its for
> runtime pm, the power numbers weren't provided. But I believe these
> decisions are made after solid testing and not some random
> approximations.

Right, I believe it was tested, I just wonder if it was tested on a
phone vs. a laptop.  A phone is almost constantly waking up to deal
with stuff (which is why my phone battery barely lasts till the end of
the day).  Phones also usually have some type of self refresh on their
panels so they can be suspended even when they look awake which means
even more constant wakeups.  A laptop (especially without panel self
refresh) may have very different usage models.  I'm trying to confirm
that this setting is appropriate for both classes of devices or if it
has been only measured / optimized for the cell phone use case.

-Doug
Sai Prakash Ranjan Sept. 3, 2020, 4:04 p.m. UTC | #8
Hi,

On 2020-09-03 21:24, Doug Anderson wrote:
> Hi,
> 
> On Thu, Sep 3, 2020 at 8:47 AM Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>> 
>> On 2020-09-03 19:16, Doug Anderson wrote:
>> > Hi,
>> >
>> > On Thu, Sep 3, 2020 at 2:58 AM Sai Prakash Ranjan
>> > <saiprakash.ranjan@codeaurora.org> wrote:
>> >>
>> >> Hi,
>> >>
>> >> On 2020-08-18 21:07, Sai Prakash Ranjan wrote:
>> >> > Hi Doug,
>> >> >
>> >> >>
>> >> >> I guess to start, it wasn't obvious (to me) that there were two
>> >> >> choices and we were picking one.  Mentioning that the other
>> >> >> alternative was way-based allocation would help a lot.  Even if you
>> >> >> can't fully explain the differences between the two, adding something
>> >> >> to the commit message indicating that this is a policy decision (in
>> >> >> other words, both work but each have their tradeoffs) would help.
>> >> >> Something like this, if it's correct:
>> >> >>
>> >> >> In general we try to enable capacity based allocation (instead of the
>> >> >> default way based allocation) since that gives us better performance
>> >> >> with the current software / hardware configuration.
>> >> >>
>> >> >
>> >> > Thanks, I will add it for next version. Let me also go poke some arch
>> >> > teams
>> >> > to understand if we actually do gain something with this selection, who
>> >> > knows
>> >> > we might get some additional details as well.
>> >> >
>> >>
>> >> I got some information from arch team today, to quote them exactly:
>> >>
>> >> 1) What benefits capacity based allocation brings over the default way
>> >> based allocation?
>> >>
>> >> "Capacity based allows finer grain partition. It is not about improved
>> >> performance but more flexibility in configuration."
>> >>
>> >> 2) Retain through power collapse, doesn’t it burn more power?
>> >>
>> >> "This feature is similar to the standard feature of retention. Yes,
>> >> when
>> >> we
>> >> have cache in retention mode it burns more power but it keeps the
>> >> values
>> >> so
>> >> that when we wake up we can get more cache hits."
>> >>
>> >>
>> >> If its good enough, then I will add this info to the commit msg and
>> >> post
>> >> next version.
>> >
>> > Sounds fine to me.  I was mostly looking for a high level idea of what
>> > was happening here.  I am at least a little curious about the
>> > retention bit.  Is that retention during S3, or during some sort of
>> > Runtime PM?  Any idea how much power is burned?  Unless the power is
>> > miniscule it seems hard to believe that it would be a net win to keep
>> > a cache powered up during S3 unless you're planning on waking up a
>> > lot.
>> >
>> 
>> The retention setting is based on sub cache id(SCID), so I think its 
>> for
>> runtime pm, the power numbers weren't provided. But I believe these
>> decisions are made after solid testing and not some random
>> approximations.
> 
> Right, I believe it was tested, I just wonder if it was tested on a
> phone vs. a laptop.  A phone is almost constantly waking up to deal
> with stuff (which is why my phone battery barely lasts till the end of
> the day).  Phones also usually have some type of self refresh on their
> panels so they can be suspended even when they look awake which means
> even more constant wakeups.  A laptop (especially without panel self
> refresh) may have very different usage models.  I'm trying to confirm
> that this setting is appropriate for both classes of devices or if it
> has been only measured / optimized for the cell phone use case.
> 

Could be, but there are windows laptops based on QCOM SoCs where these
must have also been tested (note that this setting can also be in 
firmware
and no one would know), but I don't have numbers to quantify.

Thanks,
Sai
Doug Anderson Sept. 3, 2020, 5:38 p.m. UTC | #9
Hi,

On Thu, Sep 3, 2020 at 9:04 AM Sai Prakash Ranjan
<saiprakash.ranjan@codeaurora.org> wrote:
>
> Hi,
>
> On 2020-09-03 21:24, Doug Anderson wrote:
> > Hi,
> >
> > On Thu, Sep 3, 2020 at 8:47 AM Sai Prakash Ranjan
> > <saiprakash.ranjan@codeaurora.org> wrote:
> >>
> >> On 2020-09-03 19:16, Doug Anderson wrote:
> >> > Hi,
> >> >
> >> > On Thu, Sep 3, 2020 at 2:58 AM Sai Prakash Ranjan
> >> > <saiprakash.ranjan@codeaurora.org> wrote:
> >> >>
> >> >> Hi,
> >> >>
> >> >> On 2020-08-18 21:07, Sai Prakash Ranjan wrote:
> >> >> > Hi Doug,
> >> >> >
> >> >> >>
> >> >> >> I guess to start, it wasn't obvious (to me) that there were two
> >> >> >> choices and we were picking one.  Mentioning that the other
> >> >> >> alternative was way-based allocation would help a lot.  Even if you
> >> >> >> can't fully explain the differences between the two, adding something
> >> >> >> to the commit message indicating that this is a policy decision (in
> >> >> >> other words, both work but each have their tradeoffs) would help.
> >> >> >> Something like this, if it's correct:
> >> >> >>
> >> >> >> In general we try to enable capacity based allocation (instead of the
> >> >> >> default way based allocation) since that gives us better performance
> >> >> >> with the current software / hardware configuration.
> >> >> >>
> >> >> >
> >> >> > Thanks, I will add it for next version. Let me also go poke some arch
> >> >> > teams
> >> >> > to understand if we actually do gain something with this selection, who
> >> >> > knows
> >> >> > we might get some additional details as well.
> >> >> >
> >> >>
> >> >> I got some information from arch team today, to quote them exactly:
> >> >>
> >> >> 1) What benefits capacity based allocation brings over the default way
> >> >> based allocation?
> >> >>
> >> >> "Capacity based allows finer grain partition. It is not about improved
> >> >> performance but more flexibility in configuration."
> >> >>
> >> >> 2) Retain through power collapse, doesn’t it burn more power?
> >> >>
> >> >> "This feature is similar to the standard feature of retention. Yes,
> >> >> when
> >> >> we
> >> >> have cache in retention mode it burns more power but it keeps the
> >> >> values
> >> >> so
> >> >> that when we wake up we can get more cache hits."
> >> >>
> >> >>
> >> >> If its good enough, then I will add this info to the commit msg and
> >> >> post
> >> >> next version.
> >> >
> >> > Sounds fine to me.  I was mostly looking for a high level idea of what
> >> > was happening here.  I am at least a little curious about the
> >> > retention bit.  Is that retention during S3, or during some sort of
> >> > Runtime PM?  Any idea how much power is burned?  Unless the power is
> >> > miniscule it seems hard to believe that it would be a net win to keep
> >> > a cache powered up during S3 unless you're planning on waking up a
> >> > lot.
> >> >
> >>
> >> The retention setting is based on sub cache id(SCID), so I think its
> >> for
> >> runtime pm, the power numbers weren't provided. But I believe these
> >> decisions are made after solid testing and not some random
> >> approximations.
> >
> > Right, I believe it was tested, I just wonder if it was tested on a
> > phone vs. a laptop.  A phone is almost constantly waking up to deal
> > with stuff (which is why my phone battery barely lasts till the end of
> > the day).  Phones also usually have some type of self refresh on their
> > panels so they can be suspended even when they look awake which means
> > even more constant wakeups.  A laptop (especially without panel self
> > refresh) may have very different usage models.  I'm trying to confirm
> > that this setting is appropriate for both classes of devices or if it
> > has been only measured / optimized for the cell phone use case.
> >
>
> Could be, but there are windows laptops based on QCOM SoCs where these
> must have also been tested (note that this setting can also be in
> firmware
> and no one would know), but I don't have numbers to quantify.

OK, fair enough.  Thanks for the discussion.  I'm good with a somewhat
broad explanation in the commit message then and if we find that this
somehow affects power numbers in a bad way we can track down further.

-Doug
Sai Prakash Ranjan Sept. 3, 2020, 6:11 p.m. UTC | #10
On 2020-09-03 23:08, Doug Anderson wrote:
> Hi,
> 
> On Thu, Sep 3, 2020 at 9:04 AM Sai Prakash Ranjan
> <saiprakash.ranjan@codeaurora.org> wrote:
>> 
>> Hi,
>> 
>> On 2020-09-03 21:24, Doug Anderson wrote:
>> > Hi,
>> >
>> > On Thu, Sep 3, 2020 at 8:47 AM Sai Prakash Ranjan
>> > <saiprakash.ranjan@codeaurora.org> wrote:
>> >>
>> >> On 2020-09-03 19:16, Doug Anderson wrote:
>> >> > Hi,
>> >> >
>> >> > On Thu, Sep 3, 2020 at 2:58 AM Sai Prakash Ranjan
>> >> > <saiprakash.ranjan@codeaurora.org> wrote:
>> >> >>
>> >> >> Hi,
>> >> >>
>> >> >> On 2020-08-18 21:07, Sai Prakash Ranjan wrote:
>> >> >> > Hi Doug,
>> >> >> >
>> >> >> >>
>> >> >> >> I guess to start, it wasn't obvious (to me) that there were two
>> >> >> >> choices and we were picking one.  Mentioning that the other
>> >> >> >> alternative was way-based allocation would help a lot.  Even if you
>> >> >> >> can't fully explain the differences between the two, adding something
>> >> >> >> to the commit message indicating that this is a policy decision (in
>> >> >> >> other words, both work but each have their tradeoffs) would help.
>> >> >> >> Something like this, if it's correct:
>> >> >> >>
>> >> >> >> In general we try to enable capacity based allocation (instead of the
>> >> >> >> default way based allocation) since that gives us better performance
>> >> >> >> with the current software / hardware configuration.
>> >> >> >>
>> >> >> >
>> >> >> > Thanks, I will add it for next version. Let me also go poke some arch
>> >> >> > teams
>> >> >> > to understand if we actually do gain something with this selection, who
>> >> >> > knows
>> >> >> > we might get some additional details as well.
>> >> >> >
>> >> >>
>> >> >> I got some information from arch team today, to quote them exactly:
>> >> >>
>> >> >> 1) What benefits capacity based allocation brings over the default way
>> >> >> based allocation?
>> >> >>
>> >> >> "Capacity based allows finer grain partition. It is not about improved
>> >> >> performance but more flexibility in configuration."
>> >> >>
>> >> >> 2) Retain through power collapse, doesn’t it burn more power?
>> >> >>
>> >> >> "This feature is similar to the standard feature of retention. Yes,
>> >> >> when
>> >> >> we
>> >> >> have cache in retention mode it burns more power but it keeps the
>> >> >> values
>> >> >> so
>> >> >> that when we wake up we can get more cache hits."
>> >> >>
>> >> >>
>> >> >> If its good enough, then I will add this info to the commit msg and
>> >> >> post
>> >> >> next version.
>> >> >
>> >> > Sounds fine to me.  I was mostly looking for a high level idea of what
>> >> > was happening here.  I am at least a little curious about the
>> >> > retention bit.  Is that retention during S3, or during some sort of
>> >> > Runtime PM?  Any idea how much power is burned?  Unless the power is
>> >> > miniscule it seems hard to believe that it would be a net win to keep
>> >> > a cache powered up during S3 unless you're planning on waking up a
>> >> > lot.
>> >> >
>> >>
>> >> The retention setting is based on sub cache id(SCID), so I think its
>> >> for
>> >> runtime pm, the power numbers weren't provided. But I believe these
>> >> decisions are made after solid testing and not some random
>> >> approximations.
>> >
>> > Right, I believe it was tested, I just wonder if it was tested on a
>> > phone vs. a laptop.  A phone is almost constantly waking up to deal
>> > with stuff (which is why my phone battery barely lasts till the end of
>> > the day).  Phones also usually have some type of self refresh on their
>> > panels so they can be suspended even when they look awake which means
>> > even more constant wakeups.  A laptop (especially without panel self
>> > refresh) may have very different usage models.  I'm trying to confirm
>> > that this setting is appropriate for both classes of devices or if it
>> > has been only measured / optimized for the cell phone use case.
>> >
>> 
>> Could be, but there are windows laptops based on QCOM SoCs where these
>> must have also been tested (note that this setting can also be in
>> firmware
>> and no one would know), but I don't have numbers to quantify.
> 
> OK, fair enough.  Thanks for the discussion.  I'm good with a somewhat
> broad explanation in the commit message then and if we find that this
> somehow affects power numbers in a bad way we can track down further.
> 

Thanks, I agree that we should keep an eye in case of any fluctuations 
in power numbers.

Thanks,
Sai
diff mbox series

Patch

diff --git a/drivers/soc/qcom/llcc-qcom.c b/drivers/soc/qcom/llcc-qcom.c
index 429b5a60a1ba..865f607cf502 100644
--- a/drivers/soc/qcom/llcc-qcom.c
+++ b/drivers/soc/qcom/llcc-qcom.c
@@ -45,6 +45,9 @@ 
 #define LLCC_TRP_ATTR0_CFGn(n)        (0x21000 + SZ_8 * n)
 #define LLCC_TRP_ATTR1_CFGn(n)        (0x21004 + SZ_8 * n)
 
+#define LLCC_TRP_SCID_DIS_CAP_ALLOC   0x21F00
+#define LLCC_TRP_PCB_ACT              0x21F04
+
 #define BANK_OFFSET_STRIDE	      0x80000
 
 /**
@@ -318,6 +321,11 @@  size_t llcc_get_slice_size(struct llcc_slice_desc *desc)
 }
 EXPORT_SYMBOL_GPL(llcc_get_slice_size);
 
+static const struct of_device_id __maybe_unused qcom_llcc_configure_of_match[] = {
+	{ .compatible = "qcom,sc7180-llcc" },
+	{ }
+};
+
 static int qcom_llcc_cfg_program(struct platform_device *pdev)
 {
 	int i;
@@ -327,13 +335,17 @@  static int qcom_llcc_cfg_program(struct platform_device *pdev)
 	u32 attr0_val;
 	u32 max_cap_cacheline;
 	u32 sz;
+	u32 disable_cap_alloc = 0, retain_pc = 0;
 	int ret = 0;
 	const struct llcc_slice_config *llcc_table;
 	struct llcc_slice_desc desc;
+	const struct of_device_id *llcc_configure;
 
 	sz = drv_data->cfg_size;
 	llcc_table = drv_data->cfg;
 
+	llcc_configure = of_match_node(qcom_llcc_configure_of_match, pdev->dev.of_node);
+
 	for (i = 0; i < sz; i++) {
 		attr1_cfg = LLCC_TRP_ATTR1_CFGn(llcc_table[i].slice_id);
 		attr0_cfg = LLCC_TRP_ATTR0_CFGn(llcc_table[i].slice_id);
@@ -369,6 +381,21 @@  static int qcom_llcc_cfg_program(struct platform_device *pdev)
 					attr0_val);
 		if (ret)
 			return ret;
+
+		if (llcc_configure) {
+			disable_cap_alloc |= llcc_table[i].dis_cap_alloc << llcc_table[i].slice_id;
+			ret = regmap_write(drv_data->bcast_regmap,
+						LLCC_TRP_SCID_DIS_CAP_ALLOC, disable_cap_alloc);
+			if (ret)
+				return ret;
+
+			retain_pc |= llcc_table[i].retain_on_pc << llcc_table[i].slice_id;
+			ret = regmap_write(drv_data->bcast_regmap,
+						LLCC_TRP_PCB_ACT, retain_pc);
+			if (ret)
+				return ret;
+		}
+
 		if (llcc_table[i].activate_on_init) {
 			desc.slice_id = llcc_table[i].slice_id;
 			ret = llcc_slice_activate(&desc);