diff mbox series

[v14,5/6] iommu/arm-smmu: add ACTLR data and support for SC7280

Message ID 20240816174259.2056829-6-quic_bibekkum@quicinc.com
State New
Headers show
Series iommu/arm-smmu: introduction of ACTLR implementation for Qualcomm SoCs | expand

Commit Message

Bibek Kumar Patro Aug. 16, 2024, 5:42 p.m. UTC
Add ACTLR data table for SC7280 along with support for
same including SC7280 specific implementation operations.

Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
---
 drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58 +++++++++++++++++++++-
 1 file changed, 57 insertions(+), 1 deletion(-)

--
2.34.1

Comments

Will Deacon Aug. 23, 2024, 3:59 p.m. UTC | #1
On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
> Add ACTLR data table for SC7280 along with support for
> same including SC7280 specific implementation operations.
> 
> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> ---
>  drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58 +++++++++++++++++++++-
>  1 file changed, 57 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> index dc143b250704..a776c7906c76 100644
> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> @@ -31,6 +31,55 @@
>  #define PREFETCH_MODERATE	(2 << PREFETCH_SHIFT)
>  #define PREFETCH_DEEP		(3 << PREFETCH_SHIFT)
> 
> +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> +	{ 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
> +	{ 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +	{ 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +	{ 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> +	{ 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
> +	{ 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
> +	{ 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
> +	{ 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +	{ 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
> +	{ 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> +};
> +
> +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
> +	{ 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
> +};

It's Will "stuck record" Deacon here again to say that I don't think
this data belongs in the driver.

Have a great weekend!

Will
Bibek Kumar Patro Aug. 26, 2024, 11:03 a.m. UTC | #2
On 8/23/2024 9:29 PM, Will Deacon wrote:
> On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
>> Add ACTLR data table for SC7280 along with support for
>> same including SC7280 specific implementation operations.
>>
>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>> ---
>>   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58 +++++++++++++++++++++-
>>   1 file changed, 57 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> index dc143b250704..a776c7906c76 100644
>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>> @@ -31,6 +31,55 @@
>>   #define PREFETCH_MODERATE	(2 << PREFETCH_SHIFT)
>>   #define PREFETCH_DEEP		(3 << PREFETCH_SHIFT)
>>
>> +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>> +	{ 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +	{ 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +	{ 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +	{ 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>> +	{ 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
>> +	{ 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +	{ 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +	{ 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +	{ 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +	{ 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>> +};
>> +
>> +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
>> +	{ 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
>> +};
> 
> It's Will "stuck record" Deacon here again to say that I don't think
> this data belongs in the driver.
> 

Hi Will,

It will be difficult to reach a consensus here, with Robin and the DT 
folks okay to keep it in the driver, while you believe it doesn't belong 
there.

Robin, Rob, could you please share your thoughts on concluding the 
placement of this prefetch data?

As discussed earlier [1], the prefetch value for each client doesn’t 
define the hardware topology and is implementation-defined register 
writes used by the software driver.

We're okay with either approach, but these points [2] also raised in the
RFC led us to believe that switching from DT to the driver is the
right approach.


[1]:https://lore.kernel.org/all/2b0d8c5b-7e79-41ff-bc57-003d1b947c16@quicinc.com/
[2]:https://lore.kernel.org/all/a01e7e60-6ead-4a9e-ba90-22a8a6bbd03f@quicinc.com/


> Have a great weekend!
> 
> Will
Will Deacon Aug. 27, 2024, 12:47 p.m. UTC | #3
On Mon, Aug 26, 2024 at 04:33:24PM +0530, Bibek Kumar Patro wrote:
> 
> 
> On 8/23/2024 9:29 PM, Will Deacon wrote:
> > On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
> > > Add ACTLR data table for SC7280 along with support for
> > > same including SC7280 specific implementation operations.
> > > 
> > > Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> > > ---
> > >   drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58 +++++++++++++++++++++-
> > >   1 file changed, 57 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > index dc143b250704..a776c7906c76 100644
> > > --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> > > @@ -31,6 +31,55 @@
> > >   #define PREFETCH_MODERATE	(2 << PREFETCH_SHIFT)
> > >   #define PREFETCH_DEEP		(3 << PREFETCH_SHIFT)
> > > 
> > > +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> > > +	{ 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +	{ 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +	{ 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +	{ 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> > > +	{ 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
> > > +	{ 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +	{ 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +	{ 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +	{ 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +	{ 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> > > +};
> > > +
> > > +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
> > > +	{ 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
> > > +};
> > 
> > It's Will "stuck record" Deacon here again to say that I don't think
> > this data belongs in the driver.
> > 
> 
> Hi Will,
> 
> It will be difficult to reach a consensus here, with Robin and the DT folks
> okay to keep it in the driver, while you believe it doesn't belong there.
> 
> Robin, Rob, could you please share your thoughts on concluding the placement
> of this prefetch data?
> 
> As discussed earlier [1], the prefetch value for each client doesn’t define
> the hardware topology and is implementation-defined register writes used by
> the software driver.

It does reflect the hardware topology though, doesn't it? Those magic hex
masks above refer to stream ids, so the table is hard-coding the prefetch
values for particular matches. If I run on a different SoC configuration
with the same table, then the prefetch settings will be applied to the
wrong devices. How is that not hardware topology?

WIll
Bibek Kumar Patro Aug. 30, 2024, 10 a.m. UTC | #4
On 8/27/2024 6:17 PM, Will Deacon wrote:
> On Mon, Aug 26, 2024 at 04:33:24PM +0530, Bibek Kumar Patro wrote:
>>
>>
>> On 8/23/2024 9:29 PM, Will Deacon wrote:
>>> On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
>>>> Add ACTLR data table for SC7280 along with support for
>>>> same including SC7280 specific implementation operations.
>>>>
>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>> ---
>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58 +++++++++++++++++++++-
>>>>    1 file changed, 57 insertions(+), 1 deletion(-)
>>>>
>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> index dc143b250704..a776c7906c76 100644
>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>> @@ -31,6 +31,55 @@
>>>>    #define PREFETCH_MODERATE	(2 << PREFETCH_SHIFT)
>>>>    #define PREFETCH_DEEP		(3 << PREFETCH_SHIFT)
>>>>
>>>> +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>> +	{ 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +	{ 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +	{ 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +	{ 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +	{ 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>> +	{ 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +	{ 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +	{ 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +	{ 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +	{ 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>> +};
>>>> +
>>>> +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
>>>> +	{ 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
>>>> +};
>>>
>>> It's Will "stuck record" Deacon here again to say that I don't think
>>> this data belongs in the driver.
>>>
>>
>> Hi Will,
>>
>> It will be difficult to reach a consensus here, with Robin and the DT folks
>> okay to keep it in the driver, while you believe it doesn't belong there.
>>
>> Robin, Rob, could you please share your thoughts on concluding the placement
>> of this prefetch data?
>>
>> As discussed earlier [1], the prefetch value for each client doesn’t define
>> the hardware topology and is implementation-defined register writes used by
>> the software driver.
> 
> It does reflect the hardware topology though, doesn't it? Those magic hex
> masks above refer to stream ids, so the table is hard-coding the prefetch
> values for particular matches.

That is correct in the sense that stream id is mapped to context bank
where these configurations are applied.
However the other part of it is implementation-defined register/values
for which community opinion was register/value kind of data, should not
belong to device tree and are not generally approved of.

Would also like to point out that the prefetch values are recommended
settings and doesn’t mean these are the only configuration which would
work for the soc.
So the SID-to-prefetch isn't strictly SoC defined but is a software
configuration, IMO.

> If I run on a different SoC configuration > with the same table, then the prefetch settings will be applied to the
> wrong devices. How is that not hardware topology?
> 

The configuration table is tied to SoC compatible string however as I
mentioned above, its basically a s/w recommended setting.
(using prefetch settings other than the recommended values e.g 
PREFECH_DEFAULT instead of PREFETCH_DEEP would not render the device 
unusable unlike changing stream-ids which can make it unusable).

Since it is implementation specific we cannot have a generic DT binding,
tying stream ids to these recommended settings.
Even with qcom specific binding due to dependency on implementation, not
sure if we would be able to maintain consistency.

So from maintenance perspective carrying these in driver appear to be
simpler/flexible. And if it doesn’t violate existing precedence, we
would prefer to carry it that way.

This parallels how _"QoS settings"_ are handled within the driver 
(similar to this example [1]).

[1]. 
https://lore.kernel.org/linux-arm-msm/20231030-sc8280xp-dpu-safe-lut-v1-1-6d485d7b428f@quicinc.com/#t

Thanks & regards,
Bibek

> WIll
Robin Murphy Aug. 30, 2024, 12:31 p.m. UTC | #5
On 30/08/2024 11:00 am, Bibek Kumar Patro wrote:
> 
> 
> On 8/27/2024 6:17 PM, Will Deacon wrote:
>> On Mon, Aug 26, 2024 at 04:33:24PM +0530, Bibek Kumar Patro wrote:
>>>
>>>
>>> On 8/23/2024 9:29 PM, Will Deacon wrote:
>>>> On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
>>>>> Add ACTLR data table for SC7280 along with support for
>>>>> same including SC7280 specific implementation operations.
>>>>>
>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>> ---
>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58 
>>>>> +++++++++++++++++++++-
>>>>>    1 file changed, 57 insertions(+), 1 deletion(-)
>>>>>
>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> index dc143b250704..a776c7906c76 100644
>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>> @@ -31,6 +31,55 @@
>>>>>    #define PREFETCH_MODERATE    (2 << PREFETCH_SHIFT)
>>>>>    #define PREFETCH_DEEP        (3 << PREFETCH_SHIFT)
>>>>>
>>>>> +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>> +    { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +    { 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +    { 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +    { 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +    { 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>> +    { 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +    { 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +    { 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +    { 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +    { 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>> +};
>>>>> +
>>>>> +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
>>>>> +    { 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
>>>>> +};
>>>>
>>>> It's Will "stuck record" Deacon here again to say that I don't think
>>>> this data belongs in the driver.
>>>>
>>>
>>> Hi Will,
>>>
>>> It will be difficult to reach a consensus here, with Robin and the DT 
>>> folks
>>> okay to keep it in the driver, while you believe it doesn't belong 
>>> there.
>>>
>>> Robin, Rob, could you please share your thoughts on concluding the 
>>> placement
>>> of this prefetch data?
>>>
>>> As discussed earlier [1], the prefetch value for each client doesn’t 
>>> define
>>> the hardware topology and is implementation-defined register writes 
>>> used by
>>> the software driver.
>>
>> It does reflect the hardware topology though, doesn't it? Those magic hex
>> masks above refer to stream ids, so the table is hard-coding the prefetch
>> values for particular matches.
> 
> That is correct in the sense that stream id is mapped to context bank
> where these configurations are applied.
> However the other part of it is implementation-defined register/values
> for which community opinion was register/value kind of data, should not
> belong to device tree and are not generally approved of.
> 
> Would also like to point out that the prefetch values are recommended
> settings and doesn’t mean these are the only configuration which would
> work for the soc.
> So the SID-to-prefetch isn't strictly SoC defined but is a software
> configuration, IMO.

What's particularly confusing is that most of the IDs encoded here don't 
actually seem to line up with what's in the respective SoC DTSIs...

However by this point I'm wary of whether we've lost sight of *why* 
we're doing this, and that we're deep into begging the question of 
whether identifying devices by StreamID is the right thing to do in the 
first place. For example, as best I can tell from a quick skim, we have 
over 2 dozen lines of data here which all serve the exact same purpose 
of applying PREFETCH_DEEP | CPRE | CMTLB to instances of 
"qcom,fastrpc-compute-cb". In general it seems unlikely that the same 
device would want wildly different prefetch settings across different 
SoCs, or even between different instances in the same SoC, so I'm really 
coming round to the conclusion that this data would probably be best 
handled as an extension of the existing qcom_smmu_client_of_match mechanism.

Thanks,
Robin.

> 
>> If I run on a different SoC configuration > with the same table, then 
>> the prefetch settings will be applied to the
>> wrong devices. How is that not hardware topology?
>>
> 
> The configuration table is tied to SoC compatible string however as I
> mentioned above, its basically a s/w recommended setting.
> (using prefetch settings other than the recommended values e.g 
> PREFECH_DEFAULT instead of PREFETCH_DEEP would not render the device 
> unusable unlike changing stream-ids which can make it unusable).
> 
> Since it is implementation specific we cannot have a generic DT binding,
> tying stream ids to these recommended settings.
> Even with qcom specific binding due to dependency on implementation, not
> sure if we would be able to maintain consistency.
> 
> So from maintenance perspective carrying these in driver appear to be
> simpler/flexible. And if it doesn’t violate existing precedence, we
> would prefer to carry it that way.
> 
> This parallels how _"QoS settings"_ are handled within the driver 
> (similar to this example [1]).
> 
> [1]. 
> https://lore.kernel.org/linux-arm-msm/20231030-sc8280xp-dpu-safe-lut-v1-1-6d485d7b428f@quicinc.com/#t
> 
> Thanks & regards,
> Bibek
> 
>> WIll
Bibek Kumar Patro Sept. 3, 2024, 12:59 p.m. UTC | #6
On 8/30/2024 6:01 PM, Robin Murphy wrote:
> On 30/08/2024 11:00 am, Bibek Kumar Patro wrote:
>>
>>
>> On 8/27/2024 6:17 PM, Will Deacon wrote:
>>> On Mon, Aug 26, 2024 at 04:33:24PM +0530, Bibek Kumar Patro wrote:
>>>>
>>>>
>>>> On 8/23/2024 9:29 PM, Will Deacon wrote:
>>>>> On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
>>>>>> Add ACTLR data table for SC7280 along with support for
>>>>>> same including SC7280 specific implementation operations.
>>>>>>
>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>> ---
>>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58 
>>>>>> +++++++++++++++++++++-
>>>>>>    1 file changed, 57 insertions(+), 1 deletion(-)
>>>>>>
>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c 
>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> index dc143b250704..a776c7906c76 100644
>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>> @@ -31,6 +31,55 @@
>>>>>>    #define PREFETCH_MODERATE    (2 << PREFETCH_SHIFT)
>>>>>>    #define PREFETCH_DEEP        (3 << PREFETCH_SHIFT)
>>>>>>
>>>>>> +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>>> +    { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +    { 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +    { 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +    { 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +    { 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>>> +    { 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +    { 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +    { 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +    { 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +    { 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>> +};
>>>>>> +
>>>>>> +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
>>>>>> +    { 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>> +};
>>>>>
>>>>> It's Will "stuck record" Deacon here again to say that I don't think
>>>>> this data belongs in the driver.
>>>>>
>>>>
>>>> Hi Will,
>>>>
>>>> It will be difficult to reach a consensus here, with Robin and the 
>>>> DT folks
>>>> okay to keep it in the driver, while you believe it doesn't belong 
>>>> there.
>>>>
>>>> Robin, Rob, could you please share your thoughts on concluding the 
>>>> placement
>>>> of this prefetch data?
>>>>
>>>> As discussed earlier [1], the prefetch value for each client doesn’t 
>>>> define
>>>> the hardware topology and is implementation-defined register writes 
>>>> used by
>>>> the software driver.
>>>
>>> It does reflect the hardware topology though, doesn't it? Those magic 
>>> hex
>>> masks above refer to stream ids, so the table is hard-coding the 
>>> prefetch
>>> values for particular matches.
>>
>> That is correct in the sense that stream id is mapped to context bank
>> where these configurations are applied.
>> However the other part of it is implementation-defined register/values
>> for which community opinion was register/value kind of data, should not
>> belong to device tree and are not generally approved of.
>>
>> Would also like to point out that the prefetch values are recommended
>> settings and doesn’t mean these are the only configuration which would
>> work for the soc.
>> So the SID-to-prefetch isn't strictly SoC defined but is a software
>> configuration, IMO.
> 
> What's particularly confusing is that most of the IDs encoded here don't 
> actually seem to line up with what's in the respective SoC DTSIs...
> 
> However by this point I'm wary of whether we've lost sight of *why* 
> we're doing this, and that we're deep into begging the question of 
> whether identifying devices by StreamID is the right thing to do in the 
> first place. For example, as best I can tell from a quick skim, we have 
> over 2 dozen lines of data here which all serve the exact same purpose 
> of applying PREFETCH_DEEP | CPRE | CMTLB to instances of 
> "qcom,fastrpc-compute-cb". In general it seems unlikely that the same 
> device would want wildly different prefetch settings across different 
> SoCs, or even between different instances in the same SoC, so I'm really 
> coming round to the conclusion that this data would probably be best 
> handled as an extension of the existing qcom_smmu_client_of_match 
> mechanism.
> 

As per your design idea,do you mean to use qcom_smmu_client_of_match to 
identify the device using compatible string and apply the device 
specific settings for all the SoCs (instead of StreamID based device 
identification) ?

something like this rough snippet(?):

qcom_smmu_find_actlr_client(struct device *dev)
{

	if (of_match_device(qcom_smmu_client_of_match, dev) == 
qcom,fastrpc-compute-cb )
		qcom_smmu_set_actlr_value(dev, (PREFETCH_DEEP | CPRE | CMTLB)); 
/*where (PREFETCH_DEEP | CPRE | CMTLB) is used for compute-cb client.*/

	else if (of_match_device(qcom_smmu_client_of_match, dev) == qcom,adreno )
		qcom_smmu_set_actlr_value(dev, (PREFETCH_SHALLOW | CPRE | CMTLB)); 
/*Where (PREFETCH_SHALLOW | CPRE | CMTLB) is for adreno client. */

}

Let me know if my understanding is incorrect.
Then in this case if different SoC would have a different settings for 
same device, then everytime a new compatible would be necessary for same 
device on different SoC?

On similar lines there is another TBU based approach which I can think 
of. Identify the TBU -> Identify clients from TopoID derived from SID 
range specified in qcom,stream-id-range -> Apply the client
specific settings ?

Both approaches would be driver-based, as they are now.

Also I'd like to point out that in the current design, since we fixed 
the smr_is_subset arguments to make the stream IDs a subset of entries 
in the actlr_cfg table, we can reduce the number of entries in the 
table. This way, fewer SID-mask pairs can accommodate several stream IDs.

Thanks & regards,
Bibek

> Thanks,
> Robin.
> 
>>
>>> If I run on a different SoC configuration > with the same table, then 
>>> the prefetch settings will be applied to the
>>> wrong devices. How is that not hardware topology?
>>>
>>
>> The configuration table is tied to SoC compatible string however as I
>> mentioned above, its basically a s/w recommended setting.
>> (using prefetch settings other than the recommended values e.g 
>> PREFECH_DEFAULT instead of PREFETCH_DEEP would not render the device 
>> unusable unlike changing stream-ids which can make it unusable).
>>
>> Since it is implementation specific we cannot have a generic DT binding,
>> tying stream ids to these recommended settings.
>> Even with qcom specific binding due to dependency on implementation, not
>> sure if we would be able to maintain consistency.
>>
>> So from maintenance perspective carrying these in driver appear to be
>> simpler/flexible. And if it doesn’t violate existing precedence, we
>> would prefer to carry it that way.
>>
>> This parallels how _"QoS settings"_ are handled within the driver 
>> (similar to this example [1]).
>>
>> [1]. 
>> https://lore.kernel.org/linux-arm-msm/20231030-sc8280xp-dpu-safe-lut-v1-1-6d485d7b428f@quicinc.com/#t
>>
>> Thanks & regards,
>> Bibek
>>
>>> WIll
Dmitry Baryshkov Sept. 3, 2024, 1:13 p.m. UTC | #7
On Tue, 3 Sept 2024 at 15:59, Bibek Kumar Patro
<quic_bibekkum@quicinc.com> wrote:
>
>
>
> On 8/30/2024 6:01 PM, Robin Murphy wrote:
> > On 30/08/2024 11:00 am, Bibek Kumar Patro wrote:
> >>
> >>
> >> On 8/27/2024 6:17 PM, Will Deacon wrote:
> >>> On Mon, Aug 26, 2024 at 04:33:24PM +0530, Bibek Kumar Patro wrote:
> >>>>
> >>>>
> >>>> On 8/23/2024 9:29 PM, Will Deacon wrote:
> >>>>> On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
> >>>>>> Add ACTLR data table for SC7280 along with support for
> >>>>>> same including SC7280 specific implementation operations.
> >>>>>>
> >>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
> >>>>>> ---
> >>>>>>    drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58
> >>>>>> +++++++++++++++++++++-
> >>>>>>    1 file changed, 57 insertions(+), 1 deletion(-)
> >>>>>>
> >>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>> index dc143b250704..a776c7906c76 100644
> >>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
> >>>>>> @@ -31,6 +31,55 @@
> >>>>>>    #define PREFETCH_MODERATE    (2 << PREFETCH_SHIFT)
> >>>>>>    #define PREFETCH_DEEP        (3 << PREFETCH_SHIFT)
> >>>>>>
> >>>>>> +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
> >>>>>> +    { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +    { 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +    { 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +    { 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +    { 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
> >>>>>> +    { 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +    { 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +    { 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +    { 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +    { 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
> >>>>>> +};
> >>>>>> +
> >>>>>> +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
> >>>>>> +    { 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
> >>>>>> +};
> >>>>>
> >>>>> It's Will "stuck record" Deacon here again to say that I don't think
> >>>>> this data belongs in the driver.
> >>>>>
> >>>>
> >>>> Hi Will,
> >>>>
> >>>> It will be difficult to reach a consensus here, with Robin and the
> >>>> DT folks
> >>>> okay to keep it in the driver, while you believe it doesn't belong
> >>>> there.
> >>>>
> >>>> Robin, Rob, could you please share your thoughts on concluding the
> >>>> placement
> >>>> of this prefetch data?
> >>>>
> >>>> As discussed earlier [1], the prefetch value for each client doesn’t
> >>>> define
> >>>> the hardware topology and is implementation-defined register writes
> >>>> used by
> >>>> the software driver.
> >>>
> >>> It does reflect the hardware topology though, doesn't it? Those magic
> >>> hex
> >>> masks above refer to stream ids, so the table is hard-coding the
> >>> prefetch
> >>> values for particular matches.
> >>
> >> That is correct in the sense that stream id is mapped to context bank
> >> where these configurations are applied.
> >> However the other part of it is implementation-defined register/values
> >> for which community opinion was register/value kind of data, should not
> >> belong to device tree and are not generally approved of.
> >>
> >> Would also like to point out that the prefetch values are recommended
> >> settings and doesn’t mean these are the only configuration which would
> >> work for the soc.
> >> So the SID-to-prefetch isn't strictly SoC defined but is a software
> >> configuration, IMO.
> >
> > What's particularly confusing is that most of the IDs encoded here don't
> > actually seem to line up with what's in the respective SoC DTSIs...
> >
> > However by this point I'm wary of whether we've lost sight of *why*
> > we're doing this, and that we're deep into begging the question of
> > whether identifying devices by StreamID is the right thing to do in the
> > first place. For example, as best I can tell from a quick skim, we have
> > over 2 dozen lines of data here which all serve the exact same purpose
> > of applying PREFETCH_DEEP | CPRE | CMTLB to instances of
> > "qcom,fastrpc-compute-cb". In general it seems unlikely that the same
> > device would want wildly different prefetch settings across different
> > SoCs, or even between different instances in the same SoC, so I'm really
> > coming round to the conclusion that this data would probably be best
> > handled as an extension of the existing qcom_smmu_client_of_match
> > mechanism.
> >
>
> As per your design idea,do you mean to use qcom_smmu_client_of_match to
> identify the device using compatible string and apply the device
> specific settings for all the SoCs (instead of StreamID based device
> identification) ?
>
> something like this rough snippet(?):
>
> qcom_smmu_find_actlr_client(struct device *dev)
> {
>
>         if (of_match_device(qcom_smmu_client_of_match, dev) ==
> qcom,fastrpc-compute-cb )
>                 qcom_smmu_set_actlr_value(dev, (PREFETCH_DEEP | CPRE | CMTLB));
> /*where (PREFETCH_DEEP | CPRE | CMTLB) is used for compute-cb client.*/
>
>         else if (of_match_device(qcom_smmu_client_of_match, dev) == qcom,adreno )
>                 qcom_smmu_set_actlr_value(dev, (PREFETCH_SHALLOW | CPRE | CMTLB));
> /*Where (PREFETCH_SHALLOW | CPRE | CMTLB) is for adreno client. */

I like this idea, especially once it gets converted into a per-SoC
table of compatibles.

>
> }
>
> Let me know if my understanding is incorrect.
> Then in this case if different SoC would have a different settings for
> same device, then everytime a new compatible would be necessary for same
> device on different SoC?
>
> On similar lines there is another TBU based approach which I can think
> of. Identify the TBU -> Identify clients from TopoID derived from SID
> range specified in qcom,stream-id-range -> Apply the client
> specific settings ?
>
> Both approaches would be driver-based, as they are now.
>
> Also I'd like to point out that in the current design, since we fixed
> the smr_is_subset arguments to make the stream IDs a subset of entries
> in the actlr_cfg table, we can reduce the number of entries in the
> table. This way, fewer SID-mask pairs can accommodate several stream IDs.
>
> Thanks & regards,
> Bibek
>
> > Thanks,
> > Robin.
> >
> >>
> >>> If I run on a different SoC configuration > with the same table, then
> >>> the prefetch settings will be applied to the
> >>> wrong devices. How is that not hardware topology?
> >>>
> >>
> >> The configuration table is tied to SoC compatible string however as I
> >> mentioned above, its basically a s/w recommended setting.
> >> (using prefetch settings other than the recommended values e.g
> >> PREFECH_DEFAULT instead of PREFETCH_DEEP would not render the device
> >> unusable unlike changing stream-ids which can make it unusable).
> >>
> >> Since it is implementation specific we cannot have a generic DT binding,
> >> tying stream ids to these recommended settings.
> >> Even with qcom specific binding due to dependency on implementation, not
> >> sure if we would be able to maintain consistency.
> >>
> >> So from maintenance perspective carrying these in driver appear to be
> >> simpler/flexible. And if it doesn’t violate existing precedence, we
> >> would prefer to carry it that way.
> >>
> >> This parallels how _"QoS settings"_ are handled within the driver
> >> (similar to this example [1]).
> >>
> >> [1].
> >> https://lore.kernel.org/linux-arm-msm/20231030-sc8280xp-dpu-safe-lut-v1-1-6d485d7b428f@quicinc.com/#t
> >>
> >> Thanks & regards,
> >> Bibek
> >>
> >>> WIll
Bibek Kumar Patro Sept. 20, 2024, 7:58 p.m. UTC | #8
On 9/3/2024 6:43 PM, Dmitry Baryshkov wrote:
> On Tue, 3 Sept 2024 at 15:59, Bibek Kumar Patro
> <quic_bibekkum@quicinc.com> wrote:
>>
>>
>>
>> On 8/30/2024 6:01 PM, Robin Murphy wrote:
>>> On 30/08/2024 11:00 am, Bibek Kumar Patro wrote:
>>>>
>>>>
>>>> On 8/27/2024 6:17 PM, Will Deacon wrote:
>>>>> On Mon, Aug 26, 2024 at 04:33:24PM +0530, Bibek Kumar Patro wrote:
>>>>>>
>>>>>>
>>>>>> On 8/23/2024 9:29 PM, Will Deacon wrote:
>>>>>>> On Fri, Aug 16, 2024 at 11:12:58PM +0530, Bibek Kumar Patro wrote:
>>>>>>>> Add ACTLR data table for SC7280 along with support for
>>>>>>>> same including SC7280 specific implementation operations.
>>>>>>>>
>>>>>>>> Signed-off-by: Bibek Kumar Patro <quic_bibekkum@quicinc.com>
>>>>>>>> ---
>>>>>>>>     drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c | 58
>>>>>>>> +++++++++++++++++++++-
>>>>>>>>     1 file changed, 57 insertions(+), 1 deletion(-)
>>>>>>>>
>>>>>>>> diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>> b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>> index dc143b250704..a776c7906c76 100644
>>>>>>>> --- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>> +++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
>>>>>>>> @@ -31,6 +31,55 @@
>>>>>>>>     #define PREFETCH_MODERATE    (2 << PREFETCH_SHIFT)
>>>>>>>>     #define PREFETCH_DEEP        (3 << PREFETCH_SHIFT)
>>>>>>>>
>>>>>>>> +static const struct actlr_config sc7280_apps_actlr_cfg[] = {
>>>>>>>> +    { 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +    { 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +    { 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +    { 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +    { 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
>>>>>>>> +    { 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +    { 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +    { 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +    { 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +    { 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
>>>>>>>> +};
>>>>>>>> +
>>>>>>>> +static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
>>>>>>>> +    { 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
>>>>>>>> +};
>>>>>>>
>>>>>>> It's Will "stuck record" Deacon here again to say that I don't think
>>>>>>> this data belongs in the driver.
>>>>>>>
>>>>>>
>>>>>> Hi Will,
>>>>>>
>>>>>> It will be difficult to reach a consensus here, with Robin and the
>>>>>> DT folks
>>>>>> okay to keep it in the driver, while you believe it doesn't belong
>>>>>> there.
>>>>>>
>>>>>> Robin, Rob, could you please share your thoughts on concluding the
>>>>>> placement
>>>>>> of this prefetch data?
>>>>>>
>>>>>> As discussed earlier [1], the prefetch value for each client doesn’t
>>>>>> define
>>>>>> the hardware topology and is implementation-defined register writes
>>>>>> used by
>>>>>> the software driver.
>>>>>
>>>>> It does reflect the hardware topology though, doesn't it? Those magic
>>>>> hex
>>>>> masks above refer to stream ids, so the table is hard-coding the
>>>>> prefetch
>>>>> values for particular matches.
>>>>
>>>> That is correct in the sense that stream id is mapped to context bank
>>>> where these configurations are applied.
>>>> However the other part of it is implementation-defined register/values
>>>> for which community opinion was register/value kind of data, should not
>>>> belong to device tree and are not generally approved of.
>>>>
>>>> Would also like to point out that the prefetch values are recommended
>>>> settings and doesn’t mean these are the only configuration which would
>>>> work for the soc.
>>>> So the SID-to-prefetch isn't strictly SoC defined but is a software
>>>> configuration, IMO.
>>>
>>> What's particularly confusing is that most of the IDs encoded here don't
>>> actually seem to line up with what's in the respective SoC DTSIs...
>>>
>>> However by this point I'm wary of whether we've lost sight of *why*
>>> we're doing this, and that we're deep into begging the question of
>>> whether identifying devices by StreamID is the right thing to do in the
>>> first place. For example, as best I can tell from a quick skim, we have
>>> over 2 dozen lines of data here which all serve the exact same purpose
>>> of applying PREFETCH_DEEP | CPRE | CMTLB to instances of
>>> "qcom,fastrpc-compute-cb". In general it seems unlikely that the same
>>> device would want wildly different prefetch settings across different
>>> SoCs, or even between different instances in the same SoC, so I'm really
>>> coming round to the conclusion that this data would probably be best
>>> handled as an extension of the existing qcom_smmu_client_of_match
>>> mechanism.
>>>
>>
>> As per your design idea,do you mean to use qcom_smmu_client_of_match to
>> identify the device using compatible string and apply the device
>> specific settings for all the SoCs (instead of StreamID based device
>> identification) ?
>>
>> something like this rough snippet(?):
>>
>> qcom_smmu_find_actlr_client(struct device *dev)
>> {
>>
>>          if (of_match_device(qcom_smmu_client_of_match, dev) ==
>> qcom,fastrpc-compute-cb )
>>                  qcom_smmu_set_actlr_value(dev, (PREFETCH_DEEP | CPRE | CMTLB));
>> /*where (PREFETCH_DEEP | CPRE | CMTLB) is used for compute-cb client.*/
>>
>>          else if (of_match_device(qcom_smmu_client_of_match, dev) == qcom,adreno )
>>                  qcom_smmu_set_actlr_value(dev, (PREFETCH_SHALLOW | CPRE | CMTLB));
>> /*Where (PREFETCH_SHALLOW | CPRE | CMTLB) is for adreno client. */
> 
> I like this idea, especially once it gets converted into a per-SoC
> table of compatibles.

Ack, Just posted the latest version v15 for this series , based on the 
compatible string based design approach [1] as discussed.

While completion of patch, I came to know that it might sometimes
be possible on Qualcomm SoC's, same IP/device on 2 different SoC
can have different ACTLR settings as well.
[1] _can take care of that case_ as well through different compatible
strings but this can be handled through per-SoC solution as well.

In the latest patch [1], I am going forward with Robin's suggestion
of single table per smmu, and can later follow up there if any
conflict happens.

[1]: 
https://lore.kernel.org/all/20240920155813.3434021-6-quic_bibekkum@quicinc.com/>

Thanks & regards,
Bibek

> 
>>
>> }
>>
>> Let me know if my understanding is incorrect.
>> Then in this case if different SoC would have a different settings for
>> same device, then everytime a new compatible would be necessary for same
>> device on different SoC?
>>
>> On similar lines there is another TBU based approach which I can think
>> of. Identify the TBU -> Identify clients from TopoID derived from SID
>> range specified in qcom,stream-id-range -> Apply the client
>> specific settings ?
>>
>> Both approaches would be driver-based, as they are now.
>>
>> Also I'd like to point out that in the current design, since we fixed
>> the smr_is_subset arguments to make the stream IDs a subset of entries
>> in the actlr_cfg table, we can reduce the number of entries in the
>> table. This way, fewer SID-mask pairs can accommodate several stream IDs.
>>
>> Thanks & regards,
>> Bibek
>>
>>> Thanks,
>>> Robin.
>>>
>>>>
>>>>> If I run on a different SoC configuration > with the same table, then
>>>>> the prefetch settings will be applied to the
>>>>> wrong devices. How is that not hardware topology?
>>>>>
>>>>
>>>> The configuration table is tied to SoC compatible string however as I
>>>> mentioned above, its basically a s/w recommended setting.
>>>> (using prefetch settings other than the recommended values e.g
>>>> PREFECH_DEFAULT instead of PREFETCH_DEEP would not render the device
>>>> unusable unlike changing stream-ids which can make it unusable).
>>>>
>>>> Since it is implementation specific we cannot have a generic DT binding,
>>>> tying stream ids to these recommended settings.
>>>> Even with qcom specific binding due to dependency on implementation, not
>>>> sure if we would be able to maintain consistency.
>>>>
>>>> So from maintenance perspective carrying these in driver appear to be
>>>> simpler/flexible. And if it doesn’t violate existing precedence, we
>>>> would prefer to carry it that way.
>>>>
>>>> This parallels how _"QoS settings"_ are handled within the driver
>>>> (similar to this example [1]).
>>>>
>>>> [1].
>>>> https://lore.kernel.org/linux-arm-msm/20231030-sc8280xp-dpu-safe-lut-v1-1-6d485d7b428f@quicinc.com/#t
>>>>
>>>> Thanks & regards,
>>>> Bibek
>>>>
>>>>> WIll
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
index dc143b250704..a776c7906c76 100644
--- a/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
+++ b/drivers/iommu/arm/arm-smmu/arm-smmu-qcom.c
@@ -31,6 +31,55 @@ 
 #define PREFETCH_MODERATE	(2 << PREFETCH_SHIFT)
 #define PREFETCH_DEEP		(3 << PREFETCH_SHIFT)

+static const struct actlr_config sc7280_apps_actlr_cfg[] = {
+	{ 0x0800, 0x04e0, PREFETCH_DEFAULT | CMTLB },
+	{ 0x0900, 0x0402, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x0901, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x0d01, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x1181, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1182, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1183, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1184, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1185, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1186, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1187, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1188, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x1189, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x118b, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x118c, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x118d, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x118e, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x118f, 0x0420, PREFETCH_DEEP | CPRE | CMTLB },
+	{ 0x2000, 0x0020, PREFETCH_DEFAULT | CMTLB },
+	{ 0x2040, 0x0000, PREFETCH_DEFAULT | CMTLB },
+	{ 0x2062, 0x0000, PREFETCH_DEFAULT | CMTLB },
+	{ 0x2080, 0x0020, PREFETCH_DEFAULT | CMTLB },
+	{ 0x20c0, 0x0020, PREFETCH_DEFAULT | CMTLB },
+	{ 0x2100, 0x0020, PREFETCH_DEFAULT | CMTLB },
+	{ 0x2140, 0x0000, PREFETCH_DEFAULT | CMTLB },
+	{ 0x2180, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x2181, 0x0004, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x2183, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x2184, 0x0020, PREFETCH_SHALLOW | CPRE | CMTLB },
+	{ 0x2187, 0x0000, PREFETCH_SHALLOW | CPRE | CMTLB },
+};
+
+static const struct actlr_config sc7280_gfx_actlr_cfg[] = {
+	{ 0x0000, 0x07ff, PREFETCH_DEEP | CPRE | CMTLB },
+};
+
+static const struct actlr_variant sc7280_actlr[] = {
+	{
+		.io_start = 0x15000000,
+		.actlrcfg = sc7280_apps_actlr_cfg,
+		.num_actlrcfg = ARRAY_SIZE(sc7280_apps_actlr_cfg)
+	}, {
+		.io_start = 0x03da0000,
+		.actlrcfg = sc7280_gfx_actlr_cfg,
+		.num_actlrcfg = ARRAY_SIZE(sc7280_gfx_actlr_cfg)
+	},
+};
+
 static const struct actlr_config sm8550_apps_actlr_cfg[] = {
 	{ 0x0800, 0x0020, PREFETCH_DEFAULT | CMTLB },
 	{ 0x1800, 0x00c0, PREFETCH_DEFAULT | CMTLB },
@@ -689,6 +738,13 @@  static const struct qcom_smmu_match_data sdm845_smmu_500_data = {
 	/* Also no debug configuration. */
 };

+static const struct qcom_smmu_match_data sc7280_smmu_500_impl0_data = {
+	.impl = &qcom_smmu_500_impl,
+	.adreno_impl = &qcom_adreno_smmu_500_impl,
+	.cfg = &qcom_smmu_impl0_cfg,
+	.actlrvar = sc7280_actlr,
+	.num_smmu = ARRAY_SIZE(sc7280_actlr),
+};

 static const struct qcom_smmu_match_data sm8550_smmu_500_impl0_data = {
 	.impl = &qcom_smmu_500_impl,
@@ -715,7 +771,7 @@  static const struct of_device_id __maybe_unused qcom_smmu_impl_of_match[] = {
 	{ .compatible = "qcom,qdu1000-smmu-500", .data = &qcom_smmu_500_impl0_data  },
 	{ .compatible = "qcom,sc7180-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sc7180-smmu-v2", .data = &qcom_smmu_v2_data },
-	{ .compatible = "qcom,sc7280-smmu-500", .data = &qcom_smmu_500_impl0_data },
+	{ .compatible = "qcom,sc7280-smmu-500", .data = &sc7280_smmu_500_impl0_data },
 	{ .compatible = "qcom,sc8180x-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sc8280xp-smmu-500", .data = &qcom_smmu_500_impl0_data },
 	{ .compatible = "qcom,sdm630-smmu-v2", .data = &qcom_smmu_v2_data },