diff mbox series

net: dwc_eth_qos: Pad descriptors to cacheline size

Message ID 20200429191403.112487-1-marex@denx.de
State New
Headers show
Series net: dwc_eth_qos: Pad descriptors to cacheline size | expand

Commit Message

Marek Vasut April 29, 2020, 7:14 p.m. UTC
The DWMAC4 IP has the possibility to skip up to 7 64bit words after
the descriptor, use this to pad the descriptors to cacheline size.

Signed-off-by: Marek Vasut <marex at denx.de>
Cc: Joe Hershberger <joe.hershberger at ni.com>
Cc: Patrice Chotard <patrice.chotard at st.com>
Cc: Patrick Delaunay <patrick.delaunay at st.com>
Cc: Ramon Fried <rfried.dev at gmail.com>
Cc: Stephen Warren <swarren at nvidia.com>
---
 drivers/net/dwc_eth_qos.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

Comments

Stephen Warren April 29, 2020, 7:51 p.m. UTC | #1
On 4/29/20 1:14 PM, Marek Vasut wrote:
> The DWMAC4 IP has the possibility to skip up to 7 64bit words after
> the descriptor, use this to pad the descriptors to cacheline size.

> diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c

>  /* Descriptors */
> +/*

Nit: Blank line between those two comments to match the existing style.

> + * #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN fails if we use sizeof()
> + * or offsetof() to calculate descriptor size, since neither is allowed
> + * in C preprocessor macros, so just hard-code this to 16=4*sizeof(u32).
> + */
> +#define EQOS_DESCRIPTOR_SIZE	16
> +#define EQOS_DESCRIPTOR_PAD	((ARCH_DMA_MINALIGN - EQOS_DESCRIPTOR_SIZE) / 4)
> +struct eqos_desc {
> +	u32 des0;
> +	u32 des1;
> +	u32 des2;
> +	u32 des3;
> +	u32 __pad[EQOS_DESCRIPTOR_PAD];
> +};

I think the padding needs to be optional based on the same condition in
the following existing warning logic, and the warning logic may need to
be removed/updated to account for the fact that padding is now being
used to avoid the issue:

(quoting this from existing code)
> /*
>  * Warn if the cache-line size is larger than the descriptor size. In such
>  * cases the driver will likely fail because the CPU needs to flush the cache
>  * when requeuing RX buffers, therefore descriptors written by the hardware
>  * may be discarded. Architectures with full IO coherence, such as x86, do not
>  * experience this issue, and hence are excluded from this condition.
>  *
>  * This can be fixed by defining CONFIG_SYS_NONCACHED_MEMORY which will cause
>  * the driver to allocate descriptors from a pool of non-cached memory.
>  */
> #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN
> #if !defined(CONFIG_SYS_NONCACHED_MEMORY) && \
>         !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) && !defined(CONFIG_X86)
> #warning Cache line size is larger than descriptor size
> #endif
> #endif

(back to quoting from the patch)
> -#define EQOS_DESCRIPTOR_WORDS	4
> -#define EQOS_DESCRIPTOR_SIZE	(EQOS_DESCRIPTOR_WORDS * 4)
...
> -struct eqos_desc {
> -	u32 des0;
> -	u32 des1;
> -	u32 des2;
> -	u32 des3;
> -};

This patch would be a lot smaller if those definitions were kept in
their existing location instead of moving them...
Marek Vasut April 29, 2020, 7:56 p.m. UTC | #2
On 4/29/20 9:51 PM, Stephen Warren wrote:
> On 4/29/20 1:14 PM, Marek Vasut wrote:
>> The DWMAC4 IP has the possibility to skip up to 7 64bit words after
>> the descriptor, use this to pad the descriptors to cacheline size.
> 
>> diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c
> 
>>  /* Descriptors */
>> +/*
> 
> Nit: Blank line between those two comments to match the existing style.
> 
>> + * #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN fails if we use sizeof()
>> + * or offsetof() to calculate descriptor size, since neither is allowed
>> + * in C preprocessor macros, so just hard-code this to 16=4*sizeof(u32).
>> + */
>> +#define EQOS_DESCRIPTOR_SIZE	16
>> +#define EQOS_DESCRIPTOR_PAD	((ARCH_DMA_MINALIGN - EQOS_DESCRIPTOR_SIZE) / 4)
>> +struct eqos_desc {
>> +	u32 des0;
>> +	u32 des1;
>> +	u32 des2;
>> +	u32 des3;
>> +	u32 __pad[EQOS_DESCRIPTOR_PAD];
>> +};
> 
> I think the padding needs to be optional based on the same condition in
> the following existing warning logic, and the warning logic may need to
> be removed/updated to account for the fact that padding is now being
> used to avoid the issue:
> 
> (quoting this from existing code)
>> /*
>>  * Warn if the cache-line size is larger than the descriptor size. In such
>>  * cases the driver will likely fail because the CPU needs to flush the cache
>>  * when requeuing RX buffers, therefore descriptors written by the hardware
>>  * may be discarded. Architectures with full IO coherence, such as x86, do not
>>  * experience this issue, and hence are excluded from this condition.
>>  *
>>  * This can be fixed by defining CONFIG_SYS_NONCACHED_MEMORY which will cause
>>  * the driver to allocate descriptors from a pool of non-cached memory.
>>  */
>> #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN
>> #if !defined(CONFIG_SYS_NONCACHED_MEMORY) && \
>>         !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) && !defined(CONFIG_X86)
>> #warning Cache line size is larger than descriptor size
>> #endif
>> #endif
> 
> (back to quoting from the patch)
>> -#define EQOS_DESCRIPTOR_WORDS	4
>> -#define EQOS_DESCRIPTOR_SIZE	(EQOS_DESCRIPTOR_WORDS * 4)
> ...
>> -struct eqos_desc {
>> -	u32 des0;
>> -	u32 des1;
>> -	u32 des2;
>> -	u32 des3;
>> -};
> 
> This patch would be a lot smaller if those definitions were kept in
> their existing location instead of moving them...

Could you at least test it on the tegra ?
Stephen Warren April 29, 2020, 9:25 p.m. UTC | #3
On 4/29/20 1:56 PM, Marek Vasut wrote:
> On 4/29/20 9:51 PM, Stephen Warren wrote:
>> On 4/29/20 1:14 PM, Marek Vasut wrote:
>>> The DWMAC4 IP has the possibility to skip up to 7 64bit words after
>>> the descriptor, use this to pad the descriptors to cacheline size.
>>
>>> diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c
>>
>>>  /* Descriptors */
>>> +/*
>>
>> Nit: Blank line between those two comments to match the existing style.
>>
>>> + * #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN fails if we use sizeof()
>>> + * or offsetof() to calculate descriptor size, since neither is allowed
>>> + * in C preprocessor macros, so just hard-code this to 16=4*sizeof(u32).
>>> + */
>>> +#define EQOS_DESCRIPTOR_SIZE	16
>>> +#define EQOS_DESCRIPTOR_PAD	((ARCH_DMA_MINALIGN - EQOS_DESCRIPTOR_SIZE) / 4)
>>> +struct eqos_desc {
>>> +	u32 des0;
>>> +	u32 des1;
>>> +	u32 des2;
>>> +	u32 des3;
>>> +	u32 __pad[EQOS_DESCRIPTOR_PAD];
>>> +};
>>
>> I think the padding needs to be optional based on the same condition in
>> the following existing warning logic, and the warning logic may need to
>> be removed/updated to account for the fact that padding is now being
>> used to avoid the issue:
>>
>> (quoting this from existing code)
>>> /*
>>>  * Warn if the cache-line size is larger than the descriptor size. In such
>>>  * cases the driver will likely fail because the CPU needs to flush the cache
>>>  * when requeuing RX buffers, therefore descriptors written by the hardware
>>>  * may be discarded. Architectures with full IO coherence, such as x86, do not
>>>  * experience this issue, and hence are excluded from this condition.
>>>  *
>>>  * This can be fixed by defining CONFIG_SYS_NONCACHED_MEMORY which will cause
>>>  * the driver to allocate descriptors from a pool of non-cached memory.
>>>  */
>>> #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN
>>> #if !defined(CONFIG_SYS_NONCACHED_MEMORY) && \
>>>         !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) && !defined(CONFIG_X86)
>>> #warning Cache line size is larger than descriptor size
>>> #endif
>>> #endif
>>
>> (back to quoting from the patch)
>>> -#define EQOS_DESCRIPTOR_WORDS	4
>>> -#define EQOS_DESCRIPTOR_SIZE	(EQOS_DESCRIPTOR_WORDS * 4)
>> ...
>>> -struct eqos_desc {
>>> -	u32 des0;
>>> -	u32 des1;
>>> -	u32 des2;
>>> -	u32 des3;
>>> -};
>>
>> This patch would be a lot smaller if those definitions were kept in
>> their existing location instead of moving them...
> 
> Could you at least test it on the tegra ?

This patch (applied on top of current u-boot/master) does cause network
failures on Jetson TX2 (Tegra186), which obviously uses this driver.

The docs for our version of the IP block do indicate that the DSL shift
is supported, so I think the patch should work. I wonder if it's because
of the bus width of our EQoS IP block? Our docs indicate that DSL is a
multiple of the bus width, so describes a skip of word/dword/lword count
based on 32/64/128 bus width. Perhaps the bus width needs to be
parametrized when calculating the value? Our docs don't seem to indicate
which bus width our HW uses:-( Aha, but changing the "/2" to "/4" in the
code in this patch which calculates DSL value does yield a working
system, so I guess we have a 128-bit bus width. And indeed this matches
a comment I wrote in the driver:

        /*
         * Burst length must be < 1/2 FIFO size.
         * FIFO size in tqs is encoded as (n / 256) - 1.
         * Each burst is n * 8 (PBLX8) * 16 (AXI width) == 128 bytes.

16 byte AXI width == 128 bit width.
Marek Vasut April 29, 2020, 9:41 p.m. UTC | #4
On 4/29/20 11:25 PM, Stephen Warren wrote:
> On 4/29/20 1:56 PM, Marek Vasut wrote:
>> On 4/29/20 9:51 PM, Stephen Warren wrote:
>>> On 4/29/20 1:14 PM, Marek Vasut wrote:
>>>> The DWMAC4 IP has the possibility to skip up to 7 64bit words after
>>>> the descriptor, use this to pad the descriptors to cacheline size.
>>>
>>>> diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c
>>>
>>>>  /* Descriptors */
>>>> +/*
>>>
>>> Nit: Blank line between those two comments to match the existing style.
>>>
>>>> + * #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN fails if we use sizeof()
>>>> + * or offsetof() to calculate descriptor size, since neither is allowed
>>>> + * in C preprocessor macros, so just hard-code this to 16=4*sizeof(u32).
>>>> + */
>>>> +#define EQOS_DESCRIPTOR_SIZE	16
>>>> +#define EQOS_DESCRIPTOR_PAD	((ARCH_DMA_MINALIGN - EQOS_DESCRIPTOR_SIZE) / 4)
>>>> +struct eqos_desc {
>>>> +	u32 des0;
>>>> +	u32 des1;
>>>> +	u32 des2;
>>>> +	u32 des3;
>>>> +	u32 __pad[EQOS_DESCRIPTOR_PAD];
>>>> +};
>>>
>>> I think the padding needs to be optional based on the same condition in
>>> the following existing warning logic, and the warning logic may need to
>>> be removed/updated to account for the fact that padding is now being
>>> used to avoid the issue:
>>>
>>> (quoting this from existing code)
>>>> /*
>>>>  * Warn if the cache-line size is larger than the descriptor size. In such
>>>>  * cases the driver will likely fail because the CPU needs to flush the cache
>>>>  * when requeuing RX buffers, therefore descriptors written by the hardware
>>>>  * may be discarded. Architectures with full IO coherence, such as x86, do not
>>>>  * experience this issue, and hence are excluded from this condition.
>>>>  *
>>>>  * This can be fixed by defining CONFIG_SYS_NONCACHED_MEMORY which will cause
>>>>  * the driver to allocate descriptors from a pool of non-cached memory.
>>>>  */
>>>> #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN
>>>> #if !defined(CONFIG_SYS_NONCACHED_MEMORY) && \
>>>>         !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) && !defined(CONFIG_X86)
>>>> #warning Cache line size is larger than descriptor size
>>>> #endif
>>>> #endif
>>>
>>> (back to quoting from the patch)
>>>> -#define EQOS_DESCRIPTOR_WORDS	4
>>>> -#define EQOS_DESCRIPTOR_SIZE	(EQOS_DESCRIPTOR_WORDS * 4)
>>> ...
>>>> -struct eqos_desc {
>>>> -	u32 des0;
>>>> -	u32 des1;
>>>> -	u32 des2;
>>>> -	u32 des3;
>>>> -};
>>>
>>> This patch would be a lot smaller if those definitions were kept in
>>> their existing location instead of moving them...
>>
>> Could you at least test it on the tegra ?
> 
> This patch (applied on top of current u-boot/master) does cause network
> failures on Jetson TX2 (Tegra186), which obviously uses this driver.

That's what I was afraid of.

> The docs for our version of the IP block do indicate that the DSL shift
> is supported, so I think the patch should work. I wonder if it's because
> of the bus width of our EQoS IP block?

The STM32MP1 docs say that the shift is in 64bit words, maybe that
actually translate into bus words rather than 8 bytes indeed ?

btw what is your ARCH_DMA_MINALIGN on t186, 64 ?
I'm worried that if something has it set to 128 or more, then this DSL
padding won't be able to cover it, since it can only skip up to 7 64bit
words.

> Our docs indicate that DSL is a
> multiple of the bus width, so describes a skip of word/dword/lword count
> based on 32/64/128 bus width. Perhaps the bus width needs to be
> parametrized when calculating the value? Our docs don't seem to indicate
> which bus width our HW uses:-( Aha, but changing the "/2" to "/4" in the
> code in this patch which calculates DSL value does yield a working
> system, so I guess we have a 128-bit bus width. And indeed this matches
> a comment I wrote in the driver:
> 
>         /*
>          * Burst length must be < 1/2 FIFO size.
>          * FIFO size in tqs is encoded as (n / 256) - 1.
>          * Each burst is n * 8 (PBLX8) * 16 (AXI width) == 128 bytes.
> 
> 16 byte AXI width == 128 bit width.

So how do we parametrize that, based on compatible string I guess ?
We already have params for the tegra186 and stm32mp1 variants of the IP,
so adding one more should be OK.

Also, thanks for looking into it.
Stephen Warren April 29, 2020, 9:51 p.m. UTC | #5
On 4/29/20 3:41 PM, Marek Vasut wrote:
> On 4/29/20 11:25 PM, Stephen Warren wrote:
>> On 4/29/20 1:56 PM, Marek Vasut wrote:
>>> On 4/29/20 9:51 PM, Stephen Warren wrote:
>>>> On 4/29/20 1:14 PM, Marek Vasut wrote:
>>>>> The DWMAC4 IP has the possibility to skip up to 7 64bit words after
>>>>> the descriptor, use this to pad the descriptors to cacheline size.
>>>>
>>>>> diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c
>>>>
>>>>>  /* Descriptors */
>>>>> +/*
>>>>
>>>> Nit: Blank line between those two comments to match the existing style.
>>>>
>>>>> + * #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN fails if we use sizeof()
>>>>> + * or offsetof() to calculate descriptor size, since neither is allowed
>>>>> + * in C preprocessor macros, so just hard-code this to 16=4*sizeof(u32).
>>>>> + */
>>>>> +#define EQOS_DESCRIPTOR_SIZE	16
>>>>> +#define EQOS_DESCRIPTOR_PAD	((ARCH_DMA_MINALIGN - EQOS_DESCRIPTOR_SIZE) / 4)
>>>>> +struct eqos_desc {
>>>>> +	u32 des0;
>>>>> +	u32 des1;
>>>>> +	u32 des2;
>>>>> +	u32 des3;
>>>>> +	u32 __pad[EQOS_DESCRIPTOR_PAD];
>>>>> +};
>>>>
>>>> I think the padding needs to be optional based on the same condition in
>>>> the following existing warning logic, and the warning logic may need to
>>>> be removed/updated to account for the fact that padding is now being
>>>> used to avoid the issue:
>>>>
>>>> (quoting this from existing code)
>>>>> /*
>>>>>  * Warn if the cache-line size is larger than the descriptor size. In such
>>>>>  * cases the driver will likely fail because the CPU needs to flush the cache
>>>>>  * when requeuing RX buffers, therefore descriptors written by the hardware
>>>>>  * may be discarded. Architectures with full IO coherence, such as x86, do not
>>>>>  * experience this issue, and hence are excluded from this condition.
>>>>>  *
>>>>>  * This can be fixed by defining CONFIG_SYS_NONCACHED_MEMORY which will cause
>>>>>  * the driver to allocate descriptors from a pool of non-cached memory.
>>>>>  */
>>>>> #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN
>>>>> #if !defined(CONFIG_SYS_NONCACHED_MEMORY) && \
>>>>>         !CONFIG_IS_ENABLED(SYS_DCACHE_OFF) && !defined(CONFIG_X86)
>>>>> #warning Cache line size is larger than descriptor size
>>>>> #endif
>>>>> #endif
>>>>
>>>> (back to quoting from the patch)
>>>>> -#define EQOS_DESCRIPTOR_WORDS	4
>>>>> -#define EQOS_DESCRIPTOR_SIZE	(EQOS_DESCRIPTOR_WORDS * 4)
>>>> ...
>>>>> -struct eqos_desc {
>>>>> -	u32 des0;
>>>>> -	u32 des1;
>>>>> -	u32 des2;
>>>>> -	u32 des3;
>>>>> -};
>>>>
>>>> This patch would be a lot smaller if those definitions were kept in
>>>> their existing location instead of moving them...
>>>
>>> Could you at least test it on the tegra ?
>>
>> This patch (applied on top of current u-boot/master) does cause network
>> failures on Jetson TX2 (Tegra186), which obviously uses this driver.
> 
> That's what I was afraid of.
> 
>> The docs for our version of the IP block do indicate that the DSL shift
>> is supported, so I think the patch should work. I wonder if it's because
>> of the bus width of our EQoS IP block?
> 
> The STM32MP1 docs say that the shift is in 64bit words, maybe that
> actually translate into bus words rather than 8 bytes indeed ?

I assume the STM32 doc writer only wrote the specific case that applies
to their HW, which presumably has a 64-bit bus. Our docs are IIRC
unmodified from the generic IP docs that Synopsis supplied, so mention
all the cases and leave the reader to figure out which option applies!

> btw what is your ARCH_DMA_MINALIGN on t186, 64 ?

It's 64 I believe, since ARM64 selects SYS_CACHE_SHIFT_6, which then
triggers the logic below, and I don't think Tegra overrides any of this:

config SYS_CACHELINE_SIZE
        int
        default 64 if SYS_CACHE_SHIFT_6

> I'm worried that if something has it set to 128 or more, then this DSL
> padding won't be able to cover it, since it can only skip up to 7 64bit
> words.

Oh yes, I meant to mention that in the patch; it would be good to
validate the calculated DSL value doesn't overflow its field width.

>> Our docs indicate that DSL is a
>> multiple of the bus width, so describes a skip of word/dword/lword count
>> based on 32/64/128 bus width. Perhaps the bus width needs to be
>> parametrized when calculating the value? Our docs don't seem to indicate
>> which bus width our HW uses:-( Aha, but changing the "/2" to "/4" in the
>> code in this patch which calculates DSL value does yield a working
>> system, so I guess we have a 128-bit bus width. And indeed this matches
>> a comment I wrote in the driver:
>>
>>         /*
>>          * Burst length must be < 1/2 FIFO size.
>>          * FIFO size in tqs is encoded as (n / 256) - 1.
>>          * Each burst is n * 8 (PBLX8) * 16 (AXI width) == 128 bytes.
>>
>> 16 byte AXI width == 128 bit width.
> 
> So how do we parametrize that, based on compatible string I guess ?
> We already have params for the tegra186 and stm32mp1 variants of the IP,
> so adding one more should be OK.

Yes, adding another field to struct eqos_config would make sense.

> Also, thanks for looking into it.
Patrick Delaunay May 13, 2020, 10:16 a.m. UTC | #6
Hi Stephen and Marek

> From: Stephen Warren <swarren at wwwdotorg.org>
> Sent: mercredi 29 avril 2020 23:51
> To: Marek Vasut <marex at denx.de>
> Cc: u-boot at lists.denx.de; Joe Hershberger <joe.hershberger at ni.com>; Patrice
> CHOTARD <patrice.chotard at st.com>; Patrick DELAUNAY
> <patrick.delaunay at st.com>; Ramon Fried <rfried.dev at gmail.com>; Stephen
> Warren <swarren at nvidia.com>; Tom Warren <twarren at nvidia.com>
> Subject: Re: [PATCH] net: dwc_eth_qos: Pad descriptors to cacheline size
> Importance: High
> 
> On 4/29/20 3:41 PM, Marek Vasut wrote:
> > On 4/29/20 11:25 PM, Stephen Warren wrote:
> >> On 4/29/20 1:56 PM, Marek Vasut wrote:
> >>> On 4/29/20 9:51 PM, Stephen Warren wrote:
> >>>> On 4/29/20 1:14 PM, Marek Vasut wrote:


[...]

> >>>
> >>> Could you at least test it on the tegra ?
> >>
> >> This patch (applied on top of current u-boot/master) does cause
> >> network failures on Jetson TX2 (Tegra186), which obviously uses this driver.
> >
> > That's what I was afraid of.
> >
> >> The docs for our version of the IP block do indicate that the DSL
> >> shift is supported, so I think the patch should work. I wonder if
> >> it's because of the bus width of our EQoS IP block?
> >
> > The STM32MP1 docs say that the shift is in 64bit words, maybe that
> > actually translate into bus words rather than 8 bytes indeed ?
> 
> I assume the STM32 doc writer only wrote the specific case that applies to their
> HW, which presumably has a 64-bit bus. Our docs are IIRC unmodified from the
> generic IP docs that Synopsis supplied, so mention all the cases and leave the
> reader to figure out which option applies!
> 

[...]

> 
> >> Our docs indicate that DSL is a
> >> multiple of the bus width, so describes a skip of word/dword/lword
> >> count based on 32/64/128 bus width. Perhaps the bus width needs to be
> >> parametrized when calculating the value? Our docs don't seem to
> >> indicate which bus width our HW uses:-( Aha, but changing the "/2" to
> >> "/4" in the code in this patch which calculates DSL value does yield
> >> a working system, so I guess we have a 128-bit bus width. And indeed
> >> this matches a comment I wrote in the driver:
> >>
> >>         /*
> >>          * Burst length must be < 1/2 FIFO size.
> >>          * FIFO size in tqs is encoded as (n / 256) - 1.
> >>          * Each burst is n * 8 (PBLX8) * 16 (AXI width) == 128 bytes.
> >>
> >> 16 byte AXI width == 128 bit width.
> >
> > So how do we parametrize that, based on compatible string I guess ?
> > We already have params for the tegra186 and stm32mp1 variants of the
> > IP, so adding one more should be OK.
> 
> Yes, adding another field to struct eqos_config would make sense.
> 
> > Also, thanks for looking into it.

I confirm that ETH is configurated with AXI 64bit on STM32MP15x.

But we have the same description of DSL in ETH documentation before final Datasheet generation
(DSL in Word, Dword, or Lword, depending on the 32-bit, 64-bit, or 128-bit bus).

NB: 32/64/128-bit also imply alignment of address used for DMA
       (ch0_txdesc_tail_pointer/ch0_rxdesc_tail_pointer for example).

The width of this field depends on the configuration:
? 31:2 for 32-bit configuration
? 31:3 for 64-bit configuration
? 31:4 for 128-bit configuration

Regards 

Patrick
diff mbox series

Patch

diff --git a/drivers/net/dwc_eth_qos.c b/drivers/net/dwc_eth_qos.c
index 60dfd17a74..7fc91ed9a5 100644
--- a/drivers/net/dwc_eth_qos.c
+++ b/drivers/net/dwc_eth_qos.c
@@ -188,6 +188,7 @@  struct eqos_dma_regs {
 #define EQOS_DMA_SYSBUS_MODE_BLEN8			BIT(2)
 #define EQOS_DMA_SYSBUS_MODE_BLEN4			BIT(1)
 
+#define EQOS_DMA_CH0_CONTROL_DSL_SHIFT			18
 #define EQOS_DMA_CH0_CONTROL_PBLX8			BIT(16)
 
 #define EQOS_DMA_CH0_TX_CONTROL_TXPBL_SHIFT		16
@@ -218,9 +219,21 @@  struct eqos_tegra186_regs {
 #define EQOS_AUTO_CAL_STATUS_ACTIVE			BIT(31)
 
 /* Descriptors */
+/*
+ * #if EQOS_DESCRIPTOR_SIZE < ARCH_DMA_MINALIGN fails if we use sizeof()
+ * or offsetof() to calculate descriptor size, since neither is allowed
+ * in C preprocessor macros, so just hard-code this to 16=4*sizeof(u32).
+ */
+#define EQOS_DESCRIPTOR_SIZE	16
+#define EQOS_DESCRIPTOR_PAD	((ARCH_DMA_MINALIGN - EQOS_DESCRIPTOR_SIZE) / 4)
+struct eqos_desc {
+	u32 des0;
+	u32 des1;
+	u32 des2;
+	u32 des3;
+	u32 __pad[EQOS_DESCRIPTOR_PAD];
+};
 
-#define EQOS_DESCRIPTOR_WORDS	4
-#define EQOS_DESCRIPTOR_SIZE	(EQOS_DESCRIPTOR_WORDS * 4)
 /* We assume ARCH_DMA_MINALIGN >= 16; 16 is the EQOS HW minimum */
 #define EQOS_DESCRIPTOR_ALIGN	ARCH_DMA_MINALIGN
 #define EQOS_DESCRIPTORS_TX	4
@@ -249,13 +262,6 @@  struct eqos_tegra186_regs {
 #endif
 #endif
 
-struct eqos_desc {
-	u32 des0;
-	u32 des1;
-	u32 des2;
-	u32 des3;
-};
-
 #define EQOS_DESC3_OWN		BIT(31)
 #define EQOS_DESC3_FD		BIT(29)
 #define EQOS_DESC3_LD		BIT(28)
@@ -1254,7 +1260,9 @@  static int eqos_start(struct udevice *dev)
 			EQOS_DMA_CH0_RX_CONTROL_RBSZ_SHIFT);
 
 	setbits_le32(&eqos->dma_regs->ch0_control,
-		     EQOS_DMA_CH0_CONTROL_PBLX8);
+		     EQOS_DMA_CH0_CONTROL_PBLX8 |
+		     ((EQOS_DESCRIPTOR_PAD / 2) <<
+		     EQOS_DMA_CH0_CONTROL_DSL_SHIFT));
 
 	/*
 	 * Burst length must be < 1/2 FIFO size.