diff mbox

[1/2] DMA: fix AMBA PL08x driver issue with 64bit DMA address type

Message ID 1376484729-11826-2-git-send-email-andre.przywara@linaro.org
State New
Headers show

Commit Message

Andre Przywara Aug. 14, 2013, 12:52 p.m. UTC
In Rob's recent pull request the patch
	ARM: highbank: select ARCH_DMA_ADDR_T_64BIT for LPAE
promotes dma_addr_t to 64bit, which breaks compilation of the
AMBA PL08x DMA driver.
GCC has no function for the 64bit/8bit modulo operation.
Looking more closely the divisor can only be 1, 2 or 4, so the full
featured '%' modulo operation is overkill and can be replaced by
simple bit masking.

Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
---
 drivers/dma/amba-pl08x.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

Comments

Russell King - ARM Linux Aug. 14, 2013, 1 p.m. UTC | #1
NAK.

On Wed, Aug 14, 2013 at 02:52:08PM +0200, Andre Przywara wrote:
> In Rob's recent pull request the patch
> 	ARM: highbank: select ARCH_DMA_ADDR_T_64BIT for LPAE
> promotes dma_addr_t to 64bit, which breaks compilation of the
> AMBA PL08x DMA driver.
> GCC has no function for the 64bit/8bit modulo operation.
> Looking more closely the divisor can only be 1, 2 or 4, so the full
> featured '%' modulo operation is overkill and can be replaced by
> simple bit masking.
> 
> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
> ---
>  drivers/dma/amba-pl08x.c | 16 ++++++++++------
>  1 file changed, 10 insertions(+), 6 deletions(-)
> 
> diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
> index 06fe45c..29e1cf9 100644
> --- a/drivers/dma/amba-pl08x.c
> +++ b/drivers/dma/amba-pl08x.c
> @@ -286,6 +286,11 @@ static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx)
>  	return container_of(tx, struct pl08x_txd, vd.tx);
>  }
>  
> +static int bus_addr_offset(struct pl08x_bus_data *bus)
> +{
> +	return bus->addr & (bus->buswidth - 1);
> +}
> +
>  /*
>   * Mux handling.
>   *
> @@ -886,8 +891,8 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
>  				return 0;
>  			}
>  
> -			if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
> -					(bd.dstbus.addr % bd.dstbus.buswidth)) {
> +			if (bus_addr_offset(&bd.srcbus) ||
> +					bus_addr_offset(&bd.dstbus)) {
>  				dev_err(&pl08x->adev->dev,
>  					"%s src & dst address must be aligned to src"
>  					" & dst width if peripheral is flow controller",
> @@ -908,9 +913,8 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
>  		 */
>  		if (bd.remainder < mbus->buswidth)
>  			early_bytes = bd.remainder;
> -		else if ((mbus->addr) % (mbus->buswidth)) {
> -			early_bytes = mbus->buswidth - (mbus->addr) %
> -				(mbus->buswidth);
> +		else if (bus_addr_offset(mbus)) {
> +			early_bytes = mbus->buswidth - bus_addr_offset(mbus);
>  			if ((bd.remainder - early_bytes) < mbus->buswidth)
>  				early_bytes = bd.remainder;
>  		}
> @@ -928,7 +932,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
>  			 * Master now aligned
>  			 * - if slave is not then we must set its width down
>  			 */
> -			if (sbus->addr % sbus->buswidth) {
> +			if (bus_addr_offset(sbus)) {
>  				dev_dbg(&pl08x->adev->dev,
>  					"%s set down bus width to one byte\n",
>  					__func__);
> -- 
> 1.7.12.1
>
Rob Herring Aug. 14, 2013, 7:13 p.m. UTC | #2
On Wed, Aug 14, 2013 at 8:00 AM, Russell King - ARM Linux
<linux@arm.linux.org.uk> wrote:
> NAK.

This patch has nothing to do with dma masks or your dma mask series.
The code deals with bus alignment and cleans up the code to do
alignment operations in a sane way compared to modulo operator. The
only thing 64-bit dma_addr_t did was expose crap code.

Perhaps bus_addr_offset needs a better name to indicate it is dealing
with bus alignment rather than bus address offset.

Rob

> On Wed, Aug 14, 2013 at 02:52:08PM +0200, Andre Przywara wrote:
>> In Rob's recent pull request the patch
>>       ARM: highbank: select ARCH_DMA_ADDR_T_64BIT for LPAE
>> promotes dma_addr_t to 64bit, which breaks compilation of the
>> AMBA PL08x DMA driver.
>> GCC has no function for the 64bit/8bit modulo operation.
>> Looking more closely the divisor can only be 1, 2 or 4, so the full
>> featured '%' modulo operation is overkill and can be replaced by
>> simple bit masking.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
>> ---
>>  drivers/dma/amba-pl08x.c | 16 ++++++++++------
>>  1 file changed, 10 insertions(+), 6 deletions(-)
>>
>> diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
>> index 06fe45c..29e1cf9 100644
>> --- a/drivers/dma/amba-pl08x.c
>> +++ b/drivers/dma/amba-pl08x.c
>> @@ -286,6 +286,11 @@ static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx)
>>       return container_of(tx, struct pl08x_txd, vd.tx);
>>  }
>>
>> +static int bus_addr_offset(struct pl08x_bus_data *bus)
>> +{
>> +     return bus->addr & (bus->buswidth - 1);
>> +}
>> +
>>  /*
>>   * Mux handling.
>>   *
>> @@ -886,8 +891,8 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
>>                               return 0;
>>                       }
>>
>> -                     if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
>> -                                     (bd.dstbus.addr % bd.dstbus.buswidth)) {
>> +                     if (bus_addr_offset(&bd.srcbus) ||
>> +                                     bus_addr_offset(&bd.dstbus)) {
>>                               dev_err(&pl08x->adev->dev,
>>                                       "%s src & dst address must be aligned to src"
>>                                       " & dst width if peripheral is flow controller",
>> @@ -908,9 +913,8 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
>>                */
>>               if (bd.remainder < mbus->buswidth)
>>                       early_bytes = bd.remainder;
>> -             else if ((mbus->addr) % (mbus->buswidth)) {
>> -                     early_bytes = mbus->buswidth - (mbus->addr) %
>> -                             (mbus->buswidth);
>> +             else if (bus_addr_offset(mbus)) {
>> +                     early_bytes = mbus->buswidth - bus_addr_offset(mbus);
>>                       if ((bd.remainder - early_bytes) < mbus->buswidth)
>>                               early_bytes = bd.remainder;
>>               }
>> @@ -928,7 +932,7 @@ static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
>>                        * Master now aligned
>>                        * - if slave is not then we must set its width down
>>                        */
>> -                     if (sbus->addr % sbus->buswidth) {
>> +                     if (bus_addr_offset(sbus)) {
>>                               dev_dbg(&pl08x->adev->dev,
>>                                       "%s set down bus width to one byte\n",
>>                                       __func__);
>> --
>> 1.7.12.1
>>
>
> _______________________________________________
> linux-arm-kernel mailing list
> linux-arm-kernel@lists.infradead.org
> http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
Linus Walleij Aug. 21, 2013, 9:01 p.m. UTC | #3
On Wed, Aug 14, 2013 at 2:52 PM, Andre Przywara
<andre.przywara@linaro.org> wrote:

> In Rob's recent pull request the patch
>         ARM: highbank: select ARCH_DMA_ADDR_T_64BIT for LPAE
> promotes dma_addr_t to 64bit, which breaks compilation of the
> AMBA PL08x DMA driver.
> GCC has no function for the 64bit/8bit modulo operation.
> Looking more closely the divisor can only be 1, 2 or 4, so the full
> featured '%' modulo operation is overkill and can be replaced by
> simple bit masking.
>
> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>

Can you give some context? Which platform have you tested
this on after the change, or is this only compile-tested?

Of the current defconfigs selecting this driver, none is 64bit,
so makes me a bit curious if there is really some PL08x
on the Highbank or similar?

Yours,
Linus Walleij
Andre Przywara Aug. 21, 2013, 9:22 p.m. UTC | #4
On 08/21/2013 11:01 PM, Linus Walleij wrote:
> On Wed, Aug 14, 2013 at 2:52 PM, Andre Przywara
> <andre.przywara@linaro.org> wrote:
>
>> In Rob's recent pull request the patch
>>          ARM: highbank: select ARCH_DMA_ADDR_T_64BIT for LPAE
>> promotes dma_addr_t to 64bit, which breaks compilation of the
>> AMBA PL08x DMA driver.
>> GCC has no function for the 64bit/8bit modulo operation.
>> Looking more closely the divisor can only be 1, 2 or 4, so the full
>> featured '%' modulo operation is overkill and can be replaced by
>> simple bit masking.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
>
> Can you give some context? Which platform have you tested
> this on after the change, or is this only compile-tested?

Only compile tested (sorry, forgot to mention that).
Actually I don't have this hardware anywhere, I just happened to had 
this in my config - probably just by accident as a leftover from some 
experiments.
So if you have access to any of those platforms, please give it a test.

> Of the current defconfigs selecting this driver, none is 64bit,

But it would be selected by allyesconfig or randconfig, right? And with 
more and more platforms moving into single zImage, we will spot more of 
those, as one "64-bit platform" will promote the types for every other.

> so makes me a bit curious if there is really some PL08x
> on the Highbank or similar?

Sorry, no hidden DMA controller on Highbank ;-)

Regards,
Andre.
Matt Sealey Aug. 21, 2013, 9:49 p.m. UTC | #5
On Wed, Aug 21, 2013 at 4:01 PM, Linus Walleij <linus.walleij@linaro.org> wrote:
> On Wed, Aug 14, 2013 at 2:52 PM, Andre Przywara
> <andre.przywara@linaro.org> wrote:
>
>> In Rob's recent pull request the patch
>>         ARM: highbank: select ARCH_DMA_ADDR_T_64BIT for LPAE
>> promotes dma_addr_t to 64bit, which breaks compilation of the
>> AMBA PL08x DMA driver.
>> GCC has no function for the 64bit/8bit modulo operation.
>> Looking more closely the divisor can only be 1, 2 or 4, so the full
>> featured '%' modulo operation is overkill and can be replaced by
>> simple bit masking.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
>
> Can you give some context? Which platform have you tested
> this on after the change, or is this only compile-tested?
>
> Of the current defconfigs selecting this driver, none is 64bit,
> so makes me a bit curious if there is really some PL08x
> on the Highbank or similar?

I thought the whole PL08x thing was just a side-effect, no?

The issues here I see that Rob is trying to fix:

* If you want to put a 64-bit address into a DMA controller somewhere
- be it a register (64-bit) or descriptor (64-bit), dma_addr_t is how
the kernel handles it. If dma_addr_t is only 32-bit, how can you
specify the address there? How can you do some mapping and return a
pointer into 64-bit address space into a 32-bit value? How can you
even allocate memory (dma_alloc_coherent, for example) for the range
you want?

The objections here are obvious - as below:

1) In the case where you have an LPAE platform which enables Rob's
patch, and you use dma_addr_t and the write descriptors in memory that
are 32-bit or registers that are 32-bit using specific knowledge of
the size of dma_addr_t or just assume it's a 32-bit wide value (why
would anyone do that?) then these drivers ARE buggy and should be
fixed. But that could be all of them if they're not setting their
masks correctly.. considering you can mix DMA controllers in a system,
some with 32-bit address spaces and some with >32-bit address spaces,
making dma_addr_t 64-bit might fix one and break all the others
depending on driver quality (i.e. masks..)

2) dma_addr_t being 64-bit everywhere couldn't be that much of a
problem, though, with properly written drivers. The main culprits will
be (if they're broken):

- If they're writing values into descriptors and just using writel()
then surely there'll be a compiler warning first, which may be ignored
by the developer, and then end up writing the wrong 32-bit value to
memory or just the lower 32-bits giving a perfectly 'random' DMA
failure where it silently walks over something 4GB below where it
needed to be or somewhere else (so you'd see memory trashing, random
bus faults..)

- Passing dma_addr_t in function calls to assembly code, with an
assumption of the register usage in the following case where it's
called from C (rarer but, could happen):

void write_some_dma_thing_in_assembly(int a, dma_addr_t b, char c);

 -> r0, r1, r2 on 32-bit dma_addr_t
-> r0, r2+r3, stack on 64-bit dma_addr_t

If the assembly code assumes the dma_addr_t is 32-bit and therefore
the address (per EABI) is in r1 and not r2+r3 it will get garbage from
elsewhere in the 64-bit dma_addr_t case, and plug it into the target
descriptor or register. Same memory trashing and random bus faults as
above.

They're going to explode and there's not going to be ANY warning until
there's an explosion..

On the second point, I wonder if sparse or checkpatch can figure out
somehow if a 64-bit value is being passed as the second argument to a
function where the first argument is smaller, and print a nitpick
warning about how one might want to be careful about doing this
considering possible ABI differences depending on possible
configuration differences? Does it already?

~~

So, here's the question; how do you determine what size dma_addr_t is
considering that it may be possible that the actual size for the DMA
controller may be either, but you have a minimum of 64-bit because at
least ONE DMA controller needs that can address the full range? If any
drivers are accidentally setting >32-bit masks on non-LPAE then
there're bad addresses coming back anyway and these should have been
caught already, right?

So nothing will magically get trashed in the beginning, only during
rafts of patches to "update the dma mask to full 64-bit" where it
actually depends on the runtime CPU and config options in play anyway?

I recall something around late-to-discontinued time on Windows
XP/Server 2003 where they quit certifying 32-bit drivers that would
bail or do weird things given a 64-bit pointer. The driver core of
Windows managed to move up to 64-bit dma addresses (even on "32-bit"
hardware) internally forcing the driver developers with it. The main
reason: every x86 chip worth a damn that they supported had PAE so it
could have had 8GB of RAM, on Server 2003 Datacenter Edition I think
the upper limit was 32GB. It was perfectly possible to have a "32-bit"
system with DMA going right up to the end of the extended physical
memory address..

Did I miss something here?

-- Matt
diff mbox

Patch

diff --git a/drivers/dma/amba-pl08x.c b/drivers/dma/amba-pl08x.c
index 06fe45c..29e1cf9 100644
--- a/drivers/dma/amba-pl08x.c
+++ b/drivers/dma/amba-pl08x.c
@@ -286,6 +286,11 @@  static inline struct pl08x_txd *to_pl08x_txd(struct dma_async_tx_descriptor *tx)
 	return container_of(tx, struct pl08x_txd, vd.tx);
 }
 
+static int bus_addr_offset(struct pl08x_bus_data *bus)
+{
+	return bus->addr & (bus->buswidth - 1);
+}
+
 /*
  * Mux handling.
  *
@@ -886,8 +891,8 @@  static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 				return 0;
 			}
 
-			if ((bd.srcbus.addr % bd.srcbus.buswidth) ||
-					(bd.dstbus.addr % bd.dstbus.buswidth)) {
+			if (bus_addr_offset(&bd.srcbus) ||
+					bus_addr_offset(&bd.dstbus)) {
 				dev_err(&pl08x->adev->dev,
 					"%s src & dst address must be aligned to src"
 					" & dst width if peripheral is flow controller",
@@ -908,9 +913,8 @@  static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 		 */
 		if (bd.remainder < mbus->buswidth)
 			early_bytes = bd.remainder;
-		else if ((mbus->addr) % (mbus->buswidth)) {
-			early_bytes = mbus->buswidth - (mbus->addr) %
-				(mbus->buswidth);
+		else if (bus_addr_offset(mbus)) {
+			early_bytes = mbus->buswidth - bus_addr_offset(mbus);
 			if ((bd.remainder - early_bytes) < mbus->buswidth)
 				early_bytes = bd.remainder;
 		}
@@ -928,7 +932,7 @@  static int pl08x_fill_llis_for_desc(struct pl08x_driver_data *pl08x,
 			 * Master now aligned
 			 * - if slave is not then we must set its width down
 			 */
-			if (sbus->addr % sbus->buswidth) {
+			if (bus_addr_offset(sbus)) {
 				dev_dbg(&pl08x->adev->dev,
 					"%s set down bus width to one byte\n",
 					__func__);