diff mbox series

[1/2] uas: revert from scsi_add_host_with_dma() to scsi_add_host()

Message ID 20201128154849.3193-1-tom.ty89@gmail.com
State New
Headers show
Series [1/2] uas: revert from scsi_add_host_with_dma() to scsi_add_host() | expand

Commit Message

Tom Yan Nov. 28, 2020, 3:48 p.m. UTC
Apparently the former (with the chosen dma_dev) may cause problem in certain
case (e.g. where thunderbolt dock and intel iommu are involved). The error
observed was:

XHCI swiotlb buffer is full / DMAR: Device bounce map failed

For now we retain the clamp for hw_max_sectors against the dma_max_mapping_size.
Since the device/size for the clamp that is applied when the scsi request queue
is initialized/allocated is different than the one used here, we invalidate the
early clamping by making a fallback blk_queue_max_hw_sectors() call.

Signed-off-by: Tom Yan <tom.ty89@gmail.com>
---
 drivers/usb/storage/uas.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

Comments

Hans de Goede Nov. 30, 2020, 9:48 a.m. UTC | #1
Hi,

On 11/28/20 4:48 PM, Tom Yan wrote:
> Apparently the former (with the chosen dma_dev) may cause problem in certain

> case (e.g. where thunderbolt dock and intel iommu are involved). The error

> observed was:

> 

> XHCI swiotlb buffer is full / DMAR: Device bounce map failed

> 

> For now we retain the clamp for hw_max_sectors against the dma_max_mapping_size.

> Since the device/size for the clamp that is applied when the scsi request queue

> is initialized/allocated is different than the one used here, we invalidate the

> early clamping by making a fallback blk_queue_max_hw_sectors() call.

> 

> Signed-off-by: Tom Yan <tom.ty89@gmail.com>


I can confirm that this fixes the network performance on a Lenovo Thunderbolt
dock generation 2, which uses an USB attach NIC.

With this patch added on top of 5.10-rc5 scp performance to another machine
on the local gbit LAN goes back from the regressed 1 MB/s to its original 100MB/s
as it should be:

Tested-by: Hans de Goede <hdegoede@redhat.com>


Regards,

Hans


> ---

>  drivers/usb/storage/uas.c | 11 +++++++----

>  1 file changed, 7 insertions(+), 4 deletions(-)

> 

> diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c

> index c8a577309e8f..5db1325cea20 100644

> --- a/drivers/usb/storage/uas.c

> +++ b/drivers/usb/storage/uas.c

> @@ -843,18 +843,21 @@ static int uas_slave_alloc(struct scsi_device *sdev)

>  static int uas_slave_configure(struct scsi_device *sdev)

>  {

>  	struct uas_dev_info *devinfo = sdev->hostdata;

> -	struct device *dev = sdev->host->dma_dev;

> +	struct usb_device *udev = devinfo->udev;

>  

>  	if (devinfo->flags & US_FL_MAX_SECTORS_64)

>  		blk_queue_max_hw_sectors(sdev->request_queue, 64);

>  	else if (devinfo->flags & US_FL_MAX_SECTORS_240)

>  		blk_queue_max_hw_sectors(sdev->request_queue, 240);

> -	else if (devinfo->udev->speed >= USB_SPEED_SUPER)

> +	else if (udev->speed >= USB_SPEED_SUPER)

>  		blk_queue_max_hw_sectors(sdev->request_queue, 2048);

> +	else

> +		blk_queue_max_hw_sectors(sdev->request_queue,

> +					 SCSI_DEFAULT_MAX_SECTORS);

>  

>  	blk_queue_max_hw_sectors(sdev->request_queue,

>  		min_t(size_t, queue_max_hw_sectors(sdev->request_queue),

> -		      dma_max_mapping_size(dev) >> SECTOR_SHIFT));

> +		      dma_max_mapping_size(udev->bus->sysdev) >> SECTOR_SHIFT));

>  

>  	if (devinfo->flags & US_FL_NO_REPORT_OPCODES)

>  		sdev->no_report_opcodes = 1;

> @@ -1040,7 +1043,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)

>  	shost->can_queue = devinfo->qdepth - 2;

>  

>  	usb_set_intfdata(intf, shost);

> -	result = scsi_add_host_with_dma(shost, &intf->dev, udev->bus->sysdev);

> +	result = scsi_add_host(shost, &intf->dev);

>  	if (result)

>  		goto free_streams;

>  

>
Tom Yan Nov. 30, 2020, 7:30 p.m. UTC | #2
Hmm, I wonder if I/we wrongly assumed that the dma_dev used for the
hw_max_sectors clamping in __scsi_init_queue() is wrong.

So instead of adding a fallback else-clause here or using "sysdev" as
dma_dev like in the current upstream code, maybe we should actually do
a three-way min: the "changed" hw_max_sectors, dma_max_mapping_size of
dma_dev("dev") and dma_max_mapping_size of sysdev...?

On Mon, 30 Nov 2020 at 17:48, Hans de Goede <hdegoede@redhat.com> wrote:
>

> Hi,

>

> On 11/28/20 4:48 PM, Tom Yan wrote:

> > Apparently the former (with the chosen dma_dev) may cause problem in certain

> > case (e.g. where thunderbolt dock and intel iommu are involved). The error

> > observed was:

> >

> > XHCI swiotlb buffer is full / DMAR: Device bounce map failed

> >

> > For now we retain the clamp for hw_max_sectors against the dma_max_mapping_size.

> > Since the device/size for the clamp that is applied when the scsi request queue

> > is initialized/allocated is different than the one used here, we invalidate the

> > early clamping by making a fallback blk_queue_max_hw_sectors() call.

> >

> > Signed-off-by: Tom Yan <tom.ty89@gmail.com>

>

> I can confirm that this fixes the network performance on a Lenovo Thunderbolt

> dock generation 2, which uses an USB attach NIC.

>

> With this patch added on top of 5.10-rc5 scp performance to another machine

> on the local gbit LAN goes back from the regressed 1 MB/s to its original 100MB/s

> as it should be:

>

> Tested-by: Hans de Goede <hdegoede@redhat.com>

>

> Regards,

>

> Hans

>

>

> > ---

> >  drivers/usb/storage/uas.c | 11 +++++++----

> >  1 file changed, 7 insertions(+), 4 deletions(-)

> >

> > diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c

> > index c8a577309e8f..5db1325cea20 100644

> > --- a/drivers/usb/storage/uas.c

> > +++ b/drivers/usb/storage/uas.c

> > @@ -843,18 +843,21 @@ static int uas_slave_alloc(struct scsi_device *sdev)

> >  static int uas_slave_configure(struct scsi_device *sdev)

> >  {

> >       struct uas_dev_info *devinfo = sdev->hostdata;

> > -     struct device *dev = sdev->host->dma_dev;

> > +     struct usb_device *udev = devinfo->udev;

> >

> >       if (devinfo->flags & US_FL_MAX_SECTORS_64)

> >               blk_queue_max_hw_sectors(sdev->request_queue, 64);

> >       else if (devinfo->flags & US_FL_MAX_SECTORS_240)

> >               blk_queue_max_hw_sectors(sdev->request_queue, 240);

> > -     else if (devinfo->udev->speed >= USB_SPEED_SUPER)

> > +     else if (udev->speed >= USB_SPEED_SUPER)

> >               blk_queue_max_hw_sectors(sdev->request_queue, 2048);

> > +     else

> > +             blk_queue_max_hw_sectors(sdev->request_queue,

> > +                                      SCSI_DEFAULT_MAX_SECTORS);

> >

> >       blk_queue_max_hw_sectors(sdev->request_queue,

> >               min_t(size_t, queue_max_hw_sectors(sdev->request_queue),

> > -                   dma_max_mapping_size(dev) >> SECTOR_SHIFT));

> > +                   dma_max_mapping_size(udev->bus->sysdev) >> SECTOR_SHIFT));

> >

> >       if (devinfo->flags & US_FL_NO_REPORT_OPCODES)

> >               sdev->no_report_opcodes = 1;

> > @@ -1040,7 +1043,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)

> >       shost->can_queue = devinfo->qdepth - 2;

> >

> >       usb_set_intfdata(intf, shost);

> > -     result = scsi_add_host_with_dma(shost, &intf->dev, udev->bus->sysdev);

> > +     result = scsi_add_host(shost, &intf->dev);

> >       if (result)

> >               goto free_streams;

> >

> >

>
Hans de Goede Dec. 1, 2020, 11:09 a.m. UTC | #3
Hi,

On 11/30/20 8:30 PM, Tom Yan wrote:
> Hmm, I wonder if I/we wrongly assumed that the dma_dev used for the

> hw_max_sectors clamping in __scsi_init_queue() is wrong.

> 

> So instead of adding a fallback else-clause here or using "sysdev" as

> dma_dev like in the current upstream code, maybe we should actually do

> a three-way min: the "changed" hw_max_sectors, dma_max_mapping_size of

> dma_dev("dev") and dma_max_mapping_size of sysdev...?


So both here and in the 2/2 patch thread there are lots of open questions,
which to me suggests that for 5.10 we really should just go with the 3 reverts
which I suggested earlier.

Regards,

Hans



> 

> On Mon, 30 Nov 2020 at 17:48, Hans de Goede <hdegoede@redhat.com> wrote:

>>

>> Hi,

>>

>> On 11/28/20 4:48 PM, Tom Yan wrote:

>>> Apparently the former (with the chosen dma_dev) may cause problem in certain

>>> case (e.g. where thunderbolt dock and intel iommu are involved). The error

>>> observed was:

>>>

>>> XHCI swiotlb buffer is full / DMAR: Device bounce map failed

>>>

>>> For now we retain the clamp for hw_max_sectors against the dma_max_mapping_size.

>>> Since the device/size for the clamp that is applied when the scsi request queue

>>> is initialized/allocated is different than the one used here, we invalidate the

>>> early clamping by making a fallback blk_queue_max_hw_sectors() call.

>>>

>>> Signed-off-by: Tom Yan <tom.ty89@gmail.com>

>>

>> I can confirm that this fixes the network performance on a Lenovo Thunderbolt

>> dock generation 2, which uses an USB attach NIC.

>>

>> With this patch added on top of 5.10-rc5 scp performance to another machine

>> on the local gbit LAN goes back from the regressed 1 MB/s to its original 100MB/s

>> as it should be:

>>

>> Tested-by: Hans de Goede <hdegoede@redhat.com>

>>

>> Regards,

>>

>> Hans

>>

>>

>>> ---

>>>  drivers/usb/storage/uas.c | 11 +++++++----

>>>  1 file changed, 7 insertions(+), 4 deletions(-)

>>>

>>> diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c

>>> index c8a577309e8f..5db1325cea20 100644

>>> --- a/drivers/usb/storage/uas.c

>>> +++ b/drivers/usb/storage/uas.c

>>> @@ -843,18 +843,21 @@ static int uas_slave_alloc(struct scsi_device *sdev)

>>>  static int uas_slave_configure(struct scsi_device *sdev)

>>>  {

>>>       struct uas_dev_info *devinfo = sdev->hostdata;

>>> -     struct device *dev = sdev->host->dma_dev;

>>> +     struct usb_device *udev = devinfo->udev;

>>>

>>>       if (devinfo->flags & US_FL_MAX_SECTORS_64)

>>>               blk_queue_max_hw_sectors(sdev->request_queue, 64);

>>>       else if (devinfo->flags & US_FL_MAX_SECTORS_240)

>>>               blk_queue_max_hw_sectors(sdev->request_queue, 240);

>>> -     else if (devinfo->udev->speed >= USB_SPEED_SUPER)

>>> +     else if (udev->speed >= USB_SPEED_SUPER)

>>>               blk_queue_max_hw_sectors(sdev->request_queue, 2048);

>>> +     else

>>> +             blk_queue_max_hw_sectors(sdev->request_queue,

>>> +                                      SCSI_DEFAULT_MAX_SECTORS);

>>>

>>>       blk_queue_max_hw_sectors(sdev->request_queue,

>>>               min_t(size_t, queue_max_hw_sectors(sdev->request_queue),

>>> -                   dma_max_mapping_size(dev) >> SECTOR_SHIFT));

>>> +                   dma_max_mapping_size(udev->bus->sysdev) >> SECTOR_SHIFT));

>>>

>>>       if (devinfo->flags & US_FL_NO_REPORT_OPCODES)

>>>               sdev->no_report_opcodes = 1;

>>> @@ -1040,7 +1043,7 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)

>>>       shost->can_queue = devinfo->qdepth - 2;

>>>

>>>       usb_set_intfdata(intf, shost);

>>> -     result = scsi_add_host_with_dma(shost, &intf->dev, udev->bus->sysdev);

>>> +     result = scsi_add_host(shost, &intf->dev);

>>>       if (result)

>>>               goto free_streams;

>>>

>>>

>>

>
diff mbox series

Patch

diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index c8a577309e8f..5db1325cea20 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -843,18 +843,21 @@  static int uas_slave_alloc(struct scsi_device *sdev)
 static int uas_slave_configure(struct scsi_device *sdev)
 {
 	struct uas_dev_info *devinfo = sdev->hostdata;
-	struct device *dev = sdev->host->dma_dev;
+	struct usb_device *udev = devinfo->udev;
 
 	if (devinfo->flags & US_FL_MAX_SECTORS_64)
 		blk_queue_max_hw_sectors(sdev->request_queue, 64);
 	else if (devinfo->flags & US_FL_MAX_SECTORS_240)
 		blk_queue_max_hw_sectors(sdev->request_queue, 240);
-	else if (devinfo->udev->speed >= USB_SPEED_SUPER)
+	else if (udev->speed >= USB_SPEED_SUPER)
 		blk_queue_max_hw_sectors(sdev->request_queue, 2048);
+	else
+		blk_queue_max_hw_sectors(sdev->request_queue,
+					 SCSI_DEFAULT_MAX_SECTORS);
 
 	blk_queue_max_hw_sectors(sdev->request_queue,
 		min_t(size_t, queue_max_hw_sectors(sdev->request_queue),
-		      dma_max_mapping_size(dev) >> SECTOR_SHIFT));
+		      dma_max_mapping_size(udev->bus->sysdev) >> SECTOR_SHIFT));
 
 	if (devinfo->flags & US_FL_NO_REPORT_OPCODES)
 		sdev->no_report_opcodes = 1;
@@ -1040,7 +1043,7 @@  static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	shost->can_queue = devinfo->qdepth - 2;
 
 	usb_set_intfdata(intf, shost);
-	result = scsi_add_host_with_dma(shost, &intf->dev, udev->bus->sysdev);
+	result = scsi_add_host(shost, &intf->dev);
 	if (result)
 		goto free_streams;