diff mbox series

[v2] ata: libata-scsi: Fix get identity data failed

Message ID 20230505025712.19438-1-yangxingui@huawei.com
State New
Headers show
Series [v2] ata: libata-scsi: Fix get identity data failed | expand

Commit Message

Xingui Yang May 5, 2023, 2:57 a.m. UTC
The function ata_get_identity() uses the helper ata_scsi_find_dev() to get
the ata_device structure of a scsi device. However, when the ata device is
managed by libsas, ata_scsi_find_dev() returns NULL, turning
ata_get_identity() into a nop and always returns -ENOMSG.

Fix this by checking whether ATA_FLAG_SAS_HOST is set for ap->flags in
ata_scsi_find_dev(), as the flag is only used in libsas. If
ATA_FLAG_SAS_HOST is set, use sas_to_ata_dev() to find associated ATA
device.

Signed-off-by: Xingui Yang <yangxingui@huawei.com>
---
Changes to v1 
- Let ata_scsi_find_dev() return the correct value and don't keep replacing
calls to ata_scsi_find_dev().

 drivers/ata/libata-scsi.c | 12 ++++++++++--
 drivers/ata/libata.h      |  2 +-
 2 files changed, 11 insertions(+), 3 deletions(-)

Comments

John Garry May 5, 2023, 8:25 a.m. UTC | #1
On 05/05/2023 09:17, Damien Le Moal wrote:
>> --- a/drivers/ata/libata-scsi.c
>> +++ b/drivers/ata/libata-scsi.c
>> @@ -26,6 +26,7 @@
>>   #include <scsi/scsi_device.h>
>>   #include <scsi/scsi_tcq.h>
>>   #include <scsi/scsi_transport.h>
>> +#include <scsi/libsas.h>

hmmm... is it really acceptable that libata is referencing libsas? I 
didn't think that it would be. libsas uses libata, not the other way around.

>>   #include <linux/libata.h>
>>   #include <linux/hdreg.h>
>>   #include <linux/uaccess.h>
>> @@ -2745,10 +2746,17 @@ static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
>>    *	Associated ATA device, or %NULL if not found.
>>    */
>>   struct ata_device *
>> -ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev)
> Why drop the const ?
> 
>> +ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev)
>>   {
>> -	struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
>> +	struct ata_device *dev;
>> +
>> +	if (ap->flags & ATA_FLAG_SAS_HOST) {

And this is SAS host. Not necessarily libsas (even though with ipr 
libata usage gone, it would be the only user).

>> +		struct domain_device *ddev = sdev_to_domain_dev(scsidev);
>> +
>> +		return sas_to_ata_dev(ddev);
> Do you really need the ddev variable ? Also, this really should be a libsas
> helper. I beleive this pattern is repeated in several places in libsas, so that
> would nicely clean things up.
>
Thanks,
John
Xingui Yang May 5, 2023, 9:14 a.m. UTC | #2
On 2023/5/5 16:25, John Garry wrote:
> On 05/05/2023 09:17, Damien Le Moal wrote:
>>> --- a/drivers/ata/libata-scsi.c
>>> +++ b/drivers/ata/libata-scsi.c
>>> @@ -26,6 +26,7 @@
>>>   #include <scsi/scsi_device.h>
>>>   #include <scsi/scsi_tcq.h>
>>>   #include <scsi/scsi_transport.h>
>>> +#include <scsi/libsas.h>
> 
> hmmm... is it really acceptable that libata is referencing libsas? I 
> didn't think that it would be. libsas uses libata, not the other way 
> around.
Yeah, I didn't expect that either. Is there any other way? If so, is 
patch v1 OK?
> 
>>>   #include <linux/libata.h>
>>>   #include <linux/hdreg.h>
>>>   #include <linux/uaccess.h>
>>> @@ -2745,10 +2746,17 @@ static struct ata_device 
>>> *__ata_scsi_find_dev(struct ata_port *ap,
>>>    *    Associated ATA device, or %NULL if not found.
>>>    */
>>>   struct ata_device *
>>> -ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device 
>>> *scsidev)
>> Why drop the const ?
>>
>>> +ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev)
>>>   {
>>> -    struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
>>> +    struct ata_device *dev;
>>> +
>>> +    if (ap->flags & ATA_FLAG_SAS_HOST) {
> 
> And this is SAS host. Not necessarily libsas (even though with ipr 
> libata usage gone, it would be the only user).
Add a new flag only for libsas?

Thanks,
Xingui
.
> 
>>> +        struct domain_device *ddev = sdev_to_domain_dev(scsidev);
>>> +
>>> +        return sas_to_ata_dev(ddev);
>> Do you really need the ddev variable ? Also, this really should be a 
>> libsas
>> helper. I beleive this pattern is repeated in several places in 
>> libsas, so that
>> would nicely clean things up.
>>
> Thanks,
> John
> .
John Garry May 5, 2023, 9:51 a.m. UTC | #3
On 05/05/2023 10:14, yangxingui wrote:
>> hmmm... is it really acceptable that libata is referencing libsas? I 
>> didn't think that it would be. libsas uses libata, not the other way 
>> around.
> Yeah, I didn't expect that either. Is there any other way? If so, is 
> patch v1 OK?

I still think that we can do better than v1.

>>
>>>>   #include <linux/libata.h>
>>>>   #include <linux/hdreg.h>
>>>>   #include <linux/uaccess.h>
>>>> @@ -2745,10 +2746,17 @@ static struct ata_device 
>>>> *__ata_scsi_find_dev(struct ata_port *ap,
>>>>    *    Associated ATA device, or %NULL if not found.
>>>>    */
>>>>   struct ata_device *
>>>> -ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device 
>>>> *scsidev)
>>> Why drop the const ?
>>>
>>>> +ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev)
>>>>   {
>>>> -    struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
>>>> +    struct ata_device *dev;
>>>> +
>>>> +    if (ap->flags & ATA_FLAG_SAS_HOST) {
>>
>> And this is SAS host. Not necessarily libsas (even though with ipr 
>> libata usage gone, it would be the only user).
> Add a new flag only for libsas?

No, because of previous reason.

Please remind me - at what point do we error within ata_scsi_find_dev() 
and return NULL for a libsas host?

Note: it would be good to include that commit message for future reference.

Maybe we could add a method to ata_port_operations to do this lookup. I 
assume that is abusing ata_port_operations purpose, since it's mostly 
for HW methods.

Or do we actually use sdev->hostdata for libata or libsas? If not, maybe 
we could store the struct ata_device pointer there.

I'm just thinking out loud now...

Thanks,
John
Jason Yan May 6, 2023, 2:11 a.m. UTC | #4
On 2023/5/5 17:14, yangxingui wrote:
> 
> 
> On 2023/5/5 16:25, John Garry wrote:
>> On 05/05/2023 09:17, Damien Le Moal wrote:
>>>> --- a/drivers/ata/libata-scsi.c
>>>> +++ b/drivers/ata/libata-scsi.c
>>>> @@ -26,6 +26,7 @@
>>>>   #include <scsi/scsi_device.h>
>>>>   #include <scsi/scsi_tcq.h>
>>>>   #include <scsi/scsi_transport.h>
>>>> +#include <scsi/libsas.h>
>>
>> hmmm... is it really acceptable that libata is referencing libsas? I 
>> didn't think that it would be. libsas uses libata, not the other way 
>> around.
> Yeah, I didn't expect that either. Is there any other way? If so, is 
> patch v1 OK?

Hi Xingui,

Libsas should follow the standard way of libata to manage the ata 
structures. Not the opposite way. So what you should do is to find a way 
for libsas to behave as a normal ata driver. It's not good to make 
libata aware of libsas or referencing libsas.

If you have detailed questions you can ask me internally(which will be 
faster) or publicly through the maillist(which may have some delay).

Thanks,
Jason
Xingui Yang May 6, 2023, 9:57 a.m. UTC | #5
On 2023/5/5 17:51, John Garry wrote:
> On 05/05/2023 10:14, yangxingui wrote:
>>> hmmm... is it really acceptable that libata is referencing libsas? I 
>>> didn't think that it would be. libsas uses libata, not the other way 
>>> around.
>> Yeah, I didn't expect that either. Is there any other way? If so, is 
>> patch v1 OK?
> 
> I still think that we can do better than v1.
OK
> 
>>>
>>>>>   #include <linux/libata.h>
>>>>>   #include <linux/hdreg.h>
>>>>>   #include <linux/uaccess.h>
>>>>> @@ -2745,10 +2746,17 @@ static struct ata_device 
>>>>> *__ata_scsi_find_dev(struct ata_port *ap,
>>>>>    *    Associated ATA device, or %NULL if not found.
>>>>>    */
>>>>>   struct ata_device *
>>>>> -ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device 
>>>>> *scsidev)
>>>> Why drop the const ?
>>>>
>>>>> +ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev)
>>>>>   {
>>>>> -    struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
>>>>> +    struct ata_device *dev;
>>>>> +
>>>>> +    if (ap->flags & ATA_FLAG_SAS_HOST) {
>>>
>>> And this is SAS host. Not necessarily libsas (even though with ipr 
>>> libata usage gone, it would be the only user).
>> Add a new flag only for libsas?
> 
> No, because of previous reason.
> 
> Please remind me - at what point do we error within ata_scsi_find_dev() 
> and return NULL for a libsas host?
The scsi_host_template filled by libsas and call ata_scsi_find_dev() has 
this problem. After sas_change_queue_depth() is fixed, only sas_ioctl() 
has this problem now.
> 
> Note: it would be good to include that commit message for future reference.
> 
> Maybe we could add a method to ata_port_operations to do this lookup. I 
> assume that is abusing ata_port_operations purpose, since it's mostly 
> for HW methods.
> 
> Or do we actually use sdev->hostdata for libata or libsas? If not, maybe 
> we could store the struct ata_device pointer there.
Well, it might be a way.

Thanks,
Xingui
.
> 
> I'm just thinking out loud now...
> 
> Thanks,
> John
> 
> 
> .
Damien Le Moal May 7, 2023, 2:51 p.m. UTC | #6
On 2023/05/05 18:06, yangxingui wrote:
> 
> 
> On 2023/5/5 16:17, Damien Le Moal wrote:
>> On 2023/05/05 11:57, Xingui Yang wrote:
>>> The function ata_get_identity() uses the helper ata_scsi_find_dev() to get
>>> the ata_device structure of a scsi device. However, when the ata device is
>>> managed by libsas, ata_scsi_find_dev() returns NULL, turning
>>> ata_get_identity() into a nop and always returns -ENOMSG.
>>
>> What do you do to hit the issue ? A while back for me it was the queue depth
>> setting causing problems. As Garry mentioned, this led to patch 141f3d6256e5
>> ("ata: libata-sata: Fix device queue depth control").
> Attempt to return the correct value at ata_scsi_find_dev() instead of 
> NULL, when the ata device is managed by libsas?

That I understand. My question is *what* user operation/command triggers this ?
Because on my test setup, under normal use, I do not see this issue (beside what
was already corrected with the queue depth control). Is the issue showing up
when using passthrough commands only ?
Damien Le Moal May 7, 2023, 3:02 p.m. UTC | #7
On 2023/05/05 18:51, John Garry wrote:
> On 05/05/2023 10:14, yangxingui wrote:
>>> hmmm... is it really acceptable that libata is referencing libsas? I 
>>> didn't think that it would be. libsas uses libata, not the other way 
>>> around.
>> Yeah, I didn't expect that either. Is there any other way? If so, is 
>> patch v1 OK?
> 
> I still think that we can do better than v1.
> 
>>>
>>>>>   #include <linux/libata.h>
>>>>>   #include <linux/hdreg.h>
>>>>>   #include <linux/uaccess.h>
>>>>> @@ -2745,10 +2746,17 @@ static struct ata_device 
>>>>> *__ata_scsi_find_dev(struct ata_port *ap,
>>>>>    *    Associated ATA device, or %NULL if not found.
>>>>>    */
>>>>>   struct ata_device *
>>>>> -ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device 
>>>>> *scsidev)
>>>> Why drop the const ?
>>>>
>>>>> +ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev)
>>>>>   {
>>>>> -    struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
>>>>> +    struct ata_device *dev;
>>>>> +
>>>>> +    if (ap->flags & ATA_FLAG_SAS_HOST) {
>>>
>>> And this is SAS host. Not necessarily libsas (even though with ipr 
>>> libata usage gone, it would be the only user).
>> Add a new flag only for libsas?
> 
> No, because of previous reason.
> 
> Please remind me - at what point do we error within ata_scsi_find_dev() 
> and return NULL for a libsas host?
> 
> Note: it would be good to include that commit message for future reference.
> 
> Maybe we could add a method to ata_port_operations to do this lookup. I 
> assume that is abusing ata_port_operations purpose, since it's mostly 
> for HW methods.
> 
> Or do we actually use sdev->hostdata for libata or libsas? If not, maybe 
> we could store the struct ata_device pointer there.
> 
> I'm just thinking out loud now...

Agree. Ideally, libasas should not be any different than a for a drive used with
ahci/sata/pata adapters. After all, all of them are scsi devices as well. So we
need to understand why this happens only with libsas and correct the device
setup there.
Damien Le Moal May 22, 2023, 1:35 a.m. UTC | #8
On 5/8/23 10:11, yangxingui wrote:
> 
> 
> On 2023/5/7 22:51, Damien Le Moal wrote:
>> On 2023/05/05 18:06, yangxingui wrote:
>>>
>>>
>>> On 2023/5/5 16:17, Damien Le Moal wrote:
>>>> On 2023/05/05 11:57, Xingui Yang wrote:
>>>>> The function ata_get_identity() uses the helper ata_scsi_find_dev() to get
>>>>> the ata_device structure of a scsi device. However, when the ata device is
>>>>> managed by libsas, ata_scsi_find_dev() returns NULL, turning
>>>>> ata_get_identity() into a nop and always returns -ENOMSG.
>>>>
>>>> What do you do to hit the issue ? A while back for me it was the queue depth
>>>> setting causing problems. As Garry mentioned, this led to patch 141f3d6256e5
>>>> ("ata: libata-sata: Fix device queue depth control").
>>> Attempt to return the correct value at ata_scsi_find_dev() instead of
>>> NULL, when the ata device is managed by libsas?
>>
>> That I understand. My question is *what* user operation/command triggers this ?
>> Because on my test setup, under normal use, I do not see this issue (beside what
>> was already corrected with the queue depth control). Is the issue showing up
>> when using passthrough commands only ?
> Yeah, we found that command "hdparm -i /dev/sdc" always return faild for 
> SATA HDD disk. as follows:
> [root@localhost ~]# hdparm -i /dev/sdc
> 
> /dev/sdc:
>   HDIO_GET_IDENTITY failed: Invalid argument

I cannot recreate this issue exactly like this. Here is my setup with a pm80xx
driver (Adaptec HBA):

[7:0:0:0]    disk    ATA      WDC  WUH721818AL W232  /dev/sdd   /dev/sg5
[7:0:1:0]    disk    ATA      WDC  WUH721818AL WTW2  /dev/sdi   /dev/sg6
[7:0:2:0]    disk    ATA      WDC  WUH722222AL Wf86  /dev/sdf   /dev/sg7
[7:0:3:0]    zbc     ATA      WDC  WSH722020AL W803  /dev/sdg   /dev/sg8

Using the first drive, I get:

sudo hdparm -i /dev/sdd

/dev/sdd:

 Model=WDC  WUH721818ALN604, FwRev=PCGNW232, SerialNo=3KG10LBK
 Config={ HardSect NotMFM HdSw>15uSec Fixed DTR>10Mbs }
 RawCHS=16383/16/63, TrkSize=0, SectSize=0, ECCbytes=56
 BuffType=DualPortCache, BuffSize=unknown, MaxMultSect=2, MultSect=off
 CurCHS=16383/16/63, CurSects=16514064, LBA=yes, LBAsects=4394582016
 IORDY=on/off, tPIO={min:120,w/IORDY:120}, tDMA={min:120,rec:120}
 PIO modes:  pio0 pio1 pio2 pio3 pio4
 DMA modes:  mdma0 mdma1 mdma2
 UDMA modes: udma0 udma1 udma2 udma3 udma4 udma5 *udma6
 AdvancedPM=yes: disabled (255) WriteCache=enabled
 Drive conforms to: unknown:  ATA/ATAPI-2,3,4,5,6,7

 * signifies the current active mode

So all good. However, for the following drives, I get:

sudo hdparm -i /dev/sdi

/dev/sdi:
 HDIO_GET_IDENTITY failed: No message of desired type

(same for sdf and sdg).

Will dig into this.
Damien Le Moal May 22, 2023, 7:02 a.m. UTC | #9
On 5/22/23 10:35, Damien Le Moal wrote:
> On 5/8/23 10:11, yangxingui wrote:
>>
>>
>> On 2023/5/7 22:51, Damien Le Moal wrote:
>>> On 2023/05/05 18:06, yangxingui wrote:
>>>>
>>>>
>>>> On 2023/5/5 16:17, Damien Le Moal wrote:
>>>>> On 2023/05/05 11:57, Xingui Yang wrote:
>>>>>> The function ata_get_identity() uses the helper ata_scsi_find_dev() to get
>>>>>> the ata_device structure of a scsi device. However, when the ata device is
>>>>>> managed by libsas, ata_scsi_find_dev() returns NULL, turning
>>>>>> ata_get_identity() into a nop and always returns -ENOMSG.
>>>>>
>>>>> What do you do to hit the issue ? A while back for me it was the queue depth
>>>>> setting causing problems. As Garry mentioned, this led to patch 141f3d6256e5
>>>>> ("ata: libata-sata: Fix device queue depth control").
>>>> Attempt to return the correct value at ata_scsi_find_dev() instead of
>>>> NULL, when the ata device is managed by libsas?
>>>
>>> That I understand. My question is *what* user operation/command triggers this ?
>>> Because on my test setup, under normal use, I do not see this issue (beside what
>>> was already corrected with the queue depth control). Is the issue showing up
>>> when using passthrough commands only ?
>> Yeah, we found that command "hdparm -i /dev/sdc" always return faild for 
>> SATA HDD disk. as follows:
>> [root@localhost ~]# hdparm -i /dev/sdc
>>
>> /dev/sdc:
>>   HDIO_GET_IDENTITY failed: Invalid argument
> 
> I cannot recreate this issue exactly like this. Here is my setup with a pm80xx
> driver (Adaptec HBA):
> 
> [7:0:0:0]    disk    ATA      WDC  WUH721818AL W232  /dev/sdd   /dev/sg5
> [7:0:1:0]    disk    ATA      WDC  WUH721818AL WTW2  /dev/sdi   /dev/sg6
> [7:0:2:0]    disk    ATA      WDC  WUH722222AL Wf86  /dev/sdf   /dev/sg7
> [7:0:3:0]    zbc     ATA      WDC  WSH722020AL W803  /dev/sdg   /dev/sg8
> 
> Using the first drive, I get:
> 
> sudo hdparm -i /dev/sdd
> 
> /dev/sdd:
> 
>  Model=WDC  WUH721818ALN604, FwRev=PCGNW232, SerialNo=3KG10LBK
>  Config={ HardSect NotMFM HdSw>15uSec Fixed DTR>10Mbs }
>  RawCHS=16383/16/63, TrkSize=0, SectSize=0, ECCbytes=56
>  BuffType=DualPortCache, BuffSize=unknown, MaxMultSect=2, MultSect=off
>  CurCHS=16383/16/63, CurSects=16514064, LBA=yes, LBAsects=4394582016
>  IORDY=on/off, tPIO={min:120,w/IORDY:120}, tDMA={min:120,rec:120}
>  PIO modes:  pio0 pio1 pio2 pio3 pio4
>  DMA modes:  mdma0 mdma1 mdma2
>  UDMA modes: udma0 udma1 udma2 udma3 udma4 udma5 *udma6
>  AdvancedPM=yes: disabled (255) WriteCache=enabled
>  Drive conforms to: unknown:  ATA/ATAPI-2,3,4,5,6,7
> 
>  * signifies the current active mode
> 
> So all good. However, for the following drives, I get:
> 
> sudo hdparm -i /dev/sdi
> 
> /dev/sdi:
>  HDIO_GET_IDENTITY failed: No message of desired type
> 
> (same for sdf and sdg).
> 
> Will dig into this.

OK, so the issue is that __ata_scsi_find_dev() calls ata_find_dev() with devno
== scsidev->id. This leads to devno being 0, 1, 2 and 3 for connected drives
sdd, sd1, sdf and sdg, as shown by lsscsi. However, each drive has its own
port+link, with the link for each one having  ata_link_max_devices() == 1, so
ata_find_dev() works only for the first drive with scsidev->id == 0 and fails
for the others. A naive fix would be this:

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7bb12deab70c..e4d6f17d7ccc 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -2718,7 +2718,7 @@ static struct ata_device *__ata_scsi_find_dev(struct
ata_port *ap,
        if (!sata_pmp_attached(ap)) {
                if (unlikely(scsidev->channel || scsidev->lun))
                        return NULL;
-               devno = scsidev->id;
+               devno = 0;
        } else {
                if (unlikely(scsidev->id || scsidev->lun))
                        return NULL;

And running this on my setup, it works. This makes libsas added ports/devices
look like AHCI ones, where all devices have ID 0 for the !pmp case.

However, I am not sure this would be OK for all setups...

John,

Any idea if there is any cases where libsas managed drives would endup not being
correctly identified by this change ? As long as a device always has its own
port, I do not see any issue. But is there a case where we could have multiple
devices on the same port ? Per libata, max is 2, and that is only for the IDE
master/slave case. Otherwise, it is always 1.

Not that looking at the pmp case, I am not confident at all that the
identification is correct for libsas. But I do not think that anyone would ever
connect a pmp box to a libsas HBA...
Jason Yan May 22, 2023, 8 a.m. UTC | #10
On 2023/5/22 15:02, Damien Le Moal wrote:
> On 5/22/23 10:35, Damien Le Moal wrote:
>> On 5/8/23 10:11, yangxingui wrote:
>>>
>>>
>>> On 2023/5/7 22:51, Damien Le Moal wrote:
>>>> On 2023/05/05 18:06, yangxingui wrote:
>>>>>
>>>>>
>>>>> On 2023/5/5 16:17, Damien Le Moal wrote:
>>>>>> On 2023/05/05 11:57, Xingui Yang wrote:
>>>>>>> The function ata_get_identity() uses the helper ata_scsi_find_dev() to get
>>>>>>> the ata_device structure of a scsi device. However, when the ata device is
>>>>>>> managed by libsas, ata_scsi_find_dev() returns NULL, turning
>>>>>>> ata_get_identity() into a nop and always returns -ENOMSG.
>>>>>>
>>>>>> What do you do to hit the issue ? A while back for me it was the queue depth
>>>>>> setting causing problems. As Garry mentioned, this led to patch 141f3d6256e5
>>>>>> ("ata: libata-sata: Fix device queue depth control").
>>>>> Attempt to return the correct value at ata_scsi_find_dev() instead of
>>>>> NULL, when the ata device is managed by libsas?
>>>>
>>>> That I understand. My question is *what* user operation/command triggers this ?
>>>> Because on my test setup, under normal use, I do not see this issue (beside what
>>>> was already corrected with the queue depth control). Is the issue showing up
>>>> when using passthrough commands only ?
>>> Yeah, we found that command "hdparm -i /dev/sdc" always return faild for
>>> SATA HDD disk. as follows:
>>> [root@localhost ~]# hdparm -i /dev/sdc
>>>
>>> /dev/sdc:
>>>    HDIO_GET_IDENTITY failed: Invalid argument
>>
>> I cannot recreate this issue exactly like this. Here is my setup with a pm80xx
>> driver (Adaptec HBA):
>>
>> [7:0:0:0]    disk    ATA      WDC  WUH721818AL W232  /dev/sdd   /dev/sg5
>> [7:0:1:0]    disk    ATA      WDC  WUH721818AL WTW2  /dev/sdi   /dev/sg6
>> [7:0:2:0]    disk    ATA      WDC  WUH722222AL Wf86  /dev/sdf   /dev/sg7
>> [7:0:3:0]    zbc     ATA      WDC  WSH722020AL W803  /dev/sdg   /dev/sg8
>>
>> Using the first drive, I get:
>>
>> sudo hdparm -i /dev/sdd
>>
>> /dev/sdd:
>>
>>   Model=WDC  WUH721818ALN604, FwRev=PCGNW232, SerialNo=3KG10LBK
>>   Config={ HardSect NotMFM HdSw>15uSec Fixed DTR>10Mbs }
>>   RawCHS=16383/16/63, TrkSize=0, SectSize=0, ECCbytes=56
>>   BuffType=DualPortCache, BuffSize=unknown, MaxMultSect=2, MultSect=off
>>   CurCHS=16383/16/63, CurSects=16514064, LBA=yes, LBAsects=4394582016
>>   IORDY=on/off, tPIO={min:120,w/IORDY:120}, tDMA={min:120,rec:120}
>>   PIO modes:  pio0 pio1 pio2 pio3 pio4
>>   DMA modes:  mdma0 mdma1 mdma2
>>   UDMA modes: udma0 udma1 udma2 udma3 udma4 udma5 *udma6
>>   AdvancedPM=yes: disabled (255) WriteCache=enabled
>>   Drive conforms to: unknown:  ATA/ATAPI-2,3,4,5,6,7
>>
>>   * signifies the current active mode
>>
>> So all good. However, for the following drives, I get:
>>
>> sudo hdparm -i /dev/sdi
>>
>> /dev/sdi:
>>   HDIO_GET_IDENTITY failed: No message of desired type
>>
>> (same for sdf and sdg).
>>
>> Will dig into this.
> 
> OK, so the issue is that __ata_scsi_find_dev() calls ata_find_dev() with devno
> == scsidev->id. This leads to devno being 0, 1, 2 and 3 for connected drives
> sdd, sd1, sdf and sdg, as shown by lsscsi. However, each drive has its own
> port+link, with the link for each one having  ata_link_max_devices() == 1, so
> ata_find_dev() works only for the first drive with scsidev->id == 0 and fails
> for the others. A naive fix would be this:
> 
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 7bb12deab70c..e4d6f17d7ccc 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -2718,7 +2718,7 @@ static struct ata_device *__ata_scsi_find_dev(struct
> ata_port *ap,
>          if (!sata_pmp_attached(ap)) {
>                  if (unlikely(scsidev->channel || scsidev->lun))
>                          return NULL;
> -               devno = scsidev->id;
> +               devno = 0;
>          } else {
>                  if (unlikely(scsidev->id || scsidev->lun))
>                          return NULL;
> 
> And running this on my setup, it works. This makes libsas added ports/devices
> look like AHCI ones, where all devices have ID 0 for the !pmp case.
> 
> However, I am not sure this would be OK for all setups...
> 
> John,
> 
> Any idea if there is any cases where libsas managed drives would endup not being
> correctly identified by this change ? As long as a device always has its own
> port, I do not see any issue. But is there a case where we could have multiple
> devices on the same port ? Per libata, max is 2, and that is only for the IDE
> master/slave case. Otherwise, it is always 1.
> 

AFAIK, libsas does not support multiple devices on the same port. So 
this change is ok for libsas.

> Not that looking at the pmp case, I am not confident at all that the
> identification is correct for libsas. But I do not think that anyone would ever
> connect a pmp box to a libsas HBA...
> 

libsas's does not support pmp either, and I do not see any future plans 
to support pmp.

So the above change (needs a ATA_FLAG_SAS_HOST check) looks good to me.
It's better to make libsas behave as other ata drivers so that we can 
drop the ATA_FLAG_SAS_HOST check. But this need tons of work for libsas.

Thanks,
Jason
Damien Le Moal May 22, 2023, 9:44 a.m. UTC | #11
On 5/22/23 17:00, Jason Yan wrote:
>> OK, so the issue is that __ata_scsi_find_dev() calls ata_find_dev() with devno
>> == scsidev->id. This leads to devno being 0, 1, 2 and 3 for connected drives
>> sdd, sd1, sdf and sdg, as shown by lsscsi. However, each drive has its own
>> port+link, with the link for each one having  ata_link_max_devices() == 1, so
>> ata_find_dev() works only for the first drive with scsidev->id == 0 and fails
>> for the others. A naive fix would be this:
>>
>> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
>> index 7bb12deab70c..e4d6f17d7ccc 100644
>> --- a/drivers/ata/libata-scsi.c
>> +++ b/drivers/ata/libata-scsi.c
>> @@ -2718,7 +2718,7 @@ static struct ata_device *__ata_scsi_find_dev(struct
>> ata_port *ap,
>>          if (!sata_pmp_attached(ap)) {
>>                  if (unlikely(scsidev->channel || scsidev->lun))
>>                          return NULL;
>> -               devno = scsidev->id;
>> +               devno = 0;
>>          } else {
>>                  if (unlikely(scsidev->id || scsidev->lun))
>>                          return NULL;
>>
>> And running this on my setup, it works. This makes libsas added ports/devices
>> look like AHCI ones, where all devices have ID 0 for the !pmp case.
>>
>> However, I am not sure this would be OK for all setups...
>>
>> John,
>>
>> Any idea if there is any cases where libsas managed drives would endup not being
>> correctly identified by this change ? As long as a device always has its own
>> port, I do not see any issue. But is there a case where we could have multiple
>> devices on the same port ? Per libata, max is 2, and that is only for the IDE
>> master/slave case. Otherwise, it is always 1.
>>
> 
> AFAIK, libsas does not support multiple devices on the same port. So 
> this change is ok for libsas.

Yes, for libsas it is OK. But as is, it will break master+slave IDE setups... So
the fix needs to be finer than this.

> 
>> Not that looking at the pmp case, I am not confident at all that the
>> identification is correct for libsas. But I do not think that anyone would ever
>> connect a pmp box to a libsas HBA...
>>
> 
> libsas's does not support pmp either, and I do not see any future plans 
> to support pmp.

Good. Dealing with that one is always painful.

> So the above change (needs a ATA_FLAG_SAS_HOST check) looks good to me.

Yes, this flag check is needed to avoid breaking IDE/pata.

> It's better to make libsas behave as other ata drivers so that we can 
> drop the ATA_FLAG_SAS_HOST check. But this need tons of work for libsas.

Yes, getting rid of this special casing with this flag would be really nice. It
should not be needed. I will try to write a proper fix not using it for now, to
facilitate removing the flag later.
John Garry May 22, 2023, 11:28 a.m. UTC | #12
On 22/05/2023 09:00, Jason Yan wrote:
> 
> OK, so the issue is that __ata_scsi_find_dev() calls ata_find_dev() with 
> devno
> == scsidev->id. This leads to devno being 0, 1, 2 and 3 for connected 
> drives

This numbering comes from sas_rphy_add():
...
if (identify->device_type == SAS_END_DEVICE &&
     (identify->target_port_protocols &
      (SAS_PROTOCOL_SSP | SAS_PROTOCOL_STP | SAS_PROTOCOL_SATA)))
	rphy->scsi_target_id = sas_host->next_target_id++;

..

	scsi_scan_target(&rphy->dev, 0, rphy->scsi_target_id, lun,
SCSI_SCAN_INITIAL);
}

So libata and scsi_transport_sas just use different sdev id numbering 
schemes for host scan.

> sdd, sd1, sdf and sdg, as shown by lsscsi. However, each drive has its own
> port+link, with the link for each one having  ata_link_max_devices() == 
> 1, so
> ata_find_dev() works only for the first drive with scsidev->id == 0 and 
> fails
> for the others. A naive fix would be this:
> 
> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
> index 7bb12deab70c..e4d6f17d7ccc 100644
> --- a/drivers/ata/libata-scsi.c
> +++ b/drivers/ata/libata-scsi.c
> @@ -2718,7 +2718,7 @@ static struct ata_device *__ata_scsi_find_dev(struct
> ata_port *ap,
>           if (!sata_pmp_attached(ap)) {
>                   if (unlikely(scsidev->channel || scsidev->lun))
>                           return NULL;
> -               devno = scsidev->id;
> +               devno = 0;
Would this pattern work:

ata_for_each_dev(ata_dev, link, ALL) {
	if (ata_dev->sdev == sdev)
		return ata_dev;
}

If not, I think it's ok to have devno = 0 assignment under SAS_HOST 
flag, even though it's far from ideal. Not both of these are not 
preferred, then, as I mentioned before, some per-port callback to do the 
conversion.

Thanks,
John
Damien Le Moal May 22, 2023, 11:47 a.m. UTC | #13
On 5/22/23 20:28, John Garry wrote:
> On 22/05/2023 09:00, Jason Yan wrote:
>>
>> OK, so the issue is that __ata_scsi_find_dev() calls ata_find_dev() with 
>> devno
>> == scsidev->id. This leads to devno being 0, 1, 2 and 3 for connected 
>> drives
> 
> This numbering comes from sas_rphy_add():
> ...
> if (identify->device_type == SAS_END_DEVICE &&
>      (identify->target_port_protocols &
>       (SAS_PROTOCOL_SSP | SAS_PROTOCOL_STP | SAS_PROTOCOL_SATA)))
> 	rphy->scsi_target_id = sas_host->next_target_id++;
> 
> ..
> 
> 	scsi_scan_target(&rphy->dev, 0, rphy->scsi_target_id, lun,
> SCSI_SCAN_INITIAL);
> }
> 
> So libata and scsi_transport_sas just use different sdev id numbering 
> schemes for host scan.
> 
>> sdd, sd1, sdf and sdg, as shown by lsscsi. However, each drive has its own
>> port+link, with the link for each one having  ata_link_max_devices() == 
>> 1, so
>> ata_find_dev() works only for the first drive with scsidev->id == 0 and 
>> fails
>> for the others. A naive fix would be this:
>>
>> diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
>> index 7bb12deab70c..e4d6f17d7ccc 100644
>> --- a/drivers/ata/libata-scsi.c
>> +++ b/drivers/ata/libata-scsi.c
>> @@ -2718,7 +2718,7 @@ static struct ata_device *__ata_scsi_find_dev(struct
>> ata_port *ap,
>>           if (!sata_pmp_attached(ap)) {
>>                   if (unlikely(scsidev->channel || scsidev->lun))
>>                           return NULL;
>> -               devno = scsidev->id;
>> +               devno = 0;
> Would this pattern work:
> 
> ata_for_each_dev(ata_dev, link, ALL) {
> 	if (ata_dev->sdev == sdev)
> 		return ata_dev;
> }

That would work too I think, even though a loop is a bit ugly...

> 
> If not, I think it's ok to have devno = 0 assignment under SAS_HOST 
> flag, even though it's far from ideal. Not both of these are not 
> preferred, then, as I mentioned before, some per-port callback to do the 
> conversion.

See the proper patch I posted a few min ago (I cc-ed you). I do not use SAS_HOST
flag :)

> 
> Thanks,
> John
diff mbox series

Patch

diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 7bb12deab70c..aa580ea341fa 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -26,6 +26,7 @@ 
 #include <scsi/scsi_device.h>
 #include <scsi/scsi_tcq.h>
 #include <scsi/scsi_transport.h>
+#include <scsi/libsas.h>
 #include <linux/libata.h>
 #include <linux/hdreg.h>
 #include <linux/uaccess.h>
@@ -2745,10 +2746,17 @@  static struct ata_device *__ata_scsi_find_dev(struct ata_port *ap,
  *	Associated ATA device, or %NULL if not found.
  */
 struct ata_device *
-ata_scsi_find_dev(struct ata_port *ap, const struct scsi_device *scsidev)
+ata_scsi_find_dev(struct ata_port *ap, struct scsi_device *scsidev)
 {
-	struct ata_device *dev = __ata_scsi_find_dev(ap, scsidev);
+	struct ata_device *dev;
+
+	if (ap->flags & ATA_FLAG_SAS_HOST) {
+		struct domain_device *ddev = sdev_to_domain_dev(scsidev);
+
+		return sas_to_ata_dev(ddev);
+	}
 
+	dev = __ata_scsi_find_dev(ap, scsidev);
 	if (unlikely(!dev || !ata_dev_enabled(dev)))
 		return NULL;
 
diff --git a/drivers/ata/libata.h b/drivers/ata/libata.h
index 926d0d33cd29..6d66f46da064 100644
--- a/drivers/ata/libata.h
+++ b/drivers/ata/libata.h
@@ -109,7 +109,7 @@  static inline void ata_acpi_bind_dev(struct ata_device *dev) {}
 
 /* libata-scsi.c */
 extern struct ata_device *ata_scsi_find_dev(struct ata_port *ap,
-					    const struct scsi_device *scsidev);
+					    struct scsi_device *scsidev);
 extern int ata_scsi_add_hosts(struct ata_host *host,
 			      const struct scsi_host_template *sht);
 extern void ata_scsi_scan_host(struct ata_port *ap, int sync);