diff mbox

[RFC] scsi: libsas: fix WARN on device removal

Message ID 1478185120-5509-1-git-send-email-john.garry@huawei.com
State New
Headers show

Commit Message

John Garry Nov. 3, 2016, 2:58 p.m. UTC
The following patch introduces an annoying WARN
when a device is removed from the SAS topology:
[SCSI] libsas: prevent domain rediscovery competing with ata error handling

A sample WARN is as follows:
[  236.842227] WARNING: CPU: 7 PID: 1520 at fs/sysfs/group.c:237 sysfs_remove_group+0x90/0x98
[  236.850465] Modules linked in:
[  236.853544]
[  236.855045] CPU: 7 PID: 1520 Comm: kworker/u64:4 Tainted: G        W       4.9.0-rc1-15310-g3fbc29e-dirty #676
[  236.865010] Hardware name: Huawei Taishan 2180 /D03, BIOS Estuary v2.3 D03 UEFI 08/17/2016
[  236.873249] Workqueue: scsi_wq_0 sas_destruct_devices
[  236.878317] task: ffff8027ba31b200 task.stack: ffff8027b9d44000
[  236.884225] PC is at sysfs_remove_group+0x90/0x98
[  236.888920] LR is at sysfs_remove_group+0x90/0x98
[  236.893616] pc : [<ffff000008256df8>] lr : [<ffff000008256df8>] pstate: 60000145
[  236.900989] sp : ffff8027b9d47bf0

< snip >

[  237.116463] [<ffff000008256df8>] sysfs_remove_group+0x90/0x98
[  237.122197] [<ffff00000851fe68>] dpm_sysfs_remove+0x58/0x68
[  237.127758] [<ffff000008513678>] device_del+0x40/0x218
[  237.132886] [<ffff000008513864>] device_unregister+0x14/0x2c
[  237.138536] [<ffff0000083670c4>] bsg_unregister_queue+0x5c/0xa0
[  237.144442] [<ffff00000855b984>] sas_rphy_remove+0x44/0x80
[  237.149915] [<ffff00000855b9d4>] sas_rphy_delete+0x14/0x28
[  237.155388] [<ffff00000855f9d8>] sas_destruct_devices+0x64/0x98
[  237.161293] [<ffff0000080d2c1c>] process_one_work+0x128/0x2e4
[  237.167027] [<ffff0000080d2e30>] worker_thread+0x58/0x434
[  237.172415] [<ffff0000080d8c24>] kthread+0xd4/0xe8
[  237.177198] [<ffff000008082e80>] ret_from_fork+0x10/0x50
[  237.182557] sysfs group 'power' not found for kobject 'end_device-0:0:5'

(this can be really huge when an expander is unplugged)

The problem is with the process of sas_port and domain_device
destruction in domain revalidation. There is a 2-stage process:
In domain revalidation (which runs in work queue context), if a
domain_device is discovered to be gone, then the following happens:
- the domain_device is queued for destruction in a separate work item
- the associated sas_port is destroyed immediately

This causes a problem in that the sas_port associated with
a domain_device is destroyed prior the domain_device: this causes
the sysfs WARN. Essentially the "rug has been pulled from underneath".

Also, likewise, when a root port is deformed due to loss of signal,
we have the same issue.

To solve, destroy the sas_port in a separate work item to which
we do the domain revalidation with a new discovery event, as follows:
- When a domain_device is detected to be gone, the domain_device is
  queued for destruction in a separate work item. The associated
  sas_port is also queued for destruction in another separate work item
  (needs to be queued 2nd)
- the domain_device is destroyed
- the sas_port is destroyed
[similar is done for loss of signal event, in sas_port_deformed()].

Fixes: 87c8331fcf72e501c3a3c0cdc5c [SCSI] libsas: prevent domain
rediscovery competing with ata error handling

Signed-off-by: John Garry <john.garry@huawei.com>


-- 
1.9.1

Comments

John Garry Nov. 9, 2016, 12:28 p.m. UTC | #1
On 03/11/2016 14:58, John Garry wrote:
> The following patch introduces an annoying WARN

> when a device is removed from the SAS topology:

> [SCSI] libsas: prevent domain rediscovery competing with ata error handling

>


Are there any views on this patch? I would have thought that the parties 
who use the drivers based on libsas would be interested in fixing this bug.

BTW, We are internally testing, hence the RFC.

Thanks in advance,
John

> A sample WARN is as follows:

> [  236.842227] WARNING: CPU: 7 PID: 1520 at fs/sysfs/group.c:237 sysfs_remove_group+0x90/0x98

> [  236.850465] Modules linked in:

> [  236.853544]

> [  236.855045] CPU: 7 PID: 1520 Comm: kworker/u64:4 Tainted: G        W       4.9.0-rc1-15310-g3fbc29e-dirty #676

> [  236.865010] Hardware name: Huawei Taishan 2180 /D03, BIOS Estuary v2.3 D03 UEFI 08/17/2016

> [  236.873249] Workqueue: scsi_wq_0 sas_destruct_devices

> [  236.878317] task: ffff8027ba31b200 task.stack: ffff8027b9d44000

> [  236.884225] PC is at sysfs_remove_group+0x90/0x98

> [  236.888920] LR is at sysfs_remove_group+0x90/0x98

> [  236.893616] pc : [<ffff000008256df8>] lr : [<ffff000008256df8>] pstate: 60000145

> [  236.900989] sp : ffff8027b9d47bf0

>

> < snip >

>

> [  237.116463] [<ffff000008256df8>] sysfs_remove_group+0x90/0x98

> [  237.122197] [<ffff00000851fe68>] dpm_sysfs_remove+0x58/0x68

> [  237.127758] [<ffff000008513678>] device_del+0x40/0x218

> [  237.132886] [<ffff000008513864>] device_unregister+0x14/0x2c

> [  237.138536] [<ffff0000083670c4>] bsg_unregister_queue+0x5c/0xa0

> [  237.144442] [<ffff00000855b984>] sas_rphy_remove+0x44/0x80

> [  237.149915] [<ffff00000855b9d4>] sas_rphy_delete+0x14/0x28

> [  237.155388] [<ffff00000855f9d8>] sas_destruct_devices+0x64/0x98

> [  237.161293] [<ffff0000080d2c1c>] process_one_work+0x128/0x2e4

> [  237.167027] [<ffff0000080d2e30>] worker_thread+0x58/0x434

> [  237.172415] [<ffff0000080d8c24>] kthread+0xd4/0xe8

> [  237.177198] [<ffff000008082e80>] ret_from_fork+0x10/0x50

> [  237.182557] sysfs group 'power' not found for kobject 'end_device-0:0:5'

>

> (this can be really huge when an expander is unplugged)

>

> The problem is with the process of sas_port and domain_device

> destruction in domain revalidation. There is a 2-stage process:

> In domain revalidation (which runs in work queue context), if a

> domain_device is discovered to be gone, then the following happens:

> - the domain_device is queued for destruction in a separate work item

> - the associated sas_port is destroyed immediately

>

> This causes a problem in that the sas_port associated with

> a domain_device is destroyed prior the domain_device: this causes

> the sysfs WARN. Essentially the "rug has been pulled from underneath".

>

> Also, likewise, when a root port is deformed due to loss of signal,

> we have the same issue.

>

> To solve, destroy the sas_port in a separate work item to which

> we do the domain revalidation with a new discovery event, as follows:

> - When a domain_device is detected to be gone, the domain_device is

>   queued for destruction in a separate work item. The associated

>   sas_port is also queued for destruction in another separate work item

>   (needs to be queued 2nd)

> - the domain_device is destroyed

> - the sas_port is destroyed

> [similar is done for loss of signal event, in sas_port_deformed()].

>

> Fixes: 87c8331fcf72e501c3a3c0cdc5c [SCSI] libsas: prevent domain

> rediscovery competing with ata error handling

>

> Signed-off-by: John Garry <john.garry@huawei.com>

>

> diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c

> index 60de662..01d0fe2 100644

> --- a/drivers/scsi/libsas/sas_discover.c

> +++ b/drivers/scsi/libsas/sas_discover.c

> @@ -361,7 +361,7 @@ static void sas_destruct_devices(struct work_struct *work)

>

>  	clear_bit(DISCE_DESTRUCT, &port->disc.pending);

>

> -	list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) {

> +	list_for_each_entry_safe(dev, n, &port->dev_destroy_list, disco_list_node) {

>  		list_del_init(&dev->disco_list_node);

>

>  		sas_remove_children(&dev->rphy->dev);

> @@ -383,7 +383,7 @@ void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)

>

>  	if (!test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) {

>  		sas_rphy_unlink(dev->rphy);

> -		list_move_tail(&dev->disco_list_node, &port->destroy_list);

> +		list_move_tail(&dev->disco_list_node, &port->dev_destroy_list);

>  		sas_discover_event(dev->port, DISCE_DESTRUCT);

>  	}

>  }

> @@ -525,6 +525,28 @@ static void sas_revalidate_domain(struct work_struct *work)

>  	mutex_unlock(&ha->disco_mutex);

>  }

>

> +/* ---------- Async Port destruct ---------- */

> +static void sas_async_port_destruct(struct work_struct *work)

> +{

> +	struct sas_discovery_event *ev = to_sas_discovery_event(work);

> +	struct asd_sas_port *port = ev->port;

> +	struct sas_port *sas_port, *n;

> +

> +	clear_bit(DISCE_PORT_DESTRUCT, &port->disc.pending);

> +

> +	list_for_each_entry_safe(sas_port, n, &port->port_destroy_list, destroy_list) {

> +		list_del_init(&port->port_destroy_list);

> +

> +		sas_port_delete(sas_port);

> +	}

> +}

> +

> +void sas_port_destruct(struct asd_sas_port *port, struct sas_port *sas_port)

> +{

> +	list_move_tail(&sas_port->destroy_list, &port->port_destroy_list);

> +	sas_discover_event(port, DISCE_PORT_DESTRUCT);

> +}

> +

>  /* ---------- Events ---------- */

>

>  static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)

> @@ -582,6 +604,7 @@ void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)

>  		[DISCE_SUSPEND] = sas_suspend_devices,

>  		[DISCE_RESUME] = sas_resume_devices,

>  		[DISCE_DESTRUCT] = sas_destruct_devices,

> +		[DISCE_PORT_DESTRUCT] = sas_async_port_destruct,

>  	};

>

>  	disc->pending = 0;

> diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c

> index 022bb6e..f9522a0 100644

> --- a/drivers/scsi/libsas/sas_expander.c

> +++ b/drivers/scsi/libsas/sas_expander.c

> @@ -1900,10 +1900,11 @@ static void sas_unregister_devs_sas_addr(struct domain_device *parent,

>  	}

>  	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);

>  	if (phy->port) {

> +		struct asd_sas_port *port = found->port;

>  		sas_port_delete_phy(phy->port, phy->phy);

>  		sas_device_set_phy(found, phy->port);

>  		if (phy->port->num_phys == 0)

> -			sas_port_delete(phy->port);

> +			sas_port_destruct(port, phy->port);

>  		phy->port = NULL;

>  	}

>  }

> diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c

> index d3c5297..1a32f86 100644

> --- a/drivers/scsi/libsas/sas_port.c

> +++ b/drivers/scsi/libsas/sas_port.c

> @@ -219,7 +219,7 @@ void sas_deform_port(struct asd_sas_phy *phy, int gone)

>

>  	if (port->num_phys == 1) {

>  		sas_unregister_domain_devices(port, gone);

> -		sas_port_delete(port->port);

> +		sas_port_destruct(port, port->port);

>  		port->port = NULL;

>  	} else {

>  		sas_port_delete_phy(port->port, phy->phy);

> @@ -322,7 +322,8 @@ static void sas_init_port(struct asd_sas_port *port,

>  	port->id = i;

>  	INIT_LIST_HEAD(&port->dev_list);

>  	INIT_LIST_HEAD(&port->disco_list);

> -	INIT_LIST_HEAD(&port->destroy_list);

> +	INIT_LIST_HEAD(&port->dev_destroy_list);

> +	INIT_LIST_HEAD(&port->port_destroy_list);

>  	spin_lock_init(&port->phy_list_lock);

>  	INIT_LIST_HEAD(&port->phy_list);

>  	port->ha = sas_ha;

> diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c

> index 60b651b..062c03c 100644

> --- a/drivers/scsi/scsi_transport_sas.c

> +++ b/drivers/scsi/scsi_transport_sas.c

> @@ -934,6 +934,7 @@ struct sas_port *sas_port_alloc(struct device *parent, int port_id)

>

>  	mutex_init(&port->phy_list_mutex);

>  	INIT_LIST_HEAD(&port->phy_list);

> +	INIT_LIST_HEAD(&port->destroy_list);

>

>  	if (scsi_is_sas_expander_device(parent)) {

>  		struct sas_rphy *rphy = dev_to_rphy(parent);

> diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h

> index dae99d7..a7953c8 100644

> --- a/include/scsi/libsas.h

> +++ b/include/scsi/libsas.h

> @@ -91,7 +91,8 @@ enum discover_event {

>  	DISCE_SUSPEND		= 4,

>  	DISCE_RESUME		= 5,

>  	DISCE_DESTRUCT		= 6,

> -	DISC_NUM_EVENTS		= 7,

> +	DISCE_PORT_DESTRUCT	= 7,

> +	DISC_NUM_EVENTS,

>  };

>

>  /* ---------- Expander Devices ---------- */

> @@ -268,7 +269,8 @@ struct asd_sas_port {

>  	spinlock_t dev_list_lock;

>  	struct list_head dev_list;

>  	struct list_head disco_list;

> -	struct list_head destroy_list;

> +	struct list_head dev_destroy_list;

> +	struct list_head port_destroy_list;

>  	enum   sas_linkrate linkrate;

>

>  	struct sas_work work;

> @@ -702,6 +704,7 @@ extern int sas_bios_param(struct scsi_device *,

>  int  sas_ex_revalidate_domain(struct domain_device *);

>

>  void sas_unregister_domain_devices(struct asd_sas_port *port, int gone);

> +void sas_port_destruct(struct asd_sas_port *port, struct sas_port *sas_port);

>  void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *);

>  int  sas_discover_event(struct asd_sas_port *, enum discover_event ev);

>

> diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h

> index 73d8709..b495aac 100644

> --- a/include/scsi/scsi_transport_sas.h

> +++ b/include/scsi/scsi_transport_sas.h

> @@ -154,6 +154,7 @@ struct sas_port {

>

>  	struct mutex		phy_list_mutex;

>  	struct list_head	phy_list;

> +	struct list_head	destroy_list; /* only used by libsas */

>  };

>

>  #define dev_to_sas_port(d) \

>
Dan Williams Nov. 9, 2016, 7:09 p.m. UTC | #2
On Wed, Nov 9, 2016 at 9:36 AM, John Garry <john.garry@huawei.com> wrote:
> On 09/11/2016 12:28, John Garry wrote:

>>

>> On 03/11/2016 14:58, John Garry wrote:

>>>

>>> The following patch introduces an annoying WARN

>>> when a device is removed from the SAS topology:

>>> [SCSI] libsas: prevent domain rediscovery competing with ata error

>>> handling

>>>

>>

>> Are there any views on this patch? I would have thought that the parties

>> who use the drivers based on libsas would be interested in fixing this

>> bug.

>>

>

> I should have added the before and after logs earlier, so the issue is

> illustrated. Now attached. When a 24-port expander is unplugged we get >6k

> lines of WARN on the console, lasting >30 seconds. Not nice.

>


I might be mistaken, but this patch seems functionally identical to
this attempt:

http://marc.info/?l=linux-scsi&m=143459794823595&w=2

i.e. it moves the port destruction to the workqueue and still suffers
from the flutter problem:

http://marc.info/?l=linux-scsi&m=143801026028006&w=2
http://marc.info/?l=linux-scsi&m=143801971131073&w=2

Perhaps we instead need to quiet this warning?

http://marc.info/?l=linux-scsi&m=143802229932175&w=2
Dan Williams Nov. 9, 2016, 8:35 p.m. UTC | #3
On Wed, Nov 9, 2016 at 11:09 AM, Dan Williams <dan.j.williams@intel.com> wrote:
> On Wed, Nov 9, 2016 at 9:36 AM, John Garry <john.garry@huawei.com> wrote:

>> On 09/11/2016 12:28, John Garry wrote:

>>>

>>> On 03/11/2016 14:58, John Garry wrote:

>>>>

>>>> The following patch introduces an annoying WARN

>>>> when a device is removed from the SAS topology:

>>>> [SCSI] libsas: prevent domain rediscovery competing with ata error

>>>> handling

>>>>

>>>

>>> Are there any views on this patch? I would have thought that the parties

>>> who use the drivers based on libsas would be interested in fixing this

>>> bug.

>>>

>>

>> I should have added the before and after logs earlier, so the issue is

>> illustrated. Now attached. When a 24-port expander is unplugged we get >6k

>> lines of WARN on the console, lasting >30 seconds. Not nice.

>>

>

> I might be mistaken, but this patch seems functionally identical to

> this attempt:

>

> http://marc.info/?l=linux-scsi&m=143459794823595&w=2

>

> i.e. it moves the port destruction to the workqueue and still suffers

> from the flutter problem:

>

> http://marc.info/?l=linux-scsi&m=143801026028006&w=2

> http://marc.info/?l=linux-scsi&m=143801971131073&w=2

>

> Perhaps we instead need to quiet this warning?

>

> http://marc.info/?l=linux-scsi&m=143802229932175&w=2


Alternatively we need a mechanism to cancel in-flight port shutdown
requests when we start re-attaching devices before queued port
destruction events have run.
John Garry Nov. 10, 2016, 11:53 a.m. UTC | #4
On 09/11/2016 20:35, Dan Williams wrote:
> On Wed, Nov 9, 2016 at 11:09 AM, Dan Williams <dan.j.williams@intel.com> wrote:

>> On Wed, Nov 9, 2016 at 9:36 AM, John Garry <john.garry@huawei.com> wrote:

>>> On 09/11/2016 12:28, John Garry wrote:

>>>>

>>>> On 03/11/2016 14:58, John Garry wrote:

>>>>>

>>>>> The following patch introduces an annoying WARN

>>>>> when a device is removed from the SAS topology:

>>>>> [SCSI] libsas: prevent domain rediscovery competing with ata error

>>>>> handling

>>>>>

>>>>

>>>> Are there any views on this patch? I would have thought that the parties

>>>> who use the drivers based on libsas would be interested in fixing this

>>>> bug.

>>>>

>>>

>>> I should have added the before and after logs earlier, so the issue is

>>> illustrated. Now attached. When a 24-port expander is unplugged we get >6k

>>> lines of WARN on the console, lasting >30 seconds. Not nice.

>>>

>>

>> I might be mistaken, but this patch seems functionally identical to

>> this attempt:

>>

>> http://marc.info/?l=linux-scsi&m=143459794823595&w=2


Hi Dan,

They're not the same. I don't see how your solution properly deals with 
remote sas_port deletion.

When we unplug a device connected to an expander, can't the sas_port be 
deleted twice, in sas_unregister_devs_sas_addr() from domain 
revalidation and also now in sas_destruct_devices()? I think that this 
gives a NULL dereference.
And we still get the WARN as the sas_port has still been deleted before 
the device.

In my solution, we should always delete the sas_port after the attached 
device.

>>

>> i.e. it moves the port destruction to the workqueue and still suffers

>> from the flutter problem:

>>

>> http://marc.info/?l=linux-scsi&m=143801026028006&w=2

>> http://marc.info/?l=linux-scsi&m=143801971131073&w=2

>>

>> Perhaps we instead need to quiet this warning?

>>

>> http://marc.info/?l=linux-scsi&m=143802229932175&w=2


I have not seen the flutter issue. I am just trying to solve the 
horrible WARN dump.
However I do understand that there may be a issue related to how we 
queue the events; there was a recent attempt to fix this, but it came to 
nothing:
https://www.spinics.net/lists/linux-scsi/msg99991.html

Cheers,
John

>

> Alternatively we need a mechanism to cancel in-flight port shutdown

> requests when we start re-attaching devices before queued port

> destruction events have run.

>

> .

>
wangyijing Nov. 11, 2016, 8:12 a.m. UTC | #5
> 

> They're not the same. I don't see how your solution properly deals with remote sas_port deletion.

> 

> When we unplug a device connected to an expander, can't the sas_port be deleted twice, in sas_unregister_devs_sas_addr() from domain revalidation and also now in sas_destruct_devices()? I think that this gives a NULL dereference.

> And we still get the WARN as the sas_port has still been deleted before the device.

> 

> In my solution, we should always delete the sas_port after the attached device.

> 

>>>

>>> i.e. it moves the port destruction to the workqueue and still suffers

>>> from the flutter problem:

>>>

>>> http://marc.info/?l=linux-scsi&m=143801026028006&w=2

>>> http://marc.info/?l=linux-scsi&m=143801971131073&w=2

>>>

>>> Perhaps we instead need to quiet this warning?

>>>

>>> http://marc.info/?l=linux-scsi&m=143802229932175&w=2

> 

> I have not seen the flutter issue. I am just trying to solve the horrible WARN dump.

> However I do understand that there may be a issue related to how we queue the events; there was a recent attempt to fix this, but it came to nothing:

> https://www.spinics.net/lists/linux-scsi/msg99991.html


We found libsas hotplug several problems:
1. sysfs warning calltrace(like the case you found);
2. hot-add and hot-remove work events may process out of order;
3. in some extreme cases, libsas may miss some events, if the same event is still pending in workqueue.

It's a complex issue, we posted two patches, try to fix these issues, but now few people are interested in it  :(

> 

> Cheers,

> John

> 

>>

>> Alternatively we need a mechanism to cancel in-flight port shutdown

>> requests when we start re-attaching devices before queued port

>> destruction events have run.

>>

>> .

>>

> 

> 

> _______________________________________________

> linuxarm mailing list

> linuxarm@huawei.com

> http://rnd-openeuler.huawei.com/mailman/listinfo/linuxarm

> 

> .

>
John Garry Nov. 11, 2016, 8:23 a.m. UTC | #6
On 11/11/2016 08:12, wangyijing wrote:
>>

>> They're not the same. I don't see how your solution properly deals with remote sas_port deletion.

>>

>> When we unplug a device connected to an expander, can't the sas_port be deleted twice, in sas_unregister_devs_sas_addr() from domain revalidation and also now in sas_destruct_devices()? I think that this gives a NULL dereference.

>> And we still get the WARN as the sas_port has still been deleted before the device.

>>

>> In my solution, we should always delete the sas_port after the attached device.

>>

>>>>

>>>> i.e. it moves the port destruction to the workqueue and still suffers

>>>> from the flutter problem:

>>>>

>>>> http://marc.info/?l=linux-scsi&m=143801026028006&w=2

>>>> http://marc.info/?l=linux-scsi&m=143801971131073&w=2

>>>>

>>>> Perhaps we instead need to quiet this warning?

>>>>

>>>> http://marc.info/?l=linux-scsi&m=143802229932175&w=2

>>

>> I have not seen the flutter issue. I am just trying to solve the horrible WARN dump.

>> However I do understand that there may be a issue related to how we queue the events; there was a recent attempt to fix this, but it came to nothing:

>> https://www.spinics.net/lists/linux-scsi/msg99991.html

>

> We found libsas hotplug several problems:

> 1. sysfs warning calltrace(like the case you found);


Maybe you can then review my patch.

> 2. hot-add and hot-remove work events may process out of order;

> 3. in some extreme cases, libsas may miss some events, if the same event is still pending in workqueue.

>


Can you tell me how to recreate #2 and #3?

> It's a complex issue, we posted two patches, try to fix these issues, but now few people are interested in it  :(

>


IIRC, you sent as RFC and got a "reviewed-by" from Hannes, so I'm not 
sure what else you want. BTW, I thought that the changes were quite drastic.

John

>>

>>>

>>> Alternatively we need a mechanism to cancel in-flight port shutdown

>>> requests when we start re-attaching devices before queued port

>>> destruction events have run.

>>>

>>> .

>>>

>>

>>

>> _______________________________________________

>> linuxarm mailing list

>> linuxarm@huawei.com

>> http://rnd-openeuler.huawei.com/mailman/listinfo/linuxarm

>>

>> .

>>

>

>

> .

>
wangyijing Nov. 11, 2016, 8:49 a.m. UTC | #7
>>> I have not seen the flutter issue. I am just trying to solve the horrible WARN dump.

>>> However I do understand that there may be a issue related to how we queue the events; there was a recent attempt to fix this, but it came to nothing:

>>> https://www.spinics.net/lists/linux-scsi/msg99991.html

>>

>> We found libsas hotplug several problems:

>> 1. sysfs warning calltrace(like the case you found);

> 

> Maybe you can then review my patch.


I did it, I think your solution to fix the sysfs calltrace issue is ok, and what I worried about is we still need to fix
the rest issues. So it's better if we could fix all issues one time.

> 

>> 2. hot-add and hot-remove work events may process out of order;

>> 3. in some extreme cases, libsas may miss some events, if the same event is still pending in workqueue.

>>

> 

> Can you tell me how to recreate #2 and #3?


Qilin Chen and Yousong He help me to reproduce it, I told them to reply this mail to tell you the test steps.
Some tests we did is make sas phy link flutter, so hardware would post phy down and phy up events sequentially.

1. scsi host workqueue receive phy down and phy up events.                                             in process                 new added
2. sas_deform_port would post a new destruct event to scsi host workqueue, so things in workqueue like [phy down-----phy up -----destruct]

So the phy down logic is separated by phy up, and it's not atomic, not safe, something unexpected would happen.

For case 3, we make hardware burst post lots pair of phy up and phy down events, so if libsas is processing the phy up event, the next
phy up event can not queue to scsi host workqueue again, it will lost, it's not we expect.

> 

>> It's a complex issue, we posted two patches, try to fix these issues, but now few people are interested in it  :(

>>

> 

> IIRC, you sent as RFC and got a "reviewed-by" from Hannes, so I'm not sure what else you want. BTW, I thought that the changes were quite drastic.


I agree, the changes seems something drastic. But I think current libsas hotplug framework has a big flaw.

> 

> John

> 

>>>

>>>>

>>>> Alternatively we need a mechanism to cancel in-flight port shutdown

>>>> requests when we start re-attaching devices before queued port

>>>> destruction events have run.

>>>>

>>>> .

>>>>

>>>

>>>

>>> _______________________________________________

>>> linuxarm mailing list

>>> linuxarm@huawei.com

>>> http://rnd-openeuler.huawei.com/mailman/listinfo/linuxarm

>>>

>>> .

>>>

>>

>>

>> .

>>

> 

> 

> 

> .

>
John Garry Nov. 17, 2016, 3:23 p.m. UTC | #8
On 11/11/2016 08:49, wangyijing wrote:
>>>> I have not seen the flutter issue. I am just trying to solve the horrible WARN dump.

>>>> However I do understand that there may be a issue related to how we queue the events; there was a recent attempt to fix this, but it came to nothing:

>>>> https://www.spinics.net/lists/linux-scsi/msg99991.html

>>>

>>> We found libsas hotplug several problems:

>>> 1. sysfs warning calltrace(like the case you found);

>>

>> Maybe you can then review my patch.

>

> I did it, I think your solution to fix the sysfs calltrace issue is ok, and what I worried about is we still need to fix

> the rest issues. So it's better if we could fix all issues one time.

>


@Maintainers, would you be willing to accept this patch as an interim 
fix for the dastardly WARN while we try to fix the flutter issue?

>>

>>> 2. hot-add and hot-remove work events may process out of order;

>>> 3. in some extreme cases, libsas may miss some events, if the same event is still pending in workqueue.

>>>

>>

>> Can you tell me how to recreate #2 and #3?

>

> Qilin Chen and Yousong He help me to reproduce it, I told them to reply this mail to tell you the test steps.

> Some tests we did is make sas phy link flutter, so hardware would post phy down and phy up events sequentially.

>

> 1. scsi host workqueue receive phy down and phy up events.                                             in process                 new added

> 2. sas_deform_port would post a new destruct event to scsi host workqueue, so things in workqueue like [phy down-----phy up -----destruct]

>

> So the phy down logic is separated by phy up, and it's not atomic, not safe, something unexpected would happen.

>

> For case 3, we make hardware burst post lots pair of phy up and phy down events, so if libsas is processing the phy up event, the next

> phy up event can not queue to scsi host workqueue again, it will lost, it's not we expect.

>

>>

>>> It's a complex issue, we posted two patches, try to fix these issues, but now few people are interested in it  :(

>>>

>>

>> IIRC, you sent as RFC and got a "reviewed-by" from Hannes, so I'm not sure what else you want. BTW, I thought that the changes were quite drastic.

>

> I agree, the changes seems something drastic. But I think current libsas hotplug framework has a big flaw.

>

>>

>> John

>>

>>>>

>>>>>

>>>>> Alternatively we need a mechanism to cancel in-flight port shutdown

>>>>> requests when we start re-attaching devices before queued port

>>>>> destruction events have run.

>>>>>

>>>>> .

>>>>>

>>>>

>>>>

>>>> _______________________________________________

>>>> linuxarm mailing list

>>>> linuxarm@huawei.com

>>>> http://rnd-openeuler.huawei.com/mailman/listinfo/linuxarm

>>>>

>>>> .

>>>>

>>>

>>>

>>> .

>>>

>>

>>

>>

>> .

>>

>

>

> .

>
Martin K. Petersen Nov. 18, 2016, 1:51 a.m. UTC | #9
>>>>> "John" == John Garry <john.garry@huawei.com> writes:


John> @Maintainers, would you be willing to accept this patch as an
John> interim fix for the dastardly WARN while we try to fix the flutter
John> issue?

I'll defer to James since I don't have much libsas experience.

-- 
Martin K. Petersen	Oracle Linux Engineering
John Garry Nov. 18, 2016, 9 a.m. UTC | #10
On 18/11/2016 01:53, Dan Williams wrote:
> On Thu, Nov 17, 2016 at 7:23 AM, John Garry <john.garry@huawei.com> wrote:

>> On 11/11/2016 08:49, wangyijing wrote:

>>>>>>

>>>>>> I have not seen the flutter issue. I am just trying to solve the

>>>>>> horrible WARN dump.

>>>>>> However I do understand that there may be a issue related to how we

>>>>>> queue the events; there was a recent attempt to fix this, but it came to

>>>>>> nothing:

>>>>>> https://www.spinics.net/lists/linux-scsi/msg99991.html

>>>>>

>>>>>

>>>>> We found libsas hotplug several problems:

>>>>> 1. sysfs warning calltrace(like the case you found);

>>>>

>>>>

>>>> Maybe you can then review my patch.

>>>

>>>

>>> I did it, I think your solution to fix the sysfs calltrace issue is ok,

>>> and what I worried about is we still need to fix

>>> the rest issues. So it's better if we could fix all issues one time.

>>>

>>

>> @Maintainers, would you be willing to accept this patch as an interim fix

>> for the dastardly WARN while we try to fix the flutter issue?

>

> To me this adds a bug to quiet a benign, albeit noisy, warning.

>


What is the bug which is being added?

And it's a very noisy warning, as in 6K lines on the console when an 
expander is unplugged.

> .

>
Dan Williams Nov. 18, 2016, 7:08 p.m. UTC | #11
On Fri, Nov 18, 2016 at 1:00 AM, John Garry <john.garry@huawei.com> wrote:
> On 18/11/2016 01:53, Dan Williams wrote:

>>

>> On Thu, Nov 17, 2016 at 7:23 AM, John Garry <john.garry@huawei.com> wrote:

>>>

>>> On 11/11/2016 08:49, wangyijing wrote:

>>>>>>>

>>>>>>>

>>>>>>> I have not seen the flutter issue. I am just trying to solve the

>>>>>>> horrible WARN dump.

>>>>>>> However I do understand that there may be a issue related to how we

>>>>>>> queue the events; there was a recent attempt to fix this, but it came

>>>>>>> to

>>>>>>> nothing:

>>>>>>> https://www.spinics.net/lists/linux-scsi/msg99991.html

>>>>>>

>>>>>>

>>>>>>

>>>>>> We found libsas hotplug several problems:

>>>>>> 1. sysfs warning calltrace(like the case you found);

>>>>>

>>>>>

>>>>>

>>>>> Maybe you can then review my patch.

>>>>

>>>>

>>>>

>>>> I did it, I think your solution to fix the sysfs calltrace issue is ok,

>>>> and what I worried about is we still need to fix

>>>> the rest issues. So it's better if we could fix all issues one time.

>>>>

>>>

>>> @Maintainers, would you be willing to accept this patch as an interim fix

>>> for the dastardly WARN while we try to fix the flutter issue?

>>

>>

>> To me this adds a bug to quiet a benign, albeit noisy, warning.

>>

>

> What is the bug which is being added?


The bug where we queue a port teardown, but see a port formation event
in the meantime.

> And it's a very noisy warning, as in 6K lines on the console when an

> expander is unplugged.


Does something like this modulate the failure?

diff --git a/drivers/scsi/scsi_transport_sas.c
b/drivers/scsi/scsi_transport_sas.c            index
60b651bfaa01..11401e5c88ba 100644
                 --- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -262,9 +262,10 @@ static void sas_bsg_remove(struct Scsi_Host
*shost, struct sas_rphy *rphy
 {
        struct request_queue *q;

-       if (rphy)
+       if (rphy) {
                q = rphy->q;
-       else
+               rphy->q = NULL;
+       } else
                q = to_sas_host_attrs(shost)->q;

        if (!q)
John Garry Nov. 21, 2016, 3:16 p.m. UTC | #12
>>>> @Maintainers, would you be willing to accept this patch as an interim fix

>>>> for the dastardly WARN while we try to fix the flutter issue?

>>>

>>>

>>> To me this adds a bug to quiet a benign, albeit noisy, warning.

>>>

>>

>> What is the bug which is being added?

>

> The bug where we queue a port teardown, but see a port formation event

> in the meantime.


As I understand, this vulnerability already exists:
http://marc.info/?l=linux-scsi&m=143801026028006&w=2

I actually don't understand how libsas dealt with flutter (which I take 
to mean a burst of up and down events) before these changes, as it can 
only queue simultaneously one up and one down event per port. So, if we 
get a flutter, then the events are lost and we get indeterminate state.

>

>> And it's a very noisy warning, as in 6K lines on the console when an

>> expander is unplugged.

>

> Does something like this modulate the failure?

>

> diff --git a/drivers/scsi/scsi_transport_sas.c

> b/drivers/scsi/scsi_transport_sas.c            index

> 60b651bfaa01..11401e5c88ba 100644

>                  --- a/drivers/scsi/scsi_transport_sas.c

> +++ b/drivers/scsi/scsi_transport_sas.c

> @@ -262,9 +262,10 @@ static void sas_bsg_remove(struct Scsi_Host

> *shost, struct sas_rphy *rphy

>  {

>         struct request_queue *q;

>

> -       if (rphy)

> +       if (rphy) {

>                 q = rphy->q;

> -       else

> +               rphy->q = NULL;

> +       } else

>                 q = to_sas_host_attrs(shost)->q;

>

>         if (!q)

>

> .

>
Dan Williams Nov. 21, 2016, 5:13 p.m. UTC | #13
On Mon, Nov 21, 2016 at 7:16 AM, John Garry <john.garry@huawei.com> wrote:
>>>>> @Maintainers, would you be willing to accept this patch as an interim

>>>>> fix

>>>>> for the dastardly WARN while we try to fix the flutter issue?

>>>>

>>>>

>>>>

>>>> To me this adds a bug to quiet a benign, albeit noisy, warning.

>>>>

>>>

>>> What is the bug which is being added?

>>

>>

>> The bug where we queue a port teardown, but see a port formation event

>> in the meantime.

>

>

> As I understand, this vulnerability already exists:

> http://marc.info/?l=linux-scsi&m=143801026028006&w=2

>

> I actually don't understand how libsas dealt with flutter (which I take to

> mean a burst of up and down events) before these changes, as it can only

> queue simultaneously one up and one down event per port. So, if we get a

> flutter, then the events are lost and we get indeterminate state.

>


The events are not lost.  The new problem this patch introduces is
delaying sas port deletion where it was previously immediate.  So now
we can get into a situation where the port has gone down and can start
processing a port up event before the previous deletion work has run.

>>

>>> And it's a very noisy warning, as in 6K lines on the console when an

>>> expander is unplugged.

>>

>>

>> Does something like this modulate the failure?


I'm curious if we simply need to fix the double deletion of the
sas_port bsg queue, could you try the changes below?

>>

>> diff --git a/drivers/scsi/scsi_transport_sas.c

>> b/drivers/scsi/scsi_transport_sas.c            index

>> 60b651bfaa01..11401e5c88ba 100644

>>                  --- a/drivers/scsi/scsi_transport_sas.c

>> +++ b/drivers/scsi/scsi_transport_sas.c

>> @@ -262,9 +262,10 @@ static void sas_bsg_remove(struct Scsi_Host

>> *shost, struct sas_rphy *rphy

>>  {

>>         struct request_queue *q;

>>

>> -       if (rphy)

>> +       if (rphy) {

>>                 q = rphy->q;

>> -       else

>> +               rphy->q = NULL;

>> +       } else

>>                 q = to_sas_host_attrs(shost)->q;

>>

>>         if (!q)

>>

>> .

>>

>

>
John Garry Nov. 22, 2016, 4:56 p.m. UTC | #14
On 21/11/2016 17:13, Dan Williams wrote:
> On Mon, Nov 21, 2016 at 7:16 AM, John Garry <john.garry@huawei.com> wrote:

>>>>>> @Maintainers, would you be willing to accept this patch as an interim

>>>>>> fix

>>>>>> for the dastardly WARN while we try to fix the flutter issue?

>>>>>

>>>>>

>>>>>

>>>>> To me this adds a bug to quiet a benign, albeit noisy, warning.

>>>>>

>>>>

>>>> What is the bug which is being added?

>>>

>>>

>>> The bug where we queue a port teardown, but see a port formation event

>>> in the meantime.

>>

>>

>> As I understand, this vulnerability already exists:

>> http://marc.info/?l=linux-scsi&m=143801026028006&w=2

>>

>> I actually don't understand how libsas dealt with flutter (which I take to

>> mean a burst of up and down events) before these changes, as it can only

>> queue simultaneously one up and one down event per port. So, if we get a

>> flutter, then the events are lost and we get indeterminate state.

>>

>

> The events are not lost.


In sas_queue_event(), if there is a particular event pending for a 
port/PHY, we cannot queue further same event types for that port/PHY. I 
think my colleagues found issue where we try to enqueue multiple 
complementary events.

> The new problem this patch introduces is

> delaying sas port deletion where it was previously immediate.  So now

> we can get into a situation where the port has gone down and can start

> processing a port up event before the previous deletion work has run.

>

>>>

>>>> And it's a very noisy warning, as in 6K lines on the console when an

>>>> expander is unplugged.

>>>

>>>

>>> Does something like this modulate the failure?

>

> I'm curious if we simply need to fix the double deletion of the

> sas_port bsg queue, could you try the changes below?

>


No, I just tested it on a root port and we get the same WARN.

>>>

>>> diff --git a/drivers/scsi/scsi_transport_sas.c

>>> b/drivers/scsi/scsi_transport_sas.c            index

>>> 60b651bfaa01..11401e5c88ba 100644

>>>                  --- a/drivers/scsi/scsi_transport_sas.c

>>> +++ b/drivers/scsi/scsi_transport_sas.c

>>> @@ -262,9 +262,10 @@ static void sas_bsg_remove(struct Scsi_Host

>>> *shost, struct sas_rphy *rphy

>>>  {

>>>         struct request_queue *q;

>>>

>>> -       if (rphy)

>>> +       if (rphy) {

>>>                 q = rphy->q;

>>> -       else

>>> +               rphy->q = NULL;

>>> +       } else

>>>                 q = to_sas_host_attrs(shost)->q;

>>>

>>>         if (!q)

>>>

>>> .

>>>

>>

>>

>

> .

>
diff mbox

Patch

diff --git a/drivers/scsi/libsas/sas_discover.c b/drivers/scsi/libsas/sas_discover.c
index 60de662..01d0fe2 100644
--- a/drivers/scsi/libsas/sas_discover.c
+++ b/drivers/scsi/libsas/sas_discover.c
@@ -361,7 +361,7 @@  static void sas_destruct_devices(struct work_struct *work)
 
 	clear_bit(DISCE_DESTRUCT, &port->disc.pending);
 
-	list_for_each_entry_safe(dev, n, &port->destroy_list, disco_list_node) {
+	list_for_each_entry_safe(dev, n, &port->dev_destroy_list, disco_list_node) {
 		list_del_init(&dev->disco_list_node);
 
 		sas_remove_children(&dev->rphy->dev);
@@ -383,7 +383,7 @@  void sas_unregister_dev(struct asd_sas_port *port, struct domain_device *dev)
 
 	if (!test_and_set_bit(SAS_DEV_DESTROY, &dev->state)) {
 		sas_rphy_unlink(dev->rphy);
-		list_move_tail(&dev->disco_list_node, &port->destroy_list);
+		list_move_tail(&dev->disco_list_node, &port->dev_destroy_list);
 		sas_discover_event(dev->port, DISCE_DESTRUCT);
 	}
 }
@@ -525,6 +525,28 @@  static void sas_revalidate_domain(struct work_struct *work)
 	mutex_unlock(&ha->disco_mutex);
 }
 
+/* ---------- Async Port destruct ---------- */
+static void sas_async_port_destruct(struct work_struct *work)
+{
+	struct sas_discovery_event *ev = to_sas_discovery_event(work);
+	struct asd_sas_port *port = ev->port;
+	struct sas_port *sas_port, *n;
+
+	clear_bit(DISCE_PORT_DESTRUCT, &port->disc.pending);
+
+	list_for_each_entry_safe(sas_port, n, &port->port_destroy_list, destroy_list) {
+		list_del_init(&port->port_destroy_list);
+
+		sas_port_delete(sas_port);
+	}
+}
+
+void sas_port_destruct(struct asd_sas_port *port, struct sas_port *sas_port)
+{
+	list_move_tail(&sas_port->destroy_list, &port->port_destroy_list);
+	sas_discover_event(port, DISCE_PORT_DESTRUCT);
+}
+
 /* ---------- Events ---------- */
 
 static void sas_chain_work(struct sas_ha_struct *ha, struct sas_work *sw)
@@ -582,6 +604,7 @@  void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *port)
 		[DISCE_SUSPEND] = sas_suspend_devices,
 		[DISCE_RESUME] = sas_resume_devices,
 		[DISCE_DESTRUCT] = sas_destruct_devices,
+		[DISCE_PORT_DESTRUCT] = sas_async_port_destruct,
 	};
 
 	disc->pending = 0;
diff --git a/drivers/scsi/libsas/sas_expander.c b/drivers/scsi/libsas/sas_expander.c
index 022bb6e..f9522a0 100644
--- a/drivers/scsi/libsas/sas_expander.c
+++ b/drivers/scsi/libsas/sas_expander.c
@@ -1900,10 +1900,11 @@  static void sas_unregister_devs_sas_addr(struct domain_device *parent,
 	}
 	memset(phy->attached_sas_addr, 0, SAS_ADDR_SIZE);
 	if (phy->port) {
+		struct asd_sas_port *port = found->port;
 		sas_port_delete_phy(phy->port, phy->phy);
 		sas_device_set_phy(found, phy->port);
 		if (phy->port->num_phys == 0)
-			sas_port_delete(phy->port);
+			sas_port_destruct(port, phy->port);
 		phy->port = NULL;
 	}
 }
diff --git a/drivers/scsi/libsas/sas_port.c b/drivers/scsi/libsas/sas_port.c
index d3c5297..1a32f86 100644
--- a/drivers/scsi/libsas/sas_port.c
+++ b/drivers/scsi/libsas/sas_port.c
@@ -219,7 +219,7 @@  void sas_deform_port(struct asd_sas_phy *phy, int gone)
 
 	if (port->num_phys == 1) {
 		sas_unregister_domain_devices(port, gone);
-		sas_port_delete(port->port);
+		sas_port_destruct(port, port->port);
 		port->port = NULL;
 	} else {
 		sas_port_delete_phy(port->port, phy->phy);
@@ -322,7 +322,8 @@  static void sas_init_port(struct asd_sas_port *port,
 	port->id = i;
 	INIT_LIST_HEAD(&port->dev_list);
 	INIT_LIST_HEAD(&port->disco_list);
-	INIT_LIST_HEAD(&port->destroy_list);
+	INIT_LIST_HEAD(&port->dev_destroy_list);
+	INIT_LIST_HEAD(&port->port_destroy_list);
 	spin_lock_init(&port->phy_list_lock);
 	INIT_LIST_HEAD(&port->phy_list);
 	port->ha = sas_ha;
diff --git a/drivers/scsi/scsi_transport_sas.c b/drivers/scsi/scsi_transport_sas.c
index 60b651b..062c03c 100644
--- a/drivers/scsi/scsi_transport_sas.c
+++ b/drivers/scsi/scsi_transport_sas.c
@@ -934,6 +934,7 @@  struct sas_port *sas_port_alloc(struct device *parent, int port_id)
 
 	mutex_init(&port->phy_list_mutex);
 	INIT_LIST_HEAD(&port->phy_list);
+	INIT_LIST_HEAD(&port->destroy_list);
 
 	if (scsi_is_sas_expander_device(parent)) {
 		struct sas_rphy *rphy = dev_to_rphy(parent);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index dae99d7..a7953c8 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -91,7 +91,8 @@  enum discover_event {
 	DISCE_SUSPEND		= 4,
 	DISCE_RESUME		= 5,
 	DISCE_DESTRUCT		= 6,
-	DISC_NUM_EVENTS		= 7,
+	DISCE_PORT_DESTRUCT	= 7,
+	DISC_NUM_EVENTS,
 };
 
 /* ---------- Expander Devices ---------- */
@@ -268,7 +269,8 @@  struct asd_sas_port {
 	spinlock_t dev_list_lock;
 	struct list_head dev_list;
 	struct list_head disco_list;
-	struct list_head destroy_list;
+	struct list_head dev_destroy_list;
+	struct list_head port_destroy_list;
 	enum   sas_linkrate linkrate;
 
 	struct sas_work work;
@@ -702,6 +704,7 @@  extern int sas_bios_param(struct scsi_device *,
 int  sas_ex_revalidate_domain(struct domain_device *);
 
 void sas_unregister_domain_devices(struct asd_sas_port *port, int gone);
+void sas_port_destruct(struct asd_sas_port *port, struct sas_port *sas_port);
 void sas_init_disc(struct sas_discovery *disc, struct asd_sas_port *);
 int  sas_discover_event(struct asd_sas_port *, enum discover_event ev);
 
diff --git a/include/scsi/scsi_transport_sas.h b/include/scsi/scsi_transport_sas.h
index 73d8709..b495aac 100644
--- a/include/scsi/scsi_transport_sas.h
+++ b/include/scsi/scsi_transport_sas.h
@@ -154,6 +154,7 @@  struct sas_port {
 
 	struct mutex		phy_list_mutex;
 	struct list_head	phy_list;
+	struct list_head	destroy_list; /* only used by libsas */
 };
 
 #define dev_to_sas_port(d) \