diff mbox series

[v2,1/2] Bluetooth: btintel_pcie: Fix potential race condition in firmware download

Message ID 20250525101619.530255-1-kiran.k@intel.com
State New
Headers show
Series [v2,1/2] Bluetooth: btintel_pcie: Fix potential race condition in firmware download | expand

Commit Message

K, Kiran May 25, 2025, 10:16 a.m. UTC
During firmware download, if an error occurs, interrupts must be
disabled, synchronized, and re-enabled before retrying the download.
This change ensures proper interrupt handling to prevent race
conditions.

Signed-off-by: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
Signed-off-by: Kiran K <kiran.k@intel.com>
---
 drivers/bluetooth/btintel_pcie.c | 33 ++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

Comments

Paul Menzel May 26, 2025, 7:33 a.m. UTC | #1
Dear Kiran, dear Chandrashekar,


Thank you for your patch.

Am 25.05.25 um 12:16 schrieb Kiran K:
> From: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
> 
> Support function level reset (flr) on hardware exception to recover
> controller. Driver also implements the back-off time of 5 seconds and
> the maximum number of retries are limited to 5 before giving up.

Could you elaborate on the implementation, and detail how this can be 
tested?

> Signed-off-by: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
> Signed-off-by: Kiran K <kiran.k@intel.com>
> ---
>   drivers/bluetooth/btintel_pcie.c | 234 ++++++++++++++++++++++++++++++-
>   drivers/bluetooth/btintel_pcie.h |   4 +-
>   2 files changed, 235 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
> index 74a957784525..6779a2cfa75d 100644
> --- a/drivers/bluetooth/btintel_pcie.c
> +++ b/drivers/bluetooth/btintel_pcie.c
> @@ -41,6 +41,13 @@ static const struct pci_device_id btintel_pcie_table[] = {
>   };
>   MODULE_DEVICE_TABLE(pci, btintel_pcie_table);
>   
> +struct btintel_pcie_dev_restart_data {
> +	struct list_head list;
> +	u8 restart_count;
> +	time64_t last_error;
> +	char name[];
> +};
> +
>   /* Intel PCIe uses 4 bytes of HCI type instead of 1 byte BT SIG HCI type */
>   #define BTINTEL_PCIE_HCI_TYPE_LEN	4
>   #define BTINTEL_PCIE_HCI_CMD_PKT	0x00000001
> @@ -62,6 +69,9 @@ MODULE_DEVICE_TABLE(pci, btintel_pcie_table);
>   #define BTINTEL_PCIE_TRIGGER_REASON_USER_TRIGGER	0x17A2
>   #define BTINTEL_PCIE_TRIGGER_REASON_FW_ASSERT		0x1E61
>   
> +#define BTINTEL_PCIE_RESET_OK_TIME_SECS		5
> +#define BTINTEL_PCIE_FLR_RESET_MAX_RETRY	5
> +
>   /* Alive interrupt context */
>   enum {
>   	BTINTEL_PCIE_ROM,
> @@ -99,6 +109,14 @@ struct btintel_pcie_dbgc_ctxt {
>   	struct btintel_pcie_dbgc_ctxt_buf bufs[BTINTEL_PCIE_DBGC_BUFFER_COUNT];
>   };
>   
> +struct btintel_pcie_removal {
> +	struct pci_dev *pdev;
> +	struct work_struct work;
> +};
> +
> +static LIST_HEAD(btintel_pcie_restart_data_list);
> +static DEFINE_SPINLOCK(btintel_pcie_restart_data_lock);
> +
>   /* This function initializes the memory for DBGC buffers and formats the
>    * DBGC fragment which consists header info and DBGC buffer's LSB, MSB and
>    * size as the payload
> @@ -1927,6 +1945,9 @@ static int btintel_pcie_send_frame(struct hci_dev *hdev,
>   	u32 type;
>   	u32 old_ctxt;
>   
> +	if (test_bit(BTINTEL_PCIE_CORE_HALTED, &data->flags))
> +		return -ENODEV;
> +
>   	/* Due to the fw limitation, the type header of the packet should be
>   	 * 4 bytes unlike 1 byte for UART. In UART, the firmware can read
>   	 * the first byte to get the packet type and redirect the rest of data
> @@ -2187,9 +2208,204 @@ static int btintel_pcie_setup(struct hci_dev *hdev)
>   		}
>   		btintel_pcie_start_rx(data);
>   	}
> +
> +	if (!err)
> +		set_bit(BTINTEL_PCIE_SETUP_DONE, &data->flags);
>   	return err;
>   }
>   
> +static struct btintel_pcie_dev_restart_data *btintel_pcie_get_restart_data(struct pci_dev *pdev,
> +									   struct device *dev)
> +{
> +	struct btintel_pcie_dev_restart_data *tmp, *data = NULL;
> +	const char *name = pci_name(pdev);
> +	struct hci_dev *hdev = to_hci_dev(dev);
> +
> +	spin_lock(&btintel_pcie_restart_data_lock);
> +	list_for_each_entry(tmp, &btintel_pcie_restart_data_list, list) {
> +		if (strcmp(tmp->name, name))
> +			continue;
> +		data = tmp;
> +		break;
> +	}
> +	spin_unlock(&btintel_pcie_restart_data_lock);
> +
> +	if (data) {
> +		bt_dev_dbg(hdev, "Found restart data for BDF:%s", data->name);

Add a space after the colon?

> +		return data;
> +	}
> +
> +	/* First time allocate */
> +	data = kzalloc(struct_size(data, name, strlen(name) + 1), GFP_ATOMIC);
> +	if (!data)
> +		return NULL;
> +
> +	strscpy_pad(data->name, name, strlen(name) + 1);
> +	spin_lock(&btintel_pcie_restart_data_lock);
> +	list_add_tail(&data->list, &btintel_pcie_restart_data_list);
> +	spin_unlock(&btintel_pcie_restart_data_lock);
> +
> +	return data;
> +}
> +
> +static void btintel_pcie_free_restart_list(void)
> +{
> +	struct btintel_pcie_dev_restart_data *tmp;
> +
> +	while ((tmp = list_first_entry_or_null(&btintel_pcie_restart_data_list,
> +					       typeof(*tmp), list))) {
> +		list_del(&tmp->list);
> +		kfree(tmp);
> +	}
> +}
> +
> +static void btintel_pcie_inc_restart_count(struct pci_dev *pdev,
> +					   struct device *dev)
> +{
> +	struct btintel_pcie_dev_restart_data *data;
> +	struct hci_dev *hdev = to_hci_dev(dev);
> +	time64_t retry_window;
> +
> +	data = btintel_pcie_get_restart_data(pdev, dev);
> +	if (!data)
> +		return;
> +
> +	retry_window = ktime_get_boottime_seconds() - data->last_error;
> +	if (data->restart_count == 0) {
> +		/* First iteration initialise the time and counter */

initialises

But with the debug print below, the comment is not needed, in my opinion.

> +		data->last_error = ktime_get_boottime_seconds();
> +		data->restart_count++;
> +		bt_dev_dbg(hdev, "First iteration initialise. last_error:%lld seconds restart_count:%d",

Please add a space after the colon.

> +			   data->last_error, data->restart_count);
> +	} else if (retry_window < BTINTEL_PCIE_RESET_OK_TIME_SECS &&
> +		   data->restart_count <= BTINTEL_PCIE_FLR_RESET_MAX_RETRY) {
> +		/* FLR triggered still within the Max retry time so
> +		 * increment the counter
> +		 */
> +		data->restart_count++;
> +		bt_dev_dbg(hdev, "flr triggered still within the max retry time so increment the restart_count:%d",
> +			   data->restart_count);

Space after colon.

> +	} else if (retry_window > BTINTEL_PCIE_RESET_OK_TIME_SECS) {
> +		/* FLR triggered out of the retry window so reset */
> +		bt_dev_dbg(hdev, "flr triggered out of retry window. last_error:%lld seconds restart_count:%d",

Space after colons.

> +			   data->last_error, data->restart_count);
> +		data->last_error = 0;
> +		data->restart_count = 0;
> +	}
> +}
> +
> +static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data);
> +
> +static void btintel_pcie_removal_work(struct work_struct *wk)
> +{
> +	struct btintel_pcie_removal *removal =
> +		container_of(wk, struct btintel_pcie_removal, work);
> +	struct pci_dev *pdev = removal->pdev;
> +	struct btintel_pcie_data *data;
> +	int err;
> +
> +	pci_lock_rescan_remove();
> +
> +	if (!pdev->bus)
> +		goto error;
> +
> +	data = pci_get_drvdata(pdev);
> +
> +	btintel_pcie_disable_interrupts(data);
> +	btintel_pcie_synchronize_irqs(data);
> +	flush_workqueue(data->workqueue);
> +
> +	bt_dev_dbg(data->hdev, "Release bluetooth interface");
> +	btintel_pcie_release_hdev(data);
> +
> +	err = pci_reset_function(pdev);
> +	if (err) {
> +		BT_ERR("Failed resetting the pcie device (%d)", err);
> +		goto error;
> +	}
> +
> +	btintel_pcie_enable_interrupts(data);
> +	btintel_pcie_config_msix(data);
> +
> +	err = btintel_pcie_enable_bt(data);
> +	if (err) {
> +		BT_ERR("Failed to enable bluetooth hardware after reset (%d)",
> +		       err);

What should the user reading this error do?

> +		goto error;
> +	}
> +
> +	btintel_pcie_reset_ia(data);
> +	btintel_pcie_start_rx(data);
> +	data->flags = 0;
> +
> +	err = btintel_pcie_setup_hdev(data);
> +	if (err) {
> +		BT_ERR("Failed registering hdev (%d)", err);
> +		goto error;
> +	}
> +error:
> +	pci_dev_put(pdev);
> +	pci_unlock_rescan_remove();
> +	kfree(removal);
> +}
> +
> +static void btintel_pcie_reset(struct hci_dev *hdev)
> +{
> +	struct btintel_pcie_removal *removal;
> +	struct btintel_pcie_data *data;
> +
> +	data = hci_get_drvdata(hdev);
> +
> +	if (!test_bit(BTINTEL_PCIE_SETUP_DONE, &data->flags))
> +		return;
> +
> +	removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
> +	if (!removal)
> +		return;
> +
> +	flush_work(&data->rx_work);
> +	flush_work(&hdev->dump.dump_rx);
> +
> +	removal->pdev = data->pdev;
> +	INIT_WORK(&removal->work, btintel_pcie_removal_work);
> +	pci_dev_get(removal->pdev);
> +	schedule_work(&removal->work);
> +}
> +
> +static void btintel_pcie_hw_error(struct hci_dev *hdev, u8 code)
> +{
> +	struct  btintel_pcie_dev_restart_data *data;
> +	struct btintel_pcie_data *dev_data = hci_get_drvdata(hdev);
> +	struct pci_dev *pdev = dev_data->pdev;
> +	time64_t retry_window;
> +
> +	if (code == 0x13) {
> +		bt_dev_err(hdev, "Encountered top exception");
> +		return;
> +	}
> +
> +	data = btintel_pcie_get_restart_data(pdev, &hdev->dev);
> +	if (!data)
> +		return;
> +
> +	retry_window = ktime_get_boottime_seconds() - data->last_error;
> +
> +	/* If within 5 seconds max 5 attempts have already been made
> +	 * then stop any more retry and indicate to user for cold boot
> +	 */

The messages below should be improved, and the comment above can be 
removed then.

> +	if (retry_window < BTINTEL_PCIE_RESET_OK_TIME_SECS &&
> +	    data->restart_count >= BTINTEL_PCIE_FLR_RESET_MAX_RETRY) {
> +		bt_dev_err(hdev, "Max recovery retries(%d)  exhausted.",

Please add a space before (, exactly one space before *exhausted*. What 
should the user do?

> +			   BTINTEL_PCIE_FLR_RESET_MAX_RETRY);
> +		bt_dev_dbg(hdev, "Boot time:%lld seconds first_flr at:%lld seconds restart_count:%d",

Spaces after colons.

> +			   ktime_get_boottime_seconds(), data->last_error,
> +			   data->restart_count);
> +		return;
> +	}
> +	btintel_pcie_inc_restart_count(pdev, &hdev->dev);
> +	btintel_pcie_reset(hdev);
> +}
> +
>   static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data)
>   {
>   	int err;
> @@ -2211,9 +2427,10 @@ static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data)
>   	hdev->send = btintel_pcie_send_frame;
>   	hdev->setup = btintel_pcie_setup;
>   	hdev->shutdown = btintel_shutdown_combined;
> -	hdev->hw_error = btintel_hw_error;
> +	hdev->hw_error = btintel_pcie_hw_error;
>   	hdev->set_diag = btintel_set_diag;
>   	hdev->set_bdaddr = btintel_set_bdaddr;
> +	hdev->reset = btintel_pcie_reset;
>   
>   	err = hci_register_dev(hdev);
>   	if (err < 0) {
> @@ -2361,7 +2578,20 @@ static struct pci_driver btintel_pcie_driver = {
>   	.driver.coredump = btintel_pcie_coredump
>   #endif
>   };
> -module_pci_driver(btintel_pcie_driver);
> +
> +static int __init btintel_pcie_init(void)
> +{
> +	return pci_register_driver(&btintel_pcie_driver);
> +}
> +
> +static void __exit btintel_pcie_exit(void)
> +{
> +	pci_unregister_driver(&btintel_pcie_driver);
> +	btintel_pcie_free_restart_list();
> +}
> +
> +module_init(btintel_pcie_init);
> +module_exit(btintel_pcie_exit);
>   
>   MODULE_AUTHOR("Tedd Ho-Jeong An <tedd.an@intel.com>");
>   MODULE_DESCRIPTION("Intel Bluetooth PCIe transport driver ver " VERSION);
> diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h
> index 21b964b15c1c..62b6bcdaf10f 100644
> --- a/drivers/bluetooth/btintel_pcie.h
> +++ b/drivers/bluetooth/btintel_pcie.h
> @@ -117,7 +117,9 @@ enum {
>   enum {
>   	BTINTEL_PCIE_CORE_HALTED,
>   	BTINTEL_PCIE_HWEXP_INPROGRESS,
> -	BTINTEL_PCIE_COREDUMP_INPROGRESS
> +	BTINTEL_PCIE_COREDUMP_INPROGRESS,
> +	BTINTEL_PCIE_RECOVERY_IN_PROGRESS,
> +	BTINTEL_PCIE_SETUP_DONE
>   };
>   
>   enum btintel_pcie_tlv_type {


Kind regards,

Paul
K, Kiran May 28, 2025, 2:21 a.m. UTC | #2
Hi Paul,

Thanks for your  comments.

>Subject: Re: [PATCH v2 2/2] Bluetooth: btintel_pcie: Support Function level
>reset
>
>Dear Kiran, dear Chandrashekar,
>
>
>Thank you for your patch.
>
>Am 25.05.25 um 12:16 schrieb Kiran K:
>> From: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
>>
>> Support function level reset (flr) on hardware exception to recover
>> controller. Driver also implements the back-off time of 5 seconds and
>> the maximum number of retries are limited to 5 before giving up.
>
>Could you elaborate on the implementation, and detail how this can be tested?

1. Whenever there is an exception in the firmware, driver gets notified via MSIx.
     a. Driver shall unregister  hdev
     b. invoke pci_reset_function() to reset controller
     c. make a new instance of hdev and register to bluetooth subsystem

2. The driver must monitor the number of reset attempts triggered within a 5-second window:
    a. If the number of resets exceeds 5 attempts within the last 5 seconds, the driver should cease further reset attempts, as this indicates a serious issue with the firmware.

>
>> Signed-off-by: Chandrashekar Devegowda
>> <chandrashekar.devegowda@intel.com>
>> Signed-off-by: Kiran K <kiran.k@intel.com>
>> ---
>>   drivers/bluetooth/btintel_pcie.c | 234
>++++++++++++++++++++++++++++++-
>>   drivers/bluetooth/btintel_pcie.h |   4 +-
>>   2 files changed, 235 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/bluetooth/btintel_pcie.c
>> b/drivers/bluetooth/btintel_pcie.c
>> index 74a957784525..6779a2cfa75d 100644
>> --- a/drivers/bluetooth/btintel_pcie.c
>> +++ b/drivers/bluetooth/btintel_pcie.c
>> @@ -41,6 +41,13 @@ static const struct pci_device_id btintel_pcie_table[] =
>{
>>   };
>>   MODULE_DEVICE_TABLE(pci, btintel_pcie_table);
>>
>> +struct btintel_pcie_dev_restart_data {
>> +	struct list_head list;
>> +	u8 restart_count;
>> +	time64_t last_error;
>> +	char name[];
>> +};
>> +
>>   /* Intel PCIe uses 4 bytes of HCI type instead of 1 byte BT SIG HCI type */
>>   #define BTINTEL_PCIE_HCI_TYPE_LEN	4
>>   #define BTINTEL_PCIE_HCI_CMD_PKT	0x00000001
>> @@ -62,6 +69,9 @@ MODULE_DEVICE_TABLE(pci, btintel_pcie_table);
>>   #define BTINTEL_PCIE_TRIGGER_REASON_USER_TRIGGER	0x17A2
>>   #define BTINTEL_PCIE_TRIGGER_REASON_FW_ASSERT
>	0x1E61
>>
>> +#define BTINTEL_PCIE_RESET_OK_TIME_SECS		5
>> +#define BTINTEL_PCIE_FLR_RESET_MAX_RETRY	5
>> +
>>   /* Alive interrupt context */
>>   enum {
>>   	BTINTEL_PCIE_ROM,
>> @@ -99,6 +109,14 @@ struct btintel_pcie_dbgc_ctxt {
>>   	struct btintel_pcie_dbgc_ctxt_buf
>bufs[BTINTEL_PCIE_DBGC_BUFFER_COUNT];
>>   };
>>
>> +struct btintel_pcie_removal {
>> +	struct pci_dev *pdev;
>> +	struct work_struct work;
>> +};
>> +
>> +static LIST_HEAD(btintel_pcie_restart_data_list);
>> +static DEFINE_SPINLOCK(btintel_pcie_restart_data_lock);
>> +
>>   /* This function initializes the memory for DBGC buffers and formats the
>>    * DBGC fragment which consists header info and DBGC buffer's LSB, MSB
>and
>>    * size as the payload
>> @@ -1927,6 +1945,9 @@ static int btintel_pcie_send_frame(struct hci_dev
>*hdev,
>>   	u32 type;
>>   	u32 old_ctxt;
>>
>> +	if (test_bit(BTINTEL_PCIE_CORE_HALTED, &data->flags))
>> +		return -ENODEV;
>> +
>>   	/* Due to the fw limitation, the type header of the packet should be
>>   	 * 4 bytes unlike 1 byte for UART. In UART, the firmware can read
>>   	 * the first byte to get the packet type and redirect the rest of
>> data @@ -2187,9 +2208,204 @@ static int btintel_pcie_setup(struct hci_dev
>*hdev)
>>   		}
>>   		btintel_pcie_start_rx(data);
>>   	}
>> +
>> +	if (!err)
>> +		set_bit(BTINTEL_PCIE_SETUP_DONE, &data->flags);
>>   	return err;
>>   }
>>
>> +static struct btintel_pcie_dev_restart_data
>*btintel_pcie_get_restart_data(struct pci_dev *pdev,
>> +									   struct
>device *dev)
>> +{
>> +	struct btintel_pcie_dev_restart_data *tmp, *data = NULL;
>> +	const char *name = pci_name(pdev);
>> +	struct hci_dev *hdev = to_hci_dev(dev);
>> +
>> +	spin_lock(&btintel_pcie_restart_data_lock);
>> +	list_for_each_entry(tmp, &btintel_pcie_restart_data_list, list) {
>> +		if (strcmp(tmp->name, name))
>> +			continue;
>> +		data = tmp;
>> +		break;
>> +	}
>> +	spin_unlock(&btintel_pcie_restart_data_lock);
>> +
>> +	if (data) {
>> +		bt_dev_dbg(hdev, "Found restart data for BDF:%s", data-
>>name);
>
>Add a space after the colon?
Ack

>
>> +		return data;
>> +	}
>> +
>> +	/* First time allocate */
>> +	data = kzalloc(struct_size(data, name, strlen(name) + 1),
>GFP_ATOMIC);
>> +	if (!data)
>> +		return NULL;
>> +
>> +	strscpy_pad(data->name, name, strlen(name) + 1);
>> +	spin_lock(&btintel_pcie_restart_data_lock);
>> +	list_add_tail(&data->list, &btintel_pcie_restart_data_list);
>> +	spin_unlock(&btintel_pcie_restart_data_lock);
>> +
>> +	return data;
>> +}
>> +
>> +static void btintel_pcie_free_restart_list(void)
>> +{
>> +	struct btintel_pcie_dev_restart_data *tmp;
>> +
>> +	while ((tmp = list_first_entry_or_null(&btintel_pcie_restart_data_list,
>> +					       typeof(*tmp), list))) {
>> +		list_del(&tmp->list);
>> +		kfree(tmp);
>> +	}
>> +}
>> +
>> +static void btintel_pcie_inc_restart_count(struct pci_dev *pdev,
>> +					   struct device *dev)
>> +{
>> +	struct btintel_pcie_dev_restart_data *data;
>> +	struct hci_dev *hdev = to_hci_dev(dev);
>> +	time64_t retry_window;
>> +
>> +	data = btintel_pcie_get_restart_data(pdev, dev);
>> +	if (!data)
>> +		return;
>> +
>> +	retry_window = ktime_get_boottime_seconds() - data->last_error;
>> +	if (data->restart_count == 0) {
>> +		/* First iteration initialise the time and counter */
>
>initialises
>
>But with the debug print below, the comment is not needed, in my opinion.
>
>> +		data->last_error = ktime_get_boottime_seconds();
>> +		data->restart_count++;
>> +		bt_dev_dbg(hdev, "First iteration initialise. last_error:%lld
>> +seconds restart_count:%d",
>
>Please add a space after the colon.
>
>> +			   data->last_error, data->restart_count);
>> +	} else if (retry_window < BTINTEL_PCIE_RESET_OK_TIME_SECS &&
>> +		   data->restart_count <=
>BTINTEL_PCIE_FLR_RESET_MAX_RETRY) {
>> +		/* FLR triggered still within the Max retry time so
>> +		 * increment the counter
>> +		 */
>> +		data->restart_count++;
>> +		bt_dev_dbg(hdev, "flr triggered still within the max retry time
>so increment the restart_count:%d",
>> +			   data->restart_count);
>
>Space after colon.
Ack
>
>> +	} else if (retry_window > BTINTEL_PCIE_RESET_OK_TIME_SECS) {
>> +		/* FLR triggered out of the retry window so reset */
>> +		bt_dev_dbg(hdev, "flr triggered out of retry window.
>> +last_error:%lld seconds restart_count:%d",
>
>Space after colons.
Ack
>
>> +			   data->last_error, data->restart_count);
>> +		data->last_error = 0;
>> +		data->restart_count = 0;
>> +	}
>> +}
>> +
>> +static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data);
>> +
>> +static void btintel_pcie_removal_work(struct work_struct *wk) {
>> +	struct btintel_pcie_removal *removal =
>> +		container_of(wk, struct btintel_pcie_removal, work);
>> +	struct pci_dev *pdev = removal->pdev;
>> +	struct btintel_pcie_data *data;
>> +	int err;
>> +
>> +	pci_lock_rescan_remove();
>> +
>> +	if (!pdev->bus)
>> +		goto error;
>> +
>> +	data = pci_get_drvdata(pdev);
>> +
>> +	btintel_pcie_disable_interrupts(data);
>> +	btintel_pcie_synchronize_irqs(data);
>> +	flush_workqueue(data->workqueue);
>> +
>> +	bt_dev_dbg(data->hdev, "Release bluetooth interface");
>> +	btintel_pcie_release_hdev(data);
>> +
>> +	err = pci_reset_function(pdev);
>> +	if (err) {
>> +		BT_ERR("Failed resetting the pcie device (%d)", err);
>> +		goto error;
>> +	}
>> +
>> +	btintel_pcie_enable_interrupts(data);
>> +	btintel_pcie_config_msix(data);
>> +
>> +	err = btintel_pcie_enable_bt(data);
>> +	if (err) {
>> +		BT_ERR("Failed to enable bluetooth hardware after reset (%d)",
>> +		       err);
>
>What should the user reading this error do?
The recovery attempt would fail. 
>
>> +		goto error;
>> +	}
>> +
>> +	btintel_pcie_reset_ia(data);
>> +	btintel_pcie_start_rx(data);
>> +	data->flags = 0;
>> +
>> +	err = btintel_pcie_setup_hdev(data);
>> +	if (err) {
>> +		BT_ERR("Failed registering hdev (%d)", err);
>> +		goto error;
>> +	}
>> +error:
>> +	pci_dev_put(pdev);
>> +	pci_unlock_rescan_remove();
>> +	kfree(removal);
>> +}
>> +
>> +static void btintel_pcie_reset(struct hci_dev *hdev) {
>> +	struct btintel_pcie_removal *removal;
>> +	struct btintel_pcie_data *data;
>> +
>> +	data = hci_get_drvdata(hdev);
>> +
>> +	if (!test_bit(BTINTEL_PCIE_SETUP_DONE, &data->flags))
>> +		return;
>> +
>> +	removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
>> +	if (!removal)
>> +		return;
>> +
>> +	flush_work(&data->rx_work);
>> +	flush_work(&hdev->dump.dump_rx);
>> +
>> +	removal->pdev = data->pdev;
>> +	INIT_WORK(&removal->work, btintel_pcie_removal_work);
>> +	pci_dev_get(removal->pdev);
>> +	schedule_work(&removal->work);
>> +}
>> +
>> +static void btintel_pcie_hw_error(struct hci_dev *hdev, u8 code) {
>> +	struct  btintel_pcie_dev_restart_data *data;
>> +	struct btintel_pcie_data *dev_data = hci_get_drvdata(hdev);
>> +	struct pci_dev *pdev = dev_data->pdev;
>> +	time64_t retry_window;
>> +
>> +	if (code == 0x13) {
>> +		bt_dev_err(hdev, "Encountered top exception");
>> +		return;
>> +	}
>> +
>> +	data = btintel_pcie_get_restart_data(pdev, &hdev->dev);
>> +	if (!data)
>> +		return;
>> +
>> +	retry_window = ktime_get_boottime_seconds() - data->last_error;
>> +
>> +	/* If within 5 seconds max 5 attempts have already been made
>> +	 * then stop any more retry and indicate to user for cold boot
>> +	 */
>
>The messages below should be improved, and the comment above can be
>removed then.
Ack. I will rework on this. 
>
>> +	if (retry_window < BTINTEL_PCIE_RESET_OK_TIME_SECS &&
>> +	    data->restart_count >= BTINTEL_PCIE_FLR_RESET_MAX_RETRY) {
>> +		bt_dev_err(hdev, "Max recovery retries(%d)  exhausted.",
>
>Please add a space before (, exactly one space before *exhausted*. What
>should the user do?
Ack. I hope I have explained in my first comment.
>
>> +			   BTINTEL_PCIE_FLR_RESET_MAX_RETRY);
>> +		bt_dev_dbg(hdev, "Boot time:%lld seconds first_flr at:%lld
>seconds
>> +restart_count:%d",
>
>Spaces after colons.
Ack.
>
>> +			   ktime_get_boottime_seconds(), data->last_error,
>> +			   data->restart_count);
>> +		return;
>> +	}
>> +	btintel_pcie_inc_restart_count(pdev, &hdev->dev);
>> +	btintel_pcie_reset(hdev);
>> +}
>> +
>>   static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data)
>>   {
>>   	int err;
>> @@ -2211,9 +2427,10 @@ static int btintel_pcie_setup_hdev(struct
>btintel_pcie_data *data)
>>   	hdev->send = btintel_pcie_send_frame;
>>   	hdev->setup = btintel_pcie_setup;
>>   	hdev->shutdown = btintel_shutdown_combined;
>> -	hdev->hw_error = btintel_hw_error;
>> +	hdev->hw_error = btintel_pcie_hw_error;
>>   	hdev->set_diag = btintel_set_diag;
>>   	hdev->set_bdaddr = btintel_set_bdaddr;
>> +	hdev->reset = btintel_pcie_reset;
>>
>>   	err = hci_register_dev(hdev);
>>   	if (err < 0) {
>> @@ -2361,7 +2578,20 @@ static struct pci_driver btintel_pcie_driver = {
>>   	.driver.coredump = btintel_pcie_coredump
>>   #endif
>>   };
>> -module_pci_driver(btintel_pcie_driver);
>> +
>> +static int __init btintel_pcie_init(void) {
>> +	return pci_register_driver(&btintel_pcie_driver);
>> +}
>> +
>> +static void __exit btintel_pcie_exit(void) {
>> +	pci_unregister_driver(&btintel_pcie_driver);
>> +	btintel_pcie_free_restart_list();
>> +}
>> +
>> +module_init(btintel_pcie_init);
>> +module_exit(btintel_pcie_exit);
>>
>>   MODULE_AUTHOR("Tedd Ho-Jeong An <tedd.an@intel.com>");
>>   MODULE_DESCRIPTION("Intel Bluetooth PCIe transport driver ver "
>> VERSION); diff --git a/drivers/bluetooth/btintel_pcie.h
>> b/drivers/bluetooth/btintel_pcie.h
>> index 21b964b15c1c..62b6bcdaf10f 100644
>> --- a/drivers/bluetooth/btintel_pcie.h
>> +++ b/drivers/bluetooth/btintel_pcie.h
>> @@ -117,7 +117,9 @@ enum {
>>   enum {
>>   	BTINTEL_PCIE_CORE_HALTED,
>>   	BTINTEL_PCIE_HWEXP_INPROGRESS,
>> -	BTINTEL_PCIE_COREDUMP_INPROGRESS
>> +	BTINTEL_PCIE_COREDUMP_INPROGRESS,
>> +	BTINTEL_PCIE_RECOVERY_IN_PROGRESS,
>> +	BTINTEL_PCIE_SETUP_DONE
>>   };
>>
>>   enum btintel_pcie_tlv_type {
>
>
>Kind regards,
>
>Paul

Thanks,
Kiran
diff mbox series

Patch

diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index 50fe17f1e1d1..74a957784525 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -2028,6 +2028,28 @@  static void btintel_pcie_release_hdev(struct btintel_pcie_data *data)
 	data->hdev = NULL;
 }
 
+static void btintel_pcie_disable_interrupts(struct btintel_pcie_data *data)
+{
+	spin_lock(&data->irq_lock);
+	btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, data->fh_init_mask);
+	btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, data->hw_init_mask);
+	spin_unlock(&data->irq_lock);
+}
+
+static void btintel_pcie_enable_interrupts(struct btintel_pcie_data *data)
+{
+	spin_lock(&data->irq_lock);
+	btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_FH_INT_MASK, ~data->fh_init_mask);
+	btintel_pcie_wr_reg32(data, BTINTEL_PCIE_CSR_MSIX_HW_INT_MASK, ~data->hw_init_mask);
+	spin_unlock(&data->irq_lock);
+}
+
+static void btintel_pcie_synchronize_irqs(struct btintel_pcie_data *data)
+{
+	for (int i = 0; i < data->alloc_vecs; i++)
+		synchronize_irq(data->msix_entries[i].vector);
+}
+
 static int btintel_pcie_setup_internal(struct hci_dev *hdev)
 {
 	struct btintel_pcie_data *data = hci_get_drvdata(hdev);
@@ -2147,6 +2169,8 @@  static int btintel_pcie_setup(struct hci_dev *hdev)
 		bt_dev_err(hdev, "Firmware download retry count: %d",
 			   fw_dl_retry);
 		btintel_pcie_dump_debug_registers(hdev);
+		btintel_pcie_disable_interrupts(data);
+		btintel_pcie_synchronize_irqs(data);
 		err = btintel_pcie_reset_bt(data);
 		if (err) {
 			bt_dev_err(hdev, "Failed to do shr reset: %d", err);
@@ -2154,6 +2178,7 @@  static int btintel_pcie_setup(struct hci_dev *hdev)
 		}
 		usleep_range(10000, 12000);
 		btintel_pcie_reset_ia(data);
+		btintel_pcie_enable_interrupts(data);
 		btintel_pcie_config_msix(data);
 		err = btintel_pcie_enable_bt(data);
 		if (err) {
@@ -2286,6 +2311,12 @@  static void btintel_pcie_remove(struct pci_dev *pdev)
 
 	data = pci_get_drvdata(pdev);
 
+	btintel_pcie_disable_interrupts(data);
+
+	btintel_pcie_synchronize_irqs(data);
+
+	flush_workqueue(data->workqueue);
+
 	btintel_pcie_reset_bt(data);
 	for (int i = 0; i < data->alloc_vecs; i++) {
 		struct msix_entry *msix_entry;
@@ -2298,8 +2329,6 @@  static void btintel_pcie_remove(struct pci_dev *pdev)
 
 	btintel_pcie_release_hdev(data);
 
-	flush_work(&data->rx_work);
-
 	destroy_workqueue(data->workqueue);
 
 	btintel_pcie_free(data);