diff mbox series

[v2,2/2] Bluetooth: btintel_pcie: Support Function level reset

Message ID 20250525101619.530255-2-kiran.k@intel.com
State New
Headers show
Series [v2,1/2] Bluetooth: btintel_pcie: Fix potential race condition in firmware download | expand

Commit Message

K, Kiran May 25, 2025, 10:16 a.m. UTC
From: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>

Support function level reset (flr) on hardware exception to recover
controller. Driver also implements the back-off time of 5 seconds and
the maximum number of retries are limited to 5 before giving up.

Signed-off-by: Chandrashekar Devegowda <chandrashekar.devegowda@intel.com>
Signed-off-by: Kiran K <kiran.k@intel.com>
---
 drivers/bluetooth/btintel_pcie.c | 234 ++++++++++++++++++++++++++++++-
 drivers/bluetooth/btintel_pcie.h |   4 +-
 2 files changed, 235 insertions(+), 3 deletions(-)
diff mbox series

Patch

diff --git a/drivers/bluetooth/btintel_pcie.c b/drivers/bluetooth/btintel_pcie.c
index 74a957784525..6779a2cfa75d 100644
--- a/drivers/bluetooth/btintel_pcie.c
+++ b/drivers/bluetooth/btintel_pcie.c
@@ -41,6 +41,13 @@  static const struct pci_device_id btintel_pcie_table[] = {
 };
 MODULE_DEVICE_TABLE(pci, btintel_pcie_table);
 
+struct btintel_pcie_dev_restart_data {
+	struct list_head list;
+	u8 restart_count;
+	time64_t last_error;
+	char name[];
+};
+
 /* Intel PCIe uses 4 bytes of HCI type instead of 1 byte BT SIG HCI type */
 #define BTINTEL_PCIE_HCI_TYPE_LEN	4
 #define BTINTEL_PCIE_HCI_CMD_PKT	0x00000001
@@ -62,6 +69,9 @@  MODULE_DEVICE_TABLE(pci, btintel_pcie_table);
 #define BTINTEL_PCIE_TRIGGER_REASON_USER_TRIGGER	0x17A2
 #define BTINTEL_PCIE_TRIGGER_REASON_FW_ASSERT		0x1E61
 
+#define BTINTEL_PCIE_RESET_OK_TIME_SECS		5
+#define BTINTEL_PCIE_FLR_RESET_MAX_RETRY	5
+
 /* Alive interrupt context */
 enum {
 	BTINTEL_PCIE_ROM,
@@ -99,6 +109,14 @@  struct btintel_pcie_dbgc_ctxt {
 	struct btintel_pcie_dbgc_ctxt_buf bufs[BTINTEL_PCIE_DBGC_BUFFER_COUNT];
 };
 
+struct btintel_pcie_removal {
+	struct pci_dev *pdev;
+	struct work_struct work;
+};
+
+static LIST_HEAD(btintel_pcie_restart_data_list);
+static DEFINE_SPINLOCK(btintel_pcie_restart_data_lock);
+
 /* This function initializes the memory for DBGC buffers and formats the
  * DBGC fragment which consists header info and DBGC buffer's LSB, MSB and
  * size as the payload
@@ -1927,6 +1945,9 @@  static int btintel_pcie_send_frame(struct hci_dev *hdev,
 	u32 type;
 	u32 old_ctxt;
 
+	if (test_bit(BTINTEL_PCIE_CORE_HALTED, &data->flags))
+		return -ENODEV;
+
 	/* Due to the fw limitation, the type header of the packet should be
 	 * 4 bytes unlike 1 byte for UART. In UART, the firmware can read
 	 * the first byte to get the packet type and redirect the rest of data
@@ -2187,9 +2208,204 @@  static int btintel_pcie_setup(struct hci_dev *hdev)
 		}
 		btintel_pcie_start_rx(data);
 	}
+
+	if (!err)
+		set_bit(BTINTEL_PCIE_SETUP_DONE, &data->flags);
 	return err;
 }
 
+static struct btintel_pcie_dev_restart_data *btintel_pcie_get_restart_data(struct pci_dev *pdev,
+									   struct device *dev)
+{
+	struct btintel_pcie_dev_restart_data *tmp, *data = NULL;
+	const char *name = pci_name(pdev);
+	struct hci_dev *hdev = to_hci_dev(dev);
+
+	spin_lock(&btintel_pcie_restart_data_lock);
+	list_for_each_entry(tmp, &btintel_pcie_restart_data_list, list) {
+		if (strcmp(tmp->name, name))
+			continue;
+		data = tmp;
+		break;
+	}
+	spin_unlock(&btintel_pcie_restart_data_lock);
+
+	if (data) {
+		bt_dev_dbg(hdev, "Found restart data for BDF:%s", data->name);
+		return data;
+	}
+
+	/* First time allocate */
+	data = kzalloc(struct_size(data, name, strlen(name) + 1), GFP_ATOMIC);
+	if (!data)
+		return NULL;
+
+	strscpy_pad(data->name, name, strlen(name) + 1);
+	spin_lock(&btintel_pcie_restart_data_lock);
+	list_add_tail(&data->list, &btintel_pcie_restart_data_list);
+	spin_unlock(&btintel_pcie_restart_data_lock);
+
+	return data;
+}
+
+static void btintel_pcie_free_restart_list(void)
+{
+	struct btintel_pcie_dev_restart_data *tmp;
+
+	while ((tmp = list_first_entry_or_null(&btintel_pcie_restart_data_list,
+					       typeof(*tmp), list))) {
+		list_del(&tmp->list);
+		kfree(tmp);
+	}
+}
+
+static void btintel_pcie_inc_restart_count(struct pci_dev *pdev,
+					   struct device *dev)
+{
+	struct btintel_pcie_dev_restart_data *data;
+	struct hci_dev *hdev = to_hci_dev(dev);
+	time64_t retry_window;
+
+	data = btintel_pcie_get_restart_data(pdev, dev);
+	if (!data)
+		return;
+
+	retry_window = ktime_get_boottime_seconds() - data->last_error;
+	if (data->restart_count == 0) {
+		/* First iteration initialise the time and counter */
+		data->last_error = ktime_get_boottime_seconds();
+		data->restart_count++;
+		bt_dev_dbg(hdev, "First iteration initialise. last_error:%lld seconds restart_count:%d",
+			   data->last_error, data->restart_count);
+	} else if (retry_window < BTINTEL_PCIE_RESET_OK_TIME_SECS &&
+		   data->restart_count <= BTINTEL_PCIE_FLR_RESET_MAX_RETRY) {
+		/* FLR triggered still within the Max retry time so
+		 * increment the counter
+		 */
+		data->restart_count++;
+		bt_dev_dbg(hdev, "flr triggered still within the max retry time so increment the restart_count:%d",
+			   data->restart_count);
+	} else if (retry_window > BTINTEL_PCIE_RESET_OK_TIME_SECS) {
+		/* FLR triggered out of the retry window so reset */
+		bt_dev_dbg(hdev, "flr triggered out of retry window. last_error:%lld seconds restart_count:%d",
+			   data->last_error, data->restart_count);
+		data->last_error = 0;
+		data->restart_count = 0;
+	}
+}
+
+static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data);
+
+static void btintel_pcie_removal_work(struct work_struct *wk)
+{
+	struct btintel_pcie_removal *removal =
+		container_of(wk, struct btintel_pcie_removal, work);
+	struct pci_dev *pdev = removal->pdev;
+	struct btintel_pcie_data *data;
+	int err;
+
+	pci_lock_rescan_remove();
+
+	if (!pdev->bus)
+		goto error;
+
+	data = pci_get_drvdata(pdev);
+
+	btintel_pcie_disable_interrupts(data);
+	btintel_pcie_synchronize_irqs(data);
+	flush_workqueue(data->workqueue);
+
+	bt_dev_dbg(data->hdev, "Release bluetooth interface");
+	btintel_pcie_release_hdev(data);
+
+	err = pci_reset_function(pdev);
+	if (err) {
+		BT_ERR("Failed resetting the pcie device (%d)", err);
+		goto error;
+	}
+
+	btintel_pcie_enable_interrupts(data);
+	btintel_pcie_config_msix(data);
+
+	err = btintel_pcie_enable_bt(data);
+	if (err) {
+		BT_ERR("Failed to enable bluetooth hardware after reset (%d)",
+		       err);
+		goto error;
+	}
+
+	btintel_pcie_reset_ia(data);
+	btintel_pcie_start_rx(data);
+	data->flags = 0;
+
+	err = btintel_pcie_setup_hdev(data);
+	if (err) {
+		BT_ERR("Failed registering hdev (%d)", err);
+		goto error;
+	}
+error:
+	pci_dev_put(pdev);
+	pci_unlock_rescan_remove();
+	kfree(removal);
+}
+
+static void btintel_pcie_reset(struct hci_dev *hdev)
+{
+	struct btintel_pcie_removal *removal;
+	struct btintel_pcie_data *data;
+
+	data = hci_get_drvdata(hdev);
+
+	if (!test_bit(BTINTEL_PCIE_SETUP_DONE, &data->flags))
+		return;
+
+	removal = kzalloc(sizeof(*removal), GFP_ATOMIC);
+	if (!removal)
+		return;
+
+	flush_work(&data->rx_work);
+	flush_work(&hdev->dump.dump_rx);
+
+	removal->pdev = data->pdev;
+	INIT_WORK(&removal->work, btintel_pcie_removal_work);
+	pci_dev_get(removal->pdev);
+	schedule_work(&removal->work);
+}
+
+static void btintel_pcie_hw_error(struct hci_dev *hdev, u8 code)
+{
+	struct  btintel_pcie_dev_restart_data *data;
+	struct btintel_pcie_data *dev_data = hci_get_drvdata(hdev);
+	struct pci_dev *pdev = dev_data->pdev;
+	time64_t retry_window;
+
+	if (code == 0x13) {
+		bt_dev_err(hdev, "Encountered top exception");
+		return;
+	}
+
+	data = btintel_pcie_get_restart_data(pdev, &hdev->dev);
+	if (!data)
+		return;
+
+	retry_window = ktime_get_boottime_seconds() - data->last_error;
+
+	/* If within 5 seconds max 5 attempts have already been made
+	 * then stop any more retry and indicate to user for cold boot
+	 */
+	if (retry_window < BTINTEL_PCIE_RESET_OK_TIME_SECS &&
+	    data->restart_count >= BTINTEL_PCIE_FLR_RESET_MAX_RETRY) {
+		bt_dev_err(hdev, "Max recovery retries(%d)  exhausted.",
+			   BTINTEL_PCIE_FLR_RESET_MAX_RETRY);
+		bt_dev_dbg(hdev, "Boot time:%lld seconds first_flr at:%lld seconds restart_count:%d",
+			   ktime_get_boottime_seconds(), data->last_error,
+			   data->restart_count);
+		return;
+	}
+	btintel_pcie_inc_restart_count(pdev, &hdev->dev);
+	btintel_pcie_reset(hdev);
+}
+
 static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data)
 {
 	int err;
@@ -2211,9 +2427,10 @@  static int btintel_pcie_setup_hdev(struct btintel_pcie_data *data)
 	hdev->send = btintel_pcie_send_frame;
 	hdev->setup = btintel_pcie_setup;
 	hdev->shutdown = btintel_shutdown_combined;
-	hdev->hw_error = btintel_hw_error;
+	hdev->hw_error = btintel_pcie_hw_error;
 	hdev->set_diag = btintel_set_diag;
 	hdev->set_bdaddr = btintel_set_bdaddr;
+	hdev->reset = btintel_pcie_reset;
 
 	err = hci_register_dev(hdev);
 	if (err < 0) {
@@ -2361,7 +2578,20 @@  static struct pci_driver btintel_pcie_driver = {
 	.driver.coredump = btintel_pcie_coredump
 #endif
 };
-module_pci_driver(btintel_pcie_driver);
+
+static int __init btintel_pcie_init(void)
+{
+	return pci_register_driver(&btintel_pcie_driver);
+}
+
+static void __exit btintel_pcie_exit(void)
+{
+	pci_unregister_driver(&btintel_pcie_driver);
+	btintel_pcie_free_restart_list();
+}
+
+module_init(btintel_pcie_init);
+module_exit(btintel_pcie_exit);
 
 MODULE_AUTHOR("Tedd Ho-Jeong An <tedd.an@intel.com>");
 MODULE_DESCRIPTION("Intel Bluetooth PCIe transport driver ver " VERSION);
diff --git a/drivers/bluetooth/btintel_pcie.h b/drivers/bluetooth/btintel_pcie.h
index 21b964b15c1c..62b6bcdaf10f 100644
--- a/drivers/bluetooth/btintel_pcie.h
+++ b/drivers/bluetooth/btintel_pcie.h
@@ -117,7 +117,9 @@  enum {
 enum {
 	BTINTEL_PCIE_CORE_HALTED,
 	BTINTEL_PCIE_HWEXP_INPROGRESS,
-	BTINTEL_PCIE_COREDUMP_INPROGRESS
+	BTINTEL_PCIE_COREDUMP_INPROGRESS,
+	BTINTEL_PCIE_RECOVERY_IN_PROGRESS,
+	BTINTEL_PCIE_SETUP_DONE
 };
 
 enum btintel_pcie_tlv_type {