diff mbox series

[v2,1/3] wifi: rt2x00: introduce DMA busy check watchdog for rt2800

Message ID TYAP286MB0315D7462CE08A119A99DE34BCA4A@TYAP286MB0315.JPNP286.PROD.OUTLOOK.COM
State New
Headers show
Series [v2,1/3] wifi: rt2x00: introduce DMA busy check watchdog for rt2800 | expand

Commit Message

Shiji Yang Nov. 4, 2023, 8:57 a.m. UTC
When I tried to fix the watchdog of rt2800, I found that sometimes
the watchdog can not reset the hung device. This is because the
queue is not completely stuck, it just becomes very slow. The MTK
vendor driver for the new chip MT7603/MT7612 has a DMA busy watchdog
to detect device hangs by checking DMA busy status. This watchdog
implementation is something similar to it. To reduce unnecessary
reset, we can check the INT_SOURCE_CSR register together as I found
that when the radio hung, the RX/TX coherent interrupt will always
stuck at triggered state.

The 'watchdog' module parameter has been extended to control all
watchdogs(0=disabled, 1=hang watchdog, 2=DMA watchdog, 3=both). This
new watchdog function is a slight schedule and it won't affect the
transmission speed. So we can turn on it by default. Due to the
INT_SOURCE_CSR register is invalid on rt2800 USB NICs, the DMA busy
watchdog will be automatically disabled for them.

Tested on MT7620 and RT5350.

Signed-off-by: Shiji Yang <yangshiji66@outlook.com>
Acked-by: Stanislaw Gruszka <stf_xl@wp.pl>
---
 drivers/net/wireless/ralink/rt2x00/rt2800.h   |  4 +
 .../net/wireless/ralink/rt2x00/rt2800lib.c    | 77 ++++++++++++++++---
 drivers/net/wireless/ralink/rt2x00/rt2x00.h   |  3 +
 3 files changed, 73 insertions(+), 11 deletions(-)

Comments

Kalle Valo Nov. 8, 2023, 6:14 p.m. UTC | #1
Shiji Yang <yangshiji66@outlook.com> wrote:

> When I tried to fix the watchdog of rt2800, I found that sometimes
> the watchdog can not reset the hung device. This is because the
> queue is not completely stuck, it just becomes very slow. The MTK
> vendor driver for the new chip MT7603/MT7612 has a DMA busy watchdog
> to detect device hangs by checking DMA busy status. This watchdog
> implementation is something similar to it. To reduce unnecessary
> reset, we can check the INT_SOURCE_CSR register together as I found
> that when the radio hung, the RX/TX coherent interrupt will always
> stuck at triggered state.
> 
> The 'watchdog' module parameter has been extended to control all
> watchdogs(0=disabled, 1=hang watchdog, 2=DMA watchdog, 3=both). This
> new watchdog function is a slight schedule and it won't affect the
> transmission speed. So we can turn on it by default. Due to the
> INT_SOURCE_CSR register is invalid on rt2800 USB NICs, the DMA busy
> watchdog will be automatically disabled for them.
> 
> Tested on MT7620 and RT5350.
> 
> Signed-off-by: Shiji Yang <yangshiji66@outlook.com>
> Acked-by: Stanislaw Gruszka <stf_xl@wp.pl>

3 patches applied to wireless-next.git, thanks.

b1275cdd7456 wifi: rt2x00: introduce DMA busy check watchdog for rt2800
570beb6285fd wifi: rt2x00: disable RTS threshold for rt2800 by default
a11d965a218f wifi: rt2x00: restart beacon queue when hardware reset
diff mbox series

Patch

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800.h b/drivers/net/wireless/ralink/rt2x00/rt2800.h
index 48521e455..8930589b4 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800.h
@@ -3194,4 +3194,8 @@  enum rt2800_eeprom_word {
  */
 #define BCN_TBTT_OFFSET 64
 
+/* Watchdog type mask */
+#define RT2800_WATCHDOG_HANG		BIT(0)
+#define RT2800_WATCHDOG_DMA_BUSY	BIT(1)
+
 #endif /* RT2800_H */
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
index 594dd3d9f..be4f7c144 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
@@ -30,9 +30,10 @@ 
 #include "rt2800lib.h"
 #include "rt2800.h"
 
-static bool modparam_watchdog;
-module_param_named(watchdog, modparam_watchdog, bool, S_IRUGO);
-MODULE_PARM_DESC(watchdog, "Enable watchdog to detect tx/rx hangs and reset hardware if detected");
+static unsigned int modparam_watchdog = RT2800_WATCHDOG_DMA_BUSY;
+module_param_named(watchdog, modparam_watchdog, uint, 0444);
+MODULE_PARM_DESC(watchdog, "Enable watchdog to recover tx/rx hangs.\n"
+		 "\t\t(0=disabled, 1=hang watchdog, 2=DMA watchdog(default), 3=both)");
 
 /*
  * Register access.
@@ -1260,15 +1261,12 @@  static void rt2800_update_survey(struct rt2x00_dev *rt2x00dev)
 	chan_survey->time_ext_busy += rt2800_register_read(rt2x00dev, CH_BUSY_STA_SEC);
 }
 
-void rt2800_watchdog(struct rt2x00_dev *rt2x00dev)
+static bool rt2800_watchdog_hung(struct rt2x00_dev *rt2x00dev)
 {
 	struct data_queue *queue;
 	bool hung_tx = false;
 	bool hung_rx = false;
 
-	if (test_bit(DEVICE_STATE_SCANNING, &rt2x00dev->flags))
-		return;
-
 	rt2800_update_survey(rt2x00dev);
 
 	queue_for_each(rt2x00dev, queue) {
@@ -1296,18 +1294,72 @@  void rt2800_watchdog(struct rt2x00_dev *rt2x00dev)
 		}
 	}
 
+	if (!hung_tx && !hung_rx)
+		return false;
+
 	if (hung_tx)
 		rt2x00_warn(rt2x00dev, "Watchdog TX hung detected\n");
 
 	if (hung_rx)
 		rt2x00_warn(rt2x00dev, "Watchdog RX hung detected\n");
 
-	if (hung_tx || hung_rx) {
-		queue_for_each(rt2x00dev, queue)
-			queue->wd_count = 0;
+	queue_for_each(rt2x00dev, queue)
+		queue->wd_count = 0;
+
+	return true;
+}
+
+static bool rt2800_watchdog_dma_busy(struct rt2x00_dev *rt2x00dev)
+{
+	bool busy_rx, busy_tx;
+	u32 reg_cfg = rt2800_register_read(rt2x00dev, WPDMA_GLO_CFG);
+	u32 reg_int = rt2800_register_read(rt2x00dev, INT_SOURCE_CSR);
+
+	if (rt2x00_get_field32(reg_cfg, WPDMA_GLO_CFG_RX_DMA_BUSY) &&
+	    rt2x00_get_field32(reg_int, INT_SOURCE_CSR_RX_COHERENT))
+		rt2x00dev->rxdma_busy++;
+	else
+		rt2x00dev->rxdma_busy = 0;
 
+	if (rt2x00_get_field32(reg_cfg, WPDMA_GLO_CFG_TX_DMA_BUSY) &&
+	    rt2x00_get_field32(reg_int, INT_SOURCE_CSR_TX_COHERENT))
+		rt2x00dev->txdma_busy++;
+	else
+		rt2x00dev->txdma_busy = 0;
+
+	busy_rx = rt2x00dev->rxdma_busy > 30 ? true : false;
+	busy_tx = rt2x00dev->txdma_busy > 30 ? true : false;
+
+	if (!busy_rx && !busy_tx)
+		return false;
+
+	if (busy_rx)
+		rt2x00_warn(rt2x00dev, "Watchdog RX DMA busy detected\n");
+
+	if (busy_tx)
+		rt2x00_warn(rt2x00dev, "Watchdog TX DMA busy detected\n");
+
+	rt2x00dev->rxdma_busy = 0;
+	rt2x00dev->txdma_busy = 0;
+
+	return true;
+}
+
+void rt2800_watchdog(struct rt2x00_dev *rt2x00dev)
+{
+	bool reset = false;
+
+	if (test_bit(DEVICE_STATE_SCANNING, &rt2x00dev->flags))
+		return;
+
+	if (modparam_watchdog & RT2800_WATCHDOG_DMA_BUSY)
+		reset = rt2800_watchdog_dma_busy(rt2x00dev);
+
+	if (modparam_watchdog & RT2800_WATCHDOG_HANG)
+		reset = rt2800_watchdog_hung(rt2x00dev) || reset;
+
+	if (reset)
 		ieee80211_restart_hw(rt2x00dev->hw);
-	}
 }
 EXPORT_SYMBOL_GPL(rt2800_watchdog);
 
@@ -12015,6 +12067,9 @@  int rt2800_probe_hw(struct rt2x00_dev *rt2x00dev)
 		__set_bit(REQUIRE_TASKLET_CONTEXT, &rt2x00dev->cap_flags);
 	}
 
+	/* USB NICs don't support DMA watchdog as INT_SOURCE_CSR is invalid */
+	if (rt2x00_is_usb(rt2x00dev))
+		modparam_watchdog &= ~RT2800_WATCHDOG_DMA_BUSY;
 	if (modparam_watchdog) {
 		__set_bit(CAPABILITY_RESTART_HW, &rt2x00dev->cap_flags);
 		rt2x00dev->link.watchdog_interval = msecs_to_jiffies(100);
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00.h b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
index aaaf99331..62fed38f4 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00.h
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00.h
@@ -926,6 +926,9 @@  struct rt2x00_dev {
 	 */
 	u16 beacon_int;
 
+	/* Rx/Tx DMA busy watchdog counter */
+	u16 rxdma_busy, txdma_busy;
+
 	/**
 	 * Timestamp of last received beacon
 	 */