diff mbox series

[v4] Extend watchdog timeout during kernel panic.

Message ID 20210203153602.82063-1-jp.ertola@hpe.com
State Superseded
Headers show
Series [v4] Extend watchdog timeout during kernel panic. | expand

Commit Message

JP Ertola Feb. 3, 2021, 3:36 p.m. UTC
If the watchdog timeout is set such that the crash kernel does not
have time to collect a coredump and the crash kernel is not equipped to
ping the watchdog timer itself, then a kernel coredump will not be collected
before the watchdog fires. This change registers a panic notifier and
callback so the watchdog timeout can be extended if a kernel panic occurs.
This timeout extension would give the crash kernel enough time to collect
a coredump before the CPU resets. The watchdog timeout is extended if and only
if a crash kernel image is loaded in memory, the watchdog is active at the
time of the panic, and the kconfig setting is set.

A Kconfig option has been added to configure the timeout duration at
compile-time. Default is zero seconds.

Signed-off-by: JP Ertola <jp.ertola@hpe.com>
---
v4: Remove optional callback mechanism alltogether. I agree with Guenter, 
not widely used.
v3: Fix logic so timeout extension is not longer than wdd->max_timeout
v2: Remove dead code and comments.
 drivers/watchdog/Kconfig        | 13 ++++++
 drivers/watchdog/watchdog_dev.c | 80 ++++++++++++++++++++++++++++++++-
 include/linux/watchdog.h        |  1 +
 3 files changed, 93 insertions(+), 1 deletion(-)

Comments

Guenter Roeck Feb. 4, 2021, 2:01 a.m. UTC | #1
On 2/3/21 7:36 AM, JP Ertola wrote:
> If the watchdog timeout is set such that the crash kernel does not

> have time to collect a coredump and the crash kernel is not equipped to

> ping the watchdog timer itself, then a kernel coredump will not be collected

> before the watchdog fires. This change registers a panic notifier and

> callback so the watchdog timeout can be extended if a kernel panic occurs.

> This timeout extension would give the crash kernel enough time to collect

> a coredump before the CPU resets. The watchdog timeout is extended if and only

> if a crash kernel image is loaded in memory, the watchdog is active at the

> time of the panic, and the kconfig setting is set.

> 

> A Kconfig option has been added to configure the timeout duration at

> compile-time. Default is zero seconds.

> 

> Signed-off-by: JP Ertola <jp.ertola@hpe.com>

> ---

> v4: Remove optional callback mechanism alltogether. I agree with Guenter, 

> not widely used.

> v3: Fix logic so timeout extension is not longer than wdd->max_timeout

> v2: Remove dead code and comments.

>  drivers/watchdog/Kconfig        | 13 ++++++

>  drivers/watchdog/watchdog_dev.c | 80 ++++++++++++++++++++++++++++++++-

>  include/linux/watchdog.h        |  1 +

>  3 files changed, 93 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig

> index fd7968635e6d..f1055985e100 100644

> --- a/drivers/watchdog/Kconfig

> +++ b/drivers/watchdog/Kconfig

> @@ -141,6 +141,19 @@ comment "Watchdog Device Drivers"

>  

>  # Architecture Independent

>  

> +config DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT

> +	int "Default timeout for watchdog timer before crash kernel starts (seconds)"

> +	default 0

> +	help

> +	  This option allows an extended timeout to be used for the watchdog when

> +	  the kernel panics and a crash kernel is about to start. This is helpful

> +	  when the existing WDT timeout value is less than the time required for

> +	  crash kernel to run and the crash kernel is unable to handle the

> +	  the watchdog itself. The timeout extension happens last in chain of

> +	  kernel panic handler callbacks just in case another panic handler

> +	  hangs unexpectedly. When this value is set to 0, the watchdog timeout

> +	  will not be changed.

> +

>  config SOFT_WATCHDOG

>  	tristate "Software watchdog"

>  	select WATCHDOG_CORE

> diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c

> index 2946f3a63110..cf91c08b0606 100644

> --- a/drivers/watchdog/watchdog_dev.c

> +++ b/drivers/watchdog/watchdog_dev.c

> @@ -34,6 +34,7 @@

>  #include <linux/init.h>		/* For __init/__exit/... */

>  #include <linux/hrtimer.h>	/* For hrtimers */

>  #include <linux/kernel.h>	/* For printk/panic/... */

> +#include <linux/kexec.h>	/* For checking if crash kernel is loaded */

>  #include <linux/kthread.h>	/* For kthread_work */

>  #include <linux/miscdevice.h>	/* For handling misc devices */

>  #include <linux/module.h>	/* For module stuff/... */

> @@ -82,6 +83,8 @@ static bool handle_boot_enabled =

>  

>  static unsigned open_timeout = CONFIG_WATCHDOG_OPEN_TIMEOUT;

>  

> +static unsigned int wdt_panic_timeout = CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT;

> +

>  static bool watchdog_past_open_deadline(struct watchdog_core_data *data)

>  {

>  	return ktime_after(ktime_get(), data->open_deadline);

> @@ -658,6 +661,59 @@ static int watchdog_ioctl_op(struct watchdog_device *wdd, unsigned int cmd,

>   *	off the watchdog (if 'nowayout' is not set).

>   */

>  

> +static int watchdog_panic_notifier(struct notifier_block *nb,

> +	unsigned long code, void *data)

> +{

> +	struct watchdog_device *wdd;

> +	int ret = 0;

> +	unsigned int time_out = wdt_panic_timeout;


Please use timeout as variable name.

> +

> +	if (wdt_panic_timeout == 0)

> +		return NOTIFY_DONE;

> +

> +	wdd = container_of(nb, struct watchdog_device, panic_nb);

> +

> +	if (watchdog_timeout_invalid(wdd, wdt_panic_timeout)) {

> +		time_out = min(wdt_panic_timeout, wdd->max_timeout);

> +		pr_err("watchdog%d: timeout extension value "

> +			" invalid. Falling back to %d seconds\n", wdd->id,


String in single line, please.

> +			time_out);

> +	}

> +

> +	if (kexec_crash_image && watchdog_active(wdd)) {

> +		int ping_ret = 0;


Unnecessary assignment.

> +

> +		pr_info("watchdog%d: Extending watchdog timeout to "

> +			"%d seconds\n", wdd->id, time_out);

> +

> +		ret = watchdog_set_timeout(wdd, time_out);

> +

Unnecessary empty line

> +		if (ret) {

> +			pr_err("watchdog%d: Unable to extend watchdog timeout "

> +				"before starting crash kernel (%d)",

> +				wdd->id, ret);

> +		}

> +

> +		/* Many watchdog implementations will reset the timer

> +		* when the timeout is changed, but ping again to be

> +		* safe.

> +		*/


This needs to call __watchdog_ping(). Some watchdog ping functions have
a minimum time between calls of the ping function, and others may not
have a ping function but use the start function instead.

> +		if (wdd->ops->ping) {

> +			ping_ret = wdd->ops->ping(wdd);

> +			if (ping_ret) {

> +				pr_warn("watchdog%d: Unable to ping "

> +					"watchdog before starting "

> +					"crash kernel (%d)\n",

> +					wdd->id, ping_ret);

> +			}

> +		}

> +

> +

Please drop those unnecessary empty lines.

> +	}

> +

> +	return notifier_from_errno(ret);


Are you sure about that ? I don't know the implication when NOTIFY_BAD is returned.

> +}

> +

>  static ssize_t watchdog_write(struct file *file, const char __user *data,

>  						size_t len, loff_t *ppos)

>  {

> @@ -1118,8 +1174,25 @@ int watchdog_dev_register(struct watchdog_device *wdd)

>  		return ret;

>  

>  	ret = watchdog_register_pretimeout(wdd);

> -	if (ret)

> +	if (ret) {

>  		watchdog_cdev_unregister(wdd);

> +		return ret;

> +	}

> +

> +	/*

> +	 * Setting panic_nb priority to minimum ensures the watchdog device

> +	 * panic callback runs last in the chain of kernel panic callbacks.

> +	 * This way, the watchdog device will still fire in the event another

> +	 * panic callback hangs.

> +	 */

> +	wdd->panic_nb.priority = INT_MIN;

> +	wdd->panic_nb.notifier_call = watchdog_panic_notifier;

> +

> +	ret = atomic_notifier_chain_register(&panic_notifier_list,

> +					&wdd->panic_nb);

> +	if (ret)

> +		pr_err("watchdog%d: Cannot register panic notifier (%d)\n",

> +			wdd->id, ret);


Either make this a warning and ignore the error (return 0),
or this needs a proper cleanup.

>  

>  	return ret;>  }

> @@ -1228,3 +1301,8 @@ module_param(open_timeout, uint, 0644);

>  MODULE_PARM_DESC(open_timeout,

>  	"Maximum time (in seconds, 0 means infinity) for userspace to take over a running watchdog (default="

>  	__MODULE_STRING(CONFIG_WATCHDOG_OPEN_TIMEOUT) ")");

> +

> +module_param(wdt_panic_timeout, uint, 0444);

> +MODULE_PARM_DESC(wdt_panic_timeout,

> +	"Watchdog timeout extension duration upon kernel panic. (default="

> +	__MODULE_STRING(CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT) " seconds)");

> diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h

> index 9b19e6bb68b5..bc7e6e8aa7ab 100644

> --- a/include/linux/watchdog.h

> +++ b/include/linux/watchdog.h

> @@ -107,6 +107,7 @@ struct watchdog_device {

>  	unsigned int max_hw_heartbeat_ms;

>  	struct notifier_block reboot_nb;

>  	struct notifier_block restart_nb;

> +	struct notifier_block panic_nb;

>  	void *driver_data;

>  	struct watchdog_core_data *wd_data;

>  	unsigned long status;

>
diff mbox series

Patch

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index fd7968635e6d..f1055985e100 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -141,6 +141,19 @@  comment "Watchdog Device Drivers"
 
 # Architecture Independent
 
+config DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT
+	int "Default timeout for watchdog timer before crash kernel starts (seconds)"
+	default 0
+	help
+	  This option allows an extended timeout to be used for the watchdog when
+	  the kernel panics and a crash kernel is about to start. This is helpful
+	  when the existing WDT timeout value is less than the time required for
+	  crash kernel to run and the crash kernel is unable to handle the
+	  the watchdog itself. The timeout extension happens last in chain of
+	  kernel panic handler callbacks just in case another panic handler
+	  hangs unexpectedly. When this value is set to 0, the watchdog timeout
+	  will not be changed.
+
 config SOFT_WATCHDOG
 	tristate "Software watchdog"
 	select WATCHDOG_CORE
diff --git a/drivers/watchdog/watchdog_dev.c b/drivers/watchdog/watchdog_dev.c
index 2946f3a63110..cf91c08b0606 100644
--- a/drivers/watchdog/watchdog_dev.c
+++ b/drivers/watchdog/watchdog_dev.c
@@ -34,6 +34,7 @@ 
 #include <linux/init.h>		/* For __init/__exit/... */
 #include <linux/hrtimer.h>	/* For hrtimers */
 #include <linux/kernel.h>	/* For printk/panic/... */
+#include <linux/kexec.h>	/* For checking if crash kernel is loaded */
 #include <linux/kthread.h>	/* For kthread_work */
 #include <linux/miscdevice.h>	/* For handling misc devices */
 #include <linux/module.h>	/* For module stuff/... */
@@ -82,6 +83,8 @@  static bool handle_boot_enabled =
 
 static unsigned open_timeout = CONFIG_WATCHDOG_OPEN_TIMEOUT;
 
+static unsigned int wdt_panic_timeout = CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT;
+
 static bool watchdog_past_open_deadline(struct watchdog_core_data *data)
 {
 	return ktime_after(ktime_get(), data->open_deadline);
@@ -658,6 +661,59 @@  static int watchdog_ioctl_op(struct watchdog_device *wdd, unsigned int cmd,
  *	off the watchdog (if 'nowayout' is not set).
  */
 
+static int watchdog_panic_notifier(struct notifier_block *nb,
+	unsigned long code, void *data)
+{
+	struct watchdog_device *wdd;
+	int ret = 0;
+	unsigned int time_out = wdt_panic_timeout;
+
+	if (wdt_panic_timeout == 0)
+		return NOTIFY_DONE;
+
+	wdd = container_of(nb, struct watchdog_device, panic_nb);
+
+	if (watchdog_timeout_invalid(wdd, wdt_panic_timeout)) {
+		time_out = min(wdt_panic_timeout, wdd->max_timeout);
+		pr_err("watchdog%d: timeout extension value "
+			" invalid. Falling back to %d seconds\n", wdd->id,
+			time_out);
+	}
+
+	if (kexec_crash_image && watchdog_active(wdd)) {
+		int ping_ret = 0;
+
+		pr_info("watchdog%d: Extending watchdog timeout to "
+			"%d seconds\n", wdd->id, time_out);
+
+		ret = watchdog_set_timeout(wdd, time_out);
+
+		if (ret) {
+			pr_err("watchdog%d: Unable to extend watchdog timeout "
+				"before starting crash kernel (%d)",
+				wdd->id, ret);
+		}
+
+		/* Many watchdog implementations will reset the timer
+		* when the timeout is changed, but ping again to be
+		* safe.
+		*/
+		if (wdd->ops->ping) {
+			ping_ret = wdd->ops->ping(wdd);
+			if (ping_ret) {
+				pr_warn("watchdog%d: Unable to ping "
+					"watchdog before starting "
+					"crash kernel (%d)\n",
+					wdd->id, ping_ret);
+			}
+		}
+
+
+	}
+
+	return notifier_from_errno(ret);
+}
+
 static ssize_t watchdog_write(struct file *file, const char __user *data,
 						size_t len, loff_t *ppos)
 {
@@ -1118,8 +1174,25 @@  int watchdog_dev_register(struct watchdog_device *wdd)
 		return ret;
 
 	ret = watchdog_register_pretimeout(wdd);
-	if (ret)
+	if (ret) {
 		watchdog_cdev_unregister(wdd);
+		return ret;
+	}
+
+	/*
+	 * Setting panic_nb priority to minimum ensures the watchdog device
+	 * panic callback runs last in the chain of kernel panic callbacks.
+	 * This way, the watchdog device will still fire in the event another
+	 * panic callback hangs.
+	 */
+	wdd->panic_nb.priority = INT_MIN;
+	wdd->panic_nb.notifier_call = watchdog_panic_notifier;
+
+	ret = atomic_notifier_chain_register(&panic_notifier_list,
+					&wdd->panic_nb);
+	if (ret)
+		pr_err("watchdog%d: Cannot register panic notifier (%d)\n",
+			wdd->id, ret);
 
 	return ret;
 }
@@ -1228,3 +1301,8 @@  module_param(open_timeout, uint, 0644);
 MODULE_PARM_DESC(open_timeout,
 	"Maximum time (in seconds, 0 means infinity) for userspace to take over a running watchdog (default="
 	__MODULE_STRING(CONFIG_WATCHDOG_OPEN_TIMEOUT) ")");
+
+module_param(wdt_panic_timeout, uint, 0444);
+MODULE_PARM_DESC(wdt_panic_timeout,
+	"Watchdog timeout extension duration upon kernel panic. (default="
+	__MODULE_STRING(CONFIG_DEFAULT_WATCHDOG_CRASH_KERNEL_TIMEOUT) " seconds)");
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 9b19e6bb68b5..bc7e6e8aa7ab 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -107,6 +107,7 @@  struct watchdog_device {
 	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
+	struct notifier_block panic_nb;
 	void *driver_data;
 	struct watchdog_core_data *wd_data;
 	unsigned long status;