diff mbox series

[RFC] watchdog: Introduce panic notifier for watchdog device on kdump

Message ID 20250525121939.43112-1-weiguixiong@bytedance.com
State New
Headers show
Series [RFC] watchdog: Introduce panic notifier for watchdog device on kdump | expand

Commit Message

Guixiong Wei May 25, 2025, 12:19 p.m. UTC
The watchdog device is not stop after kenrel crash. But the kexec
kernel may not enable watchdog device. This will interrupt the
long-time kdump process(e.g., TDX VMs with large-memory).

So introduce panic notifer for watchdog device. When kernel crashes,
the handler invokes the watchdog stop callback, then the kdump
process will not be interrupted by the watchdog device.

Signed-off-by: Guixiong Wei <weiguixiong@bytedance.com>
---
 drivers/watchdog/watchdog_core.c | 22 ++++++++++++++++++++++
 include/linux/watchdog.h         |  9 +++++++++
 2 files changed, 31 insertions(+)

Comments

George Cherian May 26, 2025, 5:50 a.m. UTC | #1
Hi Guixiong,

On 2025-05-25 at 17:49:39, Guixiong Wei (weiguixiong@bytedance.com) wrote:
> The watchdog device is not stop after kenrel crash. But the kexec
> kernel may not enable watchdog device. This will interrupt the
> long-time kdump process(e.g., TDX VMs with large-memory).
> 
> So introduce panic notifer for watchdog device. When kernel crashes,
> the handler invokes the watchdog stop callback, then the kdump
> process will not be interrupted by the watchdog device.
> 
> Signed-off-by: Guixiong Wei <weiguixiong@bytedance.com>
A similar attempt is made in the following series here
Patch 1: https://patchwork.kernel.org/project/linux-watchdog/patch/20250522055715.3533356-2-george.cherian@marvell.com/
Patch 2: https://patchwork.kernel.org/project/linux-watchdog/patch/20250522055715.3533356-3-george.cherian@marvell.com/ 

For your particular use-case you need to set reset_on_panic to zero via
sysfs. This would disable the watchdog in panic notifier.
It would be helpful if you could test the series above and give your
comments/Tested-by.

Regards,
-George
diff mbox series

Patch

diff --git a/drivers/watchdog/watchdog_core.c b/drivers/watchdog/watchdog_core.c
index 6152dba4b52c..7a1ad9935bf7 100644
--- a/drivers/watchdog/watchdog_core.c
+++ b/drivers/watchdog/watchdog_core.c
@@ -36,6 +36,7 @@ 
 #include <linux/of.h>		/* For of_alias_get_id */
 #include <linux/property.h>	/* For device_property_read_u32 */
 #include <linux/suspend.h>
+#include <linux/panic_notifier.h>
 
 #include "watchdog_core.h"	/* For watchdog_dev_register/... */
 
@@ -155,6 +156,22 @@  int watchdog_init_timeout(struct watchdog_device *wdd,
 }
 EXPORT_SYMBOL_GPL(watchdog_init_timeout);
 
+static int watchdog_panic_notifier(struct notifier_block *nb, unsigned long code, void *data)
+{
+	struct watchdog_device *wdd;
+
+	wdd = container_of(nb, struct watchdog_device, panic_nb);
+	if (watchdog_hw_running(wdd)) {
+		int ret;
+
+		ret = wdd->ops->stop(wdd);
+		if (ret)
+			return NOTIFY_BAD;
+	}
+
+	return NOTIFY_DONE;
+}
+
 static int watchdog_reboot_notifier(struct notifier_block *nb,
 				    unsigned long code, void *data)
 {
@@ -334,6 +351,11 @@  static int ___watchdog_register_device(struct watchdog_device *wdd)
 				wdd->id, ret);
 	}
 
+	if (test_bit(WDOG_STOP_ON_PANIC, &wdd->status)) {
+		wdd->panic_nb.notifier_call = watchdog_panic_notifier;
+		atomic_notifier_chain_register(&panic_notifier_list, &wdd->panic_nb);
+	}
+
 	return 0;
 }
 
diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 99660197a36c..2a74373aed28 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -79,6 +79,7 @@  struct watchdog_ops {
  *		Replaces max_timeout if specified.
  * @reboot_nb:	The notifier block to stop watchdog on reboot.
  * @restart_nb:	The notifier block to register a restart function.
+ * @panic_nb:	The notifier block to register a panic function.
  * @driver_data:Pointer to the drivers private data.
  * @wd_data:	Pointer to watchdog core internal data.
  * @status:	Field that contains the devices internal status bits.
@@ -107,6 +108,7 @@  struct watchdog_device {
 	unsigned int max_hw_heartbeat_ms;
 	struct notifier_block reboot_nb;
 	struct notifier_block restart_nb;
+	struct notifier_block panic_nb;
 	struct notifier_block pm_nb;
 	void *driver_data;
 	struct watchdog_core_data *wd_data;
@@ -118,6 +120,7 @@  struct watchdog_device {
 #define WDOG_HW_RUNNING		3	/* True if HW watchdog running */
 #define WDOG_STOP_ON_UNREGISTER	4	/* Should be stopped on unregister */
 #define WDOG_NO_PING_ON_SUSPEND	5	/* Ping worker should be stopped on suspend */
+#define WDOG_STOP_ON_PANIC	6	/* Should be stopped on panic */
 	struct list_head deferred;
 };
 
@@ -146,6 +149,12 @@  static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway
 		set_bit(WDOG_NO_WAY_OUT, &wdd->status);
 }
 
+/* Use the following function to stop the watchdog on panic */
+static inline void watchdog_stop_on_panic(struct watchdog_device *wdd)
+{
+	set_bit(WDOG_STOP_ON_PANIC, &wdd->status);
+}
+
 /* Use the following function to stop the watchdog on reboot */
 static inline void watchdog_stop_on_reboot(struct watchdog_device *wdd)
 {