diff mbox series

[RFC,v1,2/2] watchdog: pseries-wdt: initial support for PAPR virtual watchdog timers

Message ID 20220413165104.179144-3-cheloha@linux.ibm.com
State New
Headers show
Series Add driver for PAPR watchdog timers | expand

Commit Message

Scott Cheloha April 13, 2022, 4:51 p.m. UTC
A future revision of the PAPR will define a new hypercall, H_WATCHDOG.
The hypercall will permit guest control of one or more virtual
watchdog timers.

This patch adds a new watchdog driver for these timers, "pseries-wdt".
For now, the driver only exposes a single timer.  In the future it
could expose additional timers when more than one is available.

Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
---
 drivers/watchdog/Kconfig       |   8 +
 drivers/watchdog/Makefile      |   1 +
 drivers/watchdog/pseries-wdt.c | 337 +++++++++++++++++++++++++++++++++
 3 files changed, 346 insertions(+)
 create mode 100644 drivers/watchdog/pseries-wdt.c

Comments

Tzung-Bi Shih April 14, 2022, 3:20 a.m. UTC | #1
On Wed, Apr 13, 2022 at 11:51:04AM -0500, Scott Cheloha wrote:
> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> index c4e82a8d863f..f3e6db5bed74 100644
> --- a/drivers/watchdog/Kconfig
> +++ b/drivers/watchdog/Kconfig
> @@ -1941,6 +1941,14 @@ config WATCHDOG_RTAS
>  	  To compile this driver as a module, choose M here. The module
>  	  will be called wdrtas.
>  
> +config PSERIES_WDT
> +	tristate "POWER Architecture Platform Watchdog Timer"
> +	depends on PPC_PSERIES
> +	select WATCHDOG_CORE
> +	help
> +	  Driver for virtual watchdog timers provided by PAPR
> +	  hypervisors (e.g. pHyp, KVM).
> +

To maintain alphabetical order, the option should be prior to WATCHDOG_RTAS.

> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> index f7da867e8782..3ae1f7d9f1ec 100644
> --- a/drivers/watchdog/Makefile
> +++ b/drivers/watchdog/Makefile
> @@ -185,6 +185,7 @@ obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o
>  
>  # PPC64 Architecture
>  obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o
> +obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o

Same here.

> diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c
> new file mode 100644
> index 000000000000..0d22ddf12a7f
> --- /dev/null
> +++ b/drivers/watchdog/pseries-wdt.c
> @@ -0,0 +1,337 @@
> +/* SPDX-License-Identifier: GPL-2.0-or-later */
> +
> +#define DRV_NAME "pseries-wdt"
> +#define pr_fmt(fmt) DRV_NAME ": " fmt

`pr_fmt` is unused.

> +/*
> + * The PAPR's MSB->LSB bit ordering is is 0->63.  These macros
> + * simplify defining bitfields as described in the PAPR without
> + * needing to transpose values to the more C-like 63->0 ordering.
> + */
> +#define SETFIELD(_v, _b, _e)	\
> +    (((unsigned long)(_v) << PPC_BITLSHIFT(_e)) & PPC_BITMASK(_b, _e))
> +#define GETFIELD(_v, _b, _e)	\
> +    (((unsigned long)(_v) & PPC_BITMASK(_b, _e)) >> PPC_BITLSHIFT(_e))

Would it be better to enclose parentheses for _b and _e in PPC_BITMASK()?

> +/*
> + * R5: "watchdogNumber":
> + *
> + *     The target watchdog.  Watchdog numbers are 1-based.  The
> + *     maximum supported watchdog number may be obtained via the
> + *     "Query Watchdog Capabilities" operation.
> + *
> + *     This input is ignored for the "Query Watchdog Capabilities"
> + *     operation.
> + *
> + * R6: "timeoutInMs":
> + *
> + *     The timeout in milliseconds.  The minimum supported timeout may
> + *     be obtained via the "Query Watchdog Capabilities" operation.
> + *
> + *     This input is ignored for the "Stop Watchdog", "Query Watchdog
> + *     Capabilities", and "Query LPM Requirement" operations.
> + */
> +
> +/*
> + * H_WATCHDOG Hypercall Output
> + *
> + * R3: Return code
> + *
> + *     H_SUCCESS    The operation completed.
> + *
> + *     H_BUSY	    The hypervisor is too busy; retry the operation.
> + *
> + *     H_PARAMETER  The given "flags" are somehow invalid.  Either the
> + *                  "operation" or "timeoutAction" is invalid, or a
> + *                  reserved bit is set.
> + *
> + *     H_P2         The given "watchdogNumber" is zero or exceeds the
> + *                  supported maximum value.
> + *
> + *     H_P3         The given "timeoutInMs" is below the supported
> + *                  minimum value.
> + *
> + *     H_NOOP       The given "watchdogNumber" is already stopped.
> + *
> + *     H_HARDWARE   The operation failed for ineffable reasons.
> + *
> + *     H_FUNCTION   The H_WATCHDOG hypercall is not supported by this
> + *                  hypervisor.

The above registers/bits have no corresponding macros for manipulating.  Are
they constructing?

> +static struct platform_device *pseries_wdt_pdev;

I wonder if it could be dropped.  See below.

> +static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;

It looks like `action` can only be in the scope of pseries_wdt_start().

> +static unsigned int min_timeout = 0;

I wonder if it could be dropped.  See below.

> +struct pseries_wdt {
> +	struct watchdog_device wd;
> +	unsigned long num;		/* NB: Watchdog numbers are 1-based */
> +};
> +#define wd_to_pseries_wdt(ptr)	container_of(ptr, struct pseries_wdt, wd)

Given that it already calls watchdog_set_drvdata() in pseries_wdt_probe(), it
doesn't need the container_of() to get the struct pseries_wdt.

> +static int pseries_wdt_start(struct watchdog_device *wdd)
> +{
> +	struct pseries_wdt *pw = wd_to_pseries_wdt(wdd);

Use watchdog_get_drvdata().

> +	rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs);
> +	if (rc != H_SUCCESS) {
> +		pr_crit("H_WATCHDOG: %ld: failed to start timer %lu",
> +			rc, pw->num);

If it really needs to print something, save &pdev->dev in struct pseries_wdt
in pseries_wdt_probe() and use dev_err() here.

> +		return -EIO;	/* XXX What is the right errno here? */

I think it is fine as long as the errno makes sense for the context.  Watchdog
framework doesn't propagate the errno[1].

[1]: https://elixir.bootlin.com/linux/v5.18-rc2/source/drivers/watchdog/watchdog_core.c#L166

> +static int pseries_wdt_stop(struct watchdog_device *wdd)
> +{
> +	struct pseries_wdt *pw = wd_to_pseries_wdt(wdd);

Use watchdog_get_drvdata().

> +	rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num);
> +	if (rc != H_SUCCESS) {
> +		pr_crit("H_WATCHDOG: %ld: failed to stop timer %lu",
> +			rc, pw->num);

Ditto.

> +static int pseries_wdt_probe(struct platform_device *pdev)
> +{
> +	struct device *dev = &pdev->dev;
> +	struct pseries_wdt *pw;
> +	int err = 0;

The initialization is pointless.  `err` is going to be overriden soon.

> +	pw = devm_kzalloc(dev, sizeof *pw, GFP_KERNEL);
> +	if (pw == NULL)

To be concise, use !pw.

> +	/* XXX Is it appropriate to call devm_kfree() on registration error? */
> +	err = devm_watchdog_register_device(dev, &pw->wd);
> +	if (err) {
> +		devm_kfree(dev, pw);

No.  devm_* delegate the responsibility to device.  It doesn't need to call
devm_kfree().

> +	/*
> +	 * XXX Should we print something to the console about the new
> +	 * pseudo-device?  If so, what?
> +	 */
> +	pr_info("watchdog%d probed\n", pw->wd.id);

Up to it.  Use dev_info() or dev_dbg() here if it really needs to print
something.

> +static int pseries_wdt_remove(struct platform_device *pdev)
> +{
> +	struct pseries_wdt *pw = platform_get_drvdata(pdev);
> +
> +	/* XXX Should we say something about removing the pseudo-device? */
> +	pr_info("watchdog%d removed\n", pw->wd.id);

Ditto.

> +	/*
> +	 * XXX Calling watchdog_unregister_device() here causes the kernel
> +	 * to panic later.  What is the proper way to clean up a watchdog
> +	 * device at module unload time?
> +	 */
> +#if 0
> +	watchdog_unregister_device(&pw->wd);
> +#endif

It doesn't need to call watchdog_unregister_device().  The life cycle is
already delegated to the device if it calls devm_watchdog_register_device().

> +static int pseries_wdt_suspend(struct platform_device *pdev, pm_message_t state)
> +{
> +	struct pseries_wdt *w;
> +
> +	w = platform_get_drvdata(pdev);

To be concise, inline the assignment.  I.e.
struct pseries_wdt *w = platform_get_drvdata(pdev);

> +	return pseries_wdt_stop(&w->wd);

Taking other watchdog drivers as examples[2][3], doesn't it need to check by
watchdog_active()?

[2]: https://elixir.bootlin.com/linux/v5.18-rc2/source/drivers/watchdog/mtk_wdt.c#L399
[3]: https://elixir.bootlin.com/linux/v5.18-rc2/source/drivers/watchdog/imx_sc_wdt.c#L220

> +static int pseries_wdt_resume(struct platform_device *pdev)
> +{
> +	struct pseries_wdt *w;
> +
> +	w = platform_get_drvdata(pdev);

Ditto.

> +	return pseries_wdt_start(&w->wd);

Share the same concern for pseries_wdt_suspend().

> +static struct platform_driver pseries_wdt_driver = {
> +	.probe = pseries_wdt_probe,
> +	.remove	= pseries_wdt_remove,
> +	.suspend = pseries_wdt_suspend,
> +	.resume = pseries_wdt_resume,
> +	.driver = {
> +		.name = DRV_NAME,
> +		.owner = THIS_MODULE,
> +	},
> +};

Taking other watchdog drivers as examples[4][5], their .suspend and .resume ops
bound to the struct device_driver instead of struct platform_driver.

I have no idea what could be the difference.  Maybe others on the mailing list
could help to answer.

[4]: https://elixir.bootlin.com/linux/v5.18-rc2/source/drivers/watchdog/mtk_wdt.c#L437
[5]: https://elixir.bootlin.com/linux/v5.18-rc2/source/drivers/watchdog/imx_sc_wdt.c#L250

> +static int __init pseries_wdt_init_module(void)
> +{
> +	unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 };
> +	unsigned long cap;
> +	long rc;
> +	int err;
> +
> +	rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY);
> +	if (rc != H_SUCCESS) {
> +		if (rc == H_FUNCTION) {
> +			pr_info("hypervisor does not support H_WATCHDOG");
> +			return -ENODEV;
> +		}
> +		pr_err("H_WATCHDOG: %ld: capability query failed", rc);
> +		return -EIO;
> +	}
> +	cap = ret[0];
> +	min_timeout = roundup(PSERIES_WDTQ_MIN_TIMEOUT(cap), 1000) / 1000;
> +	pr_info("hypervisor supports %lu timer(s), %lums minimum timeout",
> +		PSERIES_WDTQ_MAX_NUMBER(cap), PSERIES_WDTQ_MIN_TIMEOUT(cap));

Could these bits be in pseries_wdt_probe()?

> +
> +	err = platform_driver_register(&pseries_wdt_driver);
> +	if (err)
> +		return err;
> +
> +	/*
> +	 * For the moment we only expose the first timer to userspace.
> +	 * In the future we could expose more.
> +	 */
> +	pseries_wdt_pdev = platform_device_register_simple(DRV_NAME,
> +							   -1, NULL, 0);
> +	if (IS_ERR(pseries_wdt_pdev)) {
> +		platform_driver_unregister(&pseries_wdt_driver);
> +		return PTR_ERR(pseries_wdt_pdev);
> +	}
> +
> +	return 0;
> +}
> +
> +static void __exit pseries_wdt_cleanup_module(void)
> +{
> +	platform_device_unregister(pseries_wdt_pdev);
> +	platform_driver_unregister(&pseries_wdt_driver);
> +}
> +
> +module_init(pseries_wdt_init_module);
> +module_exit(pseries_wdt_cleanup_module);

If the plpar_hcall() check and `min_timeout` initialzation could be in
pseries_wdt_probe(), the whole blocks can be replaced by
module_platform_driver().
diff mbox series

Patch

diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index c4e82a8d863f..f3e6db5bed74 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1941,6 +1941,14 @@  config WATCHDOG_RTAS
 	  To compile this driver as a module, choose M here. The module
 	  will be called wdrtas.
 
+config PSERIES_WDT
+	tristate "POWER Architecture Platform Watchdog Timer"
+	depends on PPC_PSERIES
+	select WATCHDOG_CORE
+	help
+	  Driver for virtual watchdog timers provided by PAPR
+	  hypervisors (e.g. pHyp, KVM).
+
 # S390 Architecture
 
 config DIAG288_WATCHDOG
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index f7da867e8782..3ae1f7d9f1ec 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -185,6 +185,7 @@  obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o
 
 # PPC64 Architecture
 obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o
+obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o
 
 # S390 Architecture
 obj-$(CONFIG_DIAG288_WATCHDOG) += diag288_wdt.o
diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c
new file mode 100644
index 000000000000..0d22ddf12a7f
--- /dev/null
+++ b/drivers/watchdog/pseries-wdt.c
@@ -0,0 +1,337 @@ 
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#define DRV_NAME "pseries-wdt"
+#define pr_fmt(fmt) DRV_NAME ": " fmt
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+/*
+ * The PAPR's MSB->LSB bit ordering is is 0->63.  These macros
+ * simplify defining bitfields as described in the PAPR without
+ * needing to transpose values to the more C-like 63->0 ordering.
+ */
+#define SETFIELD(_v, _b, _e)	\
+    (((unsigned long)(_v) << PPC_BITLSHIFT(_e)) & PPC_BITMASK(_b, _e))
+#define GETFIELD(_v, _b, _e)	\
+    (((unsigned long)(_v) & PPC_BITMASK(_b, _e)) >> PPC_BITLSHIFT(_e))
+
+/*
+ * H_WATCHDOG Hypercall Input
+ *
+ * R4: "flags":
+ *
+ *     A 64-bit value structured as follows:
+ *
+ *         Bits 0-46: Reserved (must be zero).
+ */
+#define PSERIES_WDTF_RESERVED	PPC_BITMASK(0, 46)
+
+/*
+ *         Bit 47: "leaveOtherWatchdogsRunningOnTimeout"
+ *
+ *             0  Stop outstanding watchdogs on timeout.
+ *             1  Leave outstanding watchdogs running on timeout.
+ */
+#define PSERIES_WDTF_LEAVE_OTHER	PPC_BIT(47)
+
+/*
+ *         Bits 48-55: "operation"
+ *
+ *             0x01  Start Watchdog
+ *             0x02  Stop Watchdog
+ *             0x03  Query Watchdog Capabilities
+ *             0x04  Query Watchdog LPM Requirement
+ */
+#define PSERIES_WDTF_OP(op)		SETFIELD((op), 48, 55)
+#define PSERIES_WDTF_OP_START		PSERIES_WDTF_OP(0x1)
+#define PSERIES_WDTF_OP_STOP		PSERIES_WDTF_OP(0x2)
+#define PSERIES_WDTF_OP_QUERY		PSERIES_WDTF_OP(0x3)
+#define PSERIES_WDTF_OP_QUERY_LPM	PSERIES_WDTF_OP(0x4)
+
+/*
+ *         Bits 56-63: "timeoutAction"
+ *
+ *             0x01  Hard poweroff
+ *             0x02  Hard restart
+ *             0x03  Dump restart
+ */
+#define PSERIES_WDTF_ACTION(ac)			SETFIELD(ac, 56, 63)
+#define PSERIES_WDTF_ACTION_HARD_POWEROFF	PSERIES_WDTF_ACTION(0x1)
+#define PSERIES_WDTF_ACTION_HARD_RESTART	PSERIES_WDTF_ACTION(0x2)
+#define PSERIES_WDTF_ACTION_DUMP_RESTART	PSERIES_WDTF_ACTION(0x3)
+
+/*
+ * R5: "watchdogNumber":
+ *
+ *     The target watchdog.  Watchdog numbers are 1-based.  The
+ *     maximum supported watchdog number may be obtained via the
+ *     "Query Watchdog Capabilities" operation.
+ *
+ *     This input is ignored for the "Query Watchdog Capabilities"
+ *     operation.
+ *
+ * R6: "timeoutInMs":
+ *
+ *     The timeout in milliseconds.  The minimum supported timeout may
+ *     be obtained via the "Query Watchdog Capabilities" operation.
+ *
+ *     This input is ignored for the "Stop Watchdog", "Query Watchdog
+ *     Capabilities", and "Query LPM Requirement" operations.
+ */
+
+/*
+ * H_WATCHDOG Hypercall Output
+ *
+ * R3: Return code
+ *
+ *     H_SUCCESS    The operation completed.
+ *
+ *     H_BUSY	    The hypervisor is too busy; retry the operation.
+ *
+ *     H_PARAMETER  The given "flags" are somehow invalid.  Either the
+ *                  "operation" or "timeoutAction" is invalid, or a
+ *                  reserved bit is set.
+ *
+ *     H_P2         The given "watchdogNumber" is zero or exceeds the
+ *                  supported maximum value.
+ *
+ *     H_P3         The given "timeoutInMs" is below the supported
+ *                  minimum value.
+ *
+ *     H_NOOP       The given "watchdogNumber" is already stopped.
+ *
+ *     H_HARDWARE   The operation failed for ineffable reasons.
+ *
+ *     H_FUNCTION   The H_WATCHDOG hypercall is not supported by this
+ *                  hypervisor.
+ *
+ * R4:
+ *
+ * - For the "Query Watchdog Capabilities" operation, a 64-bit
+ *   structure defined as:
+ *
+ *       Bits  0-15: The minimum supported timeout in milliseconds.
+ *       Bits 16-31: The number of watchdogs supported.
+ *       Bits 32-63: Reserved.
+ */
+#define PSERIES_WDTQ_MIN_TIMEOUT(cap)	GETFIELD((cap), 0, 15)
+#define PSERIES_WDTQ_MAX_NUMBER(cap)	GETFIELD((cap), 16, 31)
+#define PSERIES_WDTQ_RESERVED		PPC_BITMASK(32, 63)
+
+/*
+ * - For the "Query Watchdog LPM Requirement" operation:
+ *
+ *       1  The given "watchdogNumber" must be stopped prior to
+ *          suspending.
+ *
+ *       2  The given "watchdogNumber" does not have to be stopped
+ *          prior to suspending.
+ */
+#define PSERIES_WDTQL_MUST_STOP       	1
+#define PSERIES_WDTQL_NEED_NOT_STOP	2
+
+static struct platform_device *pseries_wdt_pdev;
+static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;
+static unsigned int min_timeout = 0;
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, S_IRUGO);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
+	"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+#define DEFAULT_TIMEOUT	60
+static unsigned int timeout = DEFAULT_TIMEOUT;
+module_param(timeout, uint, S_IRUGO);
+MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds "
+	"(default=" __MODULE_STRING(WATCHDOG_TIMEOUT) ")");
+
+struct pseries_wdt {
+	struct watchdog_device wd;
+	unsigned long num;		/* NB: Watchdog numbers are 1-based */
+};
+#define wd_to_pseries_wdt(ptr)	container_of(ptr, struct pseries_wdt, wd)
+
+static int pseries_wdt_start(struct watchdog_device *wdd)
+{
+	struct pseries_wdt *pw = wd_to_pseries_wdt(wdd);
+	unsigned long flags, msecs;
+	long rc;
+
+	flags = PSERIES_WDTF_OP_START | action;
+	msecs = wdd->timeout * 1000UL;
+	rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs);
+	if (rc != H_SUCCESS) {
+		pr_crit("H_WATCHDOG: %ld: failed to start timer %lu",
+			rc, pw->num);
+		return -EIO;	/* XXX What is the right errno here? */
+	}
+	return 0;
+}
+
+static int pseries_wdt_stop(struct watchdog_device *wdd)
+{
+	struct pseries_wdt *pw = wd_to_pseries_wdt(wdd);
+	long rc;
+
+	rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num);
+	if (rc != H_SUCCESS) {
+		pr_crit("H_WATCHDOG: %ld: failed to stop timer %lu",
+			rc, pw->num);
+		return -EIO;
+	}
+	return 0;
+}
+
+static struct watchdog_info pseries_wdt_info = {
+	.identity = DRV_NAME,
+	.options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT \
+	    | WDIOF_PRETIMEOUT,
+};
+
+static const struct watchdog_ops pseries_wdt_ops = {
+	.owner = THIS_MODULE,
+	.ping = pseries_wdt_start,
+	.start = pseries_wdt_start,
+	.stop = pseries_wdt_stop,
+};
+
+static int pseries_wdt_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct pseries_wdt *pw;
+	int err = 0;
+
+	pw = devm_kzalloc(dev, sizeof *pw, GFP_KERNEL);
+	if (pw == NULL)
+		return -ENOMEM;
+
+	pw->num = 1;
+
+	pw->wd.info = &pseries_wdt_info;
+	pw->wd.ops = &pseries_wdt_ops;
+	pw->wd.min_timeout = min_timeout;
+	watchdog_init_timeout(&pw->wd, timeout, NULL);
+	watchdog_set_nowayout(&pw->wd, nowayout);
+	watchdog_stop_on_reboot(&pw->wd);
+	watchdog_stop_on_unregister(&pw->wd);
+	watchdog_set_drvdata(&pw->wd, pw);
+
+	/* XXX Is it appropriate to call devm_kfree() on registration error? */
+	err = devm_watchdog_register_device(dev, &pw->wd);
+	if (err) {
+		devm_kfree(dev, pw);
+		return err;
+	}
+
+	platform_set_drvdata(pdev, pw);
+
+	/*
+	 * XXX Should we print something to the console about the new
+	 * pseudo-device?  If so, what?
+	 */
+	pr_info("watchdog%d probed\n", pw->wd.id);
+	return 0;
+}
+
+static int pseries_wdt_remove(struct platform_device *pdev)
+{
+	struct pseries_wdt *pw = platform_get_drvdata(pdev);
+
+	/* XXX Should we say something about removing the pseudo-device? */
+	pr_info("watchdog%d removed\n", pw->wd.id);
+
+	/*
+	 * XXX Calling watchdog_unregister_device() here causes the kernel
+	 * to panic later.  What is the proper way to clean up a watchdog
+	 * device at module unload time?
+	 */
+#if 0
+	watchdog_unregister_device(&pw->wd);
+#endif
+	return 0;
+}
+
+static int pseries_wdt_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct pseries_wdt *w;
+
+	w = platform_get_drvdata(pdev);
+	return pseries_wdt_stop(&w->wd);
+}
+
+static int pseries_wdt_resume(struct platform_device *pdev)
+{
+	struct pseries_wdt *w;
+
+	w = platform_get_drvdata(pdev);
+	return pseries_wdt_start(&w->wd);
+}
+
+static struct platform_driver pseries_wdt_driver = {
+	.probe = pseries_wdt_probe,
+	.remove	= pseries_wdt_remove,
+	.suspend = pseries_wdt_suspend,
+	.resume = pseries_wdt_resume,
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+	},
+};
+
+static int __init pseries_wdt_init_module(void)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 };
+	unsigned long cap;
+	long rc;
+	int err;
+
+	rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY);
+	if (rc != H_SUCCESS) {
+		if (rc == H_FUNCTION) {
+			pr_info("hypervisor does not support H_WATCHDOG");
+			return -ENODEV;
+		}
+		pr_err("H_WATCHDOG: %ld: capability query failed", rc);
+		return -EIO;
+	}
+	cap = ret[0];
+	min_timeout = roundup(PSERIES_WDTQ_MIN_TIMEOUT(cap), 1000) / 1000;
+	pr_info("hypervisor supports %lu timer(s), %lums minimum timeout",
+		PSERIES_WDTQ_MAX_NUMBER(cap), PSERIES_WDTQ_MIN_TIMEOUT(cap));
+
+	err = platform_driver_register(&pseries_wdt_driver);
+	if (err)
+		return err;
+
+	/*
+	 * For the moment we only expose the first timer to userspace.
+	 * In the future we could expose more.
+	 */
+	pseries_wdt_pdev = platform_device_register_simple(DRV_NAME,
+							   -1, NULL, 0);
+	if (IS_ERR(pseries_wdt_pdev)) {
+		platform_driver_unregister(&pseries_wdt_driver);
+		return PTR_ERR(pseries_wdt_pdev);
+	}
+
+	return 0;
+}
+
+static void __exit pseries_wdt_cleanup_module(void)
+{
+	platform_device_unregister(pseries_wdt_pdev);
+	platform_driver_unregister(&pseries_wdt_driver);
+}
+
+module_init(pseries_wdt_init_module);
+module_exit(pseries_wdt_cleanup_module);
+
+MODULE_AUTHOR("Alexey Kardashevskiy <aik@ozlabs.ru>");
+MODULE_AUTHOR("Scott Cheloha <cheloha@linux.ibm.com>");
+MODULE_DESCRIPTION("POWER Architecture Platform Watchdog Timer Driver");
+MODULE_LICENSE("GPL");