diff mbox series

[v1,4/4] watchdog/pseries-wdt: initial support for PAPR H_WATCHDOG timers

Message ID 20220520183552.33426-5-cheloha@linux.ibm.com
State Superseded
Headers show
Series pseries-wdt: initial support for PAPR virtual watchdog timers | expand

Commit Message

Scott Cheloha May 20, 2022, 6:35 p.m. UTC
PAPR v2.12 defines a new hypercall, H_WATCHDOG.  The hypercall permits
guest control of one or more virtual watchdog timers.  The timers have
millisecond granularity.  The guest is terminated when a timer
expires.

This patch adds a watchdog driver for these timers, "pseries-wdt".

pseries_wdt_probe() currently assumes the existence of only one
platform device and always assigns it watchdogNumber 1.  If we ever
expose more than one timer to userspace we will need to devise a way
to assign a distinct watchdogNumber to each platform device at device
registration time.

Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
---
 .../watchdog/watchdog-parameters.rst          |  12 +
 drivers/watchdog/Kconfig                      |   8 +
 drivers/watchdog/Makefile                     |   1 +
 drivers/watchdog/pseries-wdt.c                | 337 ++++++++++++++++++
 4 files changed, 358 insertions(+)
 create mode 100644 drivers/watchdog/pseries-wdt.c

Comments

Alexey Kardashevskiy May 25, 2022, 6:35 a.m. UTC | #1
On 5/21/22 04:35, Scott Cheloha wrote:
> PAPR v2.12 defines a new hypercall, H_WATCHDOG.  The hypercall permits
> guest control of one or more virtual watchdog timers.  The timers have
> millisecond granularity.  The guest is terminated when a timer
> expires.
> 
> This patch adds a watchdog driver for these timers, "pseries-wdt".
> 
> pseries_wdt_probe() currently assumes the existence of only one
> platform device and always assigns it watchdogNumber 1.  If we ever
> expose more than one timer to userspace we will need to devise a way
> to assign a distinct watchdogNumber to each platform device at device
> registration time.


This one should go before 4/4 in the series for bisectability.

What is platform_device_register_simple("pseries-wdt",...) going to do 
without the driver?

> 
> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
> ---
>   .../watchdog/watchdog-parameters.rst          |  12 +
>   drivers/watchdog/Kconfig                      |   8 +
>   drivers/watchdog/Makefile                     |   1 +
>   drivers/watchdog/pseries-wdt.c                | 337 ++++++++++++++++++
>   4 files changed, 358 insertions(+)
>   create mode 100644 drivers/watchdog/pseries-wdt.c
> 
> diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
> index 223c99361a30..4ffe725e796c 100644
> --- a/Documentation/watchdog/watchdog-parameters.rst
> +++ b/Documentation/watchdog/watchdog-parameters.rst
> @@ -425,6 +425,18 @@ pnx833x_wdt:
>   
>   -------------------------------------------------
>   
> +pseries-wdt:
> +    action:
> +	Action taken when watchdog expires: 1 (power off), 2 (restart),
> +	3 (dump and restart). (default=2)
> +    timeout:
> +	Initial watchdog timeout in seconds. (default=60)
> +    nowayout:
> +	Watchdog cannot be stopped once started.
> +	(default=kernel config parameter)
> +
> +-------------------------------------------------
> +
>   rc32434_wdt:
>       timeout:
>   	Watchdog timeout value, in seconds (default=20)
> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> index c4e82a8d863f..06b412603f3e 100644
> --- a/drivers/watchdog/Kconfig
> +++ b/drivers/watchdog/Kconfig
> @@ -1932,6 +1932,14 @@ config MEN_A21_WDT
>   
>   # PPC64 Architecture
>   
> +config PSERIES_WDT
> +	tristate "POWER Architecture Platform Watchdog Timer"
> +	depends on PPC_PSERIES
> +	select WATCHDOG_CORE
> +	help
> +	  Driver for virtual watchdog timers provided by PAPR
> +	  hypervisors (e.g. PowerVM, KVM).
> +
>   config WATCHDOG_RTAS
>   	tristate "RTAS watchdog"
>   	depends on PPC_RTAS
> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> index f7da867e8782..f35660409f17 100644
> --- a/drivers/watchdog/Makefile
> +++ b/drivers/watchdog/Makefile
> @@ -184,6 +184,7 @@ obj-$(CONFIG_BOOKE_WDT) += booke_wdt.o
>   obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o
>   
>   # PPC64 Architecture
> +obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o
>   obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o
>   
>   # S390 Architecture
> diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c
> new file mode 100644
> index 000000000000..f41bc4d3b7a2
> --- /dev/null
> +++ b/drivers/watchdog/pseries-wdt.c
> @@ -0,0 +1,337 @@
> +// SPDX-License-Identifier: GPL-2.0-or-later
> +/*
> + * Copyright (c) 2022 International Business Machines, Inc.
> + */
> +
> +#include <linux/bitops.h>
> +#include <linux/kernel.h>
> +#include <linux/limits.h>
> +#include <linux/mod_devicetable.h>
> +#include <linux/module.h>
> +#include <linux/moduleparam.h>
> +#include <linux/platform_device.h>
> +#include <linux/watchdog.h>
> +
> +#define DRV_NAME "pseries-wdt"
> +
> +/*
> + * The PAPR's MSB->LSB bit ordering is 0->63.  These macros simplify
> + * defining bitfields as described in the PAPR without needing to
> + * transpose values to the more C-like 63->0 ordering.
> + */
> +#define SETFIELD(_v, _b, _e)	\
> +	(((unsigned long)(_v) << PPC_BITLSHIFT(_e)) & PPC_BITMASK((_b), (_e)))
> +#define GETFIELD(_v, _b, _e)	\
> +	(((unsigned long)(_v) & PPC_BITMASK((_b), (_e))) >> PPC_BITLSHIFT(_e))
> +
> +/*
> + * H_WATCHDOG Hypercall Input
> + *
> + * R4: "flags":
> + *
> + *     A 64-bit value structured as follows:
> + *
> + *         Bits 0-46: Reserved (must be zero).
> + */
> +#define PSERIES_WDTF_RESERVED	PPC_BITMASK(0, 46)
> +
> +/*
> + *         Bit 47: "leaveOtherWatchdogsRunningOnTimeout"
> + *
> + *             0  Stop outstanding watchdogs on timeout.
> + *             1  Leave outstanding watchdogs running on timeout.
> + */
> +#define PSERIES_WDTF_LEAVE_OTHER	PPC_BIT(47)
> +
> +/*
> + *         Bits 48-55: "operation"
> + *
> + *             0x01  Start Watchdog
> + *             0x02  Stop Watchdog
> + *             0x03  Query Watchdog Capabilities
> + *             0x04  Query Watchdog LPM Requirement
> + */
> +#define PSERIES_WDTF_OP(op)		SETFIELD((op), 48, 55)
> +#define PSERIES_WDTF_OP_START		PSERIES_WDTF_OP(0x1)
> +#define PSERIES_WDTF_OP_STOP		PSERIES_WDTF_OP(0x2)
> +#define PSERIES_WDTF_OP_QUERY		PSERIES_WDTF_OP(0x3)
> +#define PSERIES_WDTF_OP_QUERY_LPM	PSERIES_WDTF_OP(0x4)
> +
> +/*
> + *         Bits 56-63: "timeoutAction"
> + *
> + *             0x01  Hard poweroff
> + *             0x02  Hard restart
> + *             0x03  Dump restart
> + */
> +#define PSERIES_WDTF_ACTION(ac)			SETFIELD(ac, 56, 63)
> +#define PSERIES_WDTF_ACTION_HARD_POWEROFF	PSERIES_WDTF_ACTION(0x1)
> +#define PSERIES_WDTF_ACTION_HARD_RESTART	PSERIES_WDTF_ACTION(0x2)
> +#define PSERIES_WDTF_ACTION_DUMP_RESTART	PSERIES_WDTF_ACTION(0x3)

> +
> +/*
> + * R5: "watchdogNumber":
> + *
> + *     The target watchdog.  Watchdog numbers are 1-based.  The
> + *     maximum supported watchdog number may be obtained via the
> + *     "Query Watchdog Capabilities" operation.
> + *
> + *     This input is ignored for the "Query Watchdog Capabilities"
> + *     operation.
> + *
> + * R6: "timeoutInMs":
> + *
> + *     The timeout in milliseconds.  The minimum supported timeout may
> + *     be obtained via the "Query Watchdog Capabilities" operation.
> + *
> + *     This input is ignored for the "Stop Watchdog", "Query Watchdog
> + *     Capabilities", and "Query Watchdog LPM Requirement" operations.
> + */
> +
> +/*
> + * H_WATCHDOG Hypercall Output
> + *
> + * R3: Return code
> + *
> + *     H_SUCCESS    The operation completed.
> + *
> + *     H_BUSY	    The hypervisor is too busy; retry the operation.
> + *
> + *     H_PARAMETER  The given "flags" are somehow invalid.  Either the
> + *                  "operation" or "timeoutAction" is invalid, or a
> + *                  reserved bit is set.
> + *
> + *     H_P2         The given "watchdogNumber" is zero or exceeds the
> + *                  supported maximum value.
> + *
> + *     H_P3         The given "timeoutInMs" is below the supported
> + *                  minimum value.
> + *
> + *     H_NOOP       The given "watchdogNumber" is already stopped.
> + *
> + *     H_HARDWARE   The operation failed for ineffable reasons.
> + *
> + *     H_FUNCTION   The H_WATCHDOG hypercall is not supported by this
> + *                  hypervisor.
> + *
> + * R4:
> + *
> + * - For the "Query Watchdog Capabilities" operation, a 64-bit
> + *   value structured as follows:
> + *
> + *       Bits  0-15: The minimum supported timeout in milliseconds.
> + *       Bits 16-31: The number of watchdogs supported.
> + *       Bits 32-63: Reserved.
> + */
> +#define PSERIES_WDTQ_MIN_TIMEOUT(cap)	GETFIELD((cap), 0, 15)
> +#define PSERIES_WDTQ_MAX_NUMBER(cap)	GETFIELD((cap), 16, 31)
> +#define PSERIES_WDTQ_RESERVED		PPC_BITMASK(32, 63)

PSERIES_WDTQ_RESERVED is not needed as there is a comment above.

> +
> +/*
> + * - For the "Query Watchdog LPM Requirement" operation:
> + *
> + *       1  The given "watchdogNumber" must be stopped prior to
> + *          suspending.
> + *
> + *       2  The given "watchdogNumber" does not have to be stopped
> + *          prior to suspending.
> + */
> +#define PSERIES_WDTQL_MUST_STOP		1
> +#define PSERIES_WDTQL_NEED_NOT_STOP	2
> +
> +static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;
> +
> +static int action_get(char *buf, const struct kernel_param *kp)
> +{
> +	int val;
> +
> +	switch (action) {
> +	case PSERIES_WDTF_ACTION_HARD_POWEROFF:
> +		val = 1;
> +		break;
> +	case PSERIES_WDTF_ACTION_HARD_RESTART:
> +		val = 2;
> +		break;
> +	case PSERIES_WDTF_ACTION_DUMP_RESTART:
> +		val = 3;
> +		break;
> +	default:
> +		return -EINVAL;
> +	}
> +	return sprintf(buf, "%d\n", val);
> +}
> +
> +static int action_set(const char *val, const struct kernel_param *kp)
> +{
> +	int choice;
> +
> +	if (kstrtoint(val, 10, &choice))
> +		return -EINVAL;
> +	switch (choice) {
> +	case 1:
> +		action = PSERIES_WDTF_ACTION_HARD_POWEROFF;
> +		return 0;
> +	case 2:
> +		action = PSERIES_WDTF_ACTION_HARD_RESTART;
> +		return 0;
> +	case 3:
> +		action = PSERIES_WDTF_ACTION_DUMP_RESTART;
> +		return 0;
> +	}
> +	return -EINVAL;
> +}
> +
> +static const struct kernel_param_ops action_ops = {
> +	.get = action_get,
> +	.set = action_set,
> +};
> +module_param_cb(action, &action_ops, NULL, 0444);


0644 here and below?

> +MODULE_PARM_DESC(action, "Action taken when watchdog expires (default=2)");
> +
> +static bool nowayout = WATCHDOG_NOWAYOUT;
> +module_param(nowayout, bool, 0444);
> +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
> +		 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
> +
> +#define WATCHDOG_TIMEOUT 60
> +static unsigned int timeout = WATCHDOG_TIMEOUT;
> +module_param(timeout, uint, 0444);
> +MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds (default="
> +		 __MODULE_STRING(WATCHDOG_TIMEOUT) ")");
> +
> +struct pseries_wdt {
> +	struct watchdog_device wd;
> +	unsigned long num;		/* Watchdog numbers are 1-based */
> +};
> +
> +static int pseries_wdt_start(struct watchdog_device *wdd)
> +{
> +	struct device *dev = wdd->parent;
> +	struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
> +	unsigned long flags, msecs;
> +	long rc;
> +
> +	flags = action | PSERIES_WDTF_OP_START;
> +	msecs = wdd->timeout * 1000UL;
> +	rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs);
> +	if (rc != H_SUCCESS) {
> +		dev_crit(dev, "H_WATCHDOG: %ld: failed to start timer %lu",
> +			 rc, pw->num);
> +		return -EIO;
> +	}
> +	return 0;
> +}
> +
> +static int pseries_wdt_stop(struct watchdog_device *wdd)
> +{
> +	struct device *dev = wdd->parent;
> +	struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
> +	long rc;
> +
> +	rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num);
> +	if (rc != H_SUCCESS && rc != H_NOOP) {
> +		dev_crit(dev, "H_WATCHDOG: %ld: failed to stop timer %lu",
> +			 rc, pw->num);
> +		return -EIO;
> +	}
> +	return 0;
> +}
> +
> +static struct watchdog_info pseries_wdt_info = {
> +	.identity = DRV_NAME,
> +	.options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT
> +	    | WDIOF_PRETIMEOUT,
> +};
> +
> +static const struct watchdog_ops pseries_wdt_ops = {
> +	.owner = THIS_MODULE,
> +	.start = pseries_wdt_start,
> +	.stop = pseries_wdt_stop,
> +};
> +
> +static int pseries_wdt_probe(struct platform_device *pdev)
> +{
> +	unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 };
> +	unsigned long cap, min_timeout_ms;
> +	long rc;
> +	struct pseries_wdt *pw;
> +	int err;
> +
> +	rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY);
> +	if (rc != H_SUCCESS)
> +		return rc == H_FUNCTION ? -ENODEV : -EIO;

Nit:

if (rc == H_FUNCTION)
	return -ENODEV;
if (rc != H_SUCCESS)
	return -EIO;

?

> +	cap = ret[0];
> +
> +	pw = devm_kzalloc(&pdev->dev, sizeof(*pw), GFP_KERNEL);
> +	if (!pw)
> +		return -ENOMEM;
> +
> +	/*
> +	 * Assume watchdogNumber 1 for now.  If we ever support
> +	 * multiple timers we will need to devise a way to choose a
> +	 * distinct watchdogNumber for each platform device at device
> +	 * registration time.
> +	 */
> +	pw->num = 1;
> +
> +	pw->wd.parent = &pdev->dev;
> +	pw->wd.info = &pseries_wdt_info;
> +	pw->wd.ops = &pseries_wdt_ops;
> +	min_timeout_ms = PSERIES_WDTQ_MIN_TIMEOUT(cap);
> +	pw->wd.min_timeout = roundup(min_timeout_ms, 1000) / 1000;
> +	pw->wd.max_timeout = UINT_MAX;
> +	watchdog_init_timeout(&pw->wd, timeout, NULL);


If PSERIES_WDTF_OP_QUERY returns 2min and this driver's default is 1min, 
watchdog_init_timeout() returns an error, don't we want to handle it 
here? Thanks,


> +	watchdog_set_nowayout(&pw->wd, nowayout);
> +	watchdog_stop_on_reboot(&pw->wd);
> +	watchdog_stop_on_unregister(&pw->wd);
> +	watchdog_set_drvdata(&pw->wd, pw);
> +
> +	err = devm_watchdog_register_device(&pdev->dev, &pw->wd);
> +	if (err)
> +		return err;
> +
> +	platform_set_drvdata(pdev, &pw->wd);
> +
> +	return 0;
> +}
> +
> +static int pseries_wdt_suspend(struct platform_device *pdev, pm_message_t state)
> +{
> +	struct watchdog_device *wd = platform_get_drvdata(pdev);
> +
> +	if (watchdog_active(wd))
> +		return pseries_wdt_stop(wd);
> +	return 0;
> +}
> +
> +static int pseries_wdt_resume(struct platform_device *pdev)
> +{
> +	struct watchdog_device *wd = platform_get_drvdata(pdev);
> +
> +	if (watchdog_active(wd))
> +		return pseries_wdt_start(wd);
> +	return 0;
> +}
> +
> +static const struct platform_device_id pseries_wdt_id[] = {
> +	{ .name = "pseries-wdt" },
> +	{}
> +};
> +MODULE_DEVICE_TABLE(platform, pseries_wdt_id);
> +
> +static struct platform_driver pseries_wdt_driver = {
> +	.driver = {
> +		.name = DRV_NAME,
> +		.owner = THIS_MODULE,
> +	},
> +	.id_table = pseries_wdt_id,
> +	.probe = pseries_wdt_probe,
> +	.resume = pseries_wdt_resume,
> +	.suspend = pseries_wdt_suspend,
> +};
> +module_platform_driver(pseries_wdt_driver);
> +
> +MODULE_AUTHOR("Alexey Kardashevskiy <aik@ozlabs.ru>");
> +MODULE_AUTHOR("Scott Cheloha <cheloha@linux.ibm.com>");
> +MODULE_DESCRIPTION("POWER Architecture Platform Watchdog Driver");
> +MODULE_LICENSE("GPL");
Guenter Roeck May 25, 2022, 7:52 a.m. UTC | #2
On 5/24/22 23:35, Alexey Kardashevskiy wrote:
> 
> 
> On 5/21/22 04:35, Scott Cheloha wrote:
>> PAPR v2.12 defines a new hypercall, H_WATCHDOG.  The hypercall permits
>> guest control of one or more virtual watchdog timers.  The timers have
>> millisecond granularity.  The guest is terminated when a timer
>> expires.
>>
>> This patch adds a watchdog driver for these timers, "pseries-wdt".
>>
>> pseries_wdt_probe() currently assumes the existence of only one
>> platform device and always assigns it watchdogNumber 1.  If we ever
>> expose more than one timer to userspace we will need to devise a way
>> to assign a distinct watchdogNumber to each platform device at device
>> registration time.
> 
> 
> This one should go before 4/4 in the series for bisectability.
> 
> What is platform_device_register_simple("pseries-wdt",...) going to do without the driver?
> 
>>
>> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
>> ---
>>   .../watchdog/watchdog-parameters.rst          |  12 +
>>   drivers/watchdog/Kconfig                      |   8 +
>>   drivers/watchdog/Makefile                     |   1 +
>>   drivers/watchdog/pseries-wdt.c                | 337 ++++++++++++++++++
>>   4 files changed, 358 insertions(+)
>>   create mode 100644 drivers/watchdog/pseries-wdt.c
>>
>> diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
>> index 223c99361a30..4ffe725e796c 100644
>> --- a/Documentation/watchdog/watchdog-parameters.rst
>> +++ b/Documentation/watchdog/watchdog-parameters.rst
>> @@ -425,6 +425,18 @@ pnx833x_wdt:
>>   -------------------------------------------------
>> +pseries-wdt:
>> +    action:
>> +    Action taken when watchdog expires: 1 (power off), 2 (restart),
>> +    3 (dump and restart). (default=2)
>> +    timeout:
>> +    Initial watchdog timeout in seconds. (default=60)
>> +    nowayout:
>> +    Watchdog cannot be stopped once started.
>> +    (default=kernel config parameter)
>> +
>> +-------------------------------------------------
>> +
>>   rc32434_wdt:
>>       timeout:
>>       Watchdog timeout value, in seconds (default=20)
>> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
>> index c4e82a8d863f..06b412603f3e 100644
>> --- a/drivers/watchdog/Kconfig
>> +++ b/drivers/watchdog/Kconfig
>> @@ -1932,6 +1932,14 @@ config MEN_A21_WDT
>>   # PPC64 Architecture
>> +config PSERIES_WDT
>> +    tristate "POWER Architecture Platform Watchdog Timer"
>> +    depends on PPC_PSERIES
>> +    select WATCHDOG_CORE
>> +    help
>> +      Driver for virtual watchdog timers provided by PAPR
>> +      hypervisors (e.g. PowerVM, KVM).
>> +
>>   config WATCHDOG_RTAS
>>       tristate "RTAS watchdog"
>>       depends on PPC_RTAS
>> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
>> index f7da867e8782..f35660409f17 100644
>> --- a/drivers/watchdog/Makefile
>> +++ b/drivers/watchdog/Makefile
>> @@ -184,6 +184,7 @@ obj-$(CONFIG_BOOKE_WDT) += booke_wdt.o
>>   obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o
>>   # PPC64 Architecture
>> +obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o
>>   obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o
>>   # S390 Architecture
>> diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c
>> new file mode 100644
>> index 000000000000..f41bc4d3b7a2
>> --- /dev/null
>> +++ b/drivers/watchdog/pseries-wdt.c
>> @@ -0,0 +1,337 @@
>> +// SPDX-License-Identifier: GPL-2.0-or-later
>> +/*
>> + * Copyright (c) 2022 International Business Machines, Inc.
>> + */
>> +
>> +#include <linux/bitops.h>
>> +#include <linux/kernel.h>
>> +#include <linux/limits.h>
>> +#include <linux/mod_devicetable.h>
>> +#include <linux/module.h>
>> +#include <linux/moduleparam.h>
>> +#include <linux/platform_device.h>
>> +#include <linux/watchdog.h>
>> +
>> +#define DRV_NAME "pseries-wdt"
>> +
>> +/*
>> + * The PAPR's MSB->LSB bit ordering is 0->63.  These macros simplify
>> + * defining bitfields as described in the PAPR without needing to
>> + * transpose values to the more C-like 63->0 ordering.
>> + */
>> +#define SETFIELD(_v, _b, _e)    \
>> +    (((unsigned long)(_v) << PPC_BITLSHIFT(_e)) & PPC_BITMASK((_b), (_e)))
>> +#define GETFIELD(_v, _b, _e)    \
>> +    (((unsigned long)(_v) & PPC_BITMASK((_b), (_e))) >> PPC_BITLSHIFT(_e))
>> +
>> +/*
>> + * H_WATCHDOG Hypercall Input
>> + *
>> + * R4: "flags":
>> + *
>> + *     A 64-bit value structured as follows:
>> + *
>> + *         Bits 0-46: Reserved (must be zero).
>> + */
>> +#define PSERIES_WDTF_RESERVED    PPC_BITMASK(0, 46)
>> +
>> +/*
>> + *         Bit 47: "leaveOtherWatchdogsRunningOnTimeout"
>> + *
>> + *             0  Stop outstanding watchdogs on timeout.
>> + *             1  Leave outstanding watchdogs running on timeout.
>> + */
>> +#define PSERIES_WDTF_LEAVE_OTHER    PPC_BIT(47)
>> +
>> +/*
>> + *         Bits 48-55: "operation"
>> + *
>> + *             0x01  Start Watchdog
>> + *             0x02  Stop Watchdog
>> + *             0x03  Query Watchdog Capabilities
>> + *             0x04  Query Watchdog LPM Requirement
>> + */
>> +#define PSERIES_WDTF_OP(op)        SETFIELD((op), 48, 55)
>> +#define PSERIES_WDTF_OP_START        PSERIES_WDTF_OP(0x1)
>> +#define PSERIES_WDTF_OP_STOP        PSERIES_WDTF_OP(0x2)
>> +#define PSERIES_WDTF_OP_QUERY        PSERIES_WDTF_OP(0x3)
>> +#define PSERIES_WDTF_OP_QUERY_LPM    PSERIES_WDTF_OP(0x4)
>> +
>> +/*
>> + *         Bits 56-63: "timeoutAction"
>> + *
>> + *             0x01  Hard poweroff
>> + *             0x02  Hard restart
>> + *             0x03  Dump restart
>> + */
>> +#define PSERIES_WDTF_ACTION(ac)            SETFIELD(ac, 56, 63)
>> +#define PSERIES_WDTF_ACTION_HARD_POWEROFF    PSERIES_WDTF_ACTION(0x1)
>> +#define PSERIES_WDTF_ACTION_HARD_RESTART    PSERIES_WDTF_ACTION(0x2)
>> +#define PSERIES_WDTF_ACTION_DUMP_RESTART    PSERIES_WDTF_ACTION(0x3)
> 
>> +
>> +/*
>> + * R5: "watchdogNumber":
>> + *
>> + *     The target watchdog.  Watchdog numbers are 1-based.  The
>> + *     maximum supported watchdog number may be obtained via the
>> + *     "Query Watchdog Capabilities" operation.
>> + *
>> + *     This input is ignored for the "Query Watchdog Capabilities"
>> + *     operation.
>> + *
>> + * R6: "timeoutInMs":
>> + *
>> + *     The timeout in milliseconds.  The minimum supported timeout may
>> + *     be obtained via the "Query Watchdog Capabilities" operation.
>> + *
>> + *     This input is ignored for the "Stop Watchdog", "Query Watchdog
>> + *     Capabilities", and "Query Watchdog LPM Requirement" operations.
>> + */
>> +
>> +/*
>> + * H_WATCHDOG Hypercall Output
>> + *
>> + * R3: Return code
>> + *
>> + *     H_SUCCESS    The operation completed.
>> + *
>> + *     H_BUSY        The hypervisor is too busy; retry the operation.
>> + *
>> + *     H_PARAMETER  The given "flags" are somehow invalid.  Either the
>> + *                  "operation" or "timeoutAction" is invalid, or a
>> + *                  reserved bit is set.
>> + *
>> + *     H_P2         The given "watchdogNumber" is zero or exceeds the
>> + *                  supported maximum value.
>> + *
>> + *     H_P3         The given "timeoutInMs" is below the supported
>> + *                  minimum value.
>> + *
>> + *     H_NOOP       The given "watchdogNumber" is already stopped.
>> + *
>> + *     H_HARDWARE   The operation failed for ineffable reasons.
>> + *
>> + *     H_FUNCTION   The H_WATCHDOG hypercall is not supported by this
>> + *                  hypervisor.
>> + *
>> + * R4:
>> + *
>> + * - For the "Query Watchdog Capabilities" operation, a 64-bit
>> + *   value structured as follows:
>> + *
>> + *       Bits  0-15: The minimum supported timeout in milliseconds.
>> + *       Bits 16-31: The number of watchdogs supported.
>> + *       Bits 32-63: Reserved.
>> + */
>> +#define PSERIES_WDTQ_MIN_TIMEOUT(cap)    GETFIELD((cap), 0, 15)
>> +#define PSERIES_WDTQ_MAX_NUMBER(cap)    GETFIELD((cap), 16, 31)
>> +#define PSERIES_WDTQ_RESERVED        PPC_BITMASK(32, 63)
> 
> PSERIES_WDTQ_RESERVED is not needed as there is a comment above.
> 
>> +
>> +/*
>> + * - For the "Query Watchdog LPM Requirement" operation:
>> + *
>> + *       1  The given "watchdogNumber" must be stopped prior to
>> + *          suspending.
>> + *
>> + *       2  The given "watchdogNumber" does not have to be stopped
>> + *          prior to suspending.
>> + */
>> +#define PSERIES_WDTQL_MUST_STOP        1
>> +#define PSERIES_WDTQL_NEED_NOT_STOP    2
>> +
>> +static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;
>> +
>> +static int action_get(char *buf, const struct kernel_param *kp)
>> +{
>> +    int val;
>> +
>> +    switch (action) {
>> +    case PSERIES_WDTF_ACTION_HARD_POWEROFF:
>> +        val = 1;
>> +        break;
>> +    case PSERIES_WDTF_ACTION_HARD_RESTART:
>> +        val = 2;
>> +        break;
>> +    case PSERIES_WDTF_ACTION_DUMP_RESTART:
>> +        val = 3;
>> +        break;
>> +    default:
>> +        return -EINVAL;
>> +    }
>> +    return sprintf(buf, "%d\n", val);
>> +}
>> +
>> +static int action_set(const char *val, const struct kernel_param *kp)
>> +{
>> +    int choice;
>> +
>> +    if (kstrtoint(val, 10, &choice))
>> +        return -EINVAL;
>> +    switch (choice) {
>> +    case 1:
>> +        action = PSERIES_WDTF_ACTION_HARD_POWEROFF;
>> +        return 0;
>> +    case 2:
>> +        action = PSERIES_WDTF_ACTION_HARD_RESTART;
>> +        return 0;
>> +    case 3:
>> +        action = PSERIES_WDTF_ACTION_DUMP_RESTART;
>> +        return 0;
>> +    }
>> +    return -EINVAL;
>> +}
>> +
>> +static const struct kernel_param_ops action_ops = {
>> +    .get = action_get,
>> +    .set = action_set,
>> +};
>> +module_param_cb(action, &action_ops, NULL, 0444);
> 
> 
> 0644 here and below?
> 

That would make the module parameters have to be runtime
configurable, which does not make sense at least for
the two parameters below. I don't know though if it is
really valuable to have all the above code instead of just
storing the action numbers and doing the conversion to action
once in the probe function. The above code really only
makes sense if the action is changeable during runtime and more
is done that just converting it to another value.

>> +MODULE_PARM_DESC(action, "Action taken when watchdog expires (default=2)");
>> +
>> +static bool nowayout = WATCHDOG_NOWAYOUT;
>> +module_param(nowayout, bool, 0444);
>> +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
>> +         __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
>> +
>> +#define WATCHDOG_TIMEOUT 60
>> +static unsigned int timeout = WATCHDOG_TIMEOUT;
>> +module_param(timeout, uint, 0444);
>> +MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds (default="
>> +         __MODULE_STRING(WATCHDOG_TIMEOUT) ")");
>> +
>> +struct pseries_wdt {
>> +    struct watchdog_device wd;
>> +    unsigned long num;        /* Watchdog numbers are 1-based */
>> +};
>> +
>> +static int pseries_wdt_start(struct watchdog_device *wdd)
>> +{
>> +    struct device *dev = wdd->parent;
>> +    struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
>> +    unsigned long flags, msecs;
>> +    long rc;
>> +
>> +    flags = action | PSERIES_WDTF_OP_START;
>> +    msecs = wdd->timeout * 1000UL;
>> +    rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs);
>> +    if (rc != H_SUCCESS) {
>> +        dev_crit(dev, "H_WATCHDOG: %ld: failed to start timer %lu",
>> +             rc, pw->num);
>> +        return -EIO;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static int pseries_wdt_stop(struct watchdog_device *wdd)
>> +{
>> +    struct device *dev = wdd->parent;
>> +    struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
>> +    long rc;
>> +
>> +    rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num);
>> +    if (rc != H_SUCCESS && rc != H_NOOP) {
>> +        dev_crit(dev, "H_WATCHDOG: %ld: failed to stop timer %lu",
>> +             rc, pw->num);
>> +        return -EIO;
>> +    }
>> +    return 0;
>> +}
>> +
>> +static struct watchdog_info pseries_wdt_info = {
>> +    .identity = DRV_NAME,
>> +    .options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT
>> +        | WDIOF_PRETIMEOUT,
>> +};
>> +
>> +static const struct watchdog_ops pseries_wdt_ops = {
>> +    .owner = THIS_MODULE,
>> +    .start = pseries_wdt_start,
>> +    .stop = pseries_wdt_stop,
>> +};
>> +
>> +static int pseries_wdt_probe(struct platform_device *pdev)
>> +{
>> +    unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 };
>> +    unsigned long cap, min_timeout_ms;
>> +    long rc;
>> +    struct pseries_wdt *pw;
>> +    int err;
>> +
>> +    rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY);
>> +    if (rc != H_SUCCESS)
>> +        return rc == H_FUNCTION ? -ENODEV : -EIO;
> 
> Nit:
> 
> if (rc == H_FUNCTION)
>      return -ENODEV;
> if (rc != H_SUCCESS)
>      return -EIO;
> 
> ?
> 
>> +    cap = ret[0];
>> +
>> +    pw = devm_kzalloc(&pdev->dev, sizeof(*pw), GFP_KERNEL);
>> +    if (!pw)
>> +        return -ENOMEM;
>> +
>> +    /*
>> +     * Assume watchdogNumber 1 for now.  If we ever support
>> +     * multiple timers we will need to devise a way to choose a
>> +     * distinct watchdogNumber for each platform device at device
>> +     * registration time.
>> +     */
>> +    pw->num = 1;
>> +
>> +    pw->wd.parent = &pdev->dev;
>> +    pw->wd.info = &pseries_wdt_info;
>> +    pw->wd.ops = &pseries_wdt_ops;
>> +    min_timeout_ms = PSERIES_WDTQ_MIN_TIMEOUT(cap);
>> +    pw->wd.min_timeout = roundup(min_timeout_ms, 1000) / 1000;

Any reason for not using min_hw_heartbeat_ms (or, for that matter,
at least DIV_ROUND_UP) ?

>> +    pw->wd.max_timeout = UINT_MAX;

This will overflow on builds with sizeof(int) == sizeof(long).


>> +    watchdog_init_timeout(&pw->wd, timeout, NULL);
> 
> 
> If PSERIES_WDTF_OP_QUERY returns 2min and this driver's default is 1min, watchdog_init_timeout() returns an error, don't we want to handle it here? Thanks,
> 

Normally one would set pw->wd.timeout to WATCHDOG_TIMEOUT
and pre-initialize the module parameter with 0. That would
take care of the problem and at the same time permit the
use of the timeout-sec devicetree property on dt systems
(which I guess isn't a concern here).

> 
>> +    watchdog_set_nowayout(&pw->wd, nowayout);
>> +    watchdog_stop_on_reboot(&pw->wd);
>> +    watchdog_stop_on_unregister(&pw->wd);
>> +    watchdog_set_drvdata(&pw->wd, pw);
>> +
>> +    err = devm_watchdog_register_device(&pdev->dev, &pw->wd);
>> +    if (err)
>> +        return err;
>> +
>> +    platform_set_drvdata(pdev, &pw->wd);
>> +
>> +    return 0;
>> +}
>> +
>> +static int pseries_wdt_suspend(struct platform_device *pdev, pm_message_t state)
>> +{
>> +    struct watchdog_device *wd = platform_get_drvdata(pdev);
>> +
>> +    if (watchdog_active(wd))
>> +        return pseries_wdt_stop(wd);
>> +    return 0;
>> +}
>> +
>> +static int pseries_wdt_resume(struct platform_device *pdev)
>> +{
>> +    struct watchdog_device *wd = platform_get_drvdata(pdev);
>> +
>> +    if (watchdog_active(wd))
>> +        return pseries_wdt_start(wd);
>> +    return 0;
>> +}
>> +
>> +static const struct platform_device_id pseries_wdt_id[] = {
>> +    { .name = "pseries-wdt" },
>> +    {}
>> +};
>> +MODULE_DEVICE_TABLE(platform, pseries_wdt_id);
>> +
>> +static struct platform_driver pseries_wdt_driver = {
>> +    .driver = {
>> +        .name = DRV_NAME,
>> +        .owner = THIS_MODULE,
>> +    },
>> +    .id_table = pseries_wdt_id,
>> +    .probe = pseries_wdt_probe,
>> +    .resume = pseries_wdt_resume,
>> +    .suspend = pseries_wdt_suspend,
>> +};
>> +module_platform_driver(pseries_wdt_driver);
>> +
>> +MODULE_AUTHOR("Alexey Kardashevskiy <aik@ozlabs.ru>");
>> +MODULE_AUTHOR("Scott Cheloha <cheloha@linux.ibm.com>");
>> +MODULE_DESCRIPTION("POWER Architecture Platform Watchdog Driver");
>> +MODULE_LICENSE("GPL");
>
Scott Cheloha June 1, 2022, 2:48 p.m. UTC | #3
On Wed, May 25, 2022 at 04:35:11PM +1000, Alexey Kardashevskiy wrote:
> 
> On 5/21/22 04:35, Scott Cheloha wrote:
> > PAPR v2.12 defines a new hypercall, H_WATCHDOG.  The hypercall permits
> > guest control of one or more virtual watchdog timers.  The timers have
> > millisecond granularity.  The guest is terminated when a timer
> > expires.
> > 
> > This patch adds a watchdog driver for these timers, "pseries-wdt".
> > 
> > pseries_wdt_probe() currently assumes the existence of only one
> > platform device and always assigns it watchdogNumber 1.  If we ever
> > expose more than one timer to userspace we will need to devise a way
> > to assign a distinct watchdogNumber to each platform device at device
> > registration time.
> 
> This one should go before 4/4 in the series for bisectability.
> 
> What is platform_device_register_simple("pseries-wdt",...) going to do
> without the driver?

This is a chicken-and-egg problem without an obvious solution.  A
device without a driver is a body without a soul.  A driver without a
device is a ghost without a machine.

... or something like that, don't quote me :)

Absent some very compelling reasoning, I would like to keep the
current order.  It feels logical to me to keep the powerpc/pseries
patches adjacent and prior to the watchdog driver patch.

> > Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
> > ---
> >   .../watchdog/watchdog-parameters.rst          |  12 +
> >   drivers/watchdog/Kconfig                      |   8 +
> >   drivers/watchdog/Makefile                     |   1 +
> >   drivers/watchdog/pseries-wdt.c                | 337 ++++++++++++++++++
> >   4 files changed, 358 insertions(+)
> >   create mode 100644 drivers/watchdog/pseries-wdt.c
> > 
> > diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
> > index 223c99361a30..4ffe725e796c 100644
> > --- a/Documentation/watchdog/watchdog-parameters.rst
> > +++ b/Documentation/watchdog/watchdog-parameters.rst
> > @@ -425,6 +425,18 @@ pnx833x_wdt:
> >   -------------------------------------------------
> > +pseries-wdt:
> > +    action:
> > +	Action taken when watchdog expires: 1 (power off), 2 (restart),
> > +	3 (dump and restart). (default=2)
> > +    timeout:
> > +	Initial watchdog timeout in seconds. (default=60)
> > +    nowayout:
> > +	Watchdog cannot be stopped once started.
> > +	(default=kernel config parameter)
> > +
> > +-------------------------------------------------
> > +
> >   rc32434_wdt:
> >       timeout:
> >   	Watchdog timeout value, in seconds (default=20)
> > diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
> > index c4e82a8d863f..06b412603f3e 100644
> > --- a/drivers/watchdog/Kconfig
> > +++ b/drivers/watchdog/Kconfig
> > @@ -1932,6 +1932,14 @@ config MEN_A21_WDT
> >   # PPC64 Architecture
> > +config PSERIES_WDT
> > +	tristate "POWER Architecture Platform Watchdog Timer"
> > +	depends on PPC_PSERIES
> > +	select WATCHDOG_CORE
> > +	help
> > +	  Driver for virtual watchdog timers provided by PAPR
> > +	  hypervisors (e.g. PowerVM, KVM).
> > +
> >   config WATCHDOG_RTAS
> >   	tristate "RTAS watchdog"
> >   	depends on PPC_RTAS
> > diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
> > index f7da867e8782..f35660409f17 100644
> > --- a/drivers/watchdog/Makefile
> > +++ b/drivers/watchdog/Makefile
> > @@ -184,6 +184,7 @@ obj-$(CONFIG_BOOKE_WDT) += booke_wdt.o
> >   obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o
> >   # PPC64 Architecture
> > +obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o
> >   obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o
> >   # S390 Architecture
> > diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c
> > new file mode 100644
> > index 000000000000..f41bc4d3b7a2
> > --- /dev/null
> > +++ b/drivers/watchdog/pseries-wdt.c
> > @@ -0,0 +1,337 @@
> > +// SPDX-License-Identifier: GPL-2.0-or-later
> > +/*
> > + * Copyright (c) 2022 International Business Machines, Inc.
> > + */
> > +
> > +#include <linux/bitops.h>
> > +#include <linux/kernel.h>
> > +#include <linux/limits.h>
> > +#include <linux/mod_devicetable.h>
> > +#include <linux/module.h>
> > +#include <linux/moduleparam.h>
> > +#include <linux/platform_device.h>
> > +#include <linux/watchdog.h>
> > +
> > +#define DRV_NAME "pseries-wdt"
> > +
> > +/*
> > + * The PAPR's MSB->LSB bit ordering is 0->63.  These macros simplify
> > + * defining bitfields as described in the PAPR without needing to
> > + * transpose values to the more C-like 63->0 ordering.
> > + */
> > +#define SETFIELD(_v, _b, _e)	\
> > +	(((unsigned long)(_v) << PPC_BITLSHIFT(_e)) & PPC_BITMASK((_b), (_e)))
> > +#define GETFIELD(_v, _b, _e)	\
> > +	(((unsigned long)(_v) & PPC_BITMASK((_b), (_e))) >> PPC_BITLSHIFT(_e))
> > +
> > +/*
> > + * H_WATCHDOG Hypercall Input
> > + *
> > + * R4: "flags":
> > + *
> > + *     A 64-bit value structured as follows:
> > + *
> > + *         Bits 0-46: Reserved (must be zero).
> > + */
> > +#define PSERIES_WDTF_RESERVED	PPC_BITMASK(0, 46)
> > +
> > +/*
> > + *         Bit 47: "leaveOtherWatchdogsRunningOnTimeout"
> > + *
> > + *             0  Stop outstanding watchdogs on timeout.
> > + *             1  Leave outstanding watchdogs running on timeout.
> > + */
> > +#define PSERIES_WDTF_LEAVE_OTHER	PPC_BIT(47)
> > +
> > +/*
> > + *         Bits 48-55: "operation"
> > + *
> > + *             0x01  Start Watchdog
> > + *             0x02  Stop Watchdog
> > + *             0x03  Query Watchdog Capabilities
> > + *             0x04  Query Watchdog LPM Requirement
> > + */
> > +#define PSERIES_WDTF_OP(op)		SETFIELD((op), 48, 55)
> > +#define PSERIES_WDTF_OP_START		PSERIES_WDTF_OP(0x1)
> > +#define PSERIES_WDTF_OP_STOP		PSERIES_WDTF_OP(0x2)
> > +#define PSERIES_WDTF_OP_QUERY		PSERIES_WDTF_OP(0x3)
> > +#define PSERIES_WDTF_OP_QUERY_LPM	PSERIES_WDTF_OP(0x4)
> > +
> > +/*
> > + *         Bits 56-63: "timeoutAction"
> > + *
> > + *             0x01  Hard poweroff
> > + *             0x02  Hard restart
> > + *             0x03  Dump restart
> > + */
> > +#define PSERIES_WDTF_ACTION(ac)			SETFIELD(ac, 56, 63)
> > +#define PSERIES_WDTF_ACTION_HARD_POWEROFF	PSERIES_WDTF_ACTION(0x1)
> > +#define PSERIES_WDTF_ACTION_HARD_RESTART	PSERIES_WDTF_ACTION(0x2)
> > +#define PSERIES_WDTF_ACTION_DUMP_RESTART	PSERIES_WDTF_ACTION(0x3)
> 
> > +
> > +/*
> > + * R5: "watchdogNumber":
> > + *
> > + *     The target watchdog.  Watchdog numbers are 1-based.  The
> > + *     maximum supported watchdog number may be obtained via the
> > + *     "Query Watchdog Capabilities" operation.
> > + *
> > + *     This input is ignored for the "Query Watchdog Capabilities"
> > + *     operation.
> > + *
> > + * R6: "timeoutInMs":
> > + *
> > + *     The timeout in milliseconds.  The minimum supported timeout may
> > + *     be obtained via the "Query Watchdog Capabilities" operation.
> > + *
> > + *     This input is ignored for the "Stop Watchdog", "Query Watchdog
> > + *     Capabilities", and "Query Watchdog LPM Requirement" operations.
> > + */
> > +
> > +/*
> > + * H_WATCHDOG Hypercall Output
> > + *
> > + * R3: Return code
> > + *
> > + *     H_SUCCESS    The operation completed.
> > + *
> > + *     H_BUSY	    The hypervisor is too busy; retry the operation.
> > + *
> > + *     H_PARAMETER  The given "flags" are somehow invalid.  Either the
> > + *                  "operation" or "timeoutAction" is invalid, or a
> > + *                  reserved bit is set.
> > + *
> > + *     H_P2         The given "watchdogNumber" is zero or exceeds the
> > + *                  supported maximum value.
> > + *
> > + *     H_P3         The given "timeoutInMs" is below the supported
> > + *                  minimum value.
> > + *
> > + *     H_NOOP       The given "watchdogNumber" is already stopped.
> > + *
> > + *     H_HARDWARE   The operation failed for ineffable reasons.
> > + *
> > + *     H_FUNCTION   The H_WATCHDOG hypercall is not supported by this
> > + *                  hypervisor.
> > + *
> > + * R4:
> > + *
> > + * - For the "Query Watchdog Capabilities" operation, a 64-bit
> > + *   value structured as follows:
> > + *
> > + *       Bits  0-15: The minimum supported timeout in milliseconds.
> > + *       Bits 16-31: The number of watchdogs supported.
> > + *       Bits 32-63: Reserved.
> > + */
> > +#define PSERIES_WDTQ_MIN_TIMEOUT(cap)	GETFIELD((cap), 0, 15)
> > +#define PSERIES_WDTQ_MAX_NUMBER(cap)	GETFIELD((cap), 16, 31)
> > +#define PSERIES_WDTQ_RESERVED		PPC_BITMASK(32, 63)
> 
> PSERIES_WDTQ_RESERVED is not needed as there is a comment above.

The *_RESERVED macros are from your draft patch.  I will remove them,
though, we aren't actually using them.

I will trim up the big comment for v2, too.  It's just going to rot
otherwise.  You need to read the PAPR to review the hypercalls,
anyway.

> > +
> > +/*
> > + * - For the "Query Watchdog LPM Requirement" operation:
> > + *
> > + *       1  The given "watchdogNumber" must be stopped prior to
> > + *          suspending.
> > + *
> > + *       2  The given "watchdogNumber" does not have to be stopped
> > + *          prior to suspending.
> > + */
> > +#define PSERIES_WDTQL_MUST_STOP		1
> > +#define PSERIES_WDTQL_NEED_NOT_STOP	2
> > +
> > +static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;
> > +
> > +static int action_get(char *buf, const struct kernel_param *kp)
> > +{
> > +	int val;
> > +
> > +	switch (action) {
> > +	case PSERIES_WDTF_ACTION_HARD_POWEROFF:
> > +		val = 1;
> > +		break;
> > +	case PSERIES_WDTF_ACTION_HARD_RESTART:
> > +		val = 2;
> > +		break;
> > +	case PSERIES_WDTF_ACTION_DUMP_RESTART:
> > +		val = 3;
> > +		break;
> > +	default:
> > +		return -EINVAL;
> > +	}
> > +	return sprintf(buf, "%d\n", val);
> > +}
> > +
> > +static int action_set(const char *val, const struct kernel_param *kp)
> > +{
> > +	int choice;
> > +
> > +	if (kstrtoint(val, 10, &choice))
> > +		return -EINVAL;
> > +	switch (choice) {
> > +	case 1:
> > +		action = PSERIES_WDTF_ACTION_HARD_POWEROFF;
> > +		return 0;
> > +	case 2:
> > +		action = PSERIES_WDTF_ACTION_HARD_RESTART;
> > +		return 0;
> > +	case 3:
> > +		action = PSERIES_WDTF_ACTION_DUMP_RESTART;
> > +		return 0;
> > +	}
> > +	return -EINVAL;
> > +}
> > +
> > +static const struct kernel_param_ops action_ops = {
> > +	.get = action_get,
> > +	.set = action_set,
> > +};
> > +module_param_cb(action, &action_ops, NULL, 0444);
> 
> 0644 here and below?

I do not want these changing at runtime.  0444 is the right thing.

> > +MODULE_PARM_DESC(action, "Action taken when watchdog expires (default=2)");
> > +
> > +static bool nowayout = WATCHDOG_NOWAYOUT;
> > +module_param(nowayout, bool, 0444);
> > +MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
> > +		 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
> > +
> > +#define WATCHDOG_TIMEOUT 60
> > +static unsigned int timeout = WATCHDOG_TIMEOUT;
> > +module_param(timeout, uint, 0444);
> > +MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds (default="
> > +		 __MODULE_STRING(WATCHDOG_TIMEOUT) ")");
> > +
> > +struct pseries_wdt {
> > +	struct watchdog_device wd;
> > +	unsigned long num;		/* Watchdog numbers are 1-based */
> > +};
> > +
> > +static int pseries_wdt_start(struct watchdog_device *wdd)
> > +{
> > +	struct device *dev = wdd->parent;
> > +	struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
> > +	unsigned long flags, msecs;
> > +	long rc;
> > +
> > +	flags = action | PSERIES_WDTF_OP_START;
> > +	msecs = wdd->timeout * 1000UL;
> > +	rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs);
> > +	if (rc != H_SUCCESS) {
> > +		dev_crit(dev, "H_WATCHDOG: %ld: failed to start timer %lu",
> > +			 rc, pw->num);
> > +		return -EIO;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static int pseries_wdt_stop(struct watchdog_device *wdd)
> > +{
> > +	struct device *dev = wdd->parent;
> > +	struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
> > +	long rc;
> > +
> > +	rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num);
> > +	if (rc != H_SUCCESS && rc != H_NOOP) {
> > +		dev_crit(dev, "H_WATCHDOG: %ld: failed to stop timer %lu",
> > +			 rc, pw->num);
> > +		return -EIO;
> > +	}
> > +	return 0;
> > +}
> > +
> > +static struct watchdog_info pseries_wdt_info = {
> > +	.identity = DRV_NAME,
> > +	.options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT
> > +	    | WDIOF_PRETIMEOUT,
> > +};
> > +
> > +static const struct watchdog_ops pseries_wdt_ops = {
> > +	.owner = THIS_MODULE,
> > +	.start = pseries_wdt_start,
> > +	.stop = pseries_wdt_stop,
> > +};
> > +
> > +static int pseries_wdt_probe(struct platform_device *pdev)
> > +{
> > +	unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 };
> > +	unsigned long cap, min_timeout_ms;
> > +	long rc;
> > +	struct pseries_wdt *pw;
> > +	int err;
> > +
> > +	rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY);
> > +	if (rc != H_SUCCESS)
> > +		return rc == H_FUNCTION ? -ENODEV : -EIO;
> 
> Nit:
> 
> if (rc == H_FUNCTION)
> 	return -ENODEV;
> if (rc != H_SUCCESS)
> 	return -EIO;
> 
> ?

Sure, changed in v2.

> > +	cap = ret[0];
> > +
> > +	pw = devm_kzalloc(&pdev->dev, sizeof(*pw), GFP_KERNEL);
> > +	if (!pw)
> > +		return -ENOMEM;
> > +
> > +	/*
> > +	 * Assume watchdogNumber 1 for now.  If we ever support
> > +	 * multiple timers we will need to devise a way to choose a
> > +	 * distinct watchdogNumber for each platform device at device
> > +	 * registration time.
> > +	 */
> > +	pw->num = 1;
> > +
> > +	pw->wd.parent = &pdev->dev;
> > +	pw->wd.info = &pseries_wdt_info;
> > +	pw->wd.ops = &pseries_wdt_ops;
> > +	min_timeout_ms = PSERIES_WDTQ_MIN_TIMEOUT(cap);
> > +	pw->wd.min_timeout = roundup(min_timeout_ms, 1000) / 1000;
> > +	pw->wd.max_timeout = UINT_MAX;
> > +	watchdog_init_timeout(&pw->wd, timeout, NULL);
> 
> 
> If PSERIES_WDTF_OP_QUERY returns 2min and this driver's default is 1min,
> watchdog_init_timeout() returns an error, don't we want to handle it here?

Fixed in v2.

-Scott
Guenter Roeck June 1, 2022, 3:45 p.m. UTC | #4
On 6/1/22 08:07, Scott Cheloha wrote:
[ ... ]
>>>> +static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;
>>>> +
>>>> +static int action_get(char *buf, const struct kernel_param *kp)
>>>> +{
>>>> +    int val;
>>>> +
>>>> +    switch (action) {
>>>> +    case PSERIES_WDTF_ACTION_HARD_POWEROFF:
>>>> +        val = 1;
>>>> +        break;
>>>> +    case PSERIES_WDTF_ACTION_HARD_RESTART:
>>>> +        val = 2;
>>>> +        break;
>>>> +    case PSERIES_WDTF_ACTION_DUMP_RESTART:
>>>> +        val = 3;
>>>> +        break;
>>>> +    default:
>>>> +        return -EINVAL;
>>>> +    }
>>>> +    return sprintf(buf, "%d\n", val);
>>>> +}
>>>> +
>>>> +static int action_set(const char *val, const struct kernel_param *kp)
>>>> +{
>>>> +    int choice;
>>>> +
>>>> +    if (kstrtoint(val, 10, &choice))
>>>> +        return -EINVAL;
>>>> +    switch (choice) {
>>>> +    case 1:
>>>> +        action = PSERIES_WDTF_ACTION_HARD_POWEROFF;
>>>> +        return 0;
>>>> +    case 2:
>>>> +        action = PSERIES_WDTF_ACTION_HARD_RESTART;
>>>> +        return 0;
>>>> +    case 3:
>>>> +        action = PSERIES_WDTF_ACTION_DUMP_RESTART;
>>>> +        return 0;
>>>> +    }
>>>> +    return -EINVAL;
>>>> +}
>>>> +
>>>> +static const struct kernel_param_ops action_ops = {
>>>> +    .get = action_get,
>>>> +    .set = action_set,
>>>> +};
>>>> +module_param_cb(action, &action_ops, NULL, 0444);
>>>
>>>
>>> 0644 here and below?
>>>
>>
>> That would make the module parameters have to be runtime
>> configurable, which does not make sense at least for
>> the two parameters below.
> 
> Agreed.
> 
>> I don't know though if it is really valuable to have all the
>> above code instead of just
>> storing the action numbers and doing the conversion to action
>> once in the probe function. The above code really only
>> makes sense if the action is changeable during runtime and more
>> is done that just converting it to another value.
> 
> Having a setter that runs exactly once during module attach is
> obvious.  We need a corresponding .get() method to convert on the way
> out anyway.
> 

Why would a get method be needed if the module parameter is just kept as-is ?

> I don't see any upside to moving the action_set() code into
> pseries_wdt_probe() aside from maybe shaving a few SLOC.  The module
> is already very compact.
> 

I disagree. The get method is unnecessary. The module parameter values (1..3)
add unnecessary complexity. It could as well be 0..2, making it easier to convert.
The actual action could be stored in struct pseries_wdt, or converted using something
like

u8 pseries_actions[] = {
	PSERIES_WDTF_ACTION_HARD_POWEROFF,
	PSERIES_WDTF_ACTION_HARD_RESTART,
	PSERIES_WDTF_ACTION_DUMP_RESTART
};

	flags = pseries_actions[action] | PSERIES_WDTF_OP_START;

or, alternatively, in probe

	if (action > 2)
		return -EINVAL;
	pw->action = pseries_actions[action];
and, in the start function,
	flags = pw->action | PSERIES_WDTF_OP_START;

That not only reduces code size but also improves readability.
get/set methods are useful, but should be limited to cases where they
are really needed, ie do something besides converting values. You could argue
that you want to be able to change the reboot method on the fly, by making
the module parameter writeable, but then the .set method should actually
change the method accordingly and not just convert values. And even then
I'd argue that it would be better not to convert the 'action' value itself
but to keep it at 0, 1, 2 (or 1, 2, 3 if you prefer) and use param_get_uint
(or param_get_ulong) for the get method.

Regarding max_timeout, we have calculations such as

	unsigned int t = wdd->timeout * 1000;

in the assumption that timeouts larger than UINT_MAX/1000 seconds (or ~50 days)
don't really make much sense. watchdog_timeout_invalid() will also return -EINVAL
if the provided timeout value is larger than UINT_MAX / 1000.

Guenter
Alexey Kardashevskiy June 2, 2022, 4:27 a.m. UTC | #5
On 6/2/22 00:48, Scott Cheloha wrote:
> On Wed, May 25, 2022 at 04:35:11PM +1000, Alexey Kardashevskiy wrote:
>>
>> On 5/21/22 04:35, Scott Cheloha wrote:
>>> PAPR v2.12 defines a new hypercall, H_WATCHDOG.  The hypercall permits
>>> guest control of one or more virtual watchdog timers.  The timers have
>>> millisecond granularity.  The guest is terminated when a timer
>>> expires.
>>>
>>> This patch adds a watchdog driver for these timers, "pseries-wdt".
>>>
>>> pseries_wdt_probe() currently assumes the existence of only one
>>> platform device and always assigns it watchdogNumber 1.  If we ever
>>> expose more than one timer to userspace we will need to devise a way
>>> to assign a distinct watchdogNumber to each platform device at device
>>> registration time.
>>
>> This one should go before 4/4 in the series for bisectability.
>>
>> What is platform_device_register_simple("pseries-wdt",...) going to do
>> without the driver?
> 
> This is a chicken-and-egg problem without an obvious solution.  A
> device without a driver is a body without a soul.  A driver without a
> device is a ghost without a machine.
> 
> ... or something like that, don't quote me :)
> 
> Absent some very compelling reasoning, I would like to keep the
> current order.  It feels logical to me to keep the powerpc/pseries
> patches adjacent and prior to the watchdog driver patch.
> 
>>> Signed-off-by: Scott Cheloha <cheloha@linux.ibm.com>
>>> ---
>>>    .../watchdog/watchdog-parameters.rst          |  12 +
>>>    drivers/watchdog/Kconfig                      |   8 +
>>>    drivers/watchdog/Makefile                     |   1 +
>>>    drivers/watchdog/pseries-wdt.c                | 337 ++++++++++++++++++
>>>    4 files changed, 358 insertions(+)
>>>    create mode 100644 drivers/watchdog/pseries-wdt.c
>>>
>>> diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
>>> index 223c99361a30..4ffe725e796c 100644
>>> --- a/Documentation/watchdog/watchdog-parameters.rst
>>> +++ b/Documentation/watchdog/watchdog-parameters.rst
>>> @@ -425,6 +425,18 @@ pnx833x_wdt:
>>>    -------------------------------------------------
>>> +pseries-wdt:
>>> +    action:
>>> +	Action taken when watchdog expires: 1 (power off), 2 (restart),
>>> +	3 (dump and restart). (default=2)
>>> +    timeout:
>>> +	Initial watchdog timeout in seconds. (default=60)
>>> +    nowayout:
>>> +	Watchdog cannot be stopped once started.
>>> +	(default=kernel config parameter)
>>> +
>>> +-------------------------------------------------
>>> +
>>>    rc32434_wdt:
>>>        timeout:
>>>    	Watchdog timeout value, in seconds (default=20)
>>> diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
>>> index c4e82a8d863f..06b412603f3e 100644
>>> --- a/drivers/watchdog/Kconfig
>>> +++ b/drivers/watchdog/Kconfig
>>> @@ -1932,6 +1932,14 @@ config MEN_A21_WDT
>>>    # PPC64 Architecture
>>> +config PSERIES_WDT
>>> +	tristate "POWER Architecture Platform Watchdog Timer"
>>> +	depends on PPC_PSERIES
>>> +	select WATCHDOG_CORE
>>> +	help
>>> +	  Driver for virtual watchdog timers provided by PAPR
>>> +	  hypervisors (e.g. PowerVM, KVM).
>>> +
>>>    config WATCHDOG_RTAS
>>>    	tristate "RTAS watchdog"
>>>    	depends on PPC_RTAS
>>> diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
>>> index f7da867e8782..f35660409f17 100644
>>> --- a/drivers/watchdog/Makefile
>>> +++ b/drivers/watchdog/Makefile
>>> @@ -184,6 +184,7 @@ obj-$(CONFIG_BOOKE_WDT) += booke_wdt.o
>>>    obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o
>>>    # PPC64 Architecture
>>> +obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o
>>>    obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o
>>>    # S390 Architecture
>>> diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c
>>> new file mode 100644
>>> index 000000000000..f41bc4d3b7a2
>>> --- /dev/null
>>> +++ b/drivers/watchdog/pseries-wdt.c
>>> @@ -0,0 +1,337 @@
>>> +// SPDX-License-Identifier: GPL-2.0-or-later
>>> +/*
>>> + * Copyright (c) 2022 International Business Machines, Inc.
>>> + */
>>> +
>>> +#include <linux/bitops.h>
>>> +#include <linux/kernel.h>
>>> +#include <linux/limits.h>
>>> +#include <linux/mod_devicetable.h>
>>> +#include <linux/module.h>
>>> +#include <linux/moduleparam.h>
>>> +#include <linux/platform_device.h>
>>> +#include <linux/watchdog.h>
>>> +
>>> +#define DRV_NAME "pseries-wdt"
>>> +
>>> +/*
>>> + * The PAPR's MSB->LSB bit ordering is 0->63.  These macros simplify
>>> + * defining bitfields as described in the PAPR without needing to
>>> + * transpose values to the more C-like 63->0 ordering.
>>> + */
>>> +#define SETFIELD(_v, _b, _e)	\
>>> +	(((unsigned long)(_v) << PPC_BITLSHIFT(_e)) & PPC_BITMASK((_b), (_e)))
>>> +#define GETFIELD(_v, _b, _e)	\
>>> +	(((unsigned long)(_v) & PPC_BITMASK((_b), (_e))) >> PPC_BITLSHIFT(_e))
>>> +
>>> +/*
>>> + * H_WATCHDOG Hypercall Input
>>> + *
>>> + * R4: "flags":
>>> + *
>>> + *     A 64-bit value structured as follows:
>>> + *
>>> + *         Bits 0-46: Reserved (must be zero).
>>> + */
>>> +#define PSERIES_WDTF_RESERVED	PPC_BITMASK(0, 46)
>>> +
>>> +/*
>>> + *         Bit 47: "leaveOtherWatchdogsRunningOnTimeout"
>>> + *
>>> + *             0  Stop outstanding watchdogs on timeout.
>>> + *             1  Leave outstanding watchdogs running on timeout.
>>> + */
>>> +#define PSERIES_WDTF_LEAVE_OTHER	PPC_BIT(47)
>>> +
>>> +/*
>>> + *         Bits 48-55: "operation"
>>> + *
>>> + *             0x01  Start Watchdog
>>> + *             0x02  Stop Watchdog
>>> + *             0x03  Query Watchdog Capabilities
>>> + *             0x04  Query Watchdog LPM Requirement
>>> + */
>>> +#define PSERIES_WDTF_OP(op)		SETFIELD((op), 48, 55)
>>> +#define PSERIES_WDTF_OP_START		PSERIES_WDTF_OP(0x1)
>>> +#define PSERIES_WDTF_OP_STOP		PSERIES_WDTF_OP(0x2)
>>> +#define PSERIES_WDTF_OP_QUERY		PSERIES_WDTF_OP(0x3)
>>> +#define PSERIES_WDTF_OP_QUERY_LPM	PSERIES_WDTF_OP(0x4)
>>> +
>>> +/*
>>> + *         Bits 56-63: "timeoutAction"
>>> + *
>>> + *             0x01  Hard poweroff
>>> + *             0x02  Hard restart
>>> + *             0x03  Dump restart
>>> + */
>>> +#define PSERIES_WDTF_ACTION(ac)			SETFIELD(ac, 56, 63)
>>> +#define PSERIES_WDTF_ACTION_HARD_POWEROFF	PSERIES_WDTF_ACTION(0x1)
>>> +#define PSERIES_WDTF_ACTION_HARD_RESTART	PSERIES_WDTF_ACTION(0x2)
>>> +#define PSERIES_WDTF_ACTION_DUMP_RESTART	PSERIES_WDTF_ACTION(0x3)
>>
>>> +
>>> +/*
>>> + * R5: "watchdogNumber":
>>> + *
>>> + *     The target watchdog.  Watchdog numbers are 1-based.  The
>>> + *     maximum supported watchdog number may be obtained via the
>>> + *     "Query Watchdog Capabilities" operation.
>>> + *
>>> + *     This input is ignored for the "Query Watchdog Capabilities"
>>> + *     operation.
>>> + *
>>> + * R6: "timeoutInMs":
>>> + *
>>> + *     The timeout in milliseconds.  The minimum supported timeout may
>>> + *     be obtained via the "Query Watchdog Capabilities" operation.
>>> + *
>>> + *     This input is ignored for the "Stop Watchdog", "Query Watchdog
>>> + *     Capabilities", and "Query Watchdog LPM Requirement" operations.
>>> + */
>>> +
>>> +/*
>>> + * H_WATCHDOG Hypercall Output
>>> + *
>>> + * R3: Return code
>>> + *
>>> + *     H_SUCCESS    The operation completed.
>>> + *
>>> + *     H_BUSY	    The hypervisor is too busy; retry the operation.
>>> + *
>>> + *     H_PARAMETER  The given "flags" are somehow invalid.  Either the
>>> + *                  "operation" or "timeoutAction" is invalid, or a
>>> + *                  reserved bit is set.
>>> + *
>>> + *     H_P2         The given "watchdogNumber" is zero or exceeds the
>>> + *                  supported maximum value.
>>> + *
>>> + *     H_P3         The given "timeoutInMs" is below the supported
>>> + *                  minimum value.
>>> + *
>>> + *     H_NOOP       The given "watchdogNumber" is already stopped.
>>> + *
>>> + *     H_HARDWARE   The operation failed for ineffable reasons.
>>> + *
>>> + *     H_FUNCTION   The H_WATCHDOG hypercall is not supported by this
>>> + *                  hypervisor.
>>> + *
>>> + * R4:
>>> + *
>>> + * - For the "Query Watchdog Capabilities" operation, a 64-bit
>>> + *   value structured as follows:
>>> + *
>>> + *       Bits  0-15: The minimum supported timeout in milliseconds.
>>> + *       Bits 16-31: The number of watchdogs supported.
>>> + *       Bits 32-63: Reserved.
>>> + */
>>> +#define PSERIES_WDTQ_MIN_TIMEOUT(cap)	GETFIELD((cap), 0, 15)
>>> +#define PSERIES_WDTQ_MAX_NUMBER(cap)	GETFIELD((cap), 16, 31)
>>> +#define PSERIES_WDTQ_RESERVED		PPC_BITMASK(32, 63)
>>
>> PSERIES_WDTQ_RESERVED is not needed as there is a comment above.
> 
> The *_RESERVED macros are from your draft patch.  I will remove them,
> though, we aren't actually using them.


Well, I was thinking of testing the reserved bits to be all zero but the 
interface may grow so yeah, drop it.


> I will trim up the big comment for v2, too.  It's just going to rot
> otherwise.  You need to read the PAPR to review the hypercalls,
> anyway.
> 
>>> +
>>> +/*
>>> + * - For the "Query Watchdog LPM Requirement" operation:
>>> + *
>>> + *       1  The given "watchdogNumber" must be stopped prior to
>>> + *          suspending.
>>> + *
>>> + *       2  The given "watchdogNumber" does not have to be stopped
>>> + *          prior to suspending.
>>> + */
>>> +#define PSERIES_WDTQL_MUST_STOP		1
>>> +#define PSERIES_WDTQL_NEED_NOT_STOP	2
>>> +
>>> +static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;
>>> +
>>> +static int action_get(char *buf, const struct kernel_param *kp)
>>> +{
>>> +	int val;
>>> +
>>> +	switch (action) {
>>> +	case PSERIES_WDTF_ACTION_HARD_POWEROFF:
>>> +		val = 1;
>>> +		break;
>>> +	case PSERIES_WDTF_ACTION_HARD_RESTART:
>>> +		val = 2;
>>> +		break;
>>> +	case PSERIES_WDTF_ACTION_DUMP_RESTART:
>>> +		val = 3;
>>> +		break;
>>> +	default:
>>> +		return -EINVAL;
>>> +	}
>>> +	return sprintf(buf, "%d\n", val);
>>> +}
>>> +
>>> +static int action_set(const char *val, const struct kernel_param *kp)
>>> +{
>>> +	int choice;
>>> +
>>> +	if (kstrtoint(val, 10, &choice))
>>> +		return -EINVAL;
>>> +	switch (choice) {
>>> +	case 1:
>>> +		action = PSERIES_WDTF_ACTION_HARD_POWEROFF;
>>> +		return 0;
>>> +	case 2:
>>> +		action = PSERIES_WDTF_ACTION_HARD_RESTART;
>>> +		return 0;
>>> +	case 3:
>>> +		action = PSERIES_WDTF_ACTION_DUMP_RESTART;
>>> +		return 0;
>>> +	}
>>> +	return -EINVAL;
>>> +}
>>> +
>>> +static const struct kernel_param_ops action_ops = {
>>> +	.get = action_get,
>>> +	.set = action_set,
>>> +};
>>> +module_param_cb(action, &action_ops, NULL, 0444);
>>
>> 0644 here and below?
> 
> I do not want these changing at runtime.  0444 is the right thing.


If the driver is compiled into vmlinux, then I'll have to have stuff in 
/etc/modprobe.d (or kernel cmdline) and reboot the kernel, annoying for 
testing/debugging. Not a big deal but I just do not see what 0444 buys 
compared to 0644 or (mostly used for watchdogs) 0.
diff mbox series

Patch

diff --git a/Documentation/watchdog/watchdog-parameters.rst b/Documentation/watchdog/watchdog-parameters.rst
index 223c99361a30..4ffe725e796c 100644
--- a/Documentation/watchdog/watchdog-parameters.rst
+++ b/Documentation/watchdog/watchdog-parameters.rst
@@ -425,6 +425,18 @@  pnx833x_wdt:
 
 -------------------------------------------------
 
+pseries-wdt:
+    action:
+	Action taken when watchdog expires: 1 (power off), 2 (restart),
+	3 (dump and restart). (default=2)
+    timeout:
+	Initial watchdog timeout in seconds. (default=60)
+    nowayout:
+	Watchdog cannot be stopped once started.
+	(default=kernel config parameter)
+
+-------------------------------------------------
+
 rc32434_wdt:
     timeout:
 	Watchdog timeout value, in seconds (default=20)
diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig
index c4e82a8d863f..06b412603f3e 100644
--- a/drivers/watchdog/Kconfig
+++ b/drivers/watchdog/Kconfig
@@ -1932,6 +1932,14 @@  config MEN_A21_WDT
 
 # PPC64 Architecture
 
+config PSERIES_WDT
+	tristate "POWER Architecture Platform Watchdog Timer"
+	depends on PPC_PSERIES
+	select WATCHDOG_CORE
+	help
+	  Driver for virtual watchdog timers provided by PAPR
+	  hypervisors (e.g. PowerVM, KVM).
+
 config WATCHDOG_RTAS
 	tristate "RTAS watchdog"
 	depends on PPC_RTAS
diff --git a/drivers/watchdog/Makefile b/drivers/watchdog/Makefile
index f7da867e8782..f35660409f17 100644
--- a/drivers/watchdog/Makefile
+++ b/drivers/watchdog/Makefile
@@ -184,6 +184,7 @@  obj-$(CONFIG_BOOKE_WDT) += booke_wdt.o
 obj-$(CONFIG_MEN_A21_WDT) += mena21_wdt.o
 
 # PPC64 Architecture
+obj-$(CONFIG_PSERIES_WDT) += pseries-wdt.o
 obj-$(CONFIG_WATCHDOG_RTAS) += wdrtas.o
 
 # S390 Architecture
diff --git a/drivers/watchdog/pseries-wdt.c b/drivers/watchdog/pseries-wdt.c
new file mode 100644
index 000000000000..f41bc4d3b7a2
--- /dev/null
+++ b/drivers/watchdog/pseries-wdt.c
@@ -0,0 +1,337 @@ 
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2022 International Business Machines, Inc.
+ */
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/limits.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/platform_device.h>
+#include <linux/watchdog.h>
+
+#define DRV_NAME "pseries-wdt"
+
+/*
+ * The PAPR's MSB->LSB bit ordering is 0->63.  These macros simplify
+ * defining bitfields as described in the PAPR without needing to
+ * transpose values to the more C-like 63->0 ordering.
+ */
+#define SETFIELD(_v, _b, _e)	\
+	(((unsigned long)(_v) << PPC_BITLSHIFT(_e)) & PPC_BITMASK((_b), (_e)))
+#define GETFIELD(_v, _b, _e)	\
+	(((unsigned long)(_v) & PPC_BITMASK((_b), (_e))) >> PPC_BITLSHIFT(_e))
+
+/*
+ * H_WATCHDOG Hypercall Input
+ *
+ * R4: "flags":
+ *
+ *     A 64-bit value structured as follows:
+ *
+ *         Bits 0-46: Reserved (must be zero).
+ */
+#define PSERIES_WDTF_RESERVED	PPC_BITMASK(0, 46)
+
+/*
+ *         Bit 47: "leaveOtherWatchdogsRunningOnTimeout"
+ *
+ *             0  Stop outstanding watchdogs on timeout.
+ *             1  Leave outstanding watchdogs running on timeout.
+ */
+#define PSERIES_WDTF_LEAVE_OTHER	PPC_BIT(47)
+
+/*
+ *         Bits 48-55: "operation"
+ *
+ *             0x01  Start Watchdog
+ *             0x02  Stop Watchdog
+ *             0x03  Query Watchdog Capabilities
+ *             0x04  Query Watchdog LPM Requirement
+ */
+#define PSERIES_WDTF_OP(op)		SETFIELD((op), 48, 55)
+#define PSERIES_WDTF_OP_START		PSERIES_WDTF_OP(0x1)
+#define PSERIES_WDTF_OP_STOP		PSERIES_WDTF_OP(0x2)
+#define PSERIES_WDTF_OP_QUERY		PSERIES_WDTF_OP(0x3)
+#define PSERIES_WDTF_OP_QUERY_LPM	PSERIES_WDTF_OP(0x4)
+
+/*
+ *         Bits 56-63: "timeoutAction"
+ *
+ *             0x01  Hard poweroff
+ *             0x02  Hard restart
+ *             0x03  Dump restart
+ */
+#define PSERIES_WDTF_ACTION(ac)			SETFIELD(ac, 56, 63)
+#define PSERIES_WDTF_ACTION_HARD_POWEROFF	PSERIES_WDTF_ACTION(0x1)
+#define PSERIES_WDTF_ACTION_HARD_RESTART	PSERIES_WDTF_ACTION(0x2)
+#define PSERIES_WDTF_ACTION_DUMP_RESTART	PSERIES_WDTF_ACTION(0x3)
+
+/*
+ * R5: "watchdogNumber":
+ *
+ *     The target watchdog.  Watchdog numbers are 1-based.  The
+ *     maximum supported watchdog number may be obtained via the
+ *     "Query Watchdog Capabilities" operation.
+ *
+ *     This input is ignored for the "Query Watchdog Capabilities"
+ *     operation.
+ *
+ * R6: "timeoutInMs":
+ *
+ *     The timeout in milliseconds.  The minimum supported timeout may
+ *     be obtained via the "Query Watchdog Capabilities" operation.
+ *
+ *     This input is ignored for the "Stop Watchdog", "Query Watchdog
+ *     Capabilities", and "Query Watchdog LPM Requirement" operations.
+ */
+
+/*
+ * H_WATCHDOG Hypercall Output
+ *
+ * R3: Return code
+ *
+ *     H_SUCCESS    The operation completed.
+ *
+ *     H_BUSY	    The hypervisor is too busy; retry the operation.
+ *
+ *     H_PARAMETER  The given "flags" are somehow invalid.  Either the
+ *                  "operation" or "timeoutAction" is invalid, or a
+ *                  reserved bit is set.
+ *
+ *     H_P2         The given "watchdogNumber" is zero or exceeds the
+ *                  supported maximum value.
+ *
+ *     H_P3         The given "timeoutInMs" is below the supported
+ *                  minimum value.
+ *
+ *     H_NOOP       The given "watchdogNumber" is already stopped.
+ *
+ *     H_HARDWARE   The operation failed for ineffable reasons.
+ *
+ *     H_FUNCTION   The H_WATCHDOG hypercall is not supported by this
+ *                  hypervisor.
+ *
+ * R4:
+ *
+ * - For the "Query Watchdog Capabilities" operation, a 64-bit
+ *   value structured as follows:
+ *
+ *       Bits  0-15: The minimum supported timeout in milliseconds.
+ *       Bits 16-31: The number of watchdogs supported.
+ *       Bits 32-63: Reserved.
+ */
+#define PSERIES_WDTQ_MIN_TIMEOUT(cap)	GETFIELD((cap), 0, 15)
+#define PSERIES_WDTQ_MAX_NUMBER(cap)	GETFIELD((cap), 16, 31)
+#define PSERIES_WDTQ_RESERVED		PPC_BITMASK(32, 63)
+
+/*
+ * - For the "Query Watchdog LPM Requirement" operation:
+ *
+ *       1  The given "watchdogNumber" must be stopped prior to
+ *          suspending.
+ *
+ *       2  The given "watchdogNumber" does not have to be stopped
+ *          prior to suspending.
+ */
+#define PSERIES_WDTQL_MUST_STOP		1
+#define PSERIES_WDTQL_NEED_NOT_STOP	2
+
+static unsigned long action = PSERIES_WDTF_ACTION_HARD_RESTART;
+
+static int action_get(char *buf, const struct kernel_param *kp)
+{
+	int val;
+
+	switch (action) {
+	case PSERIES_WDTF_ACTION_HARD_POWEROFF:
+		val = 1;
+		break;
+	case PSERIES_WDTF_ACTION_HARD_RESTART:
+		val = 2;
+		break;
+	case PSERIES_WDTF_ACTION_DUMP_RESTART:
+		val = 3;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return sprintf(buf, "%d\n", val);
+}
+
+static int action_set(const char *val, const struct kernel_param *kp)
+{
+	int choice;
+
+	if (kstrtoint(val, 10, &choice))
+		return -EINVAL;
+	switch (choice) {
+	case 1:
+		action = PSERIES_WDTF_ACTION_HARD_POWEROFF;
+		return 0;
+	case 2:
+		action = PSERIES_WDTF_ACTION_HARD_RESTART;
+		return 0;
+	case 3:
+		action = PSERIES_WDTF_ACTION_DUMP_RESTART;
+		return 0;
+	}
+	return -EINVAL;
+}
+
+static const struct kernel_param_ops action_ops = {
+	.get = action_get,
+	.set = action_set,
+};
+module_param_cb(action, &action_ops, NULL, 0444);
+MODULE_PARM_DESC(action, "Action taken when watchdog expires (default=2)");
+
+static bool nowayout = WATCHDOG_NOWAYOUT;
+module_param(nowayout, bool, 0444);
+MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started (default="
+		 __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
+
+#define WATCHDOG_TIMEOUT 60
+static unsigned int timeout = WATCHDOG_TIMEOUT;
+module_param(timeout, uint, 0444);
+MODULE_PARM_DESC(timeout, "Initial watchdog timeout in seconds (default="
+		 __MODULE_STRING(WATCHDOG_TIMEOUT) ")");
+
+struct pseries_wdt {
+	struct watchdog_device wd;
+	unsigned long num;		/* Watchdog numbers are 1-based */
+};
+
+static int pseries_wdt_start(struct watchdog_device *wdd)
+{
+	struct device *dev = wdd->parent;
+	struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
+	unsigned long flags, msecs;
+	long rc;
+
+	flags = action | PSERIES_WDTF_OP_START;
+	msecs = wdd->timeout * 1000UL;
+	rc = plpar_hcall_norets(H_WATCHDOG, flags, pw->num, msecs);
+	if (rc != H_SUCCESS) {
+		dev_crit(dev, "H_WATCHDOG: %ld: failed to start timer %lu",
+			 rc, pw->num);
+		return -EIO;
+	}
+	return 0;
+}
+
+static int pseries_wdt_stop(struct watchdog_device *wdd)
+{
+	struct device *dev = wdd->parent;
+	struct pseries_wdt *pw = watchdog_get_drvdata(wdd);
+	long rc;
+
+	rc = plpar_hcall_norets(H_WATCHDOG, PSERIES_WDTF_OP_STOP, pw->num);
+	if (rc != H_SUCCESS && rc != H_NOOP) {
+		dev_crit(dev, "H_WATCHDOG: %ld: failed to stop timer %lu",
+			 rc, pw->num);
+		return -EIO;
+	}
+	return 0;
+}
+
+static struct watchdog_info pseries_wdt_info = {
+	.identity = DRV_NAME,
+	.options = WDIOF_KEEPALIVEPING | WDIOF_MAGICCLOSE | WDIOF_SETTIMEOUT
+	    | WDIOF_PRETIMEOUT,
+};
+
+static const struct watchdog_ops pseries_wdt_ops = {
+	.owner = THIS_MODULE,
+	.start = pseries_wdt_start,
+	.stop = pseries_wdt_stop,
+};
+
+static int pseries_wdt_probe(struct platform_device *pdev)
+{
+	unsigned long ret[PLPAR_HCALL_BUFSIZE] = { 0 };
+	unsigned long cap, min_timeout_ms;
+	long rc;
+	struct pseries_wdt *pw;
+	int err;
+
+	rc = plpar_hcall(H_WATCHDOG, ret, PSERIES_WDTF_OP_QUERY);
+	if (rc != H_SUCCESS)
+		return rc == H_FUNCTION ? -ENODEV : -EIO;
+	cap = ret[0];
+
+	pw = devm_kzalloc(&pdev->dev, sizeof(*pw), GFP_KERNEL);
+	if (!pw)
+		return -ENOMEM;
+
+	/*
+	 * Assume watchdogNumber 1 for now.  If we ever support
+	 * multiple timers we will need to devise a way to choose a
+	 * distinct watchdogNumber for each platform device at device
+	 * registration time.
+	 */
+	pw->num = 1;
+
+	pw->wd.parent = &pdev->dev;
+	pw->wd.info = &pseries_wdt_info;
+	pw->wd.ops = &pseries_wdt_ops;
+	min_timeout_ms = PSERIES_WDTQ_MIN_TIMEOUT(cap);
+	pw->wd.min_timeout = roundup(min_timeout_ms, 1000) / 1000;
+	pw->wd.max_timeout = UINT_MAX;
+	watchdog_init_timeout(&pw->wd, timeout, NULL);
+	watchdog_set_nowayout(&pw->wd, nowayout);
+	watchdog_stop_on_reboot(&pw->wd);
+	watchdog_stop_on_unregister(&pw->wd);
+	watchdog_set_drvdata(&pw->wd, pw);
+
+	err = devm_watchdog_register_device(&pdev->dev, &pw->wd);
+	if (err)
+		return err;
+
+	platform_set_drvdata(pdev, &pw->wd);
+
+	return 0;
+}
+
+static int pseries_wdt_suspend(struct platform_device *pdev, pm_message_t state)
+{
+	struct watchdog_device *wd = platform_get_drvdata(pdev);
+
+	if (watchdog_active(wd))
+		return pseries_wdt_stop(wd);
+	return 0;
+}
+
+static int pseries_wdt_resume(struct platform_device *pdev)
+{
+	struct watchdog_device *wd = platform_get_drvdata(pdev);
+
+	if (watchdog_active(wd))
+		return pseries_wdt_start(wd);
+	return 0;
+}
+
+static const struct platform_device_id pseries_wdt_id[] = {
+	{ .name = "pseries-wdt" },
+	{}
+};
+MODULE_DEVICE_TABLE(platform, pseries_wdt_id);
+
+static struct platform_driver pseries_wdt_driver = {
+	.driver = {
+		.name = DRV_NAME,
+		.owner = THIS_MODULE,
+	},
+	.id_table = pseries_wdt_id,
+	.probe = pseries_wdt_probe,
+	.resume = pseries_wdt_resume,
+	.suspend = pseries_wdt_suspend,
+};
+module_platform_driver(pseries_wdt_driver);
+
+MODULE_AUTHOR("Alexey Kardashevskiy <aik@ozlabs.ru>");
+MODULE_AUTHOR("Scott Cheloha <cheloha@linux.ibm.com>");
+MODULE_DESCRIPTION("POWER Architecture Platform Watchdog Driver");
+MODULE_LICENSE("GPL");