diff mbox series

[v3,6/8] vfio: Invoke runtime PM API for IOCTL request

Message ID 20220425092615.10133-7-abhsahu@nvidia.com
State New
Headers show
Series vfio/pci: power management changes | expand

Commit Message

Abhishek Sahu April 25, 2022, 9:26 a.m. UTC
The vfio/pci driver will have runtime power management support where the
user can put the device low power state and then PCI devices can go into
the D3cold state. If the device is in low power state and user issues any
IOCTL, then the device should be moved out of low power state first. Once
the IOCTL is serviced, then it can go into low power state again. The
runtime PM framework manages this with help of usage count. One option
was to add the runtime PM related API's inside vfio/pci driver but some
IOCTL (like VFIO_DEVICE_FEATURE) can follow a different path and more
IOCTL can be added in the future. Also, the runtime PM will be
added for vfio/pci based drivers variant currently but the other vfio
based drivers can use the same in the future. So, this patch adds the
runtime calls runtime related API in the top level IOCTL function itself.

For the vfio drivers which do not have runtime power management support
currently, the runtime PM API's won't be invoked. Only for vfio/pci
based drivers currently, the runtime PM API's will be invoked to increment
and decrement the usage count. Taking this usage count incremented while
servicing IOCTL will make sure that user won't put the device into low
power state when any other IOCTL is being serviced in parallel.

Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
---
 drivers/vfio/vfio.c | 44 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 41 insertions(+), 3 deletions(-)

Comments

Alex Williamson May 4, 2022, 7:42 p.m. UTC | #1
On Mon, 25 Apr 2022 14:56:13 +0530
Abhishek Sahu <abhsahu@nvidia.com> wrote:

> The vfio/pci driver will have runtime power management support where the
> user can put the device low power state and then PCI devices can go into
> the D3cold state. If the device is in low power state and user issues any
> IOCTL, then the device should be moved out of low power state first. Once
> the IOCTL is serviced, then it can go into low power state again. The
> runtime PM framework manages this with help of usage count. One option
> was to add the runtime PM related API's inside vfio/pci driver but some
> IOCTL (like VFIO_DEVICE_FEATURE) can follow a different path and more
> IOCTL can be added in the future. Also, the runtime PM will be
> added for vfio/pci based drivers variant currently but the other vfio
> based drivers can use the same in the future. So, this patch adds the
> runtime calls runtime related API in the top level IOCTL function itself.
> 
> For the vfio drivers which do not have runtime power management support
> currently, the runtime PM API's won't be invoked. Only for vfio/pci
> based drivers currently, the runtime PM API's will be invoked to increment
> and decrement the usage count. Taking this usage count incremented while
> servicing IOCTL will make sure that user won't put the device into low
> power state when any other IOCTL is being serviced in parallel.
> 
> Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
> ---
>  drivers/vfio/vfio.c | 44 +++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 41 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> index a4555014bd1e..4e65a127744e 100644
> --- a/drivers/vfio/vfio.c
> +++ b/drivers/vfio/vfio.c
> @@ -32,6 +32,7 @@
>  #include <linux/vfio.h>
>  #include <linux/wait.h>
>  #include <linux/sched/signal.h>
> +#include <linux/pm_runtime.h>
>  #include "vfio.h"
>  
>  #define DRIVER_VERSION	"0.3"
> @@ -1536,6 +1537,30 @@ static const struct file_operations vfio_group_fops = {
>  	.release	= vfio_group_fops_release,
>  };
>  
> +/*
> + * Wrapper around pm_runtime_resume_and_get().
> + * Return 0, if driver power management callbacks are not present i.e. the driver is not

Mind the gratuitous long comment line here.

> + * using runtime power management.
> + * Return 1 upon success, otherwise -errno

Changing semantics vs the thing we're wrapping, why not provide a
wrapper for the `put` as well to avoid?  The only cases where we return
zero are just as easy to detect on the other side.

> + */
> +static inline int vfio_device_pm_runtime_get(struct device *dev)

Given some of Jason's recent series, this should probably just accept a
vfio_device.

> +{
> +#ifdef CONFIG_PM
> +	int ret;
> +
> +	if (!dev->driver || !dev->driver->pm)
> +		return 0;
> +
> +	ret = pm_runtime_resume_and_get(dev);
> +	if (ret < 0)
> +		return ret;
> +
> +	return 1;
> +#else
> +	return 0;
> +#endif
> +}
> +
>  /*
>   * VFIO Device fd
>   */
> @@ -1845,15 +1870,28 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
>  				       unsigned int cmd, unsigned long arg)
>  {
>  	struct vfio_device *device = filep->private_data;
> +	int pm_ret, ret = 0;
> +
> +	pm_ret = vfio_device_pm_runtime_get(device->dev);
> +	if (pm_ret < 0)
> +		return pm_ret;

I wonder if we might simply want to mask pm errors behind -EIO, maybe
with a rate limited dev_info().  My concern would be that we might mask
errnos that userspace has come to expect for certain ioctls.  Thanks,

Alex

>  
>  	switch (cmd) {
>  	case VFIO_DEVICE_FEATURE:
> -		return vfio_ioctl_device_feature(device, (void __user *)arg);
> +		ret = vfio_ioctl_device_feature(device, (void __user *)arg);
> +		break;
>  	default:
>  		if (unlikely(!device->ops->ioctl))
> -			return -EINVAL;
> -		return device->ops->ioctl(device, cmd, arg);
> +			ret = -EINVAL;
> +		else
> +			ret = device->ops->ioctl(device, cmd, arg);
> +		break;
>  	}
> +
> +	if (pm_ret)
> +		pm_runtime_put(device->dev);
> +
> +	return ret;
>  }
>  
>  static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
Abhishek Sahu May 5, 2022, 9:40 a.m. UTC | #2
On 5/5/2022 1:12 AM, Alex Williamson wrote:
> On Mon, 25 Apr 2022 14:56:13 +0530
> Abhishek Sahu <abhsahu@nvidia.com> wrote:
> 
>> The vfio/pci driver will have runtime power management support where the
>> user can put the device low power state and then PCI devices can go into
>> the D3cold state. If the device is in low power state and user issues any
>> IOCTL, then the device should be moved out of low power state first. Once
>> the IOCTL is serviced, then it can go into low power state again. The
>> runtime PM framework manages this with help of usage count. One option
>> was to add the runtime PM related API's inside vfio/pci driver but some
>> IOCTL (like VFIO_DEVICE_FEATURE) can follow a different path and more
>> IOCTL can be added in the future. Also, the runtime PM will be
>> added for vfio/pci based drivers variant currently but the other vfio
>> based drivers can use the same in the future. So, this patch adds the
>> runtime calls runtime related API in the top level IOCTL function itself.
>>
>> For the vfio drivers which do not have runtime power management support
>> currently, the runtime PM API's won't be invoked. Only for vfio/pci
>> based drivers currently, the runtime PM API's will be invoked to increment
>> and decrement the usage count. Taking this usage count incremented while
>> servicing IOCTL will make sure that user won't put the device into low
>> power state when any other IOCTL is being serviced in parallel.
>>
>> Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
>> ---
>>  drivers/vfio/vfio.c | 44 +++++++++++++++++++++++++++++++++++++++++---
>>  1 file changed, 41 insertions(+), 3 deletions(-)
>>
>> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
>> index a4555014bd1e..4e65a127744e 100644
>> --- a/drivers/vfio/vfio.c
>> +++ b/drivers/vfio/vfio.c
>> @@ -32,6 +32,7 @@
>>  #include <linux/vfio.h>
>>  #include <linux/wait.h>
>>  #include <linux/sched/signal.h>
>> +#include <linux/pm_runtime.h>
>>  #include "vfio.h"
>>  
>>  #define DRIVER_VERSION	"0.3"
>> @@ -1536,6 +1537,30 @@ static const struct file_operations vfio_group_fops = {
>>  	.release	= vfio_group_fops_release,
>>  };
>>  
>> +/*
>> + * Wrapper around pm_runtime_resume_and_get().
>> + * Return 0, if driver power management callbacks are not present i.e. the driver is not
> 
> Mind the gratuitous long comment line here.
> 
 
 Thanks Alex.
 
 That was a miss. I will fix this.
 
>> + * using runtime power management.
>> + * Return 1 upon success, otherwise -errno
> 
> Changing semantics vs the thing we're wrapping, why not provide a
> wrapper for the `put` as well to avoid?  The only cases where we return
> zero are just as easy to detect on the other side.
> 

 Yes. Using wrapper function for put is better option.
 I will make the changes.

>> + */
>> +static inline int vfio_device_pm_runtime_get(struct device *dev)
> 
> Given some of Jason's recent series, this should probably just accept a
> vfio_device.
> 

 Sorry. I didn't get this part.

 Do I need to change it to

 static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
 {
    struct device *dev = device->dev;
    ...
 }

>> +{
>> +#ifdef CONFIG_PM
>> +	int ret;
>> +
>> +	if (!dev->driver || !dev->driver->pm)
>> +		return 0;
>> +
>> +	ret = pm_runtime_resume_and_get(dev);
>> +	if (ret < 0)
>> +		return ret;
>> +
>> +	return 1;
>> +#else
>> +	return 0;
>> +#endif
>> +}
>> +
>>  /*
>>   * VFIO Device fd
>>   */
>> @@ -1845,15 +1870,28 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
>>  				       unsigned int cmd, unsigned long arg)
>>  {
>>  	struct vfio_device *device = filep->private_data;
>> +	int pm_ret, ret = 0;
>> +
>> +	pm_ret = vfio_device_pm_runtime_get(device->dev);
>> +	if (pm_ret < 0)
>> +		return pm_ret;
> 
> I wonder if we might simply want to mask pm errors behind -EIO, maybe
> with a rate limited dev_info().  My concern would be that we might mask
> errnos that userspace has come to expect for certain ioctls.  Thanks,
> 
> Alex
> 

  I need to do something like following. Correct ?

  ret = vfio_device_pm_runtime_get(device);
  if (ret < 0) {
     dev_info_ratelimited(device->dev, "vfio: runtime resume failed %d\n", ret);
     return -EIO;
  }
  
  Regards,
  Abhishek
 
>>  
>>  	switch (cmd) {
>>  	case VFIO_DEVICE_FEATURE:
>> -		return vfio_ioctl_device_feature(device, (void __user *)arg);
>> +		ret = vfio_ioctl_device_feature(device, (void __user *)arg);
>> +		break;
>>  	default:
>>  		if (unlikely(!device->ops->ioctl))
>> -			return -EINVAL;
>> -		return device->ops->ioctl(device, cmd, arg);
>> +			ret = -EINVAL;
>> +		else
>> +			ret = device->ops->ioctl(device, cmd, arg);
>> +		break;
>>  	}
>> +
>> +	if (pm_ret)
>> +		pm_runtime_put(device->dev);
>> +
>> +	return ret;
>>  }
>>  
>>  static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,
>
Alex Williamson May 9, 2022, 10:30 p.m. UTC | #3
On Thu, 5 May 2022 15:10:43 +0530
Abhishek Sahu <abhsahu@nvidia.com> wrote:

> On 5/5/2022 1:12 AM, Alex Williamson wrote:
> > On Mon, 25 Apr 2022 14:56:13 +0530
> > Abhishek Sahu <abhsahu@nvidia.com> wrote:
> >   
> >> The vfio/pci driver will have runtime power management support where the
> >> user can put the device low power state and then PCI devices can go into
> >> the D3cold state. If the device is in low power state and user issues any
> >> IOCTL, then the device should be moved out of low power state first. Once
> >> the IOCTL is serviced, then it can go into low power state again. The
> >> runtime PM framework manages this with help of usage count. One option
> >> was to add the runtime PM related API's inside vfio/pci driver but some
> >> IOCTL (like VFIO_DEVICE_FEATURE) can follow a different path and more
> >> IOCTL can be added in the future. Also, the runtime PM will be
> >> added for vfio/pci based drivers variant currently but the other vfio
> >> based drivers can use the same in the future. So, this patch adds the
> >> runtime calls runtime related API in the top level IOCTL function itself.
> >>
> >> For the vfio drivers which do not have runtime power management support
> >> currently, the runtime PM API's won't be invoked. Only for vfio/pci
> >> based drivers currently, the runtime PM API's will be invoked to increment
> >> and decrement the usage count. Taking this usage count incremented while
> >> servicing IOCTL will make sure that user won't put the device into low
> >> power state when any other IOCTL is being serviced in parallel.
> >>
> >> Signed-off-by: Abhishek Sahu <abhsahu@nvidia.com>
> >> ---
> >>  drivers/vfio/vfio.c | 44 +++++++++++++++++++++++++++++++++++++++++---
> >>  1 file changed, 41 insertions(+), 3 deletions(-)
> >>
> >> diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
> >> index a4555014bd1e..4e65a127744e 100644
> >> --- a/drivers/vfio/vfio.c
> >> +++ b/drivers/vfio/vfio.c
> >> @@ -32,6 +32,7 @@
> >>  #include <linux/vfio.h>
> >>  #include <linux/wait.h>
> >>  #include <linux/sched/signal.h>
> >> +#include <linux/pm_runtime.h>
> >>  #include "vfio.h"
> >>  
> >>  #define DRIVER_VERSION	"0.3"
> >> @@ -1536,6 +1537,30 @@ static const struct file_operations vfio_group_fops = {
> >>  	.release	= vfio_group_fops_release,
> >>  };
> >>  
> >> +/*
> >> + * Wrapper around pm_runtime_resume_and_get().
> >> + * Return 0, if driver power management callbacks are not present i.e. the driver is not  
> > 
> > Mind the gratuitous long comment line here.
> >   
>  
>  Thanks Alex.
>  
>  That was a miss. I will fix this.
>  
> >> + * using runtime power management.
> >> + * Return 1 upon success, otherwise -errno  
> > 
> > Changing semantics vs the thing we're wrapping, why not provide a
> > wrapper for the `put` as well to avoid?  The only cases where we return
> > zero are just as easy to detect on the other side.
> >   
> 
>  Yes. Using wrapper function for put is better option.
>  I will make the changes.
> 
> >> + */
> >> +static inline int vfio_device_pm_runtime_get(struct device *dev)  
> > 
> > Given some of Jason's recent series, this should probably just accept a
> > vfio_device.
> >   
> 
>  Sorry. I didn't get this part.
> 
>  Do I need to change it to
> 
>  static inline int vfio_device_pm_runtime_get(struct vfio_device *device)
>  {
>     struct device *dev = device->dev;
>     ...
>  }

Yes.

> >> +{
> >> +#ifdef CONFIG_PM
> >> +	int ret;
> >> +
> >> +	if (!dev->driver || !dev->driver->pm)
> >> +		return 0;

I'm also wondering how we could ever get here with dev->driver == NULL.
If that were actually possible, the above would at best be racy.  It
also really seems like there ought to be a better test than the
driver->pm pointer to check if runtime pm is enabled, but I haven't
spotted it yet.

> >> +
> >> +	ret = pm_runtime_resume_and_get(dev);
> >> +	if (ret < 0)
> >> +		return ret;
> >> +
> >> +	return 1;
> >> +#else
> >> +	return 0;
> >> +#endif
> >> +}
> >> +
> >>  /*
> >>   * VFIO Device fd
> >>   */
> >> @@ -1845,15 +1870,28 @@ static long vfio_device_fops_unl_ioctl(struct file *filep,
> >>  				       unsigned int cmd, unsigned long arg)
> >>  {
> >>  	struct vfio_device *device = filep->private_data;
> >> +	int pm_ret, ret = 0;
> >> +
> >> +	pm_ret = vfio_device_pm_runtime_get(device->dev);
> >> +	if (pm_ret < 0)
> >> +		return pm_ret;  
> > 
> > I wonder if we might simply want to mask pm errors behind -EIO, maybe
> > with a rate limited dev_info().  My concern would be that we might mask
> > errnos that userspace has come to expect for certain ioctls.  Thanks,
> > 
> > Alex
> >   
> 
>   I need to do something like following. Correct ?
> 
>   ret = vfio_device_pm_runtime_get(device);
>   if (ret < 0) {
>      dev_info_ratelimited(device->dev, "vfio: runtime resume failed %d\n", ret);
>      return -EIO;
>   }

Yeah, though I'd welcome other thoughts here.  I don't necessarily like
the idea of squashing the errno, but at the same time, if
pm_runtime_resume_and_get() returns -EINVAL on user ioctl, that's not
really describing an invalid parameter relative to the ioctl itself.
Thanks,

Alex
diff mbox series

Patch

diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c
index a4555014bd1e..4e65a127744e 100644
--- a/drivers/vfio/vfio.c
+++ b/drivers/vfio/vfio.c
@@ -32,6 +32,7 @@ 
 #include <linux/vfio.h>
 #include <linux/wait.h>
 #include <linux/sched/signal.h>
+#include <linux/pm_runtime.h>
 #include "vfio.h"
 
 #define DRIVER_VERSION	"0.3"
@@ -1536,6 +1537,30 @@  static const struct file_operations vfio_group_fops = {
 	.release	= vfio_group_fops_release,
 };
 
+/*
+ * Wrapper around pm_runtime_resume_and_get().
+ * Return 0, if driver power management callbacks are not present i.e. the driver is not
+ * using runtime power management.
+ * Return 1 upon success, otherwise -errno
+ */
+static inline int vfio_device_pm_runtime_get(struct device *dev)
+{
+#ifdef CONFIG_PM
+	int ret;
+
+	if (!dev->driver || !dev->driver->pm)
+		return 0;
+
+	ret = pm_runtime_resume_and_get(dev);
+	if (ret < 0)
+		return ret;
+
+	return 1;
+#else
+	return 0;
+#endif
+}
+
 /*
  * VFIO Device fd
  */
@@ -1845,15 +1870,28 @@  static long vfio_device_fops_unl_ioctl(struct file *filep,
 				       unsigned int cmd, unsigned long arg)
 {
 	struct vfio_device *device = filep->private_data;
+	int pm_ret, ret = 0;
+
+	pm_ret = vfio_device_pm_runtime_get(device->dev);
+	if (pm_ret < 0)
+		return pm_ret;
 
 	switch (cmd) {
 	case VFIO_DEVICE_FEATURE:
-		return vfio_ioctl_device_feature(device, (void __user *)arg);
+		ret = vfio_ioctl_device_feature(device, (void __user *)arg);
+		break;
 	default:
 		if (unlikely(!device->ops->ioctl))
-			return -EINVAL;
-		return device->ops->ioctl(device, cmd, arg);
+			ret = -EINVAL;
+		else
+			ret = device->ops->ioctl(device, cmd, arg);
+		break;
 	}
+
+	if (pm_ret)
+		pm_runtime_put(device->dev);
+
+	return ret;
 }
 
 static ssize_t vfio_device_fops_read(struct file *filep, char __user *buf,