diff mbox

[V3,3/3] mfd: palmas: Add support for optional wakeup

Message ID 1411067086-16613-4-git-send-email-nm@ti.com
State New
Headers show

Commit Message

Nishanth Menon Sept. 18, 2014, 7:04 p.m. UTC
With the recent pinctrl-single changes, omaps can treat wake-up events
from deeper power  states as interrupts.

This is to handle the case where the system needs two interrupt
sources when SoC is in deep sleep(1 to exit from deep power mode such
as sleep, and other from the module handling the actual event during
system active state). This is not the same as threaded interrupts as the
wakeup interrupt source is used only as part of deeper power saving
mode.

Let's add support for the optional second interrupt for wake-up
events. And then SoC can wakeup and handle the event using it's
regular handler.

This is similar in approach to commit 2a0b965cfb6e ("serial: omap: Add
support for optional wake-up")

Signed-off-by: Nishanth Menon <nm@ti.com>
---

V3: updates based on Thomas's comments.
V2: http://marc.info/?t=140995045000003&r=1&w=2

 drivers/mfd/palmas.c       |   62 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/palmas.h |    2 ++
 2 files changed, 64 insertions(+)

Comments

Thomas Gleixner Sept. 19, 2014, 12:57 a.m. UTC | #1
On Thu, 18 Sep 2014, Nishanth Menon wrote:
> +static irqreturn_t palmas_wake_irq(int irq, void *_palmas)
> +{
> +	/*
> +	 * Return Not handled so that interrupt is disabled.

And how is that interrupt disabled by returning IRQ_NONE? You mean it
gets disabled after it got reraised 100000 times and the spurious
detector kills it?

> +	 * Level event ensures that the event is eventually handled
> +	 * by the appropriate chip handler already registered

Eventually handled? So eventually it's not handled?

> +	 */
> +	return IRQ_NONE;
> +}
> +
>  int palmas_ext_control_req_config(struct palmas *palmas,
>  	enum palmas_external_requestor_id id,  int ext_ctrl, bool enable)
>  {
> @@ -409,6 +420,7 @@ static void palmas_dt_to_pdata(struct i2c_client *i2c,
>  		pdata->mux_from_pdata = 1;
>  		pdata->pad2 = prop;
>  	}
> +	pdata->wakeirq = irq_of_parse_and_map(node, 1);
>  
>  	/* The default for this register is all masked */
>  	ret = of_property_read_u32(node, "ti,power-ctrl", &prop);
> @@ -521,6 +533,7 @@ static int palmas_i2c_probe(struct i2c_client *i2c,
>  	i2c_set_clientdata(i2c, palmas);
>  	palmas->dev = &i2c->dev;
>  	palmas->irq = i2c->irq;
> +	palmas->wakeirq = pdata->wakeirq;
>  
>  	match = of_match_device(of_palmas_match_tbl, &i2c->dev);
>  
> @@ -587,6 +600,25 @@ static int palmas_i2c_probe(struct i2c_client *i2c,
>  	if (ret < 0)
>  		goto err_i2c;
>  
> +	if (!palmas->wakeirq)
> +		goto no_wake_irq;
> +
> +	ret = devm_request_irq(palmas->dev, palmas->wakeirq,
> +			       palmas_wake_irq,
> +			       pdata->irq_flags,
> +			       dev_name(palmas->dev),
> +			       &palmas);
> +	if (ret < 0) {
> +		dev_err(palmas->dev, "Invalid wakeirq(%d) (res: %d), skiping\n",
> +			palmas->wakeirq, ret);
> +		palmas->wakeirq = 0;
> +	} else {
> +		/* We use wakeirq only during suspend-resume path */
> +		device_set_wakeup_capable(palmas->dev, true);
> +		disable_irq_nosync(palmas->wakeirq);

Urgh. Why nosysnc? And why do you want to do that at all?

	irq_set_status_flags(irq, IRQ_NOAUTOEN);

Is what you want to set before requesting the irq.


> +	}
> +
> +no_wake_irq:
>  no_irq:
>  	slave = PALMAS_BASE_TO_SLAVE(PALMAS_PU_PD_OD_BASE);
>  	addr = PALMAS_BASE_TO_REG(PALMAS_PU_PD_OD_BASE,
> @@ -706,6 +738,34 @@ static int palmas_i2c_remove(struct i2c_client *i2c)
>  	return 0;
>  }
>  
> +static int palmas_i2c_suspend(struct i2c_client *i2c,  pm_message_t mesg)
> +{
> +	struct palmas *palmas = i2c_get_clientdata(i2c);
> +	struct device *dev = &i2c->dev;
> +
> +	if (!palmas->wakeirq)
> +		return 0;
> +
> +	if (device_may_wakeup(dev))
> +		enable_irq(palmas->wakeirq);
> +
> +	return 0;
> +}
> +
> +static int palmas_i2c_resume(struct i2c_client *i2c)
> +{
> +	struct palmas *palmas = i2c_get_clientdata(i2c);
> +	struct device *dev = &i2c->dev;
> +
> +	if (!palmas->wakeirq)
> +		return 0;
> +
> +	if (device_may_wakeup(dev))
> +		disable_irq_nosync(palmas->wakeirq);

Again, why nosync?
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nishanth Menon Sept. 19, 2014, 3:03 a.m. UTC | #2
On 17:57-20140918, Thomas Gleixner wrote:
> On Thu, 18 Sep 2014, Nishanth Menon wrote:
> > +static irqreturn_t palmas_wake_irq(int irq, void *_palmas)
> > +{
> > +	/*
> > +	 * Return Not handled so that interrupt is disabled.
> 
> And how is that interrupt disabled by returning IRQ_NONE? You mean it
> gets disabled after it got reraised 100000 times and the spurious
> detector kills it?

No, that does not happen due to the hardware involved:
http://fpaste.org/134757/09428214/ (there are still fixes needed to
various drivers to completely achieve low power states).

Explanation below:
> 
> > +	 * Level event ensures that the event is eventually handled
> > +	 * by the appropriate chip handler already registered
> 
> Eventually handled? So eventually it's not handled?

I had previously tried explaining this in v1:
https://patchwork.kernel.org/patch/4743321/

I agree it is a little convoluted, so will try again: mainly because
of the hardware entities involved. Two different hardware generate
interrupt. A GPIO hardware block handles palmas interrupts when SoC
is ON, However with GPIO block active, we cannot achieve low power
suspend (deep sleep) for the SoC, so, we switch off all GPIOs and SoC
hardware blocks OFF as part of the sequence of going to low power
suspend (mem), and depend on the hardware controlling the pins of the
SoC to generate wakeup event (wakeirq).

wakeirq is provided by drivers/pinctrl/pinctrl-single.c (implementation
to handle pad generated interrupt source).

wakeirq is generated by the hardware at the pin (we call it control
module in TI SoC), this generates an interrupt on level change. Palmas
generates level interrupt, and the level is cleared when interrupt
source is cleared.

At suspend - we enable_irq(wakeirq) - this arms the pin for palmas
interrupt to generate an interrupt when level changes.

We start the wake sequence in deep sleep (only thing alive is that
control module, every thing else, including GPIO block is powered
off).

On generating a wakeup event (in the example log, I used palmas power
button), palmas generates a level event, the transition triggers two things:
a) control module generates wakeirq (detecting the level shift)
b) wakeirq causes wakeup of SoC from deep sleep.

wakeirq wont be generated again by the hardware because pinctrl handles
the wakeirq interrupt event in the control module.

At resume - we disable_irq wakeirq -which in turn disables the pin for
generating interrupts if level ever changes again.

GPIO block is restored as part of resume path, and we generate the
handler for palmas regular interrupt service which in turn goes and
detects the real event and handles it.

I suppose I can improve the commit message to elaborate this better?
Will that help?
> 
> > +	 */
> > +	return IRQ_NONE;
> > +}
> > +
> >  int palmas_ext_control_req_config(struct palmas *palmas,
> >  	enum palmas_external_requestor_id id,  int ext_ctrl, bool enable)
> >  {
> > @@ -409,6 +420,7 @@ static void palmas_dt_to_pdata(struct i2c_client *i2c,
> >  		pdata->mux_from_pdata = 1;
> >  		pdata->pad2 = prop;
> >  	}
> > +	pdata->wakeirq = irq_of_parse_and_map(node, 1);
> >  
> >  	/* The default for this register is all masked */
> >  	ret = of_property_read_u32(node, "ti,power-ctrl", &prop);
> > @@ -521,6 +533,7 @@ static int palmas_i2c_probe(struct i2c_client *i2c,
> >  	i2c_set_clientdata(i2c, palmas);
> >  	palmas->dev = &i2c->dev;
> >  	palmas->irq = i2c->irq;
> > +	palmas->wakeirq = pdata->wakeirq;
> >  
> >  	match = of_match_device(of_palmas_match_tbl, &i2c->dev);
> >  
> > @@ -587,6 +600,25 @@ static int palmas_i2c_probe(struct i2c_client *i2c,
> >  	if (ret < 0)
> >  		goto err_i2c;
> >  
> > +	if (!palmas->wakeirq)
> > +		goto no_wake_irq;
> > +
> > +	ret = devm_request_irq(palmas->dev, palmas->wakeirq,
> > +			       palmas_wake_irq,
> > +			       pdata->irq_flags,
> > +			       dev_name(palmas->dev),
> > +			       &palmas);
> > +	if (ret < 0) {
> > +		dev_err(palmas->dev, "Invalid wakeirq(%d) (res: %d), skiping\n",
> > +			palmas->wakeirq, ret);
> > +		palmas->wakeirq = 0;
> > +	} else {
> > +		/* We use wakeirq only during suspend-resume path */
> > +		device_set_wakeup_capable(palmas->dev, true);
> > +		disable_irq_nosync(palmas->wakeirq);
> 
> Urgh. Why nosysnc? And why do you want to do that at all?
> 
> 	irq_set_status_flags(irq, IRQ_NOAUTOEN);
> 
> Is what you want to set before requesting the irq.

Aaah, OK. thanks on the suggestion. will do that.
> 
> 
> > +	}
> > +
> > +no_wake_irq:
> >  no_irq:
> >  	slave = PALMAS_BASE_TO_SLAVE(PALMAS_PU_PD_OD_BASE);
> >  	addr = PALMAS_BASE_TO_REG(PALMAS_PU_PD_OD_BASE,
> > @@ -706,6 +738,34 @@ static int palmas_i2c_remove(struct i2c_client *i2c)
> >  	return 0;
> >  }
> >  
> > +static int palmas_i2c_suspend(struct i2c_client *i2c,  pm_message_t mesg)
> > +{
> > +	struct palmas *palmas = i2c_get_clientdata(i2c);
> > +	struct device *dev = &i2c->dev;
> > +
> > +	if (!palmas->wakeirq)
> > +		return 0;
> > +
> > +	if (device_may_wakeup(dev))
> > +		enable_irq(palmas->wakeirq);
> > +
> > +	return 0;
> > +}
> > +
> > +static int palmas_i2c_resume(struct i2c_client *i2c)
> > +{
> > +	struct palmas *palmas = i2c_get_clientdata(i2c);
> > +	struct device *dev = &i2c->dev;
> > +
> > +	if (!palmas->wakeirq)
> > +		return 0;
> > +
> > +	if (device_may_wakeup(dev))
> > +		disable_irq_nosync(palmas->wakeirq);
> 
> Again, why nosync?
true - nosync is not necessary at here. disable_irq is however necessary
as we are not interested in wakeup events for level changes.

We just use the enable/disable to control when we'd want to arm the pin
for waking up from suspend state.
Thomas Gleixner Sept. 19, 2014, 3:37 p.m. UTC | #3
On Thu, 18 Sep 2014, Nishanth Menon wrote:
> On 17:57-20140918, Thomas Gleixner wrote:
> 
> I suppose I can improve the commit message to elaborate this better?
> Will that help?

You also want to improve the comment in the empty handler.

> > 
> > > +	 */
> > > +	return IRQ_NONE;

And it still does not explain WHY you think that returning IRQ_NONE is
the right thing to do here. You actually handle the interrupt, right?
Just because the handler is an NOP does not mean you did not handle
it.

> > > +static int palmas_i2c_suspend(struct i2c_client *i2c,  pm_message_t mesg)
> > > +{
> > > +	struct palmas *palmas = i2c_get_clientdata(i2c);
> > > +	struct device *dev = &i2c->dev;
> > > +
> > > +	if (!palmas->wakeirq)
> > > +		return 0;
> > > +
> > > +	if (device_may_wakeup(dev))
> > > +		enable_irq(palmas->wakeirq);
> > > +
> > > +	return 0;
> > > +}
> > > +
> > > +static int palmas_i2c_resume(struct i2c_client *i2c)
> > > +{
> > > +	struct palmas *palmas = i2c_get_clientdata(i2c);
> > > +	struct device *dev = &i2c->dev;
> > > +
> > > +	if (!palmas->wakeirq)
> > > +		return 0;
> > > +
> > > +	if (device_may_wakeup(dev))
> > > +		disable_irq_nosync(palmas->wakeirq);
> > 
> > Again, why nosync?
> true - nosync is not necessary at here. disable_irq is however necessary
> as we are not interested in wakeup events for level changes.
> 
> We just use the enable/disable to control when we'd want to arm the pin
> for waking up from suspend state.

And what is issuing the call to enable/disable_irq_wake()? 

So if that interrupt is not marked proper then you can bring your
device into a wont resume state easily

       start suspend
       enable wakeirq
       disable_device_irqs()
	   if (!iswakeup_irq())
	      disable_irq() // does not mask due to lazy masking

       ....
       wakeirq fires
          if (irq_is_disabled())
	     mask_irq();

       transition into suspend

Now your pinctrl irq is masked at the HW level and wont wake the
machine up ever again.

So now looking at that pinctrl irq chip thing, which seems to be
designed to handle these kind of wakeups. That thing looks massivly
wrong as well, simply because it enforces to use
enable_irq/disable_irq().

So because the sole purpose of this chip is to handle the separate
wakeup style interrupt, it should actually NOT enable the interrupt in
the irq_unmask callback.

Simply because during normal operation nothing is interested in the
interrupt and any operation which might enable it (including request
irq) is just making the system handle completely pointless interrupts
and hoops and loops juggling with enable/disable irq.

So the right thing here is to have an empty unmask function and do the
actual unmask only in the irq_set_wake() callback. mask of course
needs to do what it says. The point is, that the following sequence of
code will just work w/o generating an interrupt on the wakeirq line
outside of the wake enabled context.

dev_init()
	request_wakeirq();

suspend()
	if (may_wake())
	   enable_irq_wake();

resume()
	if (may_wake())
	   disable_irq_wake();

The other omap drivers using this have the same issue ... And of
course they are subtly different.

The uart one handles the actual device interrupt, which is violating
the general rule of possible interrupt reentrancy in the pm-runtime
case if the two interrupts are affine to two different cores. Yes,
it's protected by a lock and works by chance ....

The mmc one issues a disable_irq_nosync() in the wakeup irq handler
itself.

WHY does one driver need that and the other does not? You are not even
able to come up with a common scheme for OMAP. I don't want to see the
mess others are going to create when this stuff becomes more used.

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Nishanth Menon Sept. 19, 2014, 4:19 p.m. UTC | #4
On 08:37-20140919, Thomas Gleixner wrote:
> On Thu, 18 Sep 2014, Nishanth Menon wrote:
> > On 17:57-20140918, Thomas Gleixner wrote:
> > 
> > I suppose I can improve the commit message to elaborate this better?
> > Will that help?
> 
> You also want to improve the comment in the empty handler.
OK. will do the same. Thanks for suggesting.

> 
> > > 
> > > > +	 */
> > > > +	return IRQ_NONE;
> 
> And it still does not explain WHY you think that returning IRQ_NONE is
> the right thing to do here. You actually handle the interrupt, right?
> Just because the handler is an NOP does not mean you did not handle
> it.

Hmm.. My motivation for IRQ_NONE was because this specific handler does
not handle the interrupt. Now, from this discussion, I understand that I
should rather use IRQ_HANDLED since the event is indeed handled (just
not here).

Thank you for correcting my understanding. Will update in my next rev
(once we solve the following discussion)..

> 
> > > > +static int palmas_i2c_suspend(struct i2c_client *i2c,  pm_message_t mesg)
> > > > +{
> > > > +	struct palmas *palmas = i2c_get_clientdata(i2c);
> > > > +	struct device *dev = &i2c->dev;
> > > > +
> > > > +	if (!palmas->wakeirq)
> > > > +		return 0;
> > > > +
> > > > +	if (device_may_wakeup(dev))
> > > > +		enable_irq(palmas->wakeirq);
> > > > +
> > > > +	return 0;
> > > > +}
> > > > +
> > > > +static int palmas_i2c_resume(struct i2c_client *i2c)
> > > > +{
> > > > +	struct palmas *palmas = i2c_get_clientdata(i2c);
> > > > +	struct device *dev = &i2c->dev;
> > > > +
> > > > +	if (!palmas->wakeirq)
> > > > +		return 0;
> > > > +
> > > > +	if (device_may_wakeup(dev))
> > > > +		disable_irq_nosync(palmas->wakeirq);
> > > 
> > > Again, why nosync?
> > true - nosync is not necessary at here. disable_irq is however necessary
> > as we are not interested in wakeup events for level changes.
> > 
> > We just use the enable/disable to control when we'd want to arm the pin
> > for waking up from suspend state.
> 
> And what is issuing the call to enable/disable_irq_wake()? 
> 
> So if that interrupt is not marked proper then you can bring your
> device into a wont resume state easily
> 
>        start suspend
>        enable wakeirq
>        disable_device_irqs()
> 	   if (!iswakeup_irq())
> 	      disable_irq() // does not mask due to lazy masking
> 
>        ....
>        wakeirq fires
>           if (irq_is_disabled())
> 	     mask_irq();
> 
>        transition into suspend
> 
> Now your pinctrl irq is masked at the HW level and wont wake the
> machine up ever again.
True.

> 
> So now looking at that pinctrl irq chip thing, which seems to be
> designed to handle these kind of wakeups. That thing looks massivly
> wrong as well, simply because it enforces to use
> enable_irq/disable_irq().
> 
> So because the sole purpose of this chip is to handle the separate
> wakeup style interrupt, it should actually NOT enable the interrupt in
> the irq_unmask callback.
> 
> Simply because during normal operation nothing is interested in the
> interrupt and any operation which might enable it (including request
> irq) is just making the system handle completely pointless interrupts
> and hoops and loops juggling with enable/disable irq.
> 
> So the right thing here is to have an empty unmask function and do the
> actual unmask only in the irq_set_wake() callback. mask of course
> needs to do what it says. The point is, that the following sequence of
> code will just work w/o generating an interrupt on the wakeirq line
> outside of the wake enabled context.
> 
> dev_init()
> 	request_wakeirq();
> 
> suspend()
> 	if (may_wake())
> 	   enable_irq_wake();
> 
> resume()
> 	if (may_wake())
> 	   disable_irq_wake();
> 
> The other omap drivers using this have the same issue ... And of
> course they are subtly different.
> 
> The uart one handles the actual device interrupt, which is violating
> the general rule of possible interrupt reentrancy in the pm-runtime
> case if the two interrupts are affine to two different cores. Yes,
> it's protected by a lock and works by chance ....
> 
> The mmc one issues a disable_irq_nosync() in the wakeup irq handler
> itself.
> 
> WHY does one driver need that and the other does not? You are not even
> able to come up with a common scheme for OMAP. I don't want to see the
> mess others are going to create when this stuff becomes more used.
> 
> Thanks,
> 
> 	tglx

I think I understand your concern - I request Tony to comment about
this. I mean, I can try and hook things like uart in other drivers
(like https://patchwork.kernel.org/patch/4759171/ ), but w.r.t overall
generic usage guideline wise, I would prefer Tony to comment.
Thomas Gleixner Sept. 19, 2014, 5:36 p.m. UTC | #5
On Fri, 19 Sep 2014, Nishanth Menon wrote:
> On 08:37-20140919, Thomas Gleixner wrote:
> > The other omap drivers using this have the same issue ... And of
> > course they are subtly different.
> > 
> > The uart one handles the actual device interrupt, which is violating
> > the general rule of possible interrupt reentrancy in the pm-runtime
> > case if the two interrupts are affine to two different cores. Yes,
> > it's protected by a lock and works by chance ....
> > 
> > The mmc one issues a disable_irq_nosync() in the wakeup irq handler
> > itself.
> > 
> > WHY does one driver need that and the other does not? You are not even
> > able to come up with a common scheme for OMAP. I don't want to see the
> > mess others are going to create when this stuff becomes more used.
> > 
> > Thanks,
> > 
> > 	tglx
> 
> I think I understand your concern - I request Tony to comment about
> this. I mean, I can try and hook things like uart in other drivers
> (like https://patchwork.kernel.org/patch/4759171/ ), but w.r.t overall
> generic usage guideline wise, I would prefer Tony to comment.

No, the uart and that i2c thing are just wrong. Assume the following

device irq affine to cpu0
wakeup irq affine to cpu1

CPU 0				CPU 1

runtime suspend

 enable_wake(wakeup irq);

wakeup interrupt is raised	device interrupt is raised

  dev_handler(device)		dev_handler(device)

It might work due to locking, but it is nevertheless wrong. Interrupt
handlers for devices are guaranteed not to be reentrant. And this
brilliant stuff simply violates that guarantee. So, no. It's wrong
even if it happens to work by chance.

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tony Lindgren Sept. 19, 2014, 7:16 p.m. UTC | #6
* Thomas Gleixner <tglx@linutronix.de> [140919 10:37]:
> On Fri, 19 Sep 2014, Nishanth Menon wrote:
> > On 08:37-20140919, Thomas Gleixner wrote:
> > > The other omap drivers using this have the same issue ... And of
> > > course they are subtly different.
> > > 
> > > The uart one handles the actual device interrupt, which is violating
> > > the general rule of possible interrupt reentrancy in the pm-runtime
> > > case if the two interrupts are affine to two different cores. Yes,
> > > it's protected by a lock and works by chance ....
> > > 
> > > The mmc one issues a disable_irq_nosync() in the wakeup irq handler
> > > itself.
> > > 
> > > WHY does one driver need that and the other does not? You are not even
> > > able to come up with a common scheme for OMAP. I don't want to see the
> > > mess others are going to create when this stuff becomes more used.
> > > 
> > > Thanks,
> > > 
> > > 	tglx
> > 
> > I think I understand your concern - I request Tony to comment about
> > this. I mean, I can try and hook things like uart in other drivers
> > (like https://patchwork.kernel.org/patch/4759171/ ), but w.r.t overall
> > generic usage guideline wise, I would prefer Tony to comment.
> 
> No, the uart and that i2c thing are just wrong. Assume the following
> 
> device irq affine to cpu0
> wakeup irq affine to cpu1
> 
> CPU 0				CPU 1
> 
> runtime suspend
> 
>  enable_wake(wakeup irq);
> 
> wakeup interrupt is raised	device interrupt is raised
> 
>   dev_handler(device)		dev_handler(device)
> 
> It might work due to locking, but it is nevertheless wrong. Interrupt
> handlers for devices are guaranteed not to be reentrant. And this
> brilliant stuff simply violates that guarantee. So, no. It's wrong
> even if it happens to work by chance.

Hmm yeah that's a good point indeed.

From hardware point of view the wake-up events behave like interrupts
and could also be used as the only interrupt in some messed up cases.
That avoids all kinds of custom APIs from driver point.

The re-entrancy problem we've most likely had ever since we enabled
the PRCM interrupts, and maybe that's why I did not even consider
that part. I think before that we were calling the driver interrupt
after waking up from the PM code..

Anyways, how about the following to deal with the re-entrancy problem:

1. The wake-up interrupt handler must have a separate interrupt
   handler that just calls tasklet_schedule()

2. The device interrupt handler also just calls tasklet_schedule()

3. The tasklet then does pm_runtime_get, handles the registers, and
   so on.

Or would we still have a re-entrancy problem somewhere else with
that?

Regards,

Tony
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Gleixner Sept. 19, 2014, 7:46 p.m. UTC | #7
On Fri, 19 Sep 2014, Tony Lindgren wrote:
> * Thomas Gleixner <tglx@linutronix.de> [140919 10:37]:
> >From hardware point of view the wake-up events behave like interrupts
> and could also be used as the only interrupt in some messed up cases.
> That avoids all kinds of custom APIs from driver point.
> 
> The re-entrancy problem we've most likely had ever since we enabled
> the PRCM interrupts, and maybe that's why I did not even consider
> that part. I think before that we were calling the driver interrupt
> after waking up from the PM code..
> 
> Anyways, how about the following to deal with the re-entrancy problem:
> 
> 1. The wake-up interrupt handler must have a separate interrupt
>    handler that just calls tasklet_schedule()
> 
> 2. The device interrupt handler also just calls tasklet_schedule()
> 
> 3. The tasklet then does pm_runtime_get, handles the registers, and
>    so on.
> 
> Or would we still have a re-entrancy problem somewhere else with
> that?

Why on earth are you wanting tasklets in there? That's just silly,
really.

The wakeup handler is supposed to bring the thing out of deep sleep
and nothing else. All you want it to do is to mask itself and save the
information that the real device irq is pending.

A stub handler for the wakeup irq is enough. We can have that in the
irq/pm core and all it would do is simply:

irqreturn_t handle_jinxed_wakeup_irq(unsigned irq, void *dev_id)
{
	unsigned device_irq = get_dev_irq(dev_id);

	force_mask(irq);
	set_irq_pending(device_irq);
	return HANDLED;	
}

So on resume_device_irqs() the real device interrupt gets reenabled
and unmasked (if it was masked) and the interrupt gets resent either
in hardware (level or retrigger) or by the software resend mechanism.

That completely avoids tasklets, reentrant irq handlers and all other
crap which might be required.

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tony Lindgren Sept. 19, 2014, 7:57 p.m. UTC | #8
* Thomas Gleixner <tglx@linutronix.de> [140919 12:47]:
> On Fri, 19 Sep 2014, Tony Lindgren wrote:
> > * Thomas Gleixner <tglx@linutronix.de> [140919 10:37]:
> > >From hardware point of view the wake-up events behave like interrupts
> > and could also be used as the only interrupt in some messed up cases.
> > That avoids all kinds of custom APIs from driver point.
> > 
> > The re-entrancy problem we've most likely had ever since we enabled
> > the PRCM interrupts, and maybe that's why I did not even consider
> > that part. I think before that we were calling the driver interrupt
> > after waking up from the PM code..
> > 
> > Anyways, how about the following to deal with the re-entrancy problem:
> > 
> > 1. The wake-up interrupt handler must have a separate interrupt
> >    handler that just calls tasklet_schedule()
> > 
> > 2. The device interrupt handler also just calls tasklet_schedule()
> > 
> > 3. The tasklet then does pm_runtime_get, handles the registers, and
> >    so on.
> > 
> > Or would we still have a re-entrancy problem somewhere else with
> > that?
> 
> Why on earth are you wanting tasklets in there? That's just silly,
> really.

Lack of a framework on driver side to cope with this in a generic
way? :p
 
> The wakeup handler is supposed to bring the thing out of deep sleep
> and nothing else. All you want it to do is to mask itself and save the
> information that the real device irq is pending.

Yes that is enough.
 
> A stub handler for the wakeup irq is enough. We can have that in the
> irq/pm core and all it would do is simply:
> 
> irqreturn_t handle_jinxed_wakeup_irq(unsigned irq, void *dev_id)
> {
> 	unsigned device_irq = get_dev_irq(dev_id);
> 
> 	force_mask(irq);
> 	set_irq_pending(device_irq);
> 	return HANDLED;	
> }
> 
> So on resume_device_irqs() the real device interrupt gets reenabled
> and unmasked (if it was masked) and the interrupt gets resent either
> in hardware (level or retrigger) or by the software resend mechanism.
> 
> That completely avoids tasklets, reentrant irq handlers and all other
> crap which might be required.

Okie dokie, that sounds good to me. Having this generic for the
drivers would be excellent, that's the missing piece.

Thanks,

Tony
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Gleixner Sept. 20, 2014, 2:07 a.m. UTC | #9
On Fri, 19 Sep 2014, Tony Lindgren wrote:
> * Thomas Gleixner <tglx@linutronix.de> [140919 12:47]:
> > Why on earth are you wanting tasklets in there? That's just silly,
> > really.
> 
> Lack of a framework on driver side to cope with this in a generic
> way? :p

So instead of creating such a thing we rather have a completely ass
backward workaround which spreads itself all over the tree?

You SoC folks really need a quarterly sanity check.

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tony Lindgren Sept. 20, 2014, 2:07 p.m. UTC | #10
* Thomas Gleixner <tglx@linutronix.de> [140919 19:08]:
> On Fri, 19 Sep 2014, Tony Lindgren wrote:
> > * Thomas Gleixner <tglx@linutronix.de> [140919 12:47]:
> > > Why on earth are you wanting tasklets in there? That's just silly,
> > > really.
> > 
> > Lack of a framework on driver side to cope with this in a generic
> > way? :p
> 
> So instead of creating such a thing we rather have a completely ass
> backward workaround which spreads itself all over the tree?

I'm not kidding, you're the first one who was able to come up with
such a simple solution and also describe why it should be done
that way.
 
> You SoC folks really need a quarterly sanity check.

Probably time for me to file a bios bug on that yeah :)

Regards,

Tony
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tony Lindgren Nov. 6, 2014, 8:46 p.m. UTC | #11
Thomas,

Any comments on the patch below? Let me know if you want to keep the
devm stuff out of kernel/irq/manage.c.

* Tony Lindgren <tony@atomide.com> [141001 20:45]:
> Hi Thomas,
> 
> * Thomas Gleixner <tglx@linutronix.de> [140919 12:47]:
> > 
> > The wakeup handler is supposed to bring the thing out of deep sleep
> > and nothing else. All you want it to do is to mask itself and save the
> > information that the real device irq is pending.
> > 
> > A stub handler for the wakeup irq is enough. We can have that in the
> > irq/pm core and all it would do is simply:
> 
> Here's a patch along the lines of what you described, hopefully that's
> fairly close to what you had in mind.
> 
> I also did play with the replaying of the interrupts but I don't think
> that's needed. Well at least not for the omap case. I added some
> comments about that to the code.
> 
> So far I've tested with the omap-serial and omap_hsmmc drivers. The
> serial driver does not have any status as the device is powered off.
> So replaying of the interrupt does not help there, we need to wait for
> the next event anyways.
> 
> Then with omap_hsmmc the SDIO interrupt on dat1 line is level
> sensitive and is noticed after the MMC controller is powered on
> again. So no replaying of the device interrupt needed here either.
> 
> I still have not tested the MMC remux lines to GPIO for wake-up
> events that's also needed for some omaps.
> 
> Regards,
> 
> Tony
> 
> 8<-----------
> From: Tony Lindgren <tony@atomide.com>
> Date: Wed, 1 Oct 2014 14:56:35 -0700
> Subject: [PATCH] genirq: Add support for wake-up interrupts to fix irq reentry issues in drivers
> 
> As pointed out by Thomas Gleixner, at least omap wake-up interrupts
> have an issue with re-entrant interrupts because the wake-up interrupts
> are now handled as a secondary interrupt controller. Further, the
> wake-up interrupt just needs wake the system at least for omaps. So we
> should just make the wake-up interrupt handling generic.
> 
> Note that at least initially we are keeping things simple by assuming the
> wake-up interrupt is level sensitive, and the device pm_runtime_resume()
> can deal with the situation, and no replaying of the lost device interrupts
> is needed.
> 
> After tinkering with replaying of the lost device interrupts, my opinion is
> that it should be avoided because of the issues listed in the comments of
> this patch.
> 
> Signed-off-by: Tony Lindgren <tony@atomide.com>
> 
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -139,11 +139,15 @@ extern int __must_check
>  request_percpu_irq(unsigned int irq, irq_handler_t handler,
>  		   const char *devname, void __percpu *percpu_dev_id);
>  
> +struct device;
> +
> +extern int __must_check
> +request_wake_irq(struct device *dev, unsigned int wakeirq,
> +		 unsigned long irqflags);
> +
>  extern void free_irq(unsigned int, void *);
>  extern void free_percpu_irq(unsigned int, void __percpu *);
>  
> -struct device;
> -
>  extern int __must_check
>  devm_request_threaded_irq(struct device *dev, unsigned int irq,
>  			  irq_handler_t handler, irq_handler_t thread_fn,
> @@ -163,6 +167,10 @@ devm_request_any_context_irq(struct device *dev, unsigned int irq,
>  		 irq_handler_t handler, unsigned long irqflags,
>  		 const char *devname, void *dev_id);
>  
> +extern int __must_check
> +devm_request_wake_irq(struct device *dev, unsigned int wakeirq,
> +		      unsigned long irqflags);
> +
>  extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
>  
>  /*
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -14,6 +14,7 @@
>  #include <linux/module.h>
>  #include <linux/random.h>
>  #include <linux/interrupt.h>
> +#include <linux/pm_runtime.h>
>  #include <linux/slab.h>
>  #include <linux/sched.h>
>  #include <linux/sched/rt.h>
> @@ -1578,6 +1579,111 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
>  }
>  EXPORT_SYMBOL_GPL(request_any_context_irq);
>  
> +/**
> + *	handle_wakeirq_thread - call device runtime pm calls on wake-up interrupt
> + *	@wakeirq: device specific wake-up interrupt
> + *	@dev_id: struct device entry
> + */
> +static irqreturn_t handle_wakeirq_thread(int wakeirq, void *dev_id)
> +{
> +	struct device *dev = dev_id;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	if (pm_runtime_suspended(dev)) {
> +		pm_runtime_mark_last_busy(dev);
> +		pm_request_resume(dev);
> +		ret = IRQ_HANDLED;
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + *	setup_wakeirq - allocate a wake-up interrupt for a device
> + *	@dev: device to wake up
> + *	@wakeirq: interrupt that wakes up the device
> + *	@wakeflags: flags to pass to the interrupt handler
> + *	@devm: use devm
> + *
> + *	Note that the wake-up interrupt starts disabled. The wake-up interrupt
> + *	is typically enabled from the device pm_runtime_suspend() and disabled
> + *	again in the device pm_runtime_resume(). For runtime PM, the wake-up
> + *	interrupt should be always enabled, and for device suspend and resume,
> + *	the wake-up interrupt should be enabled depending on the device specific
> + *	configuration for device_can_wakeup().
> + *
> + *	Note also that we are not resending the lost device interrupts.
> + *	We assume that the wake-up interrupt just needs to wake-up the device,
> + *	and then device pm_runtime_resume() can deal with the situation.
> + *
> + *	There are at least the following reasons to not resend the lost device
> + *	interrupts automatically based on the wake-up interrupt:
> + *
> + *	1. There can be interrupt reentry issues calling the device interrupt
> + *	   based on the wake-up interrupt if done in the device driver. It
> + *	   could be done with check_irq_resend() after checking the device
> + *	   interrupt mask if we really wanted to though.
> + *
> + *	2. The device interrupt handler would need to be set up properly with
> + *	   pm_runtime_irq_safe(). Ideally you don't want to call pm_runtime
> + *	   calls from the device interrupt handler at all.
> + *
> + *	3. The IRQ subsystem may not know if it's safe to call the device
> + *	   interrupt unless the driver updates the interrupt status with
> + *	   disable_irq() and enable_irq() in addition to just disabling the
> + *	   interrupt at the hardware level in the device registers.
> + *
> + *	So if replaying the lost device interrupts is absolutely needed from the
> + *	hardware point of view, it's probably best to set up a completely
> + *	separate wake-up interrupt handler for the wake-up interrupt in the
> + *	device driver because of the reasons above.
> + */
> +static int setup_wakeirq(struct device *dev, unsigned int wakeirq,
> +			 unsigned long wakeflags, bool devm)
> +{
> +	int ret;
> +
> +	if (!(dev && wakeirq)) {
> +		pr_err("Missing device or wakeirq for %s irq %d\n",
> +		       dev_name(dev), wakeirq);
> +		return -EINVAL;
> +	}
> +
> +	if (!(wakeflags &
> +	      (IRQF_TRIGGER_LOW | IRQF_TRIGGER_HIGH | IRQF_ONESHOT))) {
> +		pr_err("Invalid wakeirq for %s irq %d, must be level oneshot\n",
> +		       dev_name(dev), wakeirq);
> +		return -EINVAL;
> +	}
> +
> +	irq_set_status_flags(wakeirq, _IRQ_NOAUTOEN);
> +
> +	if (devm)
> +		ret = devm_request_threaded_irq(dev, wakeirq, NULL,
> +						handle_wakeirq_thread,
> +						wakeflags, dev_name(dev), dev);
> +	else
> +		ret = request_threaded_irq(wakeirq, NULL,
> +					   handle_wakeirq_thread,
> +					   wakeflags, dev_name(dev), dev);
> +
> +	return ret;
> +}
> +
> +int request_wake_irq(struct device *dev, unsigned int wakeirq,
> +		     unsigned long wakeflags)
> +{
> +	return setup_wakeirq(dev, wakeirq, wakeflags, false);
> +}
> +EXPORT_SYMBOL(request_wake_irq);
> +
> +int devm_request_wake_irq(struct device *dev, unsigned int wakeirq,
> +			  unsigned long wakeflags)
> +{
> +	return setup_wakeirq(dev, wakeirq, wakeflags, false);
> +}
> +EXPORT_SYMBOL(devm_request_wake_irq);
> +
>  void enable_percpu_irq(unsigned int irq, unsigned int type)
>  {
>  	unsigned int cpu = smp_processor_id();
> --
> To unsubscribe from this list: send the line "unsubscribe linux-omap" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Gleixner Nov. 13, 2014, 10:03 a.m. UTC | #12
Tony,

On Thu, 6 Nov 2014, Tony Lindgren wrote:
> 
> Any comments on the patch below? Let me know if you want to keep the
> devm stuff out of kernel/irq/manage.c.

Sorry, this slipped through the cracks.
 
> > +static int setup_wakeirq(struct device *dev, unsigned int wakeirq,
> > +			 unsigned long wakeflags, bool devm)
> > +{
> > +	int ret;
> > +
> > +	if (!(dev && wakeirq)) {
> > +		pr_err("Missing device or wakeirq for %s irq %d\n",
> > +		       dev_name(dev), wakeirq);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (!(wakeflags &
> > +	      (IRQF_TRIGGER_LOW | IRQF_TRIGGER_HIGH | IRQF_ONESHOT))) {
> > +		pr_err("Invalid wakeirq for %s irq %d, must be level oneshot\n",
> > +		       dev_name(dev), wakeirq);

This looks odd.

Why do you want to enforce LEVEL and ONESHOT?  I can see the point for
ONESHOT, but I'm wondering about the requirement for level.

Now if you really want to enforce level AND oneshot, your check is
wrong as it will not trigger on

      wakeflags = IRQF_TRIGGER_LOW;
      wakeflags = IRQF_TRIGGER_HIGH;
      wakeflags = IRQF_ONESHOT;

Not what you really want, right?

> > +int request_wake_irq(struct device *dev, unsigned int wakeirq,
> > +		     unsigned long wakeflags)
> > +{
> > +	return setup_wakeirq(dev, wakeirq, wakeflags, false);
> > +}
> > +EXPORT_SYMBOL(request_wake_irq);

  _GPL please

> > +
> > +int devm_request_wake_irq(struct device *dev, unsigned int wakeirq,
> > +			  unsigned long wakeflags)
> > +{
> > +	return setup_wakeirq(dev, wakeirq, wakeflags, false);

Shouldnt that have devm = true?

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Thomas Gleixner Nov. 13, 2014, 10:25 p.m. UTC | #13
On Thu, 13 Nov 2014, Tony Lindgren wrote:
> Oops thanks for catching that. As the devres stuff is separate, I've
> updated the patch to keep it that way by adding a minimal manage.h.
> This avoids including internals.h in devres.c. Does that seem usable
> for you?

What's wrong with internals.h? devres.c is core code, so it is not
affected of the ban to include internals.h :)
 
> +/**
> + *	init_disabled_wakeirq - initialize a wake-up interrupt for a device
> + *	@dev: device to wake up on the wake-up interrupt
> + *	@wakeirq: wake-up interrupt for the device
> + *	@wakeflags: wake-up interrupt flags
> + *
> + *	Note that the wake-up interrupt starts disabled. The wake-up interrupt
> + *	is typically enabled from the device pm_runtime_suspend() and disabled
> + *	again in the device pm_runtime_resume(). For runtime PM, the wake-up
> + *	interrupt should be always enabled, and for device suspend and resume,
> + *	the wake-up interrupt should be enabled depending on the device specific
> + *	configuration for device_can_wakeup().
> + *
> + *	Note also that we are not resending the lost device interrupts.
> + *	We assume that the wake-up interrupt just needs to wake-up the device,
> + *	and then device pm_runtime_resume() can deal with the situation.
> + *
> + *	There are at least the following reasons to not resend the lost device
> + *	interrupts automatically based on the wake-up interrupt:
> + *
> + *	1. There can be interrupt reentry issues calling the device interrupt
> + *	   based on the wake-up interrupt if done in the device driver. It
> + *	   could be done with check_irq_resend() after checking the device
> + *	   interrupt mask if we really wanted to though.
> + *
> + *	2. The device interrupt handler would need to be set up properly with
> + *	   pm_runtime_irq_safe(). Ideally you don't want to call pm_runtime
> + *	   calls from the device interrupt handler at all.
> + *
> + *	3. The IRQ subsystem may not know if it's safe to call the device
> + *	   interrupt unless the driver updates the interrupt status with
> + *	   disable_irq() and enable_irq() in addition to just disabling the
> + *	   interrupt at the hardware level in the device registers.
> + *
> + *	So if replaying the lost device interrupts is absolutely needed from the
> + *	hardware point of view, it's probably best to set up a completely
> + *	separate wake-up interrupt handler for the wake-up interrupt in the
> + *	device driver because of the reasons above.

Can we please kill this last paragraph? I'm already seeing the
gazillion of "I think it is required to do so for my soooo special
chip" implementations in random drivers which all get it wrong again.

So I'd rather provide a mechanism upfront which lets the driver know
that the wakeup interrupt originated from that device, i.e. let the
wake up handler call

     pm_wakeup_irq(dev);

which calls:

      pm_runtime_mark_last_busy(dev);
      pm_request_resume(dev);

and aside of that tells the device via a flag or preferrably a
sequence counter that the wakeup irq has been triggered. So affected
devices can handle it based on that information w/o implementing the
next broken variant of wakeup irq handlers.

That also allows to remove the wakeflags check for level/edge.

> + */
> +int init_disabled_wakeirq(struct device *dev, unsigned int wakeirq,
> +			  unsigned long wakeflags)
> +{
> +	if (!(dev && wakeirq)) {

This is the second time I stumbled over this. While it is correct it
would be simpler to parse 

      if (!dev || !wakeirq) {

At least for my review damaged brain :)

> +		pr_err("Missing device or wakeirq for %s irq %d\n",
> +		       dev_name(dev), wakeirq);
> +		return -EINVAL;
> +	}
> +
> +	if (!(wakeflags & IRQF_ONESHOT)) {
> +		pr_err("Invalid wakeirq for %s irq %d, must be oneshot\n",
> +		       dev_name(dev), wakeirq);
> +		return -EINVAL;
> +	}

Is there a reason why we force the wakeirq into a threaded handler?

Thanks,

	tglx
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Tony Lindgren Nov. 13, 2014, 11:45 p.m. UTC | #14
* Thomas Gleixner <tglx@linutronix.de> [141113 14:27]:
> On Thu, 13 Nov 2014, Tony Lindgren wrote:
> > Oops thanks for catching that. As the devres stuff is separate, I've
> > updated the patch to keep it that way by adding a minimal manage.h.
> > This avoids including internals.h in devres.c. Does that seem usable
> > for you?
> 
> What's wrong with internals.h? devres.c is core code, so it is not
> affected of the ban to include internals.h :)

No problem, just that we need to bring in few other includes and
devres.c is currently free of any core irq stuff :) I can switch to
internals.h no problem if you prefer that.
  
> > + *	So if replaying the lost device interrupts is absolutely needed from the
> > + *	hardware point of view, it's probably best to set up a completely
> > + *	separate wake-up interrupt handler for the wake-up interrupt in the
> > + *	device driver because of the reasons above.
> 
> Can we please kill this last paragraph? I'm already seeing the
> gazillion of "I think it is required to do so for my soooo special
> chip" implementations in random drivers which all get it wrong again.

OK :)
 
> So I'd rather provide a mechanism upfront which lets the driver know
> that the wakeup interrupt originated from that device, i.e. let the
> wake up handler call
> 
>      pm_wakeup_irq(dev);
> 
> which calls:
> 
>       pm_runtime_mark_last_busy(dev);
>       pm_request_resume(dev);
> 
> and aside of that tells the device via a flag or preferrably a
> sequence counter that the wakeup irq has been triggered. So affected
> devices can handle it based on that information w/o implementing the
> next broken variant of wakeup irq handlers.

OK I'll take a look if we can just set some pm_runtime flag and use
the pm_runtime counters for that.

> That also allows to remove the wakeflags check for level/edge.
> 
> > + */
> > +int init_disabled_wakeirq(struct device *dev, unsigned int wakeirq,
> > +			  unsigned long wakeflags)
> > +{
> > +	if (!(dev && wakeirq)) {
> 
> This is the second time I stumbled over this. While it is correct it
> would be simpler to parse 
> 
>       if (!dev || !wakeirq) {
> 
> At least for my review damaged brain :)

Heh !!true.
 
> > +		pr_err("Missing device or wakeirq for %s irq %d\n",
> > +		       dev_name(dev), wakeirq);
> > +		return -EINVAL;
> > +	}
> > +
> > +	if (!(wakeflags & IRQF_ONESHOT)) {
> > +		pr_err("Invalid wakeirq for %s irq %d, must be oneshot\n",
> > +		       dev_name(dev), wakeirq);
> > +		return -EINVAL;
> > +	}
> 
> Is there a reason why we force the wakeirq into a threaded handler?

Yes the drivers may need to restore hardware state in the pm_runtime
calls and who knows what else drivers will be doing. So that too might
be a good reason to just set a flag in pm_runtime land.

Anyways, thanks for your comments. I'll post a complete series after
looking into the wake-up counters a bit.

Regards,

Tony
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Felipe Balbi Nov. 14, 2014, 4:19 p.m. UTC | #15
Hi,

On Thu, Nov 13, 2014 at 09:40:31AM -0800, Tony Lindgren wrote:

[snip]

> From: Tony Lindgren <tony@atomide.com>
> Date: Tue, 11 Nov 2014 07:53:55 -0800
> Subject: [PATCH] genirq: Add support for wake-up interrupts to fix irq reentry issues in drivers
> 
> As pointed out by Thomas Gleixner, at least omap wake-up interrupts
> have an issue with re-entrant interrupts because the wake-up interrupts
> are now handled as a secondary interrupt controller. Further, the
> wake-up interrupt just needs wake the system at least for omaps. So we
> should make the wake-up interrupt handling generic.
> 
> Note that at least initially we are keeping things simple by assuming the
> wake-up interrupt is level sensitive, and the device pm_runtime_resume()
> can deal with the situation, and no replaying of the lost device interrupts
> is needed.
> 
> After tinkering with replaying of the lost device interrupts, my opinion is
> that it should be avoided because of the issues listed in the comments of
> this patch.
> 
> Let's also add a minimal manage.h to allow us keeping the separation
> of devm functions and without having to include internals.h in devres.c.
> 
> Signed-off-by: Tony Lindgren <tony@atomide.com>
> 
> --- a/include/linux/interrupt.h
> +++ b/include/linux/interrupt.h
> @@ -139,11 +139,15 @@ extern int __must_check
>  request_percpu_irq(unsigned int irq, irq_handler_t handler,
>  		   const char *devname, void __percpu *percpu_dev_id);
>  
> +struct device;
> +
> +extern int __must_check
> +request_wake_irq(struct device *dev, unsigned int wakeirq,
> +		 unsigned long irqflags);
> +
>  extern void free_irq(unsigned int, void *);
>  extern void free_percpu_irq(unsigned int, void __percpu *);
>  
> -struct device;
> -
>  extern int __must_check
>  devm_request_threaded_irq(struct device *dev, unsigned int irq,
>  			  irq_handler_t handler, irq_handler_t thread_fn,
> @@ -163,6 +167,10 @@ devm_request_any_context_irq(struct device *dev, unsigned int irq,
>  		 irq_handler_t handler, unsigned long irqflags,
>  		 const char *devname, void *dev_id);
>  
> +extern int __must_check
> +devm_request_wake_irq(struct device *dev, unsigned int wakeirq,
> +		      unsigned long irqflags);
> +
>  extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id);
>  
>  /*
> --- a/kernel/irq/devres.c
> +++ b/kernel/irq/devres.c
> @@ -3,6 +3,8 @@
>  #include <linux/device.h>
>  #include <linux/gfp.h>
>  
> +#include "manage.h"
> +
>  /*
>   * Device resource management aware IRQ request/free implementation.
>   */
> @@ -118,6 +120,30 @@ int devm_request_any_context_irq(struct device *dev, unsigned int irq,
>  EXPORT_SYMBOL(devm_request_any_context_irq);
>  
>  /**
> + *	devm_request_wake_irq - request a wake-up interrupt for a device
> + *	@dev: device to wake on the wake-up interrupt
> + *	@wakeirq: wake-up interrupt for the device
> + *	@wakeirq: wake-up interrupt flags
> + *
> + *	The wake-up interrupt starts disabled and is typically enabled
> + *	when needed by the device driver runtime PM calls.
> + */
> +int devm_request_wake_irq(struct device *dev, unsigned int wakeirq,
> +			  unsigned long wakeflags)
> +{
> +	int ret;
> +
> +	ret = init_disabled_wakeirq(dev, wakeirq, wakeflags);
> +	if (ret)
> +		return ret;
> +
> +	return devm_request_threaded_irq(dev, wakeirq, NULL,
> +					 handle_wakeirq_thread,
> +					 wakeflags, dev_name(dev), dev);
> +}
> +EXPORT_SYMBOL_GPL(devm_request_wake_irq);
> +
> +/**
>   *	devm_free_irq - free an interrupt
>   *	@dev: device to free interrupt for
>   *	@irq: Interrupt line to free
> --- a/kernel/irq/manage.c
> +++ b/kernel/irq/manage.c
> @@ -14,12 +14,14 @@
>  #include <linux/module.h>
>  #include <linux/random.h>
>  #include <linux/interrupt.h>
> +#include <linux/pm_runtime.h>
>  #include <linux/slab.h>
>  #include <linux/sched.h>
>  #include <linux/sched/rt.h>
>  #include <linux/task_work.h>
>  
>  #include "internals.h"
> +#include "manage.h"
>  
>  #ifdef CONFIG_IRQ_FORCED_THREADING
>  __read_mostly bool force_irqthreads;
> @@ -1564,6 +1566,112 @@ int request_any_context_irq(unsigned int irq, irq_handler_t handler,
>  }
>  EXPORT_SYMBOL_GPL(request_any_context_irq);
>  
> +/**
> + *	handle_wakeirq_thread - call device runtime pm calls on wake-up interrupt
> + *	@wakeirq: device specific wake-up interrupt
> + *	@dev_id: struct device entry
> + */
> +irqreturn_t handle_wakeirq_thread(int wakeirq, void *dev_id)
> +{
> +	struct device *dev = dev_id;
> +	irqreturn_t ret = IRQ_NONE;
> +
> +	if (pm_runtime_suspended(dev)) {
> +		pm_runtime_mark_last_busy(dev);
> +		pm_request_resume(dev);

this assumes that every driver's ->resume() callback has a:

	if (pending)
		handle_pending_irqs();

which might not be very nice. I'd rather follow what Thomas suggested
and always pass device irq so this can mark it pending. Keep in mind
that we *don't* need a pm_runtime_get_sync() in every IRQ handler
because of that. Adding it is but the easiest way to get things working
and, quite frankly, very silly.

what we want is rather:

	irqreturn_t my_handler(int irq, void *dev_id)
	{
		struct device *dev = dev_id;

		if (pm_runtime_suspended(dev)) {
			pending_irqs_to_be_handled_from_runtime_resume = true;
			pm_runtime_get(dev);
			clear_irq_source(dev);
			return IRQ_HANDLED;
		}
	}

or something similar.

> +		ret = IRQ_HANDLED;
> +	}

you're not masking the wake irq here which means that when this handler
returns, wake irq will be unmasked by core IRQ subsystem leaving it
unmasked after ->resume().

> +	return ret;
> +}
> +
> +/**
> + *	init_disabled_wakeirq - initialize a wake-up interrupt for a device
> + *	@dev: device to wake up on the wake-up interrupt
> + *	@wakeirq: wake-up interrupt for the device
> + *	@wakeflags: wake-up interrupt flags
> + *
> + *	Note that the wake-up interrupt starts disabled. The wake-up interrupt
> + *	is typically enabled from the device pm_runtime_suspend() and disabled
> + *	again in the device pm_runtime_resume(). For runtime PM, the wake-up
> + *	interrupt should be always enabled, and for device suspend and resume,
> + *	the wake-up interrupt should be enabled depending on the device specific
> + *	configuration for device_can_wakeup().
> + *
> + *	Note also that we are not resending the lost device interrupts.
> + *	We assume that the wake-up interrupt just needs to wake-up the device,
> + *	and then device pm_runtime_resume() can deal with the situation.
> + *
> + *	There are at least the following reasons to not resend the lost device
> + *	interrupts automatically based on the wake-up interrupt:
> + *
> + *	1. There can be interrupt reentry issues calling the device interrupt
> + *	   based on the wake-up interrupt if done in the device driver. It
> + *	   could be done with check_irq_resend() after checking the device
> + *	   interrupt mask if we really wanted to though.
> + *
> + *	2. The device interrupt handler would need to be set up properly with
> + *	   pm_runtime_irq_safe(). Ideally you don't want to call pm_runtime
> + *	   calls from the device interrupt handler at all.
> + *
> + *	3. The IRQ subsystem may not know if it's safe to call the device
> + *	   interrupt unless the driver updates the interrupt status with
> + *	   disable_irq() and enable_irq() in addition to just disabling the
> + *	   interrupt at the hardware level in the device registers.
> + *
> + *	So if replaying the lost device interrupts is absolutely needed from the
> + *	hardware point of view, it's probably best to set up a completely
> + *	separate wake-up interrupt handler for the wake-up interrupt in the
> + *	device driver because of the reasons above.
> + */
> +int init_disabled_wakeirq(struct device *dev, unsigned int wakeirq,
> +			  unsigned long wakeflags)
> +{
> +	if (!(dev && wakeirq)) {
> +		pr_err("Missing device or wakeirq for %s irq %d\n",
> +		       dev_name(dev), wakeirq);
> +		return -EINVAL;
> +	}
> +
> +	if (!(wakeflags & IRQF_ONESHOT)) {
> +		pr_err("Invalid wakeirq for %s irq %d, must be oneshot\n",
> +		       dev_name(dev), wakeirq);
> +		return -EINVAL;
> +	}

you *know* you'll pass a NULL top half handler, why don't you just force
IRQF_ONESHOT instead of erroring out ? Just add:

	wakeflags |= IRQF_ONESHOT;

and get it over with :-)

> +	if (wakeflags & (IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING))
> +		pr_warn("Not replaying device IRQs for %s on wakeirq%d\n",
> +			dev_name(dev), wakeirq);
> +
> +	irq_set_status_flags(wakeirq, _IRQ_NOAUTOEN);
> +
> +	return 0;
> +}
> +
> +/**
> + *	request_wake_irq - request a wake-up interrupt for a device
> + *	@dev: device to wake on the wake-up interrupt
> + *	@wakeirq: wake-up interrupt for the device
> + *	@wakeirq: wake-up interrupt flags
> + *
> + *	The wake-up interrupt starts disabled and is typically enabled
> + *	when needed by the device driver runtime PM calls.
> + */
> +int request_wake_irq(struct device *dev, unsigned int wakeirq,
> +		     unsigned long wakeflags)
> +{
> +	int ret;
> +
> +	ret = init_disabled_wakeirq(dev, wakeirq, wakeflags);
> +	if (ret)
> +		return ret;
> +
> +	return request_threaded_irq(wakeirq, NULL,
> +				    handle_wakeirq_thread,
> +				    wakeflags, dev_name(dev), dev);
> +}
> +EXPORT_SYMBOL_GPL(request_wake_irq);
> +
>  void enable_percpu_irq(unsigned int irq, unsigned int type)
>  {
>  	unsigned int cpu = smp_processor_id();
> --- /dev/null
> +++ b/kernel/irq/manage.h
> @@ -0,0 +1,11 @@
> +/*
> + * IRQ subsystem internal management functions and variables:
> + *
> + * Do not ever include this file from anything else than
> + * kernel/irq/. Do not even think about using any information outside
> + * of this file for your non core code.
> + */
> +
> +irqreturn_t handle_wakeirq_thread(int wakeirq, void *dev_id);
> +int init_disabled_wakeirq(struct device *dev, unsigned int wakeirq,
> +			  unsigned long wakeflags);
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/
Tony Lindgren Nov. 14, 2014, 5:08 p.m. UTC | #16
* Felipe Balbi <balbi@ti.com> [141114 08:20]:
> On Thu, Nov 13, 2014 at 09:40:31AM -0800, Tony Lindgren wrote:
> > +/**
> > + *	handle_wakeirq_thread - call device runtime pm calls on wake-up interrupt
> > + *	@wakeirq: device specific wake-up interrupt
> > + *	@dev_id: struct device entry
> > + */
> > +irqreturn_t handle_wakeirq_thread(int wakeirq, void *dev_id)
> > +{
> > +	struct device *dev = dev_id;
> > +	irqreturn_t ret = IRQ_NONE;
> > +
> > +	if (pm_runtime_suspended(dev)) {
> > +		pm_runtime_mark_last_busy(dev);
> > +		pm_request_resume(dev);
> 
> this assumes that every driver's ->resume() callback has a:
> 
> 	if (pending)
> 		handle_pending_irqs();
> 
> which might not be very nice. I'd rather follow what Thomas suggested
> and always pass device irq so this can mark it pending. Keep in mind
> that we *don't* need a pm_runtime_get_sync() in every IRQ handler
> because of that. Adding it is but the easiest way to get things working
> and, quite frankly, very silly.
> 
> what we want is rather:
> 
> 	irqreturn_t my_handler(int irq, void *dev_id)
> 	{
> 		struct device *dev = dev_id;
> 
> 		if (pm_runtime_suspended(dev)) {
> 			pending_irqs_to_be_handled_from_runtime_resume = true;
> 			pm_runtime_get(dev);
> 			clear_irq_source(dev);
> 			return IRQ_HANDLED;
> 		}
> 	}
> 
> or something similar.

Yeah I'll take a look.
 
> > +		ret = IRQ_HANDLED;
> > +	}
> 
> you're not masking the wake irq here which means that when this handler
> returns, wake irq will be unmasked by core IRQ subsystem leaving it
> unmasked after ->resume().

It currently assumes the consumer driver takes care of it. But I get
your point, we should be able to automate this further.

And right now there's also a dependency on dev->power.irq_safe so
RPM_ASYNC is not set. And this all should ideally work even with runtime
PM not set as it's also needed for resume from suspend.
 
> you *know* you'll pass a NULL top half handler, why don't you just force
> IRQF_ONESHOT instead of erroring out ? Just add:
> 
> 	wakeflags |= IRQF_ONESHOT;
> 
> and get it over with :-)

Good point :)
 
Regards,

Tony
--
To unsubscribe from this list: send the line "unsubscribe devicetree" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Felipe Balbi Nov. 14, 2014, 5:21 p.m. UTC | #17
On Fri, Nov 14, 2014 at 09:08:17AM -0800, Tony Lindgren wrote:
> * Felipe Balbi <balbi@ti.com> [141114 08:20]:
> > On Thu, Nov 13, 2014 at 09:40:31AM -0800, Tony Lindgren wrote:
> > > +/**
> > > + *	handle_wakeirq_thread - call device runtime pm calls on wake-up interrupt
> > > + *	@wakeirq: device specific wake-up interrupt
> > > + *	@dev_id: struct device entry
> > > + */
> > > +irqreturn_t handle_wakeirq_thread(int wakeirq, void *dev_id)
> > > +{
> > > +	struct device *dev = dev_id;
> > > +	irqreturn_t ret = IRQ_NONE;
> > > +
> > > +	if (pm_runtime_suspended(dev)) {
> > > +		pm_runtime_mark_last_busy(dev);
> > > +		pm_request_resume(dev);
> > 
> > this assumes that every driver's ->resume() callback has a:
> > 
> > 	if (pending)
> > 		handle_pending_irqs();
> > 
> > which might not be very nice. I'd rather follow what Thomas suggested
> > and always pass device irq so this can mark it pending. Keep in mind
> > that we *don't* need a pm_runtime_get_sync() in every IRQ handler
> > because of that. Adding it is but the easiest way to get things working
> > and, quite frankly, very silly.
> > 
> > what we want is rather:
> > 
> > 	irqreturn_t my_handler(int irq, void *dev_id)
> > 	{
> > 		struct device *dev = dev_id;
> > 
> > 		if (pm_runtime_suspended(dev)) {
> > 			pending_irqs_to_be_handled_from_runtime_resume = true;
> > 			pm_runtime_get(dev);
> > 			clear_irq_source(dev);
> > 			return IRQ_HANDLED;
> > 		}
> > 	}
> > 
> > or something similar.
> 
> Yeah I'll take a look.

note that at the end of the day, the outcome will be pretty similar, but
with the added benefit that current users of pm_runtime_irq_safe() can
be updated as time allows, rather than in one go.

> > > +		ret = IRQ_HANDLED;
> > > +	}
> > 
> > you're not masking the wake irq here which means that when this handler
> > returns, wake irq will be unmasked by core IRQ subsystem leaving it
> > unmasked after ->resume().
> 
> It currently assumes the consumer driver takes care of it. But I get
> your point, we should be able to automate this further.

right, consumer calls disable_irq() and that's fine, should be there
anyway, but currently you still have a window where wakeirq will be
unmasked, if you look at irq_finalize_oneshot(), it's easy to see that
it will unmask wakeirq after ->thread_fn() runs:

686 static void irq_finalize_oneshot(struct irq_desc *desc,
687                                  struct irqaction *action)
688 {

[...]

726         if (!desc->threads_oneshot && !irqd_irq_disabled(&desc->irq_data) &&
727		irqd_irq_masked(&desc->irq_data))
728			unmask_threaded_irq(desc);
729 
730 out_unlock:
731	raw_spin_unlock_irq(&desc->lock);
732	chip_bus_sync_unlock(desc);
733 }

[...]

800 static irqreturn_t irq_thread_fn(struct irq_desc *desc,
801                 struct irqaction *action)
802 {
803         irqreturn_t ret;
804 
805         ret = action->thread_fn(action->irq, action->dev_id);
806         irq_finalize_oneshot(desc, action);
807         return ret;
808 }

so, ->thread_fn() returns and wakeirq is unmasked. You don't know when
your ->runtime_resume() will be scheduled, which means that wakeirq
could be unmasked for quite a while and it could refire depending on PCB
layout.

The problem should be minimal, but it's there anyway. Also, you know
that once the runtime is resumed, you don't want wakeirq to be unmasked,
so why not just mask it from handle_wake_irq() ?

Another thing, this assumes that drivers are using pm_runtime and,
furthermore, it assumes that drivers' ->runtime_resume() will properly
handle pending IRQs. This is definitely not the case for most drivers.

Note that quite a few of them aren't either using pm_runtime or have
blank/NULL runtime callbacks.

Due to these, I think Thomas' suggestion of setting device IRQ pending
is the best solution. That takes care of all cases. If drivers are using
pm_runtime, then they are required to check if device is still
pm_runtime_suspended from IRQ handler, for those who aren't, they can
assume device is ready to handle IRQs once the IRQ handler is called.

> And right now there's also a dependency on dev->power.irq_safe so
> RPM_ASYNC is not set. And this all should ideally work even with runtime
> PM not set as it's also needed for resume from suspend.

exactly.
diff mbox

Patch

diff --git a/drivers/mfd/palmas.c b/drivers/mfd/palmas.c
index 28cb048..de7d204 100644
--- a/drivers/mfd/palmas.c
+++ b/drivers/mfd/palmas.c
@@ -24,6 +24,7 @@ 
 #include <linux/mfd/core.h>
 #include <linux/mfd/palmas.h>
 #include <linux/of_device.h>
+#include <linux/of_irq.h>
 
 static const struct regmap_config palmas_regmap_config[PALMAS_NUM_CLIENTS] = {
 	{
@@ -326,6 +327,16 @@  static struct regmap_irq_chip tps65917_irq_chip = {
 			PALMAS_INT1_MASK),
 };
 
+static irqreturn_t palmas_wake_irq(int irq, void *_palmas)
+{
+	/*
+	 * Return Not handled so that interrupt is disabled.
+	 * Level event ensures that the event is eventually handled
+	 * by the appropriate chip handler already registered
+	 */
+	return IRQ_NONE;
+}
+
 int palmas_ext_control_req_config(struct palmas *palmas,
 	enum palmas_external_requestor_id id,  int ext_ctrl, bool enable)
 {
@@ -409,6 +420,7 @@  static void palmas_dt_to_pdata(struct i2c_client *i2c,
 		pdata->mux_from_pdata = 1;
 		pdata->pad2 = prop;
 	}
+	pdata->wakeirq = irq_of_parse_and_map(node, 1);
 
 	/* The default for this register is all masked */
 	ret = of_property_read_u32(node, "ti,power-ctrl", &prop);
@@ -521,6 +533,7 @@  static int palmas_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, palmas);
 	palmas->dev = &i2c->dev;
 	palmas->irq = i2c->irq;
+	palmas->wakeirq = pdata->wakeirq;
 
 	match = of_match_device(of_palmas_match_tbl, &i2c->dev);
 
@@ -587,6 +600,25 @@  static int palmas_i2c_probe(struct i2c_client *i2c,
 	if (ret < 0)
 		goto err_i2c;
 
+	if (!palmas->wakeirq)
+		goto no_wake_irq;
+
+	ret = devm_request_irq(palmas->dev, palmas->wakeirq,
+			       palmas_wake_irq,
+			       pdata->irq_flags,
+			       dev_name(palmas->dev),
+			       &palmas);
+	if (ret < 0) {
+		dev_err(palmas->dev, "Invalid wakeirq(%d) (res: %d), skiping\n",
+			palmas->wakeirq, ret);
+		palmas->wakeirq = 0;
+	} else {
+		/* We use wakeirq only during suspend-resume path */
+		device_set_wakeup_capable(palmas->dev, true);
+		disable_irq_nosync(palmas->wakeirq);
+	}
+
+no_wake_irq:
 no_irq:
 	slave = PALMAS_BASE_TO_SLAVE(PALMAS_PU_PD_OD_BASE);
 	addr = PALMAS_BASE_TO_REG(PALMAS_PU_PD_OD_BASE,
@@ -706,6 +738,34 @@  static int palmas_i2c_remove(struct i2c_client *i2c)
 	return 0;
 }
 
+static int palmas_i2c_suspend(struct i2c_client *i2c,  pm_message_t mesg)
+{
+	struct palmas *palmas = i2c_get_clientdata(i2c);
+	struct device *dev = &i2c->dev;
+
+	if (!palmas->wakeirq)
+		return 0;
+
+	if (device_may_wakeup(dev))
+		enable_irq(palmas->wakeirq);
+
+	return 0;
+}
+
+static int palmas_i2c_resume(struct i2c_client *i2c)
+{
+	struct palmas *palmas = i2c_get_clientdata(i2c);
+	struct device *dev = &i2c->dev;
+
+	if (!palmas->wakeirq)
+		return 0;
+
+	if (device_may_wakeup(dev))
+		disable_irq_nosync(palmas->wakeirq);
+
+	return 0;
+}
+
 static const struct i2c_device_id palmas_i2c_id[] = {
 	{ "palmas", },
 	{ "twl6035", },
@@ -721,6 +781,8 @@  static struct i2c_driver palmas_i2c_driver = {
 		   .of_match_table = of_palmas_match_tbl,
 		   .owner = THIS_MODULE,
 	},
+	.suspend = palmas_i2c_suspend,
+	.resume = palmas_i2c_resume,
 	.probe = palmas_i2c_probe,
 	.remove = palmas_i2c_remove,
 	.id_table = palmas_i2c_id,
diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h
index fb0390a..e8cf4c2 100644
--- a/include/linux/mfd/palmas.h
+++ b/include/linux/mfd/palmas.h
@@ -75,6 +75,7 @@  struct palmas {
 	/* IRQ Data */
 	int irq;
 	u32 irq_mask;
+	int wakeirq;
 	struct mutex irq_lock;
 	struct regmap_irq_chip_data *irq_data;
 
@@ -377,6 +378,7 @@  struct palmas_clk_platform_data {
 
 struct palmas_platform_data {
 	int irq_flags;
+	int wakeirq;
 	int gpio_base;
 
 	/* bit value to be loaded to the POWER_CTRL register */