diff mbox

[2/5] xen/arm: Keep count of inflight interrupts

Message ID 1372115067-17071-3-git-send-email-julien.grall@citrix.com
State Changes Requested
Headers show

Commit Message

Julien Grall June 24, 2013, 11:04 p.m. UTC
From: Stefano Stabellini <stefano.stabellini@citrix.com>

For guest's timers (both virtual and physical), Xen will inject virtual
interrupt. Linux handles timer interrupt as:
  1) Receive the interrupt and ack it
  2) Handle the current event timer
  3) Set the next event timer
  4) EOI the interrupt

It's unlikely possible to reinject another interrupt before
the previous one is EOIed because the next deadline is shorter than the time
to execute code until EOI it.

Signed-off-by: Stefano Stabellini <stefano.stabellini@citrix.com>
Signed-off-by: Julien Grall <julien.grall@linaro.org>
---
 xen/arch/arm/gic.c           |   35 +++++++++++++++++++++++------------
 xen/arch/arm/vgic.c          |    1 +
 xen/include/asm-arm/domain.h |    2 ++
 3 files changed, 26 insertions(+), 12 deletions(-)

Comments

Ian Campbell June 25, 2013, 4:12 p.m. UTC | #1
On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> From: Stefano Stabellini <stefano.stabellini@citrix.com>
> 
> For guest's timers (both virtual and physical), Xen will inject virtual
> interrupt. Linux handles timer interrupt as:

We should be wary of developing things based on the way which Linux
happens to do things. On x86 we have several time modes, which can be
selected based upon guest behaviour (described in
docs/man/xl.cfg.pod.5). Do we need to do something similar here?

>   1) Receive the interrupt and ack it
>   2) Handle the current event timer
>   3) Set the next event timer
>   4) EOI the interrupt
> 
> It's unlikely possible to reinject another interrupt before

I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
not sure if that is what you meant.

> the previous one is EOIed because the next deadline is shorter than the time
> to execute code until EOI it.

If we get into this situation once is there any danger that we will get
into it repeatedly and overflow the count?

Overall I'm not convinced this is the right approach to get the
behaviour we want. Isn't this interrupt level triggered, with the level
being determined by a comparison of two registers? IOW can't we
determine whether to retrigger the interrupt or not by examining the
state of our emulated versions of those registers? A generic mechanism
to callback into the appropriate emulator on EOI plus a little bit of
logic in the vtimer code is all it ought to take.

> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@citrix.com>
> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> ---
>  xen/arch/arm/gic.c           |   35 +++++++++++++++++++++++------------
>  xen/arch/arm/vgic.c          |    1 +
>  xen/include/asm-arm/domain.h |    2 ++
>  3 files changed, 26 insertions(+), 12 deletions(-)
> 
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 0fee3f2..21575df 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -817,7 +817,7 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
>  
>      while ((i = find_next_bit((const long unsigned int *) &eisr,
>                                64, i)) < 64) {
> -        struct pending_irq *p;
> +        struct pending_irq *p, *n;
>          int cpu, eoi;
>  
>          cpu = -1;
> @@ -826,13 +826,32 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
>          spin_lock_irq(&gic.lock);
>          lr = GICH[GICH_LR + i];
>          virq = lr & GICH_LR_VIRTUAL_MASK;
> +
> +        p = irq_to_pending(v, virq);
> +        if ( p->desc != NULL ) {
> +            p->desc->status &= ~IRQ_INPROGRESS;
> +            /* Assume only one pcpu needs to EOI the irq */
> +            cpu = p->desc->arch.eoi_cpu;
> +            eoi = 1;
> +            pirq = p->desc->irq;
> +        }
> +        if ( !atomic_dec_and_test(&p->inflight_cnt) )
> +        {
> +            /* Physical IRQ can't be reinject */
> +            WARN_ON(p->desc != NULL);
> +            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
> +            spin_unlock_irq(&gic.lock);
> +            i++;
> +            continue;
> +        }
> +
>          GICH[GICH_LR + i] = 0;
>          clear_bit(i, &this_cpu(lr_mask));
>  
>          if ( !list_empty(&v->arch.vgic.lr_pending) ) {
> -            p = list_entry(v->arch.vgic.lr_pending.next, typeof(*p), lr_queue);
> -            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
> -            list_del_init(&p->lr_queue);
> +            n = list_entry(v->arch.vgic.lr_pending.next, typeof(*n), lr_queue);
> +            gic_set_lr(i, n->irq, GICH_LR_PENDING, n->priority);
> +            list_del_init(&n->lr_queue);
>              set_bit(i, &this_cpu(lr_mask));
>          } else {
>              gic_inject_irq_stop();
> @@ -840,14 +859,6 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
>          spin_unlock_irq(&gic.lock);
>  
>          spin_lock_irq(&v->arch.vgic.lock);
> -        p = irq_to_pending(v, virq);
> -        if ( p->desc != NULL ) {
> -            p->desc->status &= ~IRQ_INPROGRESS;
> -            /* Assume only one pcpu needs to EOI the irq */
> -            cpu = p->desc->arch.eoi_cpu;
> -            eoi = 1;
> -            pirq = p->desc->irq;
> -        }
>          list_del_init(&p->inflight);
>          spin_unlock_irq(&v->arch.vgic.lock);
>  
> diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
> index 7eaccb7..2d91dce 100644
> --- a/xen/arch/arm/vgic.c
> +++ b/xen/arch/arm/vgic.c
> @@ -672,6 +672,7 @@ void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq, int virtual)
>  
>      spin_lock_irqsave(&v->arch.vgic.lock, flags);
>  
> +    atomic_inc(&n->inflight_cnt);
>      /* vcpu offline or irq already pending */
>      if (test_bit(_VPF_down, &v->pause_flags) || !list_empty(&n->inflight))
>      {
> diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
> index 339b6e6..fa0b776 100644
> --- a/xen/include/asm-arm/domain.h
> +++ b/xen/include/asm-arm/domain.h
> @@ -8,6 +8,7 @@
>  #include <asm/p2m.h>
>  #include <asm/vfp.h>
>  #include <public/hvm/params.h>
> +#include <asm/atomic.h>
>  
>  /* Represents state corresponding to a block of 32 interrupts */
>  struct vgic_irq_rank {
> @@ -21,6 +22,7 @@ struct vgic_irq_rank {
>  struct pending_irq
>  {
>      int irq;
> +    atomic_t inflight_cnt;
>      struct irq_desc *desc; /* only set it the irq corresponds to a physical irq */
>      uint8_t priority;
>      /* inflight is used to append instances of pending_irq to
Stefano Stabellini June 25, 2013, 4:58 p.m. UTC | #2
On Tue, 25 Jun 2013, Ian Campbell wrote:
> On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> > From: Stefano Stabellini <stefano.stabellini@citrix.com>
> > 
> > For guest's timers (both virtual and physical), Xen will inject virtual
> > interrupt. Linux handles timer interrupt as:
> 
> We should be wary of developing things based on the way which Linux
> happens to do things. On x86 we have several time modes, which can be
> selected based upon guest behaviour (described in
> docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> 
> >   1) Receive the interrupt and ack it
> >   2) Handle the current event timer
> >   3) Set the next event timer
> >   4) EOI the interrupt
> > 
> > It's unlikely possible to reinject another interrupt before
> 
> I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> not sure if that is what you meant.
> 
> > the previous one is EOIed because the next deadline is shorter than the time
> > to execute code until EOI it.
> 
> If we get into this situation once is there any danger that we will get
> into it repeatedly and overflow the count?
> 
> Overall I'm not convinced this is the right approach to get the
> behaviour we want. Isn't this interrupt level triggered, with the level
> being determined by a comparison of two registers? IOW can't we
> determine whether to retrigger the interrupt or not by examining the
> state of our emulated versions of those registers? A generic mechanism
> to callback into the appropriate emulator on EOI plus a little bit of
> logic in the vtimer code is all it ought to take.

AFAICT this what could happen:

- vtimer fires
- xen mask the vtimer
- xen adds the vtimer to the LR for the guest
- xen EOIs the vtimer
- the guest receive the vtimer interrupt
- the guest set the next deadline in the past
- the guest enables the vtimer
## an unexpected vtimer interrupt is received by Xen but the current
## one is still being serviced 
- the guest eoi the vtimer

as a result the guest looses an interrupt.
Julien, is that correct?

If that is the case, this can only happen once, right?  In that case
rather than an atomic_t we could just have a bit in the status field I
proposed before. It should be enough for us to keep track of the case
when the irq is supposed to stay high even after the guest EOIs it. (Of
course that means that we need to re-inject it into the guest).


> > Signed-off-by: Stefano Stabellini <stefano.stabellini@citrix.com>
> > Signed-off-by: Julien Grall <julien.grall@linaro.org>
> > ---
> >  xen/arch/arm/gic.c           |   35 +++++++++++++++++++++++------------
> >  xen/arch/arm/vgic.c          |    1 +
> >  xen/include/asm-arm/domain.h |    2 ++
> >  3 files changed, 26 insertions(+), 12 deletions(-)
> > 
> > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> > index 0fee3f2..21575df 100644
> > --- a/xen/arch/arm/gic.c
> > +++ b/xen/arch/arm/gic.c
> > @@ -817,7 +817,7 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
> >  
> >      while ((i = find_next_bit((const long unsigned int *) &eisr,
> >                                64, i)) < 64) {
> > -        struct pending_irq *p;
> > +        struct pending_irq *p, *n;
> >          int cpu, eoi;
> >  
> >          cpu = -1;
> > @@ -826,13 +826,32 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
> >          spin_lock_irq(&gic.lock);
> >          lr = GICH[GICH_LR + i];
> >          virq = lr & GICH_LR_VIRTUAL_MASK;
> > +
> > +        p = irq_to_pending(v, virq);
> > +        if ( p->desc != NULL ) {
> > +            p->desc->status &= ~IRQ_INPROGRESS;

Now that I am thinking about this, shouldn't this be protected by taking
the desc->lock?

> > +            /* Assume only one pcpu needs to EOI the irq */
> > +            cpu = p->desc->arch.eoi_cpu;
> > +            eoi = 1;
> > +            pirq = p->desc->irq;
> > +        }
> > +        if ( !atomic_dec_and_test(&p->inflight_cnt) )
> > +        {
> > +            /* Physical IRQ can't be reinject */
> > +            WARN_ON(p->desc != NULL);
> > +            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
> > +            spin_unlock_irq(&gic.lock);
> > +            i++;
> > +            continue;
> > +        }
> > +
> >          GICH[GICH_LR + i] = 0;
> >          clear_bit(i, &this_cpu(lr_mask));
> >  
> >          if ( !list_empty(&v->arch.vgic.lr_pending) ) {
> > -            p = list_entry(v->arch.vgic.lr_pending.next, typeof(*p), lr_queue);
> > -            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
> > -            list_del_init(&p->lr_queue);
> > +            n = list_entry(v->arch.vgic.lr_pending.next, typeof(*n), lr_queue);
> > +            gic_set_lr(i, n->irq, GICH_LR_PENDING, n->priority);
> > +            list_del_init(&n->lr_queue);
> >              set_bit(i, &this_cpu(lr_mask));
> >          } else {
> >              gic_inject_irq_stop();
> > @@ -840,14 +859,6 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
> >          spin_unlock_irq(&gic.lock);
> >  
> >          spin_lock_irq(&v->arch.vgic.lock);
> > -        p = irq_to_pending(v, virq);
> > -        if ( p->desc != NULL ) {
> > -            p->desc->status &= ~IRQ_INPROGRESS;
> > -            /* Assume only one pcpu needs to EOI the irq */
> > -            cpu = p->desc->arch.eoi_cpu;
> > -            eoi = 1;
> > -            pirq = p->desc->irq;
> > -        }
> >          list_del_init(&p->inflight);
> >          spin_unlock_irq(&v->arch.vgic.lock);
> >
Julien Grall June 25, 2013, 5:46 p.m. UTC | #3
On 06/25/2013 05:58 PM, Stefano Stabellini wrote:

> On Tue, 25 Jun 2013, Ian Campbell wrote:
>> On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
>>> From: Stefano Stabellini <stefano.stabellini@citrix.com>
>>>
>>> For guest's timers (both virtual and physical), Xen will inject virtual
>>> interrupt. Linux handles timer interrupt as:
>>
>> We should be wary of developing things based on the way which Linux
>> happens to do things. On x86 we have several time modes, which can be
>> selected based upon guest behaviour (described in
>> docs/man/xl.cfg.pod.5). Do we need to do something similar here?
>>
>>>   1) Receive the interrupt and ack it
>>>   2) Handle the current event timer
>>>   3) Set the next event timer
>>>   4) EOI the interrupt
>>>
>>> It's unlikely possible to reinject another interrupt before
>>
>> I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
>> not sure if that is what you meant.
>>
>>> the previous one is EOIed because the next deadline is shorter than the time
>>> to execute code until EOI it.
>>
>> If we get into this situation once is there any danger that we will get
>> into it repeatedly and overflow the count?
>>
>> Overall I'm not convinced this is the right approach to get the
>> behaviour we want. Isn't this interrupt level triggered, with the level
>> being determined by a comparison of two registers? IOW can't we
>> determine whether to retrigger the interrupt or not by examining the
>> state of our emulated versions of those registers? A generic mechanism
>> to callback into the appropriate emulator on EOI plus a little bit of
>> logic in the vtimer code is all it ought to take.
> 
> AFAICT this what could happen:
> 
> - vtimer fires
> - xen mask the vtimer
> - xen adds the vtimer to the LR for the guest
> - xen EOIs the vtimer
> - the guest receive the vtimer interrupt
> - the guest set the next deadline in the past
> - the guest enables the vtimer
> ## an unexpected vtimer interrupt is received by Xen but the current
> ## one is still being serviced 
> - the guest eoi the vtimer
> 
> as a result the guest looses an interrupt.
> Julien, is that correct?

Yes.

> If that is the case, this can only happen once, right?  In that case
> rather than an atomic_t we could just have a bit in the status field I
> proposed before. It should be enough for us to keep track of the case
> when the irq is supposed to stay high even after the guest EOIs it. (Of
> course that means that we need to re-inject it into the guest).


For the timer yes. I wonder, what happen if Xen receive an SGI (for
instance SCHEDULE ipi) twice, does Xen must re-inject it multiple times?

>>> Signed-off-by: Stefano Stabellini <stefano.stabellini@citrix.com>
>>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
>>> ---
>>>  xen/arch/arm/gic.c           |   35 +++++++++++++++++++++++------------
>>>  xen/arch/arm/vgic.c          |    1 +
>>>  xen/include/asm-arm/domain.h |    2 ++
>>>  3 files changed, 26 insertions(+), 12 deletions(-)
>>>
>>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
>>> index 0fee3f2..21575df 100644
>>> --- a/xen/arch/arm/gic.c
>>> +++ b/xen/arch/arm/gic.c
>>> @@ -817,7 +817,7 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
>>>  
>>>      while ((i = find_next_bit((const long unsigned int *) &eisr,
>>>                                64, i)) < 64) {
>>> -        struct pending_irq *p;
>>> +        struct pending_irq *p, *n;
>>>          int cpu, eoi;
>>>  
>>>          cpu = -1;
>>> @@ -826,13 +826,32 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
>>>          spin_lock_irq(&gic.lock);
>>>          lr = GICH[GICH_LR + i];
>>>          virq = lr & GICH_LR_VIRTUAL_MASK;
>>> +
>>> +        p = irq_to_pending(v, virq);
>>> +        if ( p->desc != NULL ) {
>>> +            p->desc->status &= ~IRQ_INPROGRESS;
> 
> Now that I am thinking about this, shouldn't this be protected by taking
> the desc->lock?

Right. I don't think desc->lock is enough if we want to protect from
release_irq.
If this function is called, it will wait until IRQ_INPROGRESS is
removed. How about moving ~IRQ_INPROGRESS at then end of the block and
add a dmb() before?

>>> +            /* Assume only one pcpu needs to EOI the irq */
>>> +            cpu = p->desc->arch.eoi_cpu;
>>> +            eoi = 1;
>>> +            pirq = p->desc->irq;
>>> +        }
>>> +        if ( !atomic_dec_and_test(&p->inflight_cnt) )
>>> +        {
>>> +            /* Physical IRQ can't be reinject */
>>> +            WARN_ON(p->desc != NULL);
>>> +            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
>>> +            spin_unlock_irq(&gic.lock);
>>> +            i++;
>>> +            continue;
>>> +        }
>>> +
>>>          GICH[GICH_LR + i] = 0;
>>>          clear_bit(i, &this_cpu(lr_mask));
>>>  
>>>          if ( !list_empty(&v->arch.vgic.lr_pending) ) {
>>> -            p = list_entry(v->arch.vgic.lr_pending.next, typeof(*p), lr_queue);
>>> -            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
>>> -            list_del_init(&p->lr_queue);
>>> +            n = list_entry(v->arch.vgic.lr_pending.next, typeof(*n), lr_queue);
>>> +            gic_set_lr(i, n->irq, GICH_LR_PENDING, n->priority);
>>> +            list_del_init(&n->lr_queue);
>>>              set_bit(i, &this_cpu(lr_mask));
>>>          } else {
>>>              gic_inject_irq_stop();
>>> @@ -840,14 +859,6 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
>>>          spin_unlock_irq(&gic.lock);
>>>  
>>>          spin_lock_irq(&v->arch.vgic.lock);
>>> -        p = irq_to_pending(v, virq);
>>> -        if ( p->desc != NULL ) {
>>> -            p->desc->status &= ~IRQ_INPROGRESS;
>>> -            /* Assume only one pcpu needs to EOI the irq */
>>> -            cpu = p->desc->arch.eoi_cpu;
>>> -            eoi = 1;
>>> -            pirq = p->desc->irq;
>>> -        }
>>>          list_del_init(&p->inflight);
>>>          spin_unlock_irq(&v->arch.vgic.lock);
>>>  
>
Stefano Stabellini June 25, 2013, 6:38 p.m. UTC | #4
On Tue, 25 Jun 2013, Julien Grall wrote:
> On 06/25/2013 05:58 PM, Stefano Stabellini wrote:
> 
> > On Tue, 25 Jun 2013, Ian Campbell wrote:
> >> On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> >>> From: Stefano Stabellini <stefano.stabellini@citrix.com>
> >>>
> >>> For guest's timers (both virtual and physical), Xen will inject virtual
> >>> interrupt. Linux handles timer interrupt as:
> >>
> >> We should be wary of developing things based on the way which Linux
> >> happens to do things. On x86 we have several time modes, which can be
> >> selected based upon guest behaviour (described in
> >> docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> >>
> >>>   1) Receive the interrupt and ack it
> >>>   2) Handle the current event timer
> >>>   3) Set the next event timer
> >>>   4) EOI the interrupt
> >>>
> >>> It's unlikely possible to reinject another interrupt before
> >>
> >> I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> >> not sure if that is what you meant.
> >>
> >>> the previous one is EOIed because the next deadline is shorter than the time
> >>> to execute code until EOI it.
> >>
> >> If we get into this situation once is there any danger that we will get
> >> into it repeatedly and overflow the count?
> >>
> >> Overall I'm not convinced this is the right approach to get the
> >> behaviour we want. Isn't this interrupt level triggered, with the level
> >> being determined by a comparison of two registers? IOW can't we
> >> determine whether to retrigger the interrupt or not by examining the
> >> state of our emulated versions of those registers? A generic mechanism
> >> to callback into the appropriate emulator on EOI plus a little bit of
> >> logic in the vtimer code is all it ought to take.
> > 
> > AFAICT this what could happen:
> > 
> > - vtimer fires
> > - xen mask the vtimer
> > - xen adds the vtimer to the LR for the guest
> > - xen EOIs the vtimer
> > - the guest receive the vtimer interrupt
> > - the guest set the next deadline in the past
> > - the guest enables the vtimer
> > ## an unexpected vtimer interrupt is received by Xen but the current
> > ## one is still being serviced 
> > - the guest eoi the vtimer
> > 
> > as a result the guest looses an interrupt.
> > Julien, is that correct?
> 
> Yes.

Could you please add something like that to the commit message?


> > If that is the case, this can only happen once, right?  In that case
> > rather than an atomic_t we could just have a bit in the status field I
> > proposed before. It should be enough for us to keep track of the case
> > when the irq is supposed to stay high even after the guest EOIs it. (Of
> > course that means that we need to re-inject it into the guest).
> 
> 
> For the timer yes. I wonder, what happen if Xen receive an SGI (for
> instance SCHEDULE ipi) twice, does Xen must re-inject it multiple times?

I don't think it can happen with anything but the vtimer: in order for
the scenario above to happen it takes an irq that is EOId in the
hardware before the guest EOIs it.

SGIs are completely "emulated", there is not an hardware irq
corresponding to them. From the Xen point of view the SGI is inflight
exactly and only from the moment the first vcpu sends it, to the point
when the receiving vcpu EOIs it.


> >>> Signed-off-by: Stefano Stabellini <stefano.stabellini@citrix.com>
> >>> Signed-off-by: Julien Grall <julien.grall@linaro.org>
> >>> ---
> >>>  xen/arch/arm/gic.c           |   35 +++++++++++++++++++++++------------
> >>>  xen/arch/arm/vgic.c          |    1 +
> >>>  xen/include/asm-arm/domain.h |    2 ++
> >>>  3 files changed, 26 insertions(+), 12 deletions(-)
> >>>
> >>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> >>> index 0fee3f2..21575df 100644
> >>> --- a/xen/arch/arm/gic.c
> >>> +++ b/xen/arch/arm/gic.c
> >>> @@ -817,7 +817,7 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
> >>>  
> >>>      while ((i = find_next_bit((const long unsigned int *) &eisr,
> >>>                                64, i)) < 64) {
> >>> -        struct pending_irq *p;
> >>> +        struct pending_irq *p, *n;
> >>>          int cpu, eoi;
> >>>  
> >>>          cpu = -1;
> >>> @@ -826,13 +826,32 @@ static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
> >>>          spin_lock_irq(&gic.lock);
> >>>          lr = GICH[GICH_LR + i];
> >>>          virq = lr & GICH_LR_VIRTUAL_MASK;
> >>> +
> >>> +        p = irq_to_pending(v, virq);
> >>> +        if ( p->desc != NULL ) {
> >>> +            p->desc->status &= ~IRQ_INPROGRESS;
> > 
> > Now that I am thinking about this, shouldn't this be protected by taking
> > the desc->lock?
> 
> Right. I don't think desc->lock is enough if we want to protect from
> release_irq.
> If this function is called, it will wait until IRQ_INPROGRESS is
> removed. How about moving ~IRQ_INPROGRESS at then end of the block and
> add a dmb() before?

that should work
Ian Campbell June 26, 2013, 10:58 a.m. UTC | #5
On Tue, 2013-06-25 at 17:58 +0100, Stefano Stabellini wrote:
> On Tue, 25 Jun 2013, Ian Campbell wrote:
> > On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> > > From: Stefano Stabellini <stefano.stabellini@citrix.com>
> > > 
> > > For guest's timers (both virtual and physical), Xen will inject virtual
> > > interrupt. Linux handles timer interrupt as:
> > 
> > We should be wary of developing things based on the way which Linux
> > happens to do things. On x86 we have several time modes, which can be
> > selected based upon guest behaviour (described in
> > docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> > 
> > >   1) Receive the interrupt and ack it
> > >   2) Handle the current event timer
> > >   3) Set the next event timer
> > >   4) EOI the interrupt
> > > 
> > > It's unlikely possible to reinject another interrupt before
> > 
> > I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> > not sure if that is what you meant.
> > 
> > > the previous one is EOIed because the next deadline is shorter than the time
> > > to execute code until EOI it.
> > 
> > If we get into this situation once is there any danger that we will get
> > into it repeatedly and overflow the count?
> > 
> > Overall I'm not convinced this is the right approach to get the
> > behaviour we want. Isn't this interrupt level triggered, with the level
> > being determined by a comparison of two registers? IOW can't we
> > determine whether to retrigger the interrupt or not by examining the
> > state of our emulated versions of those registers? A generic mechanism
> > to callback into the appropriate emulator on EOI plus a little bit of
> > logic in the vtimer code is all it ought to take.
> 
> AFAICT this what could happen:
> 
> - vtimer fires
> - xen mask the vtimer
> - xen adds the vtimer to the LR for the guest
> - xen EOIs the vtimer

This step is Xen deprioritises the interrupt (by writing to the GICD_DIR
register)...

> - the guest receive the vtimer interrupt
> - the guest set the next deadline in the past
> - the guest enables the vtimer
> ## an unexpected vtimer interrupt is received by Xen but the current
> ## one is still being serviced 
> - the guest eoi the vtimer

... and the actual Xen EOI only happens here in response to the
maintenance interrupt resulting from the guests EOI.

(or maybe I've misunderstood what you mean by "EOI the vtimer")

Ian.
Ian Campbell June 26, 2013, 10:59 a.m. UTC | #6
On Tue, 2013-06-25 at 19:38 +0100, Stefano Stabellini wrote:
> > > If that is the case, this can only happen once, right?  In that case
> > > rather than an atomic_t we could just have a bit in the status field I
> > > proposed before. It should be enough for us to keep track of the case
> > > when the irq is supposed to stay high even after the guest EOIs it. (Of
> > > course that means that we need to re-inject it into the guest).
> > 
> > 
> > For the timer yes. I wonder, what happen if Xen receive an SGI (for
> > instance SCHEDULE ipi) twice, does Xen must re-inject it multiple times?
> 
> I don't think it can happen with anything but the vtimer: in order for
> the scenario above to happen it takes an irq that is EOId in the
> hardware before the guest EOIs it.
> 
> SGIs are completely "emulated", there is not an hardware irq
> corresponding to them. From the Xen point of view the SGI is inflight
> exactly and only from the moment the first vcpu sends it, to the point
> when the receiving vcpu EOIs it.

Are we talking about real SGIs (passed by Xen between physical
processors) or fake SGIs (emulated between virtual processors)?

Ian.
Stefano Stabellini June 26, 2013, 11:08 a.m. UTC | #7
On Wed, 26 Jun 2013, Ian Campbell wrote:
> On Tue, 2013-06-25 at 17:58 +0100, Stefano Stabellini wrote:
> > On Tue, 25 Jun 2013, Ian Campbell wrote:
> > > On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> > > > From: Stefano Stabellini <stefano.stabellini@citrix.com>
> > > > 
> > > > For guest's timers (both virtual and physical), Xen will inject virtual
> > > > interrupt. Linux handles timer interrupt as:
> > > 
> > > We should be wary of developing things based on the way which Linux
> > > happens to do things. On x86 we have several time modes, which can be
> > > selected based upon guest behaviour (described in
> > > docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> > > 
> > > >   1) Receive the interrupt and ack it
> > > >   2) Handle the current event timer
> > > >   3) Set the next event timer
> > > >   4) EOI the interrupt
> > > > 
> > > > It's unlikely possible to reinject another interrupt before
> > > 
> > > I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> > > not sure if that is what you meant.
> > > 
> > > > the previous one is EOIed because the next deadline is shorter than the time
> > > > to execute code until EOI it.
> > > 
> > > If we get into this situation once is there any danger that we will get
> > > into it repeatedly and overflow the count?
> > > 
> > > Overall I'm not convinced this is the right approach to get the
> > > behaviour we want. Isn't this interrupt level triggered, with the level
> > > being determined by a comparison of two registers? IOW can't we
> > > determine whether to retrigger the interrupt or not by examining the
> > > state of our emulated versions of those registers? A generic mechanism
> > > to callback into the appropriate emulator on EOI plus a little bit of
> > > logic in the vtimer code is all it ought to take.
> > 
> > AFAICT this what could happen:
> > 
> > - vtimer fires
> > - xen mask the vtimer
> > - xen adds the vtimer to the LR for the guest
> > - xen EOIs the vtimer
> 
> This step is Xen deprioritises the interrupt (by writing to the GICD_DIR
> register)...
> 
> > - the guest receive the vtimer interrupt
> > - the guest set the next deadline in the past
> > - the guest enables the vtimer
> > ## an unexpected vtimer interrupt is received by Xen but the current
> > ## one is still being serviced 
> > - the guest eoi the vtimer
> 
> ... and the actual Xen EOI only happens here in response to the
> maintenance interrupt resulting from the guests EOI.
> 
> (or maybe I've misunderstood what you mean by "EOI the vtimer")

The vtimer is a Xen irq that injects an irq into the guest from the Xen
interrupt handler. It's not a guest irq.
See xen/arch/arm/time.c:vtimer_interrupt.
Stefano Stabellini June 26, 2013, 11:10 a.m. UTC | #8
On Wed, 26 Jun 2013, Ian Campbell wrote:
> On Tue, 2013-06-25 at 19:38 +0100, Stefano Stabellini wrote:
> > > > If that is the case, this can only happen once, right?  In that case
> > > > rather than an atomic_t we could just have a bit in the status field I
> > > > proposed before. It should be enough for us to keep track of the case
> > > > when the irq is supposed to stay high even after the guest EOIs it. (Of
> > > > course that means that we need to re-inject it into the guest).
> > > 
> > > 
> > > For the timer yes. I wonder, what happen if Xen receive an SGI (for
> > > instance SCHEDULE ipi) twice, does Xen must re-inject it multiple times?
> > 
> > I don't think it can happen with anything but the vtimer: in order for
> > the scenario above to happen it takes an irq that is EOId in the
> > hardware before the guest EOIs it.
> > 
> > SGIs are completely "emulated", there is not an hardware irq
> > corresponding to them. From the Xen point of view the SGI is inflight
> > exactly and only from the moment the first vcpu sends it, to the point
> > when the receiving vcpu EOIs it.
> 
> Are we talking about real SGIs (passed by Xen between physical
> processors) or fake SGIs (emulated between virtual processors)?

I was talking about fake SGIs
Ian Campbell June 26, 2013, 11:15 a.m. UTC | #9
On Wed, 2013-06-26 at 12:08 +0100, Stefano Stabellini wrote:
> On Wed, 26 Jun 2013, Ian Campbell wrote:
> > On Tue, 2013-06-25 at 17:58 +0100, Stefano Stabellini wrote:
> > > On Tue, 25 Jun 2013, Ian Campbell wrote:
> > > > On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> > > > > From: Stefano Stabellini <stefano.stabellini@citrix.com>
> > > > > 
> > > > > For guest's timers (both virtual and physical), Xen will inject virtual
> > > > > interrupt. Linux handles timer interrupt as:
> > > > 
> > > > We should be wary of developing things based on the way which Linux
> > > > happens to do things. On x86 we have several time modes, which can be
> > > > selected based upon guest behaviour (described in
> > > > docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> > > > 
> > > > >   1) Receive the interrupt and ack it
> > > > >   2) Handle the current event timer
> > > > >   3) Set the next event timer
> > > > >   4) EOI the interrupt
> > > > > 
> > > > > It's unlikely possible to reinject another interrupt before
> > > > 
> > > > I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> > > > not sure if that is what you meant.
> > > > 
> > > > > the previous one is EOIed because the next deadline is shorter than the time
> > > > > to execute code until EOI it.
> > > > 
> > > > If we get into this situation once is there any danger that we will get
> > > > into it repeatedly and overflow the count?
> > > > 
> > > > Overall I'm not convinced this is the right approach to get the
> > > > behaviour we want. Isn't this interrupt level triggered, with the level
> > > > being determined by a comparison of two registers? IOW can't we
> > > > determine whether to retrigger the interrupt or not by examining the
> > > > state of our emulated versions of those registers? A generic mechanism
> > > > to callback into the appropriate emulator on EOI plus a little bit of
> > > > logic in the vtimer code is all it ought to take.
> > > 
> > > AFAICT this what could happen:
> > > 
> > > - vtimer fires
> > > - xen mask the vtimer
> > > - xen adds the vtimer to the LR for the guest
> > > - xen EOIs the vtimer
> > 
> > This step is Xen deprioritises the interrupt (by writing to the GICD_DIR
> > register)...
> > 
> > > - the guest receive the vtimer interrupt
> > > - the guest set the next deadline in the past
> > > - the guest enables the vtimer
> > > ## an unexpected vtimer interrupt is received by Xen but the current
> > > ## one is still being serviced 
> > > - the guest eoi the vtimer
> > 
> > ... and the actual Xen EOI only happens here in response to the
> > maintenance interrupt resulting from the guests EOI.
> > 
> > (or maybe I've misunderstood what you mean by "EOI the vtimer")
> 
> The vtimer is a Xen irq that injects an irq into the guest from the Xen
> interrupt handler. It's not a guest irq.
> See xen/arch/arm/time.c:vtimer_interrupt.

OK, so what does it mean to "EOI" that timer?

Ian.
Ian Campbell June 26, 2013, 11:16 a.m. UTC | #10
On Wed, 2013-06-26 at 12:10 +0100, Stefano Stabellini wrote:
> On Wed, 26 Jun 2013, Ian Campbell wrote:
> > On Tue, 2013-06-25 at 19:38 +0100, Stefano Stabellini wrote:
> > > > > If that is the case, this can only happen once, right?  In that case
> > > > > rather than an atomic_t we could just have a bit in the status field I
> > > > > proposed before. It should be enough for us to keep track of the case
> > > > > when the irq is supposed to stay high even after the guest EOIs it. (Of
> > > > > course that means that we need to re-inject it into the guest).
> > > > 
> > > > 
> > > > For the timer yes. I wonder, what happen if Xen receive an SGI (for
> > > > instance SCHEDULE ipi) twice, does Xen must re-inject it multiple times?
> > > 
> > > I don't think it can happen with anything but the vtimer: in order for
> > > the scenario above to happen it takes an irq that is EOId in the
> > > hardware before the guest EOIs it.
> > > 
> > > SGIs are completely "emulated", there is not an hardware irq
> > > corresponding to them. From the Xen point of view the SGI is inflight
> > > exactly and only from the moment the first vcpu sends it, to the point
> > > when the receiving vcpu EOIs it.
> > 
> > Are we talking about real SGIs (passed by Xen between physical
> > processors) or fake SGIs (emulated between virtual processors)?
> 
> I was talking about fake SGIs

I think Julien was talking about real ones (the Xen schedule IPI).

Ian.
Stefano Stabellini June 26, 2013, 11:23 a.m. UTC | #11
On Wed, 26 Jun 2013, Ian Campbell wrote:
> On Wed, 2013-06-26 at 12:08 +0100, Stefano Stabellini wrote:
> > On Wed, 26 Jun 2013, Ian Campbell wrote:
> > > On Tue, 2013-06-25 at 17:58 +0100, Stefano Stabellini wrote:
> > > > On Tue, 25 Jun 2013, Ian Campbell wrote:
> > > > > On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> > > > > > From: Stefano Stabellini <stefano.stabellini@citrix.com>
> > > > > > 
> > > > > > For guest's timers (both virtual and physical), Xen will inject virtual
> > > > > > interrupt. Linux handles timer interrupt as:
> > > > > 
> > > > > We should be wary of developing things based on the way which Linux
> > > > > happens to do things. On x86 we have several time modes, which can be
> > > > > selected based upon guest behaviour (described in
> > > > > docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> > > > > 
> > > > > >   1) Receive the interrupt and ack it
> > > > > >   2) Handle the current event timer
> > > > > >   3) Set the next event timer
> > > > > >   4) EOI the interrupt
> > > > > > 
> > > > > > It's unlikely possible to reinject another interrupt before
> > > > > 
> > > > > I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> > > > > not sure if that is what you meant.
> > > > > 
> > > > > > the previous one is EOIed because the next deadline is shorter than the time
> > > > > > to execute code until EOI it.
> > > > > 
> > > > > If we get into this situation once is there any danger that we will get
> > > > > into it repeatedly and overflow the count?
> > > > > 
> > > > > Overall I'm not convinced this is the right approach to get the
> > > > > behaviour we want. Isn't this interrupt level triggered, with the level
> > > > > being determined by a comparison of two registers? IOW can't we
> > > > > determine whether to retrigger the interrupt or not by examining the
> > > > > state of our emulated versions of those registers? A generic mechanism
> > > > > to callback into the appropriate emulator on EOI plus a little bit of
> > > > > logic in the vtimer code is all it ought to take.
> > > > 
> > > > AFAICT this what could happen:
> > > > 
> > > > - vtimer fires
> > > > - xen mask the vtimer
> > > > - xen adds the vtimer to the LR for the guest
> > > > - xen EOIs the vtimer
> > > 
> > > This step is Xen deprioritises the interrupt (by writing to the GICD_DIR
> > > register)...
> > > 
> > > > - the guest receive the vtimer interrupt
> > > > - the guest set the next deadline in the past
> > > > - the guest enables the vtimer
> > > > ## an unexpected vtimer interrupt is received by Xen but the current
> > > > ## one is still being serviced 
> > > > - the guest eoi the vtimer
> > > 
> > > ... and the actual Xen EOI only happens here in response to the
> > > maintenance interrupt resulting from the guests EOI.
> > > 
> > > (or maybe I've misunderstood what you mean by "EOI the vtimer")
> > 
> > The vtimer is a Xen irq that injects an irq into the guest from the Xen
> > interrupt handler. It's not a guest irq.
> > See xen/arch/arm/time.c:vtimer_interrupt.
> 
> OK, so what does it mean to "EOI" that timer?
 
By "xen EOIs the vtimer" I meant Xen EOIs the vtimer interrupt in the
real hardware.
By "the guest eoi the vtimer" I meant the guest EOIs the virtual
interrupt that we injected into the guest.
Ian Campbell June 26, 2013, 11:41 a.m. UTC | #12
On Wed, 2013-06-26 at 12:23 +0100, Stefano Stabellini wrote:
> On Wed, 26 Jun 2013, Ian Campbell wrote:
> > On Wed, 2013-06-26 at 12:08 +0100, Stefano Stabellini wrote:
> > > On Wed, 26 Jun 2013, Ian Campbell wrote:
> > > > On Tue, 2013-06-25 at 17:58 +0100, Stefano Stabellini wrote:
> > > > > On Tue, 25 Jun 2013, Ian Campbell wrote:
> > > > > > On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> > > > > > > From: Stefano Stabellini <stefano.stabellini@citrix.com>
> > > > > > > 
> > > > > > > For guest's timers (both virtual and physical), Xen will inject virtual
> > > > > > > interrupt. Linux handles timer interrupt as:
> > > > > > 
> > > > > > We should be wary of developing things based on the way which Linux
> > > > > > happens to do things. On x86 we have several time modes, which can be
> > > > > > selected based upon guest behaviour (described in
> > > > > > docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> > > > > > 
> > > > > > >   1) Receive the interrupt and ack it
> > > > > > >   2) Handle the current event timer
> > > > > > >   3) Set the next event timer
> > > > > > >   4) EOI the interrupt
> > > > > > > 
> > > > > > > It's unlikely possible to reinject another interrupt before
> > > > > > 
> > > > > > I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> > > > > > not sure if that is what you meant.
> > > > > > 
> > > > > > > the previous one is EOIed because the next deadline is shorter than the time
> > > > > > > to execute code until EOI it.
> > > > > > 
> > > > > > If we get into this situation once is there any danger that we will get
> > > > > > into it repeatedly and overflow the count?
> > > > > > 
> > > > > > Overall I'm not convinced this is the right approach to get the
> > > > > > behaviour we want. Isn't this interrupt level triggered, with the level
> > > > > > being determined by a comparison of two registers? IOW can't we
> > > > > > determine whether to retrigger the interrupt or not by examining the
> > > > > > state of our emulated versions of those registers? A generic mechanism
> > > > > > to callback into the appropriate emulator on EOI plus a little bit of
> > > > > > logic in the vtimer code is all it ought to take.
> > > > > 
> > > > > AFAICT this what could happen:
> > > > > 
> > > > > - vtimer fires
> > > > > - xen mask the vtimer
> > > > > - xen adds the vtimer to the LR for the guest
> > > > > - xen EOIs the vtimer
> > > > 
> > > > This step is Xen deprioritises the interrupt (by writing to the GICD_DIR
> > > > register)...
> > > > 
> > > > > - the guest receive the vtimer interrupt
> > > > > - the guest set the next deadline in the past
> > > > > - the guest enables the vtimer
> > > > > ## an unexpected vtimer interrupt is received by Xen but the current
> > > > > ## one is still being serviced 
> > > > > - the guest eoi the vtimer
> > > > 
> > > > ... and the actual Xen EOI only happens here in response to the
> > > > maintenance interrupt resulting from the guests EOI.
> > > > 
> > > > (or maybe I've misunderstood what you mean by "EOI the vtimer")
> > > 
> > > The vtimer is a Xen irq that injects an irq into the guest from the Xen
> > > interrupt handler. It's not a guest irq.
> > > See xen/arch/arm/time.c:vtimer_interrupt.
> > 
> > OK, so what does it mean to "EOI" that timer?
>  
> By "xen EOIs the vtimer" I meant Xen EOIs the vtimer interrupt in the
> real hardware.

Oh right, for some reason I thought this was driven off Xen timer
infrastructure, rather than being an actual interrupt.

In that case my comments about deprioritisation and actual EOI happening
later are correct, aren't they?

> By "the guest eoi the vtimer" I meant the guest EOIs the virtual
> interrupt that we injected into the guest.
Stefano Stabellini June 26, 2013, 11:50 a.m. UTC | #13
On Wed, 26 Jun 2013, Ian Campbell wrote:
> On Wed, 2013-06-26 at 12:23 +0100, Stefano Stabellini wrote:
> > On Wed, 26 Jun 2013, Ian Campbell wrote:
> > > On Wed, 2013-06-26 at 12:08 +0100, Stefano Stabellini wrote:
> > > > On Wed, 26 Jun 2013, Ian Campbell wrote:
> > > > > On Tue, 2013-06-25 at 17:58 +0100, Stefano Stabellini wrote:
> > > > > > On Tue, 25 Jun 2013, Ian Campbell wrote:
> > > > > > > On Tue, 2013-06-25 at 00:04 +0100, Julien Grall wrote:
> > > > > > > > From: Stefano Stabellini <stefano.stabellini@citrix.com>
> > > > > > > > 
> > > > > > > > For guest's timers (both virtual and physical), Xen will inject virtual
> > > > > > > > interrupt. Linux handles timer interrupt as:
> > > > > > > 
> > > > > > > We should be wary of developing things based on the way which Linux
> > > > > > > happens to do things. On x86 we have several time modes, which can be
> > > > > > > selected based upon guest behaviour (described in
> > > > > > > docs/man/xl.cfg.pod.5). Do we need to do something similar here?
> > > > > > > 
> > > > > > > >   1) Receive the interrupt and ack it
> > > > > > > >   2) Handle the current event timer
> > > > > > > >   3) Set the next event timer
> > > > > > > >   4) EOI the interrupt
> > > > > > > > 
> > > > > > > > It's unlikely possible to reinject another interrupt before
> > > > > > > 
> > > > > > > I can't parse this sentence. "unlikely to be possible" perhaps? but I'm
> > > > > > > not sure if that is what you meant.
> > > > > > > 
> > > > > > > > the previous one is EOIed because the next deadline is shorter than the time
> > > > > > > > to execute code until EOI it.
> > > > > > > 
> > > > > > > If we get into this situation once is there any danger that we will get
> > > > > > > into it repeatedly and overflow the count?
> > > > > > > 
> > > > > > > Overall I'm not convinced this is the right approach to get the
> > > > > > > behaviour we want. Isn't this interrupt level triggered, with the level
> > > > > > > being determined by a comparison of two registers? IOW can't we
> > > > > > > determine whether to retrigger the interrupt or not by examining the
> > > > > > > state of our emulated versions of those registers? A generic mechanism
> > > > > > > to callback into the appropriate emulator on EOI plus a little bit of
> > > > > > > logic in the vtimer code is all it ought to take.
> > > > > > 
> > > > > > AFAICT this what could happen:
> > > > > > 
> > > > > > - vtimer fires
> > > > > > - xen mask the vtimer
> > > > > > - xen adds the vtimer to the LR for the guest
> > > > > > - xen EOIs the vtimer
> > > > > 
> > > > > This step is Xen deprioritises the interrupt (by writing to the GICD_DIR
> > > > > register)...
> > > > > 
> > > > > > - the guest receive the vtimer interrupt
> > > > > > - the guest set the next deadline in the past
> > > > > > - the guest enables the vtimer
> > > > > > ## an unexpected vtimer interrupt is received by Xen but the current
> > > > > > ## one is still being serviced 
> > > > > > - the guest eoi the vtimer
> > > > > 
> > > > > ... and the actual Xen EOI only happens here in response to the
> > > > > maintenance interrupt resulting from the guests EOI.
> > > > > 
> > > > > (or maybe I've misunderstood what you mean by "EOI the vtimer")
> > > > 
> > > > The vtimer is a Xen irq that injects an irq into the guest from the Xen
> > > > interrupt handler. It's not a guest irq.
> > > > See xen/arch/arm/time.c:vtimer_interrupt.
> > > 
> > > OK, so what does it mean to "EOI" that timer?
> >  
> > By "xen EOIs the vtimer" I meant Xen EOIs the vtimer interrupt in the
> > real hardware.
> 
> Oh right, for some reason I thought this was driven off Xen timer
> infrastructure, rather than being an actual interrupt.
> 
> In that case my comments about deprioritisation and actual EOI happening
> later are correct, aren't they?

No, they are not. This is the full sequence of events, including
deprioritization and EOI:

- the vtimer interrupt fires
- xen deprioritizes the vtimer interrupt
- xen masks the vtimer interrupt
- xen adds the vtimer interrupt to the LR for the guest
- xen EOIs the vtimer interrupt
- the guest receives the vtimer interrupt
- the guest deprioritizes the vtimer interrupt
- the guest set the next deadline in the past
- the guest enables the vtimer
## an unexpected vtimer interrupt is received by Xen but the current
## one is still being serviced 
- the guest EOIs the vtimer interrupt
Ian Campbell June 26, 2013, 11:57 a.m. UTC | #14
On Wed, 2013-06-26 at 12:50 +0100, Stefano Stabellini wrote:
> On Wed, 26 Jun 2013, Ian Campbell wrote:

> > In that case my comments about deprioritisation and actual EOI happening
> > later are correct, aren't they?
> 
> No, they are not. This is the full sequence of events, including
> deprioritization and EOI:
> 
> - the vtimer interrupt fires
> - xen deprioritizes the vtimer interrupt
> - xen masks the vtimer interrupt
> - xen adds the vtimer interrupt to the LR for the guest
> - xen EOIs the vtimer interrupt
> - the guest receives the vtimer interrupt
> - the guest deprioritizes the vtimer interrupt
> - the guest set the next deadline in the past
> - the guest enables the vtimer
> ## an unexpected vtimer interrupt is received by Xen but the current
> ## one is still being serviced 
> - the guest EOIs the vtimer interrupt

Is this particular to the handling of the vtimer interrupt? In the
normal case an interrupt which is injected into the guest isn't EOIed by
Xen until the maintenance interrupt, isn't it? But the vtimer is special
because it isn't routed to the guest even though we do so via an
orthogonal mechanism?

Perhaps the solution here is a third type of interrupt alongside
"for-xen" and "for-a-specific-guest" which is "for-current-guest"?

Ian.
Stefano Stabellini June 26, 2013, 2:02 p.m. UTC | #15
On Wed, 26 Jun 2013, Ian Campbell wrote:
> On Wed, 2013-06-26 at 12:50 +0100, Stefano Stabellini wrote:
> > On Wed, 26 Jun 2013, Ian Campbell wrote:
> 
> > > In that case my comments about deprioritisation and actual EOI happening
> > > later are correct, aren't they?
> > 
> > No, they are not. This is the full sequence of events, including
> > deprioritization and EOI:
> > 
> > - the vtimer interrupt fires
> > - xen deprioritizes the vtimer interrupt
> > - xen masks the vtimer interrupt
> > - xen adds the vtimer interrupt to the LR for the guest
> > - xen EOIs the vtimer interrupt
> > - the guest receives the vtimer interrupt
> > - the guest deprioritizes the vtimer interrupt
> > - the guest set the next deadline in the past
> > - the guest enables the vtimer
> > ## an unexpected vtimer interrupt is received by Xen but the current
> > ## one is still being serviced 
> > - the guest EOIs the vtimer interrupt
> 
> Is this particular to the handling of the vtimer interrupt? 

Yes


> In the
> normal case an interrupt which is injected into the guest isn't EOIed by
> Xen until the maintenance interrupt, isn't it?

Right


> But the vtimer is special
> because it isn't routed to the guest even though we do so via an
> orthogonal mechanism?

Yes


> Perhaps the solution here is a third type of interrupt alongside
> "for-xen" and "for-a-specific-guest" which is "for-current-guest"?

We could try but I did implement this strategy first and I ran into some
serious problems. It could have been because the vtimer emulation in the
FastModel had issues.
diff mbox

Patch

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 0fee3f2..21575df 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -817,7 +817,7 @@  static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
 
     while ((i = find_next_bit((const long unsigned int *) &eisr,
                               64, i)) < 64) {
-        struct pending_irq *p;
+        struct pending_irq *p, *n;
         int cpu, eoi;
 
         cpu = -1;
@@ -826,13 +826,32 @@  static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
         spin_lock_irq(&gic.lock);
         lr = GICH[GICH_LR + i];
         virq = lr & GICH_LR_VIRTUAL_MASK;
+
+        p = irq_to_pending(v, virq);
+        if ( p->desc != NULL ) {
+            p->desc->status &= ~IRQ_INPROGRESS;
+            /* Assume only one pcpu needs to EOI the irq */
+            cpu = p->desc->arch.eoi_cpu;
+            eoi = 1;
+            pirq = p->desc->irq;
+        }
+        if ( !atomic_dec_and_test(&p->inflight_cnt) )
+        {
+            /* Physical IRQ can't be reinject */
+            WARN_ON(p->desc != NULL);
+            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
+            spin_unlock_irq(&gic.lock);
+            i++;
+            continue;
+        }
+
         GICH[GICH_LR + i] = 0;
         clear_bit(i, &this_cpu(lr_mask));
 
         if ( !list_empty(&v->arch.vgic.lr_pending) ) {
-            p = list_entry(v->arch.vgic.lr_pending.next, typeof(*p), lr_queue);
-            gic_set_lr(i, p->irq, GICH_LR_PENDING, p->priority);
-            list_del_init(&p->lr_queue);
+            n = list_entry(v->arch.vgic.lr_pending.next, typeof(*n), lr_queue);
+            gic_set_lr(i, n->irq, GICH_LR_PENDING, n->priority);
+            list_del_init(&n->lr_queue);
             set_bit(i, &this_cpu(lr_mask));
         } else {
             gic_inject_irq_stop();
@@ -840,14 +859,6 @@  static void maintenance_interrupt(int irq, void *dev_id, struct cpu_user_regs *r
         spin_unlock_irq(&gic.lock);
 
         spin_lock_irq(&v->arch.vgic.lock);
-        p = irq_to_pending(v, virq);
-        if ( p->desc != NULL ) {
-            p->desc->status &= ~IRQ_INPROGRESS;
-            /* Assume only one pcpu needs to EOI the irq */
-            cpu = p->desc->arch.eoi_cpu;
-            eoi = 1;
-            pirq = p->desc->irq;
-        }
         list_del_init(&p->inflight);
         spin_unlock_irq(&v->arch.vgic.lock);
 
diff --git a/xen/arch/arm/vgic.c b/xen/arch/arm/vgic.c
index 7eaccb7..2d91dce 100644
--- a/xen/arch/arm/vgic.c
+++ b/xen/arch/arm/vgic.c
@@ -672,6 +672,7 @@  void vgic_vcpu_inject_irq(struct vcpu *v, unsigned int irq, int virtual)
 
     spin_lock_irqsave(&v->arch.vgic.lock, flags);
 
+    atomic_inc(&n->inflight_cnt);
     /* vcpu offline or irq already pending */
     if (test_bit(_VPF_down, &v->pause_flags) || !list_empty(&n->inflight))
     {
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index 339b6e6..fa0b776 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -8,6 +8,7 @@ 
 #include <asm/p2m.h>
 #include <asm/vfp.h>
 #include <public/hvm/params.h>
+#include <asm/atomic.h>
 
 /* Represents state corresponding to a block of 32 interrupts */
 struct vgic_irq_rank {
@@ -21,6 +22,7 @@  struct vgic_irq_rank {
 struct pending_irq
 {
     int irq;
+    atomic_t inflight_cnt;
     struct irq_desc *desc; /* only set it the irq corresponds to a physical irq */
     uint8_t priority;
     /* inflight is used to append instances of pending_irq to