diff mbox

[Xen-devel,v6,10/10] xen/arm: gic_events_need_delivery and irq priorities

Message ID 1396450906-542-10-git-send-email-stefano.stabellini@eu.citrix.com
State New
Headers show

Commit Message

Stefano Stabellini April 2, 2014, 3:01 p.m. UTC
gic_events_need_delivery should only return positive if an outstanding
pending irq has an higher priority than the currently active irq and the
priority mask.
Rewrite the function by going through the priority ordered inflight and
lr_queue lists.

In gic_restore_pending_irqs replace lower priority pending (and not
active) irqs in GICH_LRs with higher priority irqs if no more GICH_LRs
are available.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>

---
Changes in v5:
- improve in code comments;
- use list_for_each_entry_reverse instead of writing my own list walker.

Changes in v4:
- in gic_events_need_delivery go through inflight_irqs and only consider
enabled irqs.
---
 xen/arch/arm/gic.c           |   77 ++++++++++++++++++++++++++++++++++++++----
 xen/include/asm-arm/domain.h |    5 +--
 xen/include/asm-arm/gic.h    |    3 ++
 3 files changed, 76 insertions(+), 9 deletions(-)

Comments

Ian Campbell April 3, 2014, 11:37 a.m. UTC | #1
On Wed, 2014-04-02 at 16:01 +0100, Stefano Stabellini wrote:
> gic_events_need_delivery should only return positive if an outstanding
> pending irq has an higher priority than the currently active irq and the
> priority mask.
> Rewrite the function by going through the priority ordered inflight and
> lr_queue lists.
> 
> In gic_restore_pending_irqs replace lower priority pending (and not
> active) irqs in GICH_LRs with higher priority irqs if no more GICH_LRs
> are available.
> 
> Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> 
> ---
> Changes in v5:
> - improve in code comments;
> - use list_for_each_entry_reverse instead of writing my own list walker.
> 
> Changes in v4:
> - in gic_events_need_delivery go through inflight_irqs and only consider
> enabled irqs.
> ---
>  xen/arch/arm/gic.c           |   77 ++++++++++++++++++++++++++++++++++++++----
>  xen/include/asm-arm/domain.h |    5 +--
>  xen/include/asm-arm/gic.h    |    3 ++
>  3 files changed, 76 insertions(+), 9 deletions(-)
> 
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 611990d..7faa0e9 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -721,6 +721,7 @@ static void gic_clear_one_lr(struct vcpu *v, int i)

Looking at the complete function, a better name would include "try"
somewhere, since it doesn't unconditionally clear things. In fact it's
almost more of a "sync the h/w and s/w state" function, is it?

>      p = irq_to_pending(v, irq);
>      if ( lr & GICH_LR_ACTIVE )
>      {
> +        set_bit(GIC_IRQ_GUEST_ACTIVE, &p->status);

This appears to be unmotivated by the commit message. Is this just a
case of needing to more carefully manage the software ACTIVE state to
match the h/w?

>          /* HW interrupts cannot be ACTIVE and PENDING */
>          if ( p->desc == NULL &&
>               test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) &&


> @@ -735,6 +736,7 @@ static void gic_clear_one_lr(struct vcpu *v, int i)
>          if ( p->desc != NULL )
>              p->desc->status &= ~IRQ_INPROGRESS;
>          clear_bit(GIC_IRQ_GUEST_VISIBLE, &p->status);
> +        clear_bit(GIC_IRQ_GUEST_ACTIVE, &p->status);

Same as above I guess?

>          p->lr = GIC_INVALID_LR;
>          if ( test_bit(GIC_IRQ_GUEST_PENDING, &p->status) &&
>                  test_bit(GIC_IRQ_GUEST_ENABLED, &p->status))
> @@ -763,22 +765,51 @@ void gic_clear_lrs(struct vcpu *v)
>  
>  static void gic_restore_pending_irqs(struct vcpu *v)
>  {
> -    int i;
> -    struct pending_irq *p, *t;
> +    int i = 0, lrs = nr_lrs;
> +    struct pending_irq *p, *t, *p_r;
>      unsigned long flags;
>  
> +    if ( list_empty(&v->arch.vgic.lr_pending) )
> +        return;

This is unlocked, according to the previous patch access to lr_pending
is supposed to be protected by vgic.lock.

If this lock free check is safe for some reason then please add a
comment.

> +
> +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> +
>      list_for_each_entry_safe ( p, t, &v->arch.vgic.lr_pending, lr_queue )
>      {
>          i = find_first_zero_bit(&this_cpu(lr_mask), nr_lrs);
> -        if ( i >= nr_lrs ) return;
> +        if ( i >= nr_lrs )

This where we find the LRs are full and we look for a lower priority
interrupt to evict from the LRs? Isn't this check ~= lr_all_full, which
would be more obvious, and avoid a find_first_zero_bit in the case where
things are full.

Also a comment explaining that we are looking to evict a lower priority
interrupt would be useful. Do we not need to check for LRs which can
simply be cleared? I suppose that must happen elsewhere.

> +        {
> +            list_for_each_entry_reverse( p_r,
> +                                         &v->arch.vgic.inflight_irqs,
> +                                         inflight )
> +            {
> +                if ( test_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status) &&
> +                     !test_bit(GIC_IRQ_GUEST_ACTIVE, &p_r->status) )
> +                    goto found;
> +                if ( &p_r->inflight == p->inflight.next )
> +                    goto out;

Can't we stop the search as soon as we find a higher priority interrupt
than the one we are trying to inject?

Is this nested loop O(n^2) in the number of inflight interrupts?

Is there a possible algorithm which involves picking the head from
whichever of inflight_irqs or pending_irqs has the highest priority,
until the LRs are full (or the lists are empty) and then putting the
remainder of pending back into the inflight list?

Or perhaps because we know that the two lists are sorted we can avoid a
complete search of inflight_irqs on every outer loop by remembering how
far we got last time and restarting there? i.e. if you gave up on an
interrupt with priority N+1 last time then there isn't much point in
checking for an interrupt with priority N this time around.

I'm also unsure why this function both uses find_first_zero_bit and
counts the lrs it has unassigned in the "lrs" variable. Is one or the
other not sufficient?

> +            }
> +            goto out;
> +
> +found:
> +            i = p_r->lr;
> +            p_r->lr = GIC_INVALID_LR;
> +            set_bit(GIC_IRQ_GUEST_PENDING, &p_r->status);
> +            clear_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status);
> +            gic_add_to_lr_pending(v, p_r);
> +        }
>  
> -        spin_lock_irqsave(&v->arch.vgic.lock, flags);
>          gic_set_lr(i, p, GICH_LR_PENDING);
>          list_del_init(&p->lr_queue);
>          set_bit(i, &this_cpu(lr_mask));
> -        spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> +
> +        lrs--;
> +        if ( lrs == 0 )
> +            break;
>      }
>  
> +out:
> +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
>  }
>  
>  void gic_clear_pending_irqs(struct vcpu *v)
> @@ -794,8 +825,40 @@ void gic_clear_pending_irqs(struct vcpu *v)
>  
>  int gic_events_need_delivery(void)
>  {
> -    return (!list_empty(&current->arch.vgic.lr_pending) ||
> -            this_cpu(lr_mask));
> +    int mask_priority, lrs = nr_lrs;
> +    int max_priority = 0xff, active_priority = 0xff;
> +    struct vcpu *v = current;
> +    struct pending_irq *p;
> +    unsigned long flags;
> +
> +    mask_priority = (GICH[GICH_VMCR] >> GICH_VMCR_PRIORITY_SHIFT) & GICH_VMCR_PRIORITY_MASK;
> +
> +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> +
> +    /* TODO: We order the guest irqs by priority, but we don't change
> +     * the priority of host irqs. */
> +    list_for_each_entry( p, &v->arch.vgic.inflight_irqs, inflight )
> +    {
> +        if ( test_bit(GIC_IRQ_GUEST_ACTIVE, &p->status) )
> +        {
> +            if ( p->priority < active_priority )
> +                active_priority = p->priority;
> +        } else if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) ) {
> +            if ( p->priority < max_priority )
> +                max_priority = p->priority;
> +        }

Can't you do the comparison against mask_priority in this loop and
simply return true as soon as you find an interrupt which needs an
upcall?

Can't you also stop searching when p->priority > mask_priority, since
inflight_irqs is order you know all the rest of the list has lower
(numerically higher) priority,

> +        lrs--;
> +        if ( lrs == 0 )
> +            break;
> +    }
> +
> +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> +
> +    if ( max_priority < active_priority &&
> +         (max_priority >> 3) < mask_priority )

Why >> 3? Something to do with our emulation or the BPR?

> +        return 1;
> +    else
> +        return 0;
>  }

Ian.
Stefano Stabellini April 6, 2014, 7:27 p.m. UTC | #2
On Thu, 3 Apr 2014, Ian Campbell wrote:
> On Wed, 2014-04-02 at 16:01 +0100, Stefano Stabellini wrote:
> > gic_events_need_delivery should only return positive if an outstanding
> > pending irq has an higher priority than the currently active irq and the
> > priority mask.
> > Rewrite the function by going through the priority ordered inflight and
> > lr_queue lists.
> > 
> > In gic_restore_pending_irqs replace lower priority pending (and not
> > active) irqs in GICH_LRs with higher priority irqs if no more GICH_LRs
> > are available.
> > 
> > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> > 
> > ---
> > Changes in v5:
> > - improve in code comments;
> > - use list_for_each_entry_reverse instead of writing my own list walker.
> > 
> > Changes in v4:
> > - in gic_events_need_delivery go through inflight_irqs and only consider
> > enabled irqs.
> > ---
> >  xen/arch/arm/gic.c           |   77 ++++++++++++++++++++++++++++++++++++++----
> >  xen/include/asm-arm/domain.h |    5 +--
> >  xen/include/asm-arm/gic.h    |    3 ++
> >  3 files changed, 76 insertions(+), 9 deletions(-)
> > 
> > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> > index 611990d..7faa0e9 100644
> > --- a/xen/arch/arm/gic.c
> > +++ b/xen/arch/arm/gic.c
> > @@ -721,6 +721,7 @@ static void gic_clear_one_lr(struct vcpu *v, int i)
> 
> Looking at the complete function, a better name would include "try"
> somewhere, since it doesn't unconditionally clear things. In fact it's
> almost more of a "sync the h/w and s/w state" function, is it?

Yes, that is true. I'll rename it to gic_update_one_lr.


> >      p = irq_to_pending(v, irq);
> >      if ( lr & GICH_LR_ACTIVE )
> >      {
> > +        set_bit(GIC_IRQ_GUEST_ACTIVE, &p->status);
> 
> This appears to be unmotivated by the commit message. Is this just a
> case of needing to more carefully manage the software ACTIVE state to
> match the h/w?

The new GIC_IRQ_GUEST_ACTIVE bit is needed to track which one (if any)
is the currenly active irq. We need the information to calculate
priorities in gic_events_need_delivery.

I'll add a note to the commit message to clarify why we are introducing
it.


> >          /* HW interrupts cannot be ACTIVE and PENDING */
> >          if ( p->desc == NULL &&
> >               test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) &&
> 
> 
> > @@ -735,6 +736,7 @@ static void gic_clear_one_lr(struct vcpu *v, int i)
> >          if ( p->desc != NULL )
> >              p->desc->status &= ~IRQ_INPROGRESS;
> >          clear_bit(GIC_IRQ_GUEST_VISIBLE, &p->status);
> > +        clear_bit(GIC_IRQ_GUEST_ACTIVE, &p->status);
> 
> Same as above I guess?
> 
> >          p->lr = GIC_INVALID_LR;
> >          if ( test_bit(GIC_IRQ_GUEST_PENDING, &p->status) &&
> >                  test_bit(GIC_IRQ_GUEST_ENABLED, &p->status))
> > @@ -763,22 +765,51 @@ void gic_clear_lrs(struct vcpu *v)
> >  
> >  static void gic_restore_pending_irqs(struct vcpu *v)
> >  {
> > -    int i;
> > -    struct pending_irq *p, *t;
> > +    int i = 0, lrs = nr_lrs;
> > +    struct pending_irq *p, *t, *p_r;
> >      unsigned long flags;
> >  
> > +    if ( list_empty(&v->arch.vgic.lr_pending) )
> > +        return;
> 
> This is unlocked, according to the previous patch access to lr_pending
> is supposed to be protected by vgic.lock.

Well spotted, I'll fix it.


> If this lock free check is safe for some reason then please add a
> comment.
> 
> > +
> > +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> > +
> >      list_for_each_entry_safe ( p, t, &v->arch.vgic.lr_pending, lr_queue )
> >      {
> >          i = find_first_zero_bit(&this_cpu(lr_mask), nr_lrs);
> > -        if ( i >= nr_lrs ) return;
> > +        if ( i >= nr_lrs )
> 
> This where we find the LRs are full and we look for a lower priority
> interrupt to evict from the LRs?

Yep.


> Isn't this check ~= lr_all_full, which
> would be more obvious, and avoid a find_first_zero_bit in the case where
> things are full.

Consider that we are inside a list_for_each_entry_safe loop, adding
lr_pending irqs to GICH_LR registers one by one: we need the
find_first_zero_bit.



> Also a comment explaining that we are looking to evict a lower priority
> interrupt would be useful.

Good point, I'll add a comment.


> Do we not need to check for LRs which can
> simply be cleared? I suppose that must happen elsewhere.

Yes, it happens on entry to the hypervisor.


> > +        {
> > +            list_for_each_entry_reverse( p_r,
> > +                                         &v->arch.vgic.inflight_irqs,
> > +                                         inflight )
> > +            {
> > +                if ( test_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status) &&
> > +                     !test_bit(GIC_IRQ_GUEST_ACTIVE, &p_r->status) )
> > +                    goto found;
> > +                if ( &p_r->inflight == p->inflight.next )
> > +                    goto out;
> 
> Can't we stop the search as soon as we find a higher priority interrupt
> than the one we are trying to inject?

Actually we are already doing that. Currently we stop when we find a
suitable lower priority irq, or when the next irq in inflight_irqs is
the same that we are already processing from lr_pending. Given that both
lr_pending and inflight_irqs are ordered by priority (a new irq is
inserted between the last irq with the same priority and the first with
lower priority) and that we are walking inflight_irqs in reverse, it is
the same as you are suggesting.  We are stopping when the next in line
is the irq we are already evaluating from lr_pending, therefore we know
for sure that all the following irqs in inflight_irqs have the same or
higher priority.


> Is this nested loop O(n^2) in the number of inflight interrupts?

We only run the outer loop nr_lrs times (see the lrs == 0 check).
Also:

- the lists are ordered by priority and we exploit that
- lr_pending is a subset of inflight
- we stop as soon as we exaust the lower priority irqs that we can evict
- nr_lrs is very limited and we can at most do nr_lrs substitutions

In my tests thanks to the list ordering it actually takes only one step
of the inner loop to find an irq to evict and the case when we need to
do any evicting at all is very rare.


> Is there a possible algorithm which involves picking the head from
> whichever of inflight_irqs or pending_irqs has the highest priority,
> until the LRs are full (or the lists are empty) and then putting the
> remainder of pending back into the inflight list?
> 
> Or perhaps because we know that the two lists are sorted we can avoid a
> complete search of inflight_irqs on every outer loop by remembering how
> far we got last time and restarting there? i.e. if you gave up on an
> interrupt with priority N+1 last time then there isn't much point in
> checking for an interrupt with priority N this time around.

This is exactly what my previous version of the patch did:

http://marc.info/?l=xen-devel&m=139523245301114

but you asked me to change it :-)


> I'm also unsure why this function both uses find_first_zero_bit and
> counts the lrs it has unassigned in the "lrs" variable. Is one or the
> other not sufficient?

find_first_zero_bit is used to find free LRs.
lrs is used to limit the iterations of the outer loop: no point in
trying to fill or substitute more than nr_lrs LRs as lr_pending is
already order by priority.


> > +            }
> > +            goto out;
> > +
> > +found:
> > +            i = p_r->lr;
> > +            p_r->lr = GIC_INVALID_LR;
> > +            set_bit(GIC_IRQ_GUEST_PENDING, &p_r->status);
> > +            clear_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status);
> > +            gic_add_to_lr_pending(v, p_r);
> > +        }
> >  
> > -        spin_lock_irqsave(&v->arch.vgic.lock, flags);
> >          gic_set_lr(i, p, GICH_LR_PENDING);
> >          list_del_init(&p->lr_queue);
> >          set_bit(i, &this_cpu(lr_mask));
> > -        spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > +
> > +        lrs--;
> > +        if ( lrs == 0 )
> > +            break;
> >      }
> >  
> > +out:
> > +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> >  }
> >  
> >  void gic_clear_pending_irqs(struct vcpu *v)
> > @@ -794,8 +825,40 @@ void gic_clear_pending_irqs(struct vcpu *v)
> >  
> >  int gic_events_need_delivery(void)
> >  {
> > -    return (!list_empty(&current->arch.vgic.lr_pending) ||
> > -            this_cpu(lr_mask));
> > +    int mask_priority, lrs = nr_lrs;
> > +    int max_priority = 0xff, active_priority = 0xff;
> > +    struct vcpu *v = current;
> > +    struct pending_irq *p;
> > +    unsigned long flags;
> > +
> > +    mask_priority = (GICH[GICH_VMCR] >> GICH_VMCR_PRIORITY_SHIFT) & GICH_VMCR_PRIORITY_MASK;
> > +
> > +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> > +
> > +    /* TODO: We order the guest irqs by priority, but we don't change
> > +     * the priority of host irqs. */
> > +    list_for_each_entry( p, &v->arch.vgic.inflight_irqs, inflight )
> > +    {
> > +        if ( test_bit(GIC_IRQ_GUEST_ACTIVE, &p->status) )
> > +        {
> > +            if ( p->priority < active_priority )
> > +                active_priority = p->priority;
> > +        } else if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) ) {
> > +            if ( p->priority < max_priority )
> > +                max_priority = p->priority;
> > +        }
> 
> Can't you do the comparison against mask_priority in this loop and
> simply return true as soon as you find an interrupt which needs an
> upcall?

We need to know the active_priority before we can return. Only if an
outstanding irq exists with priority higher than active_priority (and
mask_priority) we can return true.

 
> Can't you also stop searching when p->priority > mask_priority, since
> inflight_irqs is order you know all the rest of the list has lower
> (numerically higher) priority,

This is a good point.  We can also stop as soon as we find the irq with
GIC_IRQ_GUEST_ACTIVE, because we know than going forward all the others
are going to have a lower priority.
I'll make the changes.


> > +        lrs--;
> > +        if ( lrs == 0 )
> > +            break;
> > +    }
> > +
> > +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > +
> > +    if ( max_priority < active_priority &&
> > +         (max_priority >> 3) < mask_priority )
> 
> Why >> 3? Something to do with our emulation or the BPR?

Only 5 bits are implemented for the guest irq priority, both in GICH_LRs
and in GICH_VMCR. See gic_set_lr.


> > +        return 1;
> > +    else
> > +        return 0;
> >  }
> 
> Ian.
> 
>
Ian Campbell April 7, 2014, 9:43 a.m. UTC | #3
On Sun, 2014-04-06 at 20:27 +0100, Stefano Stabellini wrote:
> On Thu, 3 Apr 2014, Ian Campbell wrote:
> > On Wed, 2014-04-02 at 16:01 +0100, Stefano Stabellini wrote:
> > > gic_events_need_delivery should only return positive if an outstanding
> > > pending irq has an higher priority than the currently active irq and the
> > > priority mask.
> > > Rewrite the function by going through the priority ordered inflight and
> > > lr_queue lists.
> > > 
> > > In gic_restore_pending_irqs replace lower priority pending (and not
> > > active) irqs in GICH_LRs with higher priority irqs if no more GICH_LRs
> > > are available.
> > > 
> > > Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
> > > 
> > > ---
> > > Changes in v5:
> > > - improve in code comments;
> > > - use list_for_each_entry_reverse instead of writing my own list walker.
> > > 
> > > Changes in v4:
> > > - in gic_events_need_delivery go through inflight_irqs and only consider
> > > enabled irqs.
> > > ---
> > >  xen/arch/arm/gic.c           |   77 ++++++++++++++++++++++++++++++++++++++----
> > >  xen/include/asm-arm/domain.h |    5 +--
> > >  xen/include/asm-arm/gic.h    |    3 ++
> > >  3 files changed, 76 insertions(+), 9 deletions(-)
> > > 
> > > diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> > > index 611990d..7faa0e9 100644
> > > --- a/xen/arch/arm/gic.c
> > > +++ b/xen/arch/arm/gic.c
> > > @@ -721,6 +721,7 @@ static void gic_clear_one_lr(struct vcpu *v, int i)
> > 
> > Looking at the complete function, a better name would include "try"
> > somewhere, since it doesn't unconditionally clear things. In fact it's
> > almost more of a "sync the h/w and s/w state" function, is it?
> 
> Yes, that is true. I'll rename it to gic_update_one_lr.

Thanks, I think that'll help clarify the stuff I asked about in patch #8
too.

> > >      p = irq_to_pending(v, irq);
> > >      if ( lr & GICH_LR_ACTIVE )
> > >      {
> > > +        set_bit(GIC_IRQ_GUEST_ACTIVE, &p->status);
> > 
> > This appears to be unmotivated by the commit message. Is this just a
> > case of needing to more carefully manage the software ACTIVE state to
> > match the h/w?
> 
> The new GIC_IRQ_GUEST_ACTIVE bit is needed to track which one (if any)
> is the currenly active irq. We need the information to calculate
> priorities in gic_events_need_delivery.
> 
> I'll add a note to the commit message to clarify why we are introducing
> it.

OK, hopefully I'll understand the comment ;-)

> > If this lock free check is safe for some reason then please add a
> > comment.
> > 
> > > +
> > > +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> > > +
> > >      list_for_each_entry_safe ( p, t, &v->arch.vgic.lr_pending, lr_queue )
> > >      {
> > >          i = find_first_zero_bit(&this_cpu(lr_mask), nr_lrs);
> > > -        if ( i >= nr_lrs ) return;
> > > +        if ( i >= nr_lrs )
> > 
> > This where we find the LRs are full and we look for a lower priority
> > interrupt to evict from the LRs?
> 
> Yep.
> 
> 
> > Isn't this check ~= lr_all_full, which
> > would be more obvious, and avoid a find_first_zero_bit in the case where
> > things are full.
> 
> Consider that we are inside a list_for_each_entry_safe loop, adding
> lr_pending irqs to GICH_LR registers one by one: we need the
> find_first_zero_bit.

Shouldn't/couldn't you remember the answer from last time and start the
search from there. Currently it seems to search the whole bitmask every
time. (I think what I'm suggesting is roughly equivalent too passing i
instead of nr_lrs, with suitable initialisation and ++ of i at the
appropriate points)

> > > +        {
> > > +            list_for_each_entry_reverse( p_r,
> > > +                                         &v->arch.vgic.inflight_irqs,
> > > +                                         inflight )
> > > +            {
> > > +                if ( test_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status) &&
> > > +                     !test_bit(GIC_IRQ_GUEST_ACTIVE, &p_r->status) )
> > > +                    goto found;
> > > +                if ( &p_r->inflight == p->inflight.next )
> > > +                    goto out;
> > 
> > Can't we stop the search as soon as we find a higher priority interrupt
> > than the one we are trying to inject?
> 
> Actually we are already doing that. Currently we stop when we find a
> suitable lower priority irq, or when the next irq in inflight_irqs is
> the same that we are already processing from lr_pending. Given that both
> lr_pending and inflight_irqs are ordered by priority (a new irq is
> inserted between the last irq with the same priority and the first with
> lower priority) and that we are walking inflight_irqs in reverse, it is
> the same as you are suggesting.  We are stopping when the next in line
> is the irq we are already evaluating from lr_pending, therefore we know
> for sure that all the following irqs in inflight_irqs have the same or
> higher priority.
> 
> 
> > Is this nested loop O(n^2) in the number of inflight interrupts?
> 
> We only run the outer loop nr_lrs times (see the lrs == 0 check).

The important thing here is how many times we run the inner loop for
each of those nr_lrs times.

> Also:
> 
> - the lists are ordered by priority and we exploit that
> - lr_pending is a subset of inflight
> - we stop as soon as we exaust the lower priority irqs that we can evict
> - nr_lrs is very limited and we can at most do nr_lrs substitutions
> 
> In my tests thanks to the list ordering it actually takes only one step
> of the inner loop to find an irq to evict and the case when we need to
> do any evicting at all is very rare.
> 
> 
> > Is there a possible algorithm which involves picking the head from
> > whichever of inflight_irqs or pending_irqs has the highest priority,
> > until the LRs are full (or the lists are empty) and then putting the
> > remainder of pending back into the inflight list?
> > 
> > Or perhaps because we know that the two lists are sorted we can avoid a
> > complete search of inflight_irqs on every outer loop by remembering how
> > far we got last time and restarting there? i.e. if you gave up on an
> > interrupt with priority N+1 last time then there isn't much point in
> > checking for an interrupt with priority N this time around.
> 
> This is exactly what my previous version of the patch did:
> 
> http://marc.info/?l=xen-devel&m=139523245301114
> 
> but you asked me to change it :-)

You mean by my asking to use the macros I ruled this approach out?
Oops ;-)

> > > +            }
> > > +            goto out;
> > > +
> > > +found:
> > > +            i = p_r->lr;
> > > +            p_r->lr = GIC_INVALID_LR;
> > > +            set_bit(GIC_IRQ_GUEST_PENDING, &p_r->status);
> > > +            clear_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status);
> > > +            gic_add_to_lr_pending(v, p_r);
> > > +        }
> > >  
> > > -        spin_lock_irqsave(&v->arch.vgic.lock, flags);
> > >          gic_set_lr(i, p, GICH_LR_PENDING);
> > >          list_del_init(&p->lr_queue);
> > >          set_bit(i, &this_cpu(lr_mask));
> > > -        spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > > +
> > > +        lrs--;
> > > +        if ( lrs == 0 )
> > > +            break;
> > >      }
> > >  
> > > +out:
> > > +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > >  }
> > >  
> > >  void gic_clear_pending_irqs(struct vcpu *v)
> > > @@ -794,8 +825,40 @@ void gic_clear_pending_irqs(struct vcpu *v)
> > >  
> > >  int gic_events_need_delivery(void)
> > >  {
> > > -    return (!list_empty(&current->arch.vgic.lr_pending) ||
> > > -            this_cpu(lr_mask));
> > > +    int mask_priority, lrs = nr_lrs;
> > > +    int max_priority = 0xff, active_priority = 0xff;
> > > +    struct vcpu *v = current;
> > > +    struct pending_irq *p;
> > > +    unsigned long flags;
> > > +
> > > +    mask_priority = (GICH[GICH_VMCR] >> GICH_VMCR_PRIORITY_SHIFT) & GICH_VMCR_PRIORITY_MASK;
> > > +
> > > +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> > > +
> > > +    /* TODO: We order the guest irqs by priority, but we don't change
> > > +     * the priority of host irqs. */
> > > +    list_for_each_entry( p, &v->arch.vgic.inflight_irqs, inflight )
> > > +    {
> > > +        if ( test_bit(GIC_IRQ_GUEST_ACTIVE, &p->status) )
> > > +        {
> > > +            if ( p->priority < active_priority )
> > > +                active_priority = p->priority;
> > > +        } else if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) ) {
> > > +            if ( p->priority < max_priority )
> > > +                max_priority = p->priority;
> > > +        }
> > 
> > Can't you do the comparison against mask_priority in this loop and
> > simply return true as soon as you find an interrupt which needs an
> > upcall?
> 
> We need to know the active_priority before we can return. Only if an
> outstanding irq exists with priority higher than active_priority (and
> mask_priority) we can return true.

"active_priority" here is the priority of the highest priority active
interrupt I think? You don't actually need to know what the highest is,
just that one exists which is higher than the threshold, don't you?

Likewise max_priority is the priority of highest priority interrupt
which the guest hasn't masked? Again don't you just need an existence
proof of one which exceeds the required threshold rather than the strict
highest?

> > Can't you also stop searching when p->priority > mask_priority, since
> > inflight_irqs is order you know all the rest of the list has lower
> > (numerically higher) priority,
> 
> This is a good point.  We can also stop as soon as we find the irq with
> GIC_IRQ_GUEST_ACTIVE, because we know than going forward all the others
> are going to have a lower priority.
> I'll make the changes.
> 
> 
> > > +        lrs--;
> > > +        if ( lrs == 0 )
> > > +            break;
> > > +    }
> > > +
> > > +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > > +
> > > +    if ( max_priority < active_priority &&
> > > +         (max_priority >> 3) < mask_priority )
> > 
> > Why >> 3? Something to do with our emulation or the BPR?
> 
> Only 5 bits are implemented for the guest irq priority, both in GICH_LRs
> and in GICH_VMCR. See gic_set_lr.

Looking at the docs it seems like the LRs have lower precision than
GICD_IPRIORITYRn, how odd. So this is a hardware limit rather an
artefact of our implementation? (I notice there are 3 reserved bits in
the LR right where the missing priority bits would sit... /me strokes
beard). I guess this ties in with GICH_APR only being 32 bits.

I'd like to see the >>3 replaced by a #define both here and gic_set_lr
please.

> 
> 
> > > +        return 1;
> > > +    else
> > > +        return 0;
> > >  }
> > 
> > Ian.
> > 
> >
Stefano Stabellini April 7, 2014, 10:30 a.m. UTC | #4
On Mon, 7 Apr 2014, Ian Campbell wrote:
> > > Isn't this check ~= lr_all_full, which
> > > would be more obvious, and avoid a find_first_zero_bit in the case where
> > > things are full.
> > 
> > Consider that we are inside a list_for_each_entry_safe loop, adding
> > lr_pending irqs to GICH_LR registers one by one: we need the
> > find_first_zero_bit.
> 
> Shouldn't/couldn't you remember the answer from last time and start the
> search from there. Currently it seems to search the whole bitmask every
> time. (I think what I'm suggesting is roughly equivalent too passing i
> instead of nr_lrs, with suitable initialisation and ++ of i at the
> appropriate points)

Yeah, I could use find_next_zero_bit instead of find_first_zero_bit.


> > > > +        {
> > > > +            list_for_each_entry_reverse( p_r,
> > > > +                                         &v->arch.vgic.inflight_irqs,
> > > > +                                         inflight )
> > > > +            {
> > > > +                if ( test_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status) &&
> > > > +                     !test_bit(GIC_IRQ_GUEST_ACTIVE, &p_r->status) )
> > > > +                    goto found;
> > > > +                if ( &p_r->inflight == p->inflight.next )
> > > > +                    goto out;
> > > 
> > > Can't we stop the search as soon as we find a higher priority interrupt
> > > than the one we are trying to inject?
> > 
> > Actually we are already doing that. Currently we stop when we find a
> > suitable lower priority irq, or when the next irq in inflight_irqs is
> > the same that we are already processing from lr_pending. Given that both
> > lr_pending and inflight_irqs are ordered by priority (a new irq is
> > inserted between the last irq with the same priority and the first with
> > lower priority) and that we are walking inflight_irqs in reverse, it is
> > the same as you are suggesting.  We are stopping when the next in line
> > is the irq we are already evaluating from lr_pending, therefore we know
> > for sure that all the following irqs in inflight_irqs have the same or
> > higher priority.
> > 
> > 
> > > Is this nested loop O(n^2) in the number of inflight interrupts?
> > 
> > We only run the outer loop nr_lrs times (see the lrs == 0 check).
> 
> The important thing here is how many times we run the inner loop for
> each of those nr_lrs times.
> 
> > Also:
> > 
> > - the lists are ordered by priority and we exploit that
> > - lr_pending is a subset of inflight
> > - we stop as soon as we exaust the lower priority irqs that we can evict
> > - nr_lrs is very limited and we can at most do nr_lrs substitutions
> > 
> > In my tests thanks to the list ordering it actually takes only one step
> > of the inner loop to find an irq to evict and the case when we need to
> > do any evicting at all is very rare.
> > 
> > 
> > > Is there a possible algorithm which involves picking the head from
> > > whichever of inflight_irqs or pending_irqs has the highest priority,
> > > until the LRs are full (or the lists are empty) and then putting the
> > > remainder of pending back into the inflight list?
> > > 
> > > Or perhaps because we know that the two lists are sorted we can avoid a
> > > complete search of inflight_irqs on every outer loop by remembering how
> > > far we got last time and restarting there? i.e. if you gave up on an
> > > interrupt with priority N+1 last time then there isn't much point in
> > > checking for an interrupt with priority N this time around.
> > 
> > This is exactly what my previous version of the patch did:
> > 
> > http://marc.info/?l=xen-devel&m=139523245301114
> > 
> > but you asked me to change it :-)
> 
> You mean by my asking to use the macros I ruled this approach out?
> Oops ;-)

I think I can find a way to do both.


> > > > +            }
> > > > +            goto out;
> > > > +
> > > > +found:
> > > > +            i = p_r->lr;
> > > > +            p_r->lr = GIC_INVALID_LR;
> > > > +            set_bit(GIC_IRQ_GUEST_PENDING, &p_r->status);
> > > > +            clear_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status);
> > > > +            gic_add_to_lr_pending(v, p_r);
> > > > +        }
> > > >  
> > > > -        spin_lock_irqsave(&v->arch.vgic.lock, flags);
> > > >          gic_set_lr(i, p, GICH_LR_PENDING);
> > > >          list_del_init(&p->lr_queue);
> > > >          set_bit(i, &this_cpu(lr_mask));
> > > > -        spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > > > +
> > > > +        lrs--;
> > > > +        if ( lrs == 0 )
> > > > +            break;
> > > >      }
> > > >  
> > > > +out:
> > > > +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > > >  }
> > > >  
> > > >  void gic_clear_pending_irqs(struct vcpu *v)
> > > > @@ -794,8 +825,40 @@ void gic_clear_pending_irqs(struct vcpu *v)
> > > >  
> > > >  int gic_events_need_delivery(void)
> > > >  {
> > > > -    return (!list_empty(&current->arch.vgic.lr_pending) ||
> > > > -            this_cpu(lr_mask));
> > > > +    int mask_priority, lrs = nr_lrs;
> > > > +    int max_priority = 0xff, active_priority = 0xff;
> > > > +    struct vcpu *v = current;
> > > > +    struct pending_irq *p;
> > > > +    unsigned long flags;
> > > > +
> > > > +    mask_priority = (GICH[GICH_VMCR] >> GICH_VMCR_PRIORITY_SHIFT) & GICH_VMCR_PRIORITY_MASK;
> > > > +
> > > > +    spin_lock_irqsave(&v->arch.vgic.lock, flags);
> > > > +
> > > > +    /* TODO: We order the guest irqs by priority, but we don't change
> > > > +     * the priority of host irqs. */
> > > > +    list_for_each_entry( p, &v->arch.vgic.inflight_irqs, inflight )
> > > > +    {
> > > > +        if ( test_bit(GIC_IRQ_GUEST_ACTIVE, &p->status) )
> > > > +        {
> > > > +            if ( p->priority < active_priority )
> > > > +                active_priority = p->priority;
> > > > +        } else if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) ) {
> > > > +            if ( p->priority < max_priority )
> > > > +                max_priority = p->priority;
> > > > +        }
> > > 
> > > Can't you do the comparison against mask_priority in this loop and
> > > simply return true as soon as you find an interrupt which needs an
> > > upcall?
> > 
> > We need to know the active_priority before we can return. Only if an
> > outstanding irq exists with priority higher than active_priority (and
> > mask_priority) we can return true.
> 
> "active_priority" here is the priority of the highest priority active
> interrupt I think? You don't actually need to know what the highest is,
> just that one exists which is higher than the threshold, don't you?

No, we need to know the highest because only if max_priority is higher
than active_priority then the function should return true.


> Likewise max_priority is the priority of highest priority interrupt
> which the guest hasn't masked? Again don't you just need an existence
> proof of one which exceeds the required threshold rather than the strict
> highest?

We need to know that exceeds the required threshold AND that it is
higher than active_priority.


> > > Can't you also stop searching when p->priority > mask_priority, since
> > > inflight_irqs is order you know all the rest of the list has lower
> > > (numerically higher) priority,
> > 
> > This is a good point.  We can also stop as soon as we find the irq with
> > GIC_IRQ_GUEST_ACTIVE, because we know than going forward all the others
> > are going to have a lower priority.
> > I'll make the changes.
> > 
> > 
> > > > +        lrs--;
> > > > +        if ( lrs == 0 )
> > > > +            break;
> > > > +    }
> > > > +
> > > > +    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
> > > > +
> > > > +    if ( max_priority < active_priority &&
> > > > +         (max_priority >> 3) < mask_priority )
> > > 
> > > Why >> 3? Something to do with our emulation or the BPR?
> > 
> > Only 5 bits are implemented for the guest irq priority, both in GICH_LRs
> > and in GICH_VMCR. See gic_set_lr.
> 
> Looking at the docs it seems like the LRs have lower precision than
> GICD_IPRIORITYRn, how odd. So this is a hardware limit rather an
> artefact of our implementation? (I notice there are 3 reserved bits in
> the LR right where the missing priority bits would sit... /me strokes
> beard). I guess this ties in with GICH_APR only being 32 bits.
> 
> I'd like to see the >>3 replaced by a #define both here and gic_set_lr
> please.

OK, I'll add a new patch at the end of the series.


> > 
> > 
> > > > +        return 1;
> > > > +    else
> > > > +        return 0;
> > > >  }
> > > 
> > > Ian.
> > > 
> > > 
> 
>
diff mbox

Patch

diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 611990d..7faa0e9 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -721,6 +721,7 @@  static void gic_clear_one_lr(struct vcpu *v, int i)
     p = irq_to_pending(v, irq);
     if ( lr & GICH_LR_ACTIVE )
     {
+        set_bit(GIC_IRQ_GUEST_ACTIVE, &p->status);
         /* HW interrupts cannot be ACTIVE and PENDING */
         if ( p->desc == NULL &&
              test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) &&
@@ -735,6 +736,7 @@  static void gic_clear_one_lr(struct vcpu *v, int i)
         if ( p->desc != NULL )
             p->desc->status &= ~IRQ_INPROGRESS;
         clear_bit(GIC_IRQ_GUEST_VISIBLE, &p->status);
+        clear_bit(GIC_IRQ_GUEST_ACTIVE, &p->status);
         p->lr = GIC_INVALID_LR;
         if ( test_bit(GIC_IRQ_GUEST_PENDING, &p->status) &&
                 test_bit(GIC_IRQ_GUEST_ENABLED, &p->status))
@@ -763,22 +765,51 @@  void gic_clear_lrs(struct vcpu *v)
 
 static void gic_restore_pending_irqs(struct vcpu *v)
 {
-    int i;
-    struct pending_irq *p, *t;
+    int i = 0, lrs = nr_lrs;
+    struct pending_irq *p, *t, *p_r;
     unsigned long flags;
 
+    if ( list_empty(&v->arch.vgic.lr_pending) )
+        return;
+
+    spin_lock_irqsave(&v->arch.vgic.lock, flags);
+
     list_for_each_entry_safe ( p, t, &v->arch.vgic.lr_pending, lr_queue )
     {
         i = find_first_zero_bit(&this_cpu(lr_mask), nr_lrs);
-        if ( i >= nr_lrs ) return;
+        if ( i >= nr_lrs )
+        {
+            list_for_each_entry_reverse( p_r,
+                                         &v->arch.vgic.inflight_irqs,
+                                         inflight )
+            {
+                if ( test_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status) &&
+                     !test_bit(GIC_IRQ_GUEST_ACTIVE, &p_r->status) )
+                    goto found;
+                if ( &p_r->inflight == p->inflight.next )
+                    goto out;
+            }
+            goto out;
+
+found:
+            i = p_r->lr;
+            p_r->lr = GIC_INVALID_LR;
+            set_bit(GIC_IRQ_GUEST_PENDING, &p_r->status);
+            clear_bit(GIC_IRQ_GUEST_VISIBLE, &p_r->status);
+            gic_add_to_lr_pending(v, p_r);
+        }
 
-        spin_lock_irqsave(&v->arch.vgic.lock, flags);
         gic_set_lr(i, p, GICH_LR_PENDING);
         list_del_init(&p->lr_queue);
         set_bit(i, &this_cpu(lr_mask));
-        spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
+
+        lrs--;
+        if ( lrs == 0 )
+            break;
     }
 
+out:
+    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
 }
 
 void gic_clear_pending_irqs(struct vcpu *v)
@@ -794,8 +825,40 @@  void gic_clear_pending_irqs(struct vcpu *v)
 
 int gic_events_need_delivery(void)
 {
-    return (!list_empty(&current->arch.vgic.lr_pending) ||
-            this_cpu(lr_mask));
+    int mask_priority, lrs = nr_lrs;
+    int max_priority = 0xff, active_priority = 0xff;
+    struct vcpu *v = current;
+    struct pending_irq *p;
+    unsigned long flags;
+
+    mask_priority = (GICH[GICH_VMCR] >> GICH_VMCR_PRIORITY_SHIFT) & GICH_VMCR_PRIORITY_MASK;
+
+    spin_lock_irqsave(&v->arch.vgic.lock, flags);
+
+    /* TODO: We order the guest irqs by priority, but we don't change
+     * the priority of host irqs. */
+    list_for_each_entry( p, &v->arch.vgic.inflight_irqs, inflight )
+    {
+        if ( test_bit(GIC_IRQ_GUEST_ACTIVE, &p->status) )
+        {
+            if ( p->priority < active_priority )
+                active_priority = p->priority;
+        } else if ( test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) ) {
+            if ( p->priority < max_priority )
+                max_priority = p->priority;
+        }
+        lrs--;
+        if ( lrs == 0 )
+            break;
+    }
+
+    spin_unlock_irqrestore(&v->arch.vgic.lock, flags);
+
+    if ( max_priority < active_priority &&
+         (max_priority >> 3) < mask_priority )
+        return 1;
+    else
+        return 0;
 }
 
 void gic_inject(void)
diff --git a/xen/include/asm-arm/domain.h b/xen/include/asm-arm/domain.h
index dcbeba1..696f36c 100644
--- a/xen/include/asm-arm/domain.h
+++ b/xen/include/asm-arm/domain.h
@@ -55,8 +55,9 @@  struct pending_irq
      *
      */
 #define GIC_IRQ_GUEST_PENDING  0
-#define GIC_IRQ_GUEST_VISIBLE  1
-#define GIC_IRQ_GUEST_ENABLED  2
+#define GIC_IRQ_GUEST_ACTIVE   1
+#define GIC_IRQ_GUEST_VISIBLE  2
+#define GIC_IRQ_GUEST_ENABLED  3
     unsigned long status;
     struct irq_desc *desc; /* only set it the irq corresponds to a physical irq */
     int irq;
diff --git a/xen/include/asm-arm/gic.h b/xen/include/asm-arm/gic.h
index 5a9dc77..5d8f7f1 100644
--- a/xen/include/asm-arm/gic.h
+++ b/xen/include/asm-arm/gic.h
@@ -129,6 +129,9 @@ 
 #define GICH_LR_CPUID_SHIFT     9
 #define GICH_VTR_NRLRGS         0x3f
 
+#define GICH_VMCR_PRIORITY_MASK   0x1f
+#define GICH_VMCR_PRIORITY_SHIFT  27
+
 /*
  * The minimum GICC_BPR is required to be in the range 0-3. We set
  * GICC_BPR to 0 but we must expect that it might be 3. This means we