[Xen-devel,RFC,23/49] ARM: new VGIC: Add IRQ sorting

Message ID 20180209143937.28866-24-andre.przywara@linaro.org
State New
Headers show
Series
  • New VGIC(-v2) implementation
Related show

Commit Message

Andre Przywara Feb. 9, 2018, 2:39 p.m.
Adds the sorting function to cover the case where you have more IRQs
to consider than you have LRs. We consider their priorities.
This pulls in Linux' list_sort.c , which is a merge sort implementation
for linked lists.

This is based on Linux commit 8e4447457965, written by Christoffer Dall.

Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
---
 xen/arch/arm/vgic/vgic.c    |  59 +++++++++++++++
 xen/common/list_sort.c      | 170 ++++++++++++++++++++++++++++++++++++++++++++
 xen/include/xen/list_sort.h |  11 +++
 3 files changed, 240 insertions(+)
 create mode 100644 xen/common/list_sort.c
 create mode 100644 xen/include/xen/list_sort.h

Comments

Julien Grall Feb. 13, 2018, 12:30 p.m. | #1
Hi Andre,

On 09/02/18 14:39, Andre Przywara wrote:
> Adds the sorting function to cover the case where you have more IRQs
> to consider than you have LRs. We consider their priorities.
> This pulls in Linux' list_sort.c , which is a merge sort implementation
> for linked lists.
> 
> This is based on Linux commit 8e4447457965, written by Christoffer Dall.
> 
> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
> ---
>   xen/arch/arm/vgic/vgic.c    |  59 +++++++++++++++
>   xen/common/list_sort.c      | 170 ++++++++++++++++++++++++++++++++++++++++++++
>   xen/include/xen/list_sort.h |  11 +++

You need to CC "THE REST" maintainers for this code. It would also make 
sense to have a separate patch for adding list_sort.c

>   3 files changed, 240 insertions(+)
>   create mode 100644 xen/common/list_sort.c
>   create mode 100644 xen/include/xen/list_sort.h
> 
> diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
> index f517df6d00..a4efd1fd03 100644
> --- a/xen/arch/arm/vgic/vgic.c
> +++ b/xen/arch/arm/vgic/vgic.c
> @@ -16,6 +16,7 @@
>    */
>   
>   #include <asm/bug.h>
> +#include <xen/list_sort.h>
>   #include <xen/sched.h>
>   
>   #include <asm/arm_vgic.h>
> @@ -163,6 +164,64 @@ static struct vcpu *vgic_target_oracle(struct vgic_irq *irq)
>       return NULL;
>   }
>   
> +/*
> + * The order of items in the ap_lists defines how we'll pack things in LRs as
> + * well, the first items in the list being the first things populated in the
> + * LRs.
> + *
> + * A hard rule is that active interrupts can never be pushed out of the LRs
> + * (and therefore take priority) since we cannot reliably trap on deactivation
> + * of IRQs and therefore they have to be present in the LRs.
> + *
> + * Otherwise things should be sorted by the priority field and the GIC
> + * hardware support will take care of preemption of priority groups etc.
> + *
> + * Return negative if "a" sorts before "b", 0 to preserve order, and positive
> + * to sort "b" before "a".

Finally a good explanation of the return value of a sort function :). I 
always get confused what the return is supposed to be.

> + */
> +static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
> +{
> +    struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
> +    struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
> +    bool penda, pendb;
> +    int ret;
> +
> +    spin_lock(&irqa->irq_lock);
> +    spin_lock(&irqb->irq_lock);

I guess the locking order does not matter here because this is the only 
place where two IRQs lock have to be taken?

Also, this will be done with irq disabled right? In that case, may I ask 
for an ASSERT(!local_irq_is_enabled())? Or maybe in vgic_sort_ap_list.

> +
> +    if ( irqa->active || irqb->active )
> +    {
> +        ret = (int)irqb->active - (int)irqa->active;
> +        goto out;
> +    }
> +
> +    penda = irqa->enabled && irq_is_pending(irqa);
> +    pendb = irqb->enabled && irq_is_pending(irqb);
> +
> +    if ( !penda || !pendb )
> +    {
> +        ret = (int)pendb - (int)penda;
> +        goto out;
> +    }
> +
> +    /* Both pending and enabled, sort by priority */
> +    ret = irqa->priority - irqb->priority;
> +out:
> +    spin_unlock(&irqb->irq_lock);
> +    spin_unlock(&irqa->irq_lock);
> +    return ret;
> +}
> +
> +/* Must be called with the ap_list_lock held */
> +static void vgic_sort_ap_list(struct vcpu *vcpu)
> +{
> +    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
> +
> +    ASSERT(spin_is_locked(&vgic_cpu->ap_list_lock));
> +
> +    list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
> +}
> +
>   /*
>    * Only valid injection if changing level for level-triggered IRQs or for a
>    * rising edge.
> diff --git a/xen/common/list_sort.c b/xen/common/list_sort.c
> new file mode 100644
> index 0000000000..9c5cc58e43
> --- /dev/null
> +++ b/xen/common/list_sort.c
> @@ -0,0 +1,170 @@
> +/*
> + * list_sort.c: merge sort implementation for linked lists
> + * Copied from the Linux kernel (lib/list_sort.c)
> + * (without specific copyright notice there)

I can see you moved from Linux to Xen coding style. Is there any other 
changes made?

> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
> + * more details.
> + *
> + * You should have received a copy of the GNU General Public License along with
> + * this program; If not, see <http://www.gnu.org/licenses/>.
> + */
> +#include <xen/lib.h>
> +#include <xen/list.h>
> +
> +#define MAX_LIST_LENGTH_BITS 20
> +
> +/*
> + * Returns a list organized in an intermediate format suited
> + * to chaining of merge() calls: null-terminated, no reserved or
> + * sentinel head node, "prev" links not maintained.
> + */
> +static struct list_head *merge(void *priv,
> +                               int (*cmp)(void *priv, struct list_head *a,
> +                                          struct list_head *b),
> +                               struct list_head *a, struct list_head *b)
> +{
> +    struct list_head head, *tail = &head;
> +
> +    while ( a && b )
> +    {
> +        /* if equal, take 'a' -- important for sort stability */
> +        if ( (*cmp)(priv, a, b) <= 0 )
> +        {
> +            tail->next = a;
> +            a = a->next;
> +        }
> +        else
> +        {
> +            tail->next = b;
> +            b = b->next;
> +        }
> +        tail = tail->next;
> +    }
> +    tail->next = a?:b;
> +    return head.next;
> +}
> +
> +/*
> + * Combine final list merge with restoration of standard doubly-linked
> + * list structure.  This approach duplicates code from merge(), but
> + * runs faster than the tidier alternatives of either a separate final
> + * prev-link restoration pass, or maintaining the prev links
> + * throughout.
> + */
> +static void merge_and_restore_back_links(void *priv,
> +                                         int (*cmp)(void *priv,
> +                                                    struct list_head *a,
> +                                                    struct list_head *b),
> +                                         struct list_head *head,
> +                                         struct list_head *a,
> +                                         struct list_head *b)
> +{
> +    struct list_head *tail = head;
> +    u8 count = 0;
> +
> +    while ( a && b )
> +    {
> +        /* if equal, take 'a' -- important for sort stability */
> +        if ( (*cmp)(priv, a, b) <= 0 )
> +        {
> +            tail->next = a;
> +            a->prev = tail;
> +            a = a->next;
> +        }
> +        else
> +        {
> +            tail->next = b;
> +            b->prev = tail;
> +            b = b->next;
> +        }
> +        tail = tail->next;
> +    }
> +    tail->next = a ? : b;
> +
> +    do
> +    {
> +        /*
> +         * In worst cases this loop may run many iterations.
> +         * Continue callbacks to the client even though no
> +         * element comparison is needed, so the client's cmp()
> +         * routine can invoke cond_resched() periodically.
> +         */
> +        if ( unlikely(!(++count)) )
> +            (*cmp)(priv, tail->next, tail->next);
> +
> +        tail->next->prev = tail;
> +        tail = tail->next;
> +    } while ( tail->next );
> +
> +    tail->next = head;
> +    head->prev = tail;
> +}
> +
> +/**
> + * list_sort - sort a list
> + * @priv: private data, opaque to list_sort(), passed to @cmp
> + * @head: the list to sort
> + * @cmp: the elements comparison function
> + *
> + * This function implements "merge sort", which has O(nlog(n))
> + * complexity.
> + *
> + * The comparison function @cmp must return a negative value if @a
> + * should sort before @b, and a positive value if @a should sort after
> + * @b. If @a and @b are equivalent, and their original relative
> + * ordering is to be preserved, @cmp must return 0.
> + */
> +void list_sort(void *priv, struct list_head *head,
> +               int (*cmp)(void *priv, struct list_head *a, struct list_head *b))
> +{
> +    struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
> +                        -- last slot is a sentinel */
> +    int lev;  /* index into part[] */
> +    int max_lev = 0;
> +    struct list_head *list;
> +
> +    if ( list_empty(head) )
> +        return;
> +
> +    memset(part, 0, sizeof(part));
> +
> +    head->prev->next = NULL;
> +    list = head->next;
> +
> +    while ( list )
> +    {
> +        struct list_head *cur = list;
> +        list = list->next;
> +        cur->next = NULL;
> +
> +        for ( lev = 0; part[lev]; lev++ )
> +        {
> +            cur = merge(priv, cmp, part[lev], cur);
> +            part[lev] = NULL;
> +        }
> +        if ( lev > max_lev )
> +        {
> +            if ( unlikely(lev >= ARRAY_SIZE(part)-1) )
> +            {
> +                dprintk(XENLOG_DEBUG, "list too long for efficiency\n");
> +                lev--;
> +            }
> +            max_lev = lev;
> +        }
> +        part[lev] = cur;
> +    }
> +
> +    for ( lev = 0; lev < max_lev; lev++ )
> +        if ( part[lev] )
> +            list = merge(priv, cmp, part[lev], list);
> +
> +    merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
> +}
> +EXPORT_SYMBOL(list_sort);
> diff --git a/xen/include/xen/list_sort.h b/xen/include/xen/list_sort.h
> new file mode 100644
> index 0000000000..a60c589d4b
> --- /dev/null
> +++ b/xen/include/xen/list_sort.h
> @@ -0,0 +1,11 @@
> +#ifndef _LINUX_LIST_SORT_H
> +#define _LINUX_LIST_SORT_H
> +
> +#include <xen/types.h>
> +
> +struct list_head;
> +
> +void list_sort(void *priv, struct list_head *head,
> +               int (*cmp)(void *priv, struct list_head *a,
> +                          struct list_head *b));
> +#endif
> 

Cheers,
Andre Przywara Feb. 13, 2018, 2:56 p.m. | #2
Hi,

Christoffer, Eric, Marc,
a question about locking order between multiple IRQs below. Could you
have a brief look, please?

On 13/02/18 12:30, Julien Grall wrote:
> Hi Andre,
> 
> On 09/02/18 14:39, Andre Przywara wrote:
>> Adds the sorting function to cover the case where you have more IRQs
>> to consider than you have LRs. We consider their priorities.
>> This pulls in Linux' list_sort.c , which is a merge sort implementation
>> for linked lists.
>>
>> This is based on Linux commit 8e4447457965, written by Christoffer Dall.
>>
>> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
>> ---
>>   xen/arch/arm/vgic/vgic.c    |  59 +++++++++++++++
>>   xen/common/list_sort.c      | 170
>> ++++++++++++++++++++++++++++++++++++++++++++
>>   xen/include/xen/list_sort.h |  11 +++
> 
> You need to CC "THE REST" maintainers for this code. It would also make
> sense to have a separate patch for adding list_sort.c

Yeah, will do.

>>   3 files changed, 240 insertions(+)
>>   create mode 100644 xen/common/list_sort.c
>>   create mode 100644 xen/include/xen/list_sort.h
>>
>> diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
>> index f517df6d00..a4efd1fd03 100644
>> --- a/xen/arch/arm/vgic/vgic.c
>> +++ b/xen/arch/arm/vgic/vgic.c
>> @@ -16,6 +16,7 @@
>>    */
>>     #include <asm/bug.h>
>> +#include <xen/list_sort.h>
>>   #include <xen/sched.h>
>>     #include <asm/arm_vgic.h>
>> @@ -163,6 +164,64 @@ static struct vcpu *vgic_target_oracle(struct
>> vgic_irq *irq)
>>       return NULL;
>>   }
>>   +/*
>> + * The order of items in the ap_lists defines how we'll pack things
>> in LRs as
>> + * well, the first items in the list being the first things populated
>> in the
>> + * LRs.
>> + *
>> + * A hard rule is that active interrupts can never be pushed out of
>> the LRs
>> + * (and therefore take priority) since we cannot reliably trap on
>> deactivation
>> + * of IRQs and therefore they have to be present in the LRs.
>> + *
>> + * Otherwise things should be sorted by the priority field and the GIC
>> + * hardware support will take care of preemption of priority groups etc.
>> + *
>> + * Return negative if "a" sorts before "b", 0 to preserve order, and
>> positive
>> + * to sort "b" before "a".
> 
> Finally a good explanation of the return value of a sort function :). I
> always get confused what the return is supposed to be.
> 
>> + */
>> +static int vgic_irq_cmp(void *priv, struct list_head *a, struct
>> list_head *b)
>> +{
>> +    struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
>> +    struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
>> +    bool penda, pendb;
>> +    int ret;
>> +
>> +    spin_lock(&irqa->irq_lock);
>> +    spin_lock(&irqb->irq_lock);
> 
> I guess the locking order does not matter here because this is the only
> place where two IRQs lock have to be taken?

Mmh, good question. I guess indeed in practice this will not be a problem:
- As you mentioned this should be the only(?) place where we take
multiple IRQ locks, but that sounds fragile.
- A certain IRQ should only be on one VCPU list at a given point in
time. So there would be no race with two instances of this compare
function trying to lock the same IRQ.

But that sounds a bit dodgy to rely on. It should be relatively straight
forward to fix this with a simple comparison, shouldn't it?
CC:ing Christoffer, Marc and Eric here to see if we should add this (in
KVM as well).

> Also, this will be done with irq disabled right? In that case, may I ask
> for an ASSERT(!local_irq_is_enabled())? Or maybe in vgic_sort_ap_list.

OK.

>> +
>> +    if ( irqa->active || irqb->active )
>> +    {
>> +        ret = (int)irqb->active - (int)irqa->active;
>> +        goto out;
>> +    }
>> +
>> +    penda = irqa->enabled && irq_is_pending(irqa);
>> +    pendb = irqb->enabled && irq_is_pending(irqb);
>> +
>> +    if ( !penda || !pendb )
>> +    {
>> +        ret = (int)pendb - (int)penda;
>> +        goto out;
>> +    }
>> +
>> +    /* Both pending and enabled, sort by priority */
>> +    ret = irqa->priority - irqb->priority;
>> +out:
>> +    spin_unlock(&irqb->irq_lock);
>> +    spin_unlock(&irqa->irq_lock);
>> +    return ret;
>> +}
>> +
>> +/* Must be called with the ap_list_lock held */
>> +static void vgic_sort_ap_list(struct vcpu *vcpu)
>> +{
>> +    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
>> +
>> +    ASSERT(spin_is_locked(&vgic_cpu->ap_list_lock));
>> +
>> +    list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
>> +}
>> +
>>   /*
>>    * Only valid injection if changing level for level-triggered IRQs
>> or for a
>>    * rising edge.
>> diff --git a/xen/common/list_sort.c b/xen/common/list_sort.c
>> new file mode 100644
>> index 0000000000..9c5cc58e43
>> --- /dev/null
>> +++ b/xen/common/list_sort.c
>> @@ -0,0 +1,170 @@
>> +/*
>> + * list_sort.c: merge sort implementation for linked lists
>> + * Copied from the Linux kernel (lib/list_sort.c)
>> + * (without specific copyright notice there)
> 
> I can see you moved from Linux to Xen coding style. Is there any other
> changes made?

Just the list of include files, but I didn't touch any actual code.
Will mention this in the commit message for this separate patch.

Cheers,
Andre.

> 
>> + *
>> + * This program is free software; you can redistribute it and/or
>> modify it
>> + * under the terms and conditions of the GNU General Public License,
>> + * version 2, as published by the Free Software Foundation.
>> + *
>> + * This program is distributed in the hope it will be useful, but
>> WITHOUT
>> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
>> + * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
>> License for
>> + * more details.
>> + *
>> + * You should have received a copy of the GNU General Public License
>> along with
>> + * this program; If not, see <http://www.gnu.org/licenses/>.
>> + */
>> +#include <xen/lib.h>
>> +#include <xen/list.h>
>> +
>> +#define MAX_LIST_LENGTH_BITS 20
>> +
>> +/*
>> + * Returns a list organized in an intermediate format suited
>> + * to chaining of merge() calls: null-terminated, no reserved or
>> + * sentinel head node, "prev" links not maintained.
>> + */
>> +static struct list_head *merge(void *priv,
>> +                               int (*cmp)(void *priv, struct
>> list_head *a,
>> +                                          struct list_head *b),
>> +                               struct list_head *a, struct list_head *b)
>> +{
>> +    struct list_head head, *tail = &head;
>> +
>> +    while ( a && b )
>> +    {
>> +        /* if equal, take 'a' -- important for sort stability */
>> +        if ( (*cmp)(priv, a, b) <= 0 )
>> +        {
>> +            tail->next = a;
>> +            a = a->next;
>> +        }
>> +        else
>> +        {
>> +            tail->next = b;
>> +            b = b->next;
>> +        }
>> +        tail = tail->next;
>> +    }
>> +    tail->next = a?:b;
>> +    return head.next;
>> +}
>> +
>> +/*
>> + * Combine final list merge with restoration of standard doubly-linked
>> + * list structure.  This approach duplicates code from merge(), but
>> + * runs faster than the tidier alternatives of either a separate final
>> + * prev-link restoration pass, or maintaining the prev links
>> + * throughout.
>> + */
>> +static void merge_and_restore_back_links(void *priv,
>> +                                         int (*cmp)(void *priv,
>> +                                                    struct list_head *a,
>> +                                                    struct list_head
>> *b),
>> +                                         struct list_head *head,
>> +                                         struct list_head *a,
>> +                                         struct list_head *b)
>> +{
>> +    struct list_head *tail = head;
>> +    u8 count = 0;
>> +
>> +    while ( a && b )
>> +    {
>> +        /* if equal, take 'a' -- important for sort stability */
>> +        if ( (*cmp)(priv, a, b) <= 0 )
>> +        {
>> +            tail->next = a;
>> +            a->prev = tail;
>> +            a = a->next;
>> +        }
>> +        else
>> +        {
>> +            tail->next = b;
>> +            b->prev = tail;
>> +            b = b->next;
>> +        }
>> +        tail = tail->next;
>> +    }
>> +    tail->next = a ? : b;
>> +
>> +    do
>> +    {
>> +        /*
>> +         * In worst cases this loop may run many iterations.
>> +         * Continue callbacks to the client even though no
>> +         * element comparison is needed, so the client's cmp()
>> +         * routine can invoke cond_resched() periodically.
>> +         */
>> +        if ( unlikely(!(++count)) )
>> +            (*cmp)(priv, tail->next, tail->next);
>> +
>> +        tail->next->prev = tail;
>> +        tail = tail->next;
>> +    } while ( tail->next );
>> +
>> +    tail->next = head;
>> +    head->prev = tail;
>> +}
>> +
>> +/**
>> + * list_sort - sort a list
>> + * @priv: private data, opaque to list_sort(), passed to @cmp
>> + * @head: the list to sort
>> + * @cmp: the elements comparison function
>> + *
>> + * This function implements "merge sort", which has O(nlog(n))
>> + * complexity.
>> + *
>> + * The comparison function @cmp must return a negative value if @a
>> + * should sort before @b, and a positive value if @a should sort after
>> + * @b. If @a and @b are equivalent, and their original relative
>> + * ordering is to be preserved, @cmp must return 0.
>> + */
>> +void list_sort(void *priv, struct list_head *head,
>> +               int (*cmp)(void *priv, struct list_head *a, struct
>> list_head *b))
>> +{
>> +    struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial
>> lists
>> +                        -- last slot is a sentinel */
>> +    int lev;  /* index into part[] */
>> +    int max_lev = 0;
>> +    struct list_head *list;
>> +
>> +    if ( list_empty(head) )
>> +        return;
>> +
>> +    memset(part, 0, sizeof(part));
>> +
>> +    head->prev->next = NULL;
>> +    list = head->next;
>> +
>> +    while ( list )
>> +    {
>> +        struct list_head *cur = list;
>> +        list = list->next;
>> +        cur->next = NULL;
>> +
>> +        for ( lev = 0; part[lev]; lev++ )
>> +        {
>> +            cur = merge(priv, cmp, part[lev], cur);
>> +            part[lev] = NULL;
>> +        }
>> +        if ( lev > max_lev )
>> +        {
>> +            if ( unlikely(lev >= ARRAY_SIZE(part)-1) )
>> +            {
>> +                dprintk(XENLOG_DEBUG, "list too long for efficiency\n");
>> +                lev--;
>> +            }
>> +            max_lev = lev;
>> +        }
>> +        part[lev] = cur;
>> +    }
>> +
>> +    for ( lev = 0; lev < max_lev; lev++ )
>> +        if ( part[lev] )
>> +            list = merge(priv, cmp, part[lev], list);
>> +
>> +    merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
>> +}
>> +EXPORT_SYMBOL(list_sort);
>> diff --git a/xen/include/xen/list_sort.h b/xen/include/xen/list_sort.h
>> new file mode 100644
>> index 0000000000..a60c589d4b
>> --- /dev/null
>> +++ b/xen/include/xen/list_sort.h
>> @@ -0,0 +1,11 @@
>> +#ifndef _LINUX_LIST_SORT_H
>> +#define _LINUX_LIST_SORT_H
>> +
>> +#include <xen/types.h>
>> +
>> +struct list_head;
>> +
>> +void list_sort(void *priv, struct list_head *head,
>> +               int (*cmp)(void *priv, struct list_head *a,
>> +                          struct list_head *b));
>> +#endif
>>
> 
> Cheers,
>
Julien Grall Feb. 13, 2018, 3 p.m. | #3
On 13/02/18 14:56, Andre Przywara wrote:
>>> diff --git a/xen/common/list_sort.c b/xen/common/list_sort.c
>>> new file mode 100644
>>> index 0000000000..9c5cc58e43
>>> --- /dev/null
>>> +++ b/xen/common/list_sort.c
>>> @@ -0,0 +1,170 @@
>>> +/*
>>> + * list_sort.c: merge sort implementation for linked lists
>>> + * Copied from the Linux kernel (lib/list_sort.c)
>>> + * (without specific copyright notice there)
>>
>> I can see you moved from Linux to Xen coding style. Is there any other
>> changes made?
> 
> Just the list of include files, but I didn't touch any actual code.
> Will mention this in the commit message for this separate patch.

Can you keep the list coding style in that case please?

Thank you.
Christoffer Dall Feb. 13, 2018, 4:21 p.m. | #4
On Tue, Feb 13, 2018 at 3:56 PM, Andre Przywara
<andre.przywara@linaro.org> wrote:
> Hi,
>
> Christoffer, Eric, Marc,
> a question about locking order between multiple IRQs below. Could you
> have a brief look, please?
>
> On 13/02/18 12:30, Julien Grall wrote:
>> Hi Andre,
>>
>> On 09/02/18 14:39, Andre Przywara wrote:
>>> Adds the sorting function to cover the case where you have more IRQs
>>> to consider than you have LRs. We consider their priorities.
>>> This pulls in Linux' list_sort.c , which is a merge sort implementation
>>> for linked lists.
>>>
>>> This is based on Linux commit 8e4447457965, written by Christoffer Dall.
>>>
>>> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
>>> ---
>>>   xen/arch/arm/vgic/vgic.c    |  59 +++++++++++++++
>>>   xen/common/list_sort.c      | 170
>>> ++++++++++++++++++++++++++++++++++++++++++++
>>>   xen/include/xen/list_sort.h |  11 +++
>>
>> You need to CC "THE REST" maintainers for this code. It would also make
>> sense to have a separate patch for adding list_sort.c
>
> Yeah, will do.
>
>>>   3 files changed, 240 insertions(+)
>>>   create mode 100644 xen/common/list_sort.c
>>>   create mode 100644 xen/include/xen/list_sort.h
>>>
>>> diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
>>> index f517df6d00..a4efd1fd03 100644
>>> --- a/xen/arch/arm/vgic/vgic.c
>>> +++ b/xen/arch/arm/vgic/vgic.c
>>> @@ -16,6 +16,7 @@
>>>    */
>>>     #include <asm/bug.h>
>>> +#include <xen/list_sort.h>
>>>   #include <xen/sched.h>
>>>     #include <asm/arm_vgic.h>
>>> @@ -163,6 +164,64 @@ static struct vcpu *vgic_target_oracle(struct
>>> vgic_irq *irq)
>>>       return NULL;
>>>   }
>>>   +/*
>>> + * The order of items in the ap_lists defines how we'll pack things
>>> in LRs as
>>> + * well, the first items in the list being the first things populated
>>> in the
>>> + * LRs.
>>> + *
>>> + * A hard rule is that active interrupts can never be pushed out of
>>> the LRs
>>> + * (and therefore take priority) since we cannot reliably trap on
>>> deactivation
>>> + * of IRQs and therefore they have to be present in the LRs.
>>> + *
>>> + * Otherwise things should be sorted by the priority field and the GIC
>>> + * hardware support will take care of preemption of priority groups etc.
>>> + *
>>> + * Return negative if "a" sorts before "b", 0 to preserve order, and
>>> positive
>>> + * to sort "b" before "a".
>>
>> Finally a good explanation of the return value of a sort function :). I
>> always get confused what the return is supposed to be.
>>
>>> + */
>>> +static int vgic_irq_cmp(void *priv, struct list_head *a, struct
>>> list_head *b)
>>> +{
>>> +    struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
>>> +    struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
>>> +    bool penda, pendb;
>>> +    int ret;
>>> +
>>> +    spin_lock(&irqa->irq_lock);
>>> +    spin_lock(&irqb->irq_lock);
>>
>> I guess the locking order does not matter here because this is the only
>> place where two IRQs lock have to be taken?
>
> Mmh, good question. I guess indeed in practice this will not be a problem:
> - As you mentioned this should be the only(?) place where we take
> multiple IRQ locks, but that sounds fragile.
> - A certain IRQ should only be on one VCPU list at a given point in
> time. So there would be no race with two instances of this compare
> function trying to lock the same IRQ.
>
> But that sounds a bit dodgy to rely on. It should be relatively straight
> forward to fix this with a simple comparison, shouldn't it?
> CC:ing Christoffer, Marc and Eric here to see if we should add this (in
> KVM as well).
>

The only concern about holding two locks at the same time is the risk
of another thread attempting to hold a number of locks at the same
time in a different order, leading to a deadlock (either directly or
via a circular dependency).

As you point out, the only place where we take two irq locks at the
same time is in vgic_irq_cmp().  Now, the concern can be reduced to
calling this function more than once in parallel, operating on the
same set of struct irqs.

An IRQ can only be on a single AP list at any time, and we call
vgic_irq_cmp() from exactly one place in the KVM code, which holds the
ap_list_lock, and our locking order defines that the ap_list_lock must
be taken before irq locks.  This means that vgic_irq_cmp() can only
execute in parallel on different AP lists and therefore not operate on
the same set of struct irqs.

There is no need to change anything in the implementation.

Thanks,
-Christoffer

Patch

diff --git a/xen/arch/arm/vgic/vgic.c b/xen/arch/arm/vgic/vgic.c
index f517df6d00..a4efd1fd03 100644
--- a/xen/arch/arm/vgic/vgic.c
+++ b/xen/arch/arm/vgic/vgic.c
@@ -16,6 +16,7 @@ 
  */
 
 #include <asm/bug.h>
+#include <xen/list_sort.h>
 #include <xen/sched.h>
 
 #include <asm/arm_vgic.h>
@@ -163,6 +164,64 @@  static struct vcpu *vgic_target_oracle(struct vgic_irq *irq)
     return NULL;
 }
 
+/*
+ * The order of items in the ap_lists defines how we'll pack things in LRs as
+ * well, the first items in the list being the first things populated in the
+ * LRs.
+ *
+ * A hard rule is that active interrupts can never be pushed out of the LRs
+ * (and therefore take priority) since we cannot reliably trap on deactivation
+ * of IRQs and therefore they have to be present in the LRs.
+ *
+ * Otherwise things should be sorted by the priority field and the GIC
+ * hardware support will take care of preemption of priority groups etc.
+ *
+ * Return negative if "a" sorts before "b", 0 to preserve order, and positive
+ * to sort "b" before "a".
+ */
+static int vgic_irq_cmp(void *priv, struct list_head *a, struct list_head *b)
+{
+    struct vgic_irq *irqa = container_of(a, struct vgic_irq, ap_list);
+    struct vgic_irq *irqb = container_of(b, struct vgic_irq, ap_list);
+    bool penda, pendb;
+    int ret;
+
+    spin_lock(&irqa->irq_lock);
+    spin_lock(&irqb->irq_lock);
+
+    if ( irqa->active || irqb->active )
+    {
+        ret = (int)irqb->active - (int)irqa->active;
+        goto out;
+    }
+
+    penda = irqa->enabled && irq_is_pending(irqa);
+    pendb = irqb->enabled && irq_is_pending(irqb);
+
+    if ( !penda || !pendb )
+    {
+        ret = (int)pendb - (int)penda;
+        goto out;
+    }
+
+    /* Both pending and enabled, sort by priority */
+    ret = irqa->priority - irqb->priority;
+out:
+    spin_unlock(&irqb->irq_lock);
+    spin_unlock(&irqa->irq_lock);
+    return ret;
+}
+
+/* Must be called with the ap_list_lock held */
+static void vgic_sort_ap_list(struct vcpu *vcpu)
+{
+    struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
+
+    ASSERT(spin_is_locked(&vgic_cpu->ap_list_lock));
+
+    list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp);
+}
+
 /*
  * Only valid injection if changing level for level-triggered IRQs or for a
  * rising edge.
diff --git a/xen/common/list_sort.c b/xen/common/list_sort.c
new file mode 100644
index 0000000000..9c5cc58e43
--- /dev/null
+++ b/xen/common/list_sort.c
@@ -0,0 +1,170 @@ 
+/*
+ * list_sort.c: merge sort implementation for linked lists
+ * Copied from the Linux kernel (lib/list_sort.c)
+ * (without specific copyright notice there)
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <xen/lib.h>
+#include <xen/list.h>
+
+#define MAX_LIST_LENGTH_BITS 20
+
+/*
+ * Returns a list organized in an intermediate format suited
+ * to chaining of merge() calls: null-terminated, no reserved or
+ * sentinel head node, "prev" links not maintained.
+ */
+static struct list_head *merge(void *priv,
+                               int (*cmp)(void *priv, struct list_head *a,
+                                          struct list_head *b),
+                               struct list_head *a, struct list_head *b)
+{
+    struct list_head head, *tail = &head;
+
+    while ( a && b )
+    {
+        /* if equal, take 'a' -- important for sort stability */
+        if ( (*cmp)(priv, a, b) <= 0 )
+        {
+            tail->next = a;
+            a = a->next;
+        }
+        else
+        {
+            tail->next = b;
+            b = b->next;
+        }
+        tail = tail->next;
+    }
+    tail->next = a?:b;
+    return head.next;
+}
+
+/*
+ * Combine final list merge with restoration of standard doubly-linked
+ * list structure.  This approach duplicates code from merge(), but
+ * runs faster than the tidier alternatives of either a separate final
+ * prev-link restoration pass, or maintaining the prev links
+ * throughout.
+ */
+static void merge_and_restore_back_links(void *priv,
+                                         int (*cmp)(void *priv,
+                                                    struct list_head *a,
+                                                    struct list_head *b),
+                                         struct list_head *head,
+                                         struct list_head *a,
+                                         struct list_head *b)
+{
+    struct list_head *tail = head;
+    u8 count = 0;
+
+    while ( a && b )
+    {
+        /* if equal, take 'a' -- important for sort stability */
+        if ( (*cmp)(priv, a, b) <= 0 )
+        {
+            tail->next = a;
+            a->prev = tail;
+            a = a->next;
+        }
+        else
+        {
+            tail->next = b;
+            b->prev = tail;
+            b = b->next;
+        }
+        tail = tail->next;
+    }
+    tail->next = a ? : b;
+
+    do
+    {
+        /*
+         * In worst cases this loop may run many iterations.
+         * Continue callbacks to the client even though no
+         * element comparison is needed, so the client's cmp()
+         * routine can invoke cond_resched() periodically.
+         */
+        if ( unlikely(!(++count)) )
+            (*cmp)(priv, tail->next, tail->next);
+
+        tail->next->prev = tail;
+        tail = tail->next;
+    } while ( tail->next );
+
+    tail->next = head;
+    head->prev = tail;
+}
+
+/**
+ * list_sort - sort a list
+ * @priv: private data, opaque to list_sort(), passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function implements "merge sort", which has O(nlog(n))
+ * complexity.
+ *
+ * The comparison function @cmp must return a negative value if @a
+ * should sort before @b, and a positive value if @a should sort after
+ * @b. If @a and @b are equivalent, and their original relative
+ * ordering is to be preserved, @cmp must return 0.
+ */
+void list_sort(void *priv, struct list_head *head,
+               int (*cmp)(void *priv, struct list_head *a, struct list_head *b))
+{
+    struct list_head *part[MAX_LIST_LENGTH_BITS+1]; /* sorted partial lists
+                        -- last slot is a sentinel */
+    int lev;  /* index into part[] */
+    int max_lev = 0;
+    struct list_head *list;
+
+    if ( list_empty(head) )
+        return;
+
+    memset(part, 0, sizeof(part));
+
+    head->prev->next = NULL;
+    list = head->next;
+
+    while ( list )
+    {
+        struct list_head *cur = list;
+        list = list->next;
+        cur->next = NULL;
+
+        for ( lev = 0; part[lev]; lev++ )
+        {
+            cur = merge(priv, cmp, part[lev], cur);
+            part[lev] = NULL;
+        }
+        if ( lev > max_lev )
+        {
+            if ( unlikely(lev >= ARRAY_SIZE(part)-1) )
+            {
+                dprintk(XENLOG_DEBUG, "list too long for efficiency\n");
+                lev--;
+            }
+            max_lev = lev;
+        }
+        part[lev] = cur;
+    }
+
+    for ( lev = 0; lev < max_lev; lev++ )
+        if ( part[lev] )
+            list = merge(priv, cmp, part[lev], list);
+
+    merge_and_restore_back_links(priv, cmp, head, part[max_lev], list);
+}
+EXPORT_SYMBOL(list_sort);
diff --git a/xen/include/xen/list_sort.h b/xen/include/xen/list_sort.h
new file mode 100644
index 0000000000..a60c589d4b
--- /dev/null
+++ b/xen/include/xen/list_sort.h
@@ -0,0 +1,11 @@ 
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include <xen/types.h>
+
+struct list_head;
+
+void list_sort(void *priv, struct list_head *head,
+               int (*cmp)(void *priv, struct list_head *a,
+                          struct list_head *b));
+#endif