diff mbox series

[Xen-devel,v3,5/8] ARM: VGIC: factor out vgic_connect_hw_irq()

Message ID 20180124181058.6157-6-andre.przywara@linaro.org
State Superseded
Headers show
Series ARM: VGIC/GIC separation cleanups | expand

Commit Message

Andre Przywara Jan. 24, 2018, 6:10 p.m. UTC
At the moment we happily access VGIC internal data structures like
the rank and struct pending_irq in gic.c, which should be VGIC agnostic.

Factor out a new function vgic_connect_hw_irq(), which allows a virtual
IRQ to be connected to a hardware IRQ (using the hw bit in the LR).

This removes said accesses to VGIC data structures and improves abstraction.

Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
Acked-by: Stefano Stabellini <sstabellini@kernel.org>
---
 xen/arch/arm/gic-vgic.c    | 31 +++++++++++++++++++++++++++++++
 xen/arch/arm/gic.c         | 42 ++++++------------------------------------
 xen/include/asm-arm/vgic.h |  2 ++
 3 files changed, 39 insertions(+), 36 deletions(-)

Comments

Julien Grall Jan. 30, 2018, 1:19 p.m. UTC | #1
Hi Andre,

On 24/01/18 18:10, Andre Przywara wrote:
> At the moment we happily access VGIC internal data structures like
> the rank and struct pending_irq in gic.c, which should be VGIC agnostic.
> 
> Factor out a new function vgic_connect_hw_irq(), which allows a virtual
> IRQ to be connected to a hardware IRQ (using the hw bit in the LR).
> 
> This removes said accesses to VGIC data structures and improves abstraction.

You are modifying the locking of the 2 functions. But I don't see how 
this is safe. Can you explain it?

> 
> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
> Acked-by: Stefano Stabellini <sstabellini@kernel.org>
> ---
>   xen/arch/arm/gic-vgic.c    | 31 +++++++++++++++++++++++++++++++
>   xen/arch/arm/gic.c         | 42 ++++++------------------------------------
>   xen/include/asm-arm/vgic.h |  2 ++
>   3 files changed, 39 insertions(+), 36 deletions(-)
> 
> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
> index 0a2958c5db..d44e4dacd3 100644
> --- a/xen/arch/arm/gic-vgic.c
> +++ b/xen/arch/arm/gic-vgic.c
> @@ -411,6 +411,37 @@ void gic_dump_vgic_info(struct vcpu *v)
>           printk("Pending irq=%d\n", p->irq);
>   }
>   
> +int vgic_connect_hw_irq(struct domain *d, struct vcpu *v, unsigned int virq,
> +                        struct irq_desc *desc)
> +{
> +    unsigned long flags;
> +    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> +     * route SPIs to guests, it doesn't make any difference. */

Can you fix the coding style of the comment?

> +    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
> +    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
> +    struct pending_irq *p = irq_to_pending(v_target, virq);
> +    int ret = 0;
> +
> +    /* We are taking to rank lock to prevent parallel connections. */
> +    vgic_lock_rank(v_target, rank, flags);
> +
> +    if ( desc )
> +    {
> +        /* The VIRQ should not be already enabled by the guest */
> +        if ( !p->desc &&
> +             !test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) )
> +            p->desc = desc;
> +        else
> +            ret = -EBUSY;
> +    }
> +    else
> +        p->desc = NULL;
> +
> +    vgic_unlock_rank(v_target, rank, flags);
> +
> +    return ret;
> +}
> +
>   /*
>    * Local variables:
>    * mode: C
> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
> index 4cb74d449e..d46a6d54b3 100644
> --- a/xen/arch/arm/gic.c
> +++ b/xen/arch/arm/gic.c
> @@ -128,27 +128,12 @@ void gic_route_irq_to_xen(struct irq_desc *desc, unsigned int priority)
>   int gic_route_irq_to_guest(struct domain *d, unsigned int virq,
>                              struct irq_desc *desc, unsigned int priority)
>   {
> -    unsigned long flags;
> -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
> -     * route SPIs to guests, it doesn't make any difference. */
> -    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
> -    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
> -    struct pending_irq *p = irq_to_pending(v_target, virq);
> -    int res = -EBUSY;
> -
>       ASSERT(spin_is_locked(&desc->lock));
>       /* Caller has already checked that the IRQ is an SPI */
>       ASSERT(virq >= 32);
>       ASSERT(virq < vgic_num_irqs(d));
>       ASSERT(!is_lpi(virq));
>   
> -    vgic_lock_rank(v_target, rank, flags);
> -
> -    if ( p->desc ||
> -         /* The VIRQ should not be already enabled by the guest */
> -         test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) )
> -        goto out;
> -
>       desc->handler = gic_hw_ops->gic_guest_irq_type;
>       set_bit(_IRQ_GUEST, &desc->status);

This looks wrong to me. You don't want to execute any of the code below 
if you are not able to route the pIRQ. For instance because the vIRQ has 
already a desc assigned.

>   
> @@ -156,31 +141,19 @@ int gic_route_irq_to_guest(struct domain *d, unsigned int virq,
>           gic_set_irq_type(desc, desc->arch.type);
>       gic_set_irq_priority(desc, priority);
>   
> -    p->desc = desc;
> -    res = 0;
> -
> -out:
> -    vgic_unlock_rank(v_target, rank, flags);
> -
> -    return res;
> +    return vgic_connect_hw_irq(d, NULL, virq, desc);
>   }
>   
>   /* This function only works with SPIs for now */
>   int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
>                                 struct irq_desc *desc)
>   {
> -    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
> -    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
> -    struct pending_irq *p = irq_to_pending(v_target, virq);
> -    unsigned long flags;
> +    int ret;
>   
>       ASSERT(spin_is_locked(&desc->lock));
>       ASSERT(test_bit(_IRQ_GUEST, &desc->status));
> -    ASSERT(p->desc == desc);

You dropped this assert but I don't see any replacement in the new code? 
We really want to make sure the caller will not do something dumb here 
(like passing a different desc).

>       ASSERT(!is_lpi(virq));
>   
> -    vgic_lock_rank(v_target, rank, flags);
> -
>       if ( d->is_dying )
>       {
>           desc->handler->shutdown(desc);
> @@ -198,19 +171,16 @@ int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
>            */
>           if ( test_bit(_IRQ_INPROGRESS, &desc->status) ||
>                !test_bit(_IRQ_DISABLED, &desc->status) )
> -        {
> -            vgic_unlock_rank(v_target, rank, flags);
>               return -EBUSY;
> -        }
>       }
>   
> +    ret = vgic_connect_hw_irq(d, NULL, virq, NULL);
> +    if ( ret )
> +        return ret;
> +
>       clear_bit(_IRQ_GUEST, &desc->status);
>       desc->handler = &no_irq_type;
>   
> -    p->desc = NULL;
> -
> -    vgic_unlock_rank(v_target, rank, flags);
> -
>       return 0;
>   }
>   
> diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
> index 22c8502c95..f4240df371 100644
> --- a/xen/include/asm-arm/vgic.h
> +++ b/xen/include/asm-arm/vgic.h
> @@ -219,6 +219,8 @@ int vgic_v2_init(struct domain *d, int *mmio_count);
>   int vgic_v3_init(struct domain *d, int *mmio_count);
>   
>   bool vgic_evtchn_irq_pending(struct vcpu *v);
> +int vgic_connect_hw_irq(struct domain *d, struct vcpu *v, unsigned int virq,
> +                        struct irq_desc *desc);
>   
>   extern int domain_vgic_register(struct domain *d, int *mmio_count);
>   extern int vcpu_vgic_free(struct vcpu *v);
> 

Cheers,
Andre Przywara Jan. 31, 2018, 3:54 p.m. UTC | #2
Hi,

Yeah! Locking discussions! Have fun below ;-)

On 30/01/18 13:19, Julien Grall wrote:
> Hi Andre,
> 
> On 24/01/18 18:10, Andre Przywara wrote:
>> At the moment we happily access VGIC internal data structures like
>> the rank and struct pending_irq in gic.c, which should be VGIC agnostic.
>>
>> Factor out a new function vgic_connect_hw_irq(), which allows a virtual
>> IRQ to be connected to a hardware IRQ (using the hw bit in the LR).
>>
>> This removes said accesses to VGIC data structures and improves
>> abstraction.
> 
> You are modifying the locking of the 2 functions. But I don't see how
> this is safe. Can you explain it?

Are you worried about anything particular? I will explain my reasoning
below, but feel free to point me to the cause of your gripes.

>>
>> Signed-off-by: Andre Przywara <andre.przywara@linaro.org>
>> Acked-by: Stefano Stabellini <sstabellini@kernel.org>
>> ---
>>   xen/arch/arm/gic-vgic.c    | 31 +++++++++++++++++++++++++++++++
>>   xen/arch/arm/gic.c         | 42
>> ++++++------------------------------------
>>   xen/include/asm-arm/vgic.h |  2 ++
>>   3 files changed, 39 insertions(+), 36 deletions(-)
>>
>> diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
>> index 0a2958c5db..d44e4dacd3 100644
>> --- a/xen/arch/arm/gic-vgic.c
>> +++ b/xen/arch/arm/gic-vgic.c
>> @@ -411,6 +411,37 @@ void gic_dump_vgic_info(struct vcpu *v)
>>           printk("Pending irq=%d\n", p->irq);
>>   }
>>   +int vgic_connect_hw_irq(struct domain *d, struct vcpu *v, unsigned
>> int virq,
>> +                        struct irq_desc *desc)
>> +{
>> +    unsigned long flags;
>> +    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
>> +     * route SPIs to guests, it doesn't make any difference. */
> 
> Can you fix the coding style of the comment?

Sure, I wasn't sure if the original style was on purpose.

>> +    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
>> +    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
>> +    struct pending_irq *p = irq_to_pending(v_target, virq);
>> +    int ret = 0;
>> +
>> +    /* We are taking to rank lock to prevent parallel connections. */
>> +    vgic_lock_rank(v_target, rank, flags);

So the rank lock here protects against assigning different (or the
same?) hardware IRQs to some particular virtual IRQ. The rank lock is
not strictly correct, but a not-too-bad replacement for the missing
per-IRQ lock. To this degree it protects reading and assigning p->desc
in struct pending_irq.

>> +
>> +    if ( desc )
>> +    {
>> +        /* The VIRQ should not be already enabled by the guest */
>> +        if ( !p->desc &&
>> +             !test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) )
>> +            p->desc = desc;
>> +        else
>> +            ret = -EBUSY;
>> +    }
>> +    else
>> +        p->desc = NULL;
>> +
>> +    vgic_unlock_rank(v_target, rank, flags);
>> +
>> +    return ret;
>> +}
>> +
>>   /*
>>    * Local variables:
>>    * mode: C
>> diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
>> index 4cb74d449e..d46a6d54b3 100644
>> --- a/xen/arch/arm/gic.c
>> +++ b/xen/arch/arm/gic.c
>> @@ -128,27 +128,12 @@ void gic_route_irq_to_xen(struct irq_desc *desc,
>> unsigned int priority)
>>   int gic_route_irq_to_guest(struct domain *d, unsigned int virq,
>>                              struct irq_desc *desc, unsigned int
>> priority)
>>   {
>> -    unsigned long flags;
>> -    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
>> -     * route SPIs to guests, it doesn't make any difference. */
>> -    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
>> -    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
>> -    struct pending_irq *p = irq_to_pending(v_target, virq);
>> -    int res = -EBUSY;
>> -
>>       ASSERT(spin_is_locked(&desc->lock));
>>       /* Caller has already checked that the IRQ is an SPI */
>>       ASSERT(virq >= 32);
>>       ASSERT(virq < vgic_num_irqs(d));
>>       ASSERT(!is_lpi(virq));
>>   -    vgic_lock_rank(v_target, rank, flags);
>> -
>> -    if ( p->desc ||
>> -         /* The VIRQ should not be already enabled by the guest */
>> -         test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) )
>> -        goto out;
>> -
>>       desc->handler = gic_hw_ops->gic_guest_irq_type;

Assigning desc->handler is protected by the desc lock, which we check is
held above. This is independent from the virtual IRQ locking. This is
only leaving aside the issue you correctly point out below ...

>>       set_bit(_IRQ_GUEST, &desc->status);
> 
> This looks wrong to me. You don't want to execute any of the code below
> if you are not able to route the pIRQ. For instance because the vIRQ has
> already a desc assigned.

Ah, good point. Indeed I didn't consider the failure path. Should be
easily fixed, though. Thanks for catching this.

>>   @@ -156,31 +141,19 @@ int gic_route_irq_to_guest(struct domain *d,
>> unsigned int virq,
>>           gic_set_irq_type(desc, desc->arch.type);
>>       gic_set_irq_priority(desc, priority);
>>   -    p->desc = desc;
>> -    res = 0;
>> -
>> -out:
>> -    vgic_unlock_rank(v_target, rank, flags);
>> -
>> -    return res;
>> +    return vgic_connect_hw_irq(d, NULL, virq, desc);
>>   }
>>     /* This function only works with SPIs for now */
>>   int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
>>                                 struct irq_desc *desc)
>>   {
>> -    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
>> -    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
>> -    struct pending_irq *p = irq_to_pending(v_target, virq);
>> -    unsigned long flags;
>> +    int ret;
>>         ASSERT(spin_is_locked(&desc->lock));
>>       ASSERT(test_bit(_IRQ_GUEST, &desc->status));
>> -    ASSERT(p->desc == desc);
> 
> You dropped this assert but I don't see any replacement in the new code?
> We really want to make sure the caller will not do something dumb here
> (like passing a different desc).

So the first thing here is that I can't have anything dereferencing
struct pending_irq here. Secondly the rank lock (protecting the p->
structure) is only taken below, so nothing prevents this from changing
between the ASSERT and the lock, AFAICS.
And to be honest, I don't really get the purpose of this ASSERT: the
desc pointer is taken from the pending_irq in the caller, but without
any locks. So if I am not mistaken, it could race with a
gic_route_irq_to_xen(), and that would lead to the ASSERT triggering,
just because of this race and not because of the code being broken
ultimately.
I *could* get the irq_desc by calling the new vgic_get_hw_irq_desc() -
again. Not sure if that is useful, though.
Another possibility would be to rethink this whole functionality:
The only caller (release_guest_irq() in irq.c) gets a virtual IRQ
number, then finds the associated irq_desc, only to lock it. Then it
passes both the virtual IRQ number and the irq_desc to this function,
where both are rechecked. The reason for this redundancy seems to be
some locking order (irq_desc first?), however I can't find any
documentation about this.
So I wonder if we could just pass on only the virtual IRQ number, and
let it up to this function here to safely retrieve the right irq_desc.

>>       ASSERT(!is_lpi(virq));
>>   -    vgic_lock_rank(v_target, rank, flags);

I couldn't find what this lock protects here, so early at least. Until
the actual "p->desc = NULL;" line below nothing needs to be protected by
this lock, it's all already covered by the desc lock.
We only need the lock to eventually atomically remove the connection
between the h/w and the virtual IRQ, which is done in
vgic_connect_hw_irq() now.

Cheers,
Andre.

>> -
>>       if ( d->is_dying )
>>       {
>>           desc->handler->shutdown(desc);
>> @@ -198,19 +171,16 @@ int gic_remove_irq_from_guest(struct domain *d,
>> unsigned int virq,
>>            */
>>           if ( test_bit(_IRQ_INPROGRESS, &desc->status) ||
>>                !test_bit(_IRQ_DISABLED, &desc->status) )
>> -        {
>> -            vgic_unlock_rank(v_target, rank, flags);
>>               return -EBUSY;
>> -        }
>>       }
>>   +    ret = vgic_connect_hw_irq(d, NULL, virq, NULL);
>> +    if ( ret )
>> +        return ret;
>> +
>>       clear_bit(_IRQ_GUEST, &desc->status);
>>       desc->handler = &no_irq_type;
>>   -    p->desc = NULL;
>> -
>> -    vgic_unlock_rank(v_target, rank, flags);
>> -
>>       return 0;
>>   }
>>   diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
>> index 22c8502c95..f4240df371 100644
>> --- a/xen/include/asm-arm/vgic.h
>> +++ b/xen/include/asm-arm/vgic.h
>> @@ -219,6 +219,8 @@ int vgic_v2_init(struct domain *d, int *mmio_count);
>>   int vgic_v3_init(struct domain *d, int *mmio_count);
>>     bool vgic_evtchn_irq_pending(struct vcpu *v);
>> +int vgic_connect_hw_irq(struct domain *d, struct vcpu *v, unsigned
>> int virq,
>> +                        struct irq_desc *desc);
>>     extern int domain_vgic_register(struct domain *d, int *mmio_count);
>>   extern int vcpu_vgic_free(struct vcpu *v);
>>
> 
> Cheers,
>
Julien Grall Jan. 31, 2018, 4:30 p.m. UTC | #3
On 31/01/18 15:54, Andre Przywara wrote:
> Hi,
> 
> Yeah! Locking discussions! Have fun below ;-)
> 
> On 30/01/18 13:19, Julien Grall wrote:
>> Hi Andre,
>>
>> On 24/01/18 18:10, Andre Przywara wrote:
>>> At the moment we happily access VGIC internal data structures like
>>> the rank and struct pending_irq in gic.c, which should be VGIC agnostic.
>>>
>>> Factor out a new function vgic_connect_hw_irq(), which allows a virtual
>>> IRQ to be connected to a hardware IRQ (using the hw bit in the LR).
>>>
>>> This removes said accesses to VGIC data structures and improves
>>> abstraction.
>>
>> You are modifying the locking of the 2 functions. But I don't see how
>> this is safe. Can you explain it?
> 
> Are you worried about anything particular? I will explain my reasoning
> below, but feel free to point me to the cause of your gripes.

In general, it is quite nice to explain roughly in the commit message 
why the new locking order is ok. It would avoid reviewers to spend time 
guessing why it is fine.

In that particular case I am concerned about any potential concurrent 
access on anything related to a vIRQ.

[...]

>>>        set_bit(_IRQ_GUEST, &desc->status);
>>
>> This looks wrong to me. You don't want to execute any of the code below
>> if you are not able to route the pIRQ. For instance because the vIRQ has
>> already a desc assigned.
> 
> Ah, good point. Indeed I didn't consider the failure path. Should be
> easily fixed, though. Thanks for catching this.
> 
>>>    @@ -156,31 +141,19 @@ int gic_route_irq_to_guest(struct domain *d,
>>> unsigned int virq,
>>>            gic_set_irq_type(desc, desc->arch.type);
>>>        gic_set_irq_priority(desc, priority);
>>>    -    p->desc = desc;
>>> -    res = 0;
>>> -
>>> -out:
>>> -    vgic_unlock_rank(v_target, rank, flags);
>>> -
>>> -    return res;
>>> +    return vgic_connect_hw_irq(d, NULL, virq, desc);
>>>    }
>>>      /* This function only works with SPIs for now */
>>>    int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
>>>                                  struct irq_desc *desc)
>>>    {
>>> -    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
>>> -    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
>>> -    struct pending_irq *p = irq_to_pending(v_target, virq);
>>> -    unsigned long flags;
>>> +    int ret;
>>>          ASSERT(spin_is_locked(&desc->lock));
>>>        ASSERT(test_bit(_IRQ_GUEST, &desc->status));
>>> -    ASSERT(p->desc == desc);
>>
>> You dropped this assert but I don't see any replacement in the new code?
>> We really want to make sure the caller will not do something dumb here
>> (like passing a different desc).
> 
> So the first thing here is that I can't have anything dereferencing
> struct pending_irq here. Secondly the rank lock (protecting the p->
> structure) is only taken below, so nothing prevents this from changing
> between the ASSERT and the lock, AFAICS.

You could move the ASSERT within the lock, right?

> And to be honest, I don't really get the purpose of this ASSERT: the
> desc pointer is taken from the pending_irq in the caller, but without
> any locks. So if I am not mistaken, it could race with a
> gic_route_irq_to_xen(), and that would lead to the ASSERT triggering,
> just because of this race and not because of the code being broken
> ultimately.
> I *could* get the irq_desc by calling the new vgic_get_hw_irq_desc() -
> again. Not sure if that is useful, though.
> Another possibility would be to rethink this whole functionality:
> The only caller (release_guest_irq() in irq.c) gets a virtual IRQ
> number, then finds the associated irq_desc, only to lock it. Then it
> passes both the virtual IRQ number and the irq_desc to this function,
> where both are rechecked. The reason for this redundancy seems to be
> some locking order (irq_desc first?), however I can't find any
> documentation about this.
> So I wonder if we could just pass on only the virtual IRQ number, and
> let it up to this function here to safely retrieve the right irq_desc.

While I agree that the ASSERT without any lock is dangerous, it could at 
least catch someone passing the wrong irq_desc. Something will really go 
wrong if you disable pIRQ A but the irq_desc was belonging to pIRQ B.

And I agree that the code does not prevent that today. But it at least 
limit the scope of the problem.

So I think the code should be:

gic_remove_irq_from_guest(.....)
{

    vgic_lock_rank(...)
    if ( p->desc != desc )
    {
	vgic_unlock_rank(...)
         return -1;
    }

    do the vGIC removal

    vgic_unlock_rank(...)

    return 0;
}

> 
>>>        ASSERT(!is_lpi(virq));
>>>    -    vgic_lock_rank(v_target, rank, flags);
> 
> I couldn't find what this lock protects here, so early at least. Until
> the actual "p->desc = NULL;" line below nothing needs to be protected by
> this lock, it's all already covered by the desc lock.
> We only need the lock to eventually atomically remove the connection
> between the h/w and the virtual IRQ, which is done in
> vgic_connect_hw_irq() now.

See above.
Andre Przywara Feb. 1, 2018, 12:07 p.m. UTC | #4
Hi,

On 31/01/18 16:30, Julien Grall wrote:
> 
> 
> On 31/01/18 15:54, Andre Przywara wrote:
>> Hi,
>>
>> Yeah! Locking discussions! Have fun below ;-)
>>
>> On 30/01/18 13:19, Julien Grall wrote:
>>> Hi Andre,
>>>
>>> On 24/01/18 18:10, Andre Przywara wrote:
>>>> At the moment we happily access VGIC internal data structures like
>>>> the rank and struct pending_irq in gic.c, which should be VGIC
>>>> agnostic.
>>>>
>>>> Factor out a new function vgic_connect_hw_irq(), which allows a virtual
>>>> IRQ to be connected to a hardware IRQ (using the hw bit in the LR).
>>>>
>>>> This removes said accesses to VGIC data structures and improves
>>>> abstraction.
>>>
>>> You are modifying the locking of the 2 functions. But I don't see how
>>> this is safe. Can you explain it?
>>
>> Are you worried about anything particular? I will explain my reasoning
>> below, but feel free to point me to the cause of your gripes.
> 
> In general, it is quite nice to explain roughly in the commit message
> why the new locking order is ok. It would avoid reviewers to spend time
> guessing why it is fine.
> 
> In that particular case I am concerned about any potential concurrent
> access on anything related to a vIRQ.
> 
> [...]
> 
>>>>        set_bit(_IRQ_GUEST, &desc->status);
>>>
>>> This looks wrong to me. You don't want to execute any of the code below
>>> if you are not able to route the pIRQ. For instance because the vIRQ has
>>> already a desc assigned.
>>
>> Ah, good point. Indeed I didn't consider the failure path. Should be
>> easily fixed, though. Thanks for catching this.
>>
>>>>    @@ -156,31 +141,19 @@ int gic_route_irq_to_guest(struct domain *d,
>>>> unsigned int virq,
>>>>            gic_set_irq_type(desc, desc->arch.type);
>>>>        gic_set_irq_priority(desc, priority);
>>>>    -    p->desc = desc;
>>>> -    res = 0;
>>>> -
>>>> -out:
>>>> -    vgic_unlock_rank(v_target, rank, flags);
>>>> -
>>>> -    return res;
>>>> +    return vgic_connect_hw_irq(d, NULL, virq, desc);
>>>>    }
>>>>      /* This function only works with SPIs for now */
>>>>    int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
>>>>                                  struct irq_desc *desc)
>>>>    {
>>>> -    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
>>>> -    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
>>>> -    struct pending_irq *p = irq_to_pending(v_target, virq);
>>>> -    unsigned long flags;
>>>> +    int ret;
>>>>          ASSERT(spin_is_locked(&desc->lock));
>>>>        ASSERT(test_bit(_IRQ_GUEST, &desc->status));
>>>> -    ASSERT(p->desc == desc);
>>>
>>> You dropped this assert but I don't see any replacement in the new code?
>>> We really want to make sure the caller will not do something dumb here
>>> (like passing a different desc).
>>
>> So the first thing here is that I can't have anything dereferencing
>> struct pending_irq here. Secondly the rank lock (protecting the p->
>> structure) is only taken below, so nothing prevents this from changing
>> between the ASSERT and the lock, AFAICS.
> 
> You could move the ASSERT within the lock, right?

Move: yes, ASSERT: no.
My understanding is that an ASSERT detects logical coding errors, where
a functions passes an invalid argument. Hopefully we trigger this code
path while running a debug build during testing, and can fix this. The
ASSERT stays in place to prevent new users of this function to do wrong
things and to spot regressions.

Now in this particular case I sense a *race* that is not properly
protected, I guess due to locking order. My understanding of the code
flow (please correct me if I am wrong here):
- Someone calls release_guest_irq(), passing in a domain and a *virtual*
IRQ number.
- release_guest_irq() looks up the associated irq_desc (no vIRQ locks
involved!), does some sanity checks, locks that *desc* structure and
calls  gic_remove_irq_from_guest().
- gic_remove_irq_from_guest() now locks that virtual IRQ (somewhat)
using the rank lock. That should have happened before, but I _guess_
that would violate the locking order (desc first, then rank).
- Now it checks whether the virtual IRQ and the passed irq_desc (still)
match. However an ASSERT is not the right way of doing this, since the
caller logic is correct. BUT there could be a race, because the virtual
IRQ could have changed between the original irq_desc lookup in the
caller and the rank lock being taken here. Maybe not in practise due to
subtle (and fragile?) other conditions, but by just looking at the
functions.

>> And to be honest, I don't really get the purpose of this ASSERT: the
>> desc pointer is taken from the pending_irq in the caller, but without
>> any locks. So if I am not mistaken, it could race with a
>> gic_route_irq_to_xen(), and that would lead to the ASSERT triggering,
>> just because of this race and not because of the code being broken
>> ultimately.
>> I *could* get the irq_desc by calling the new vgic_get_hw_irq_desc() -
>> again. Not sure if that is useful, though.
>> Another possibility would be to rethink this whole functionality:
>> The only caller (release_guest_irq() in irq.c) gets a virtual IRQ
>> number, then finds the associated irq_desc, only to lock it. Then it
>> passes both the virtual IRQ number and the irq_desc to this function,
>> where both are rechecked. The reason for this redundancy seems to be
>> some locking order (irq_desc first?), however I can't find any
>> documentation about this.
>> So I wonder if we could just pass on only the virtual IRQ number, and
>> let it up to this function here to safely retrieve the right irq_desc.
> 
> While I agree that the ASSERT without any lock is dangerous, it could at
> least catch someone passing the wrong irq_desc. Something will really go
> wrong if you disable pIRQ A but the irq_desc was belonging to pIRQ B.
> 
> And I agree that the code does not prevent that today. But it at least
> limit the scope of the problem.
> 
> So I think the code should be:
> 
> gic_remove_irq_from_guest(.....)
> {
> 
>    vgic_lock_rank(...)
>    if ( p->desc != desc )
>    {
>     vgic_unlock_rank(...)
>         return -1;
>    }
> 
>    do the vGIC removal
> 
>    vgic_unlock_rank(...)
> 
>    return 0;
> }

Yeah, that makes some sense. Only problem is that series tries to remove
any dependency on the VGIC, namely p->desc and the rank lock, here.
So p->desc would translate into vgic_get_hw_irq_desc(), but the lock
can't be used anymore, at least not for whole of this function.
I need to think about a solution for this.

Cheers,
Andre.

>>>>        ASSERT(!is_lpi(virq));
>>>>    -    vgic_lock_rank(v_target, rank, flags);
>>
>> I couldn't find what this lock protects here, so early at least. Until
>> the actual "p->desc = NULL;" line below nothing needs to be protected by
>> this lock, it's all already covered by the desc lock.
>> We only need the lock to eventually atomically remove the connection
>> between the h/w and the virtual IRQ, which is done in
>> vgic_connect_hw_irq() now.
> 
> See above.
>
diff mbox series

Patch

diff --git a/xen/arch/arm/gic-vgic.c b/xen/arch/arm/gic-vgic.c
index 0a2958c5db..d44e4dacd3 100644
--- a/xen/arch/arm/gic-vgic.c
+++ b/xen/arch/arm/gic-vgic.c
@@ -411,6 +411,37 @@  void gic_dump_vgic_info(struct vcpu *v)
         printk("Pending irq=%d\n", p->irq);
 }
 
+int vgic_connect_hw_irq(struct domain *d, struct vcpu *v, unsigned int virq,
+                        struct irq_desc *desc)
+{
+    unsigned long flags;
+    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
+     * route SPIs to guests, it doesn't make any difference. */
+    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
+    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
+    struct pending_irq *p = irq_to_pending(v_target, virq);
+    int ret = 0;
+
+    /* We are taking to rank lock to prevent parallel connections. */
+    vgic_lock_rank(v_target, rank, flags);
+
+    if ( desc )
+    {
+        /* The VIRQ should not be already enabled by the guest */
+        if ( !p->desc &&
+             !test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) )
+            p->desc = desc;
+        else
+            ret = -EBUSY;
+    }
+    else
+        p->desc = NULL;
+
+    vgic_unlock_rank(v_target, rank, flags);
+
+    return ret;
+}
+
 /*
  * Local variables:
  * mode: C
diff --git a/xen/arch/arm/gic.c b/xen/arch/arm/gic.c
index 4cb74d449e..d46a6d54b3 100644
--- a/xen/arch/arm/gic.c
+++ b/xen/arch/arm/gic.c
@@ -128,27 +128,12 @@  void gic_route_irq_to_xen(struct irq_desc *desc, unsigned int priority)
 int gic_route_irq_to_guest(struct domain *d, unsigned int virq,
                            struct irq_desc *desc, unsigned int priority)
 {
-    unsigned long flags;
-    /* Use vcpu0 to retrieve the pending_irq struct. Given that we only
-     * route SPIs to guests, it doesn't make any difference. */
-    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
-    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
-    struct pending_irq *p = irq_to_pending(v_target, virq);
-    int res = -EBUSY;
-
     ASSERT(spin_is_locked(&desc->lock));
     /* Caller has already checked that the IRQ is an SPI */
     ASSERT(virq >= 32);
     ASSERT(virq < vgic_num_irqs(d));
     ASSERT(!is_lpi(virq));
 
-    vgic_lock_rank(v_target, rank, flags);
-
-    if ( p->desc ||
-         /* The VIRQ should not be already enabled by the guest */
-         test_bit(GIC_IRQ_GUEST_ENABLED, &p->status) )
-        goto out;
-
     desc->handler = gic_hw_ops->gic_guest_irq_type;
     set_bit(_IRQ_GUEST, &desc->status);
 
@@ -156,31 +141,19 @@  int gic_route_irq_to_guest(struct domain *d, unsigned int virq,
         gic_set_irq_type(desc, desc->arch.type);
     gic_set_irq_priority(desc, priority);
 
-    p->desc = desc;
-    res = 0;
-
-out:
-    vgic_unlock_rank(v_target, rank, flags);
-
-    return res;
+    return vgic_connect_hw_irq(d, NULL, virq, desc);
 }
 
 /* This function only works with SPIs for now */
 int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
                               struct irq_desc *desc)
 {
-    struct vcpu *v_target = vgic_get_target_vcpu(d->vcpu[0], virq);
-    struct vgic_irq_rank *rank = vgic_rank_irq(v_target, virq);
-    struct pending_irq *p = irq_to_pending(v_target, virq);
-    unsigned long flags;
+    int ret;
 
     ASSERT(spin_is_locked(&desc->lock));
     ASSERT(test_bit(_IRQ_GUEST, &desc->status));
-    ASSERT(p->desc == desc);
     ASSERT(!is_lpi(virq));
 
-    vgic_lock_rank(v_target, rank, flags);
-
     if ( d->is_dying )
     {
         desc->handler->shutdown(desc);
@@ -198,19 +171,16 @@  int gic_remove_irq_from_guest(struct domain *d, unsigned int virq,
          */
         if ( test_bit(_IRQ_INPROGRESS, &desc->status) ||
              !test_bit(_IRQ_DISABLED, &desc->status) )
-        {
-            vgic_unlock_rank(v_target, rank, flags);
             return -EBUSY;
-        }
     }
 
+    ret = vgic_connect_hw_irq(d, NULL, virq, NULL);
+    if ( ret )
+        return ret;
+
     clear_bit(_IRQ_GUEST, &desc->status);
     desc->handler = &no_irq_type;
 
-    p->desc = NULL;
-
-    vgic_unlock_rank(v_target, rank, flags);
-
     return 0;
 }
 
diff --git a/xen/include/asm-arm/vgic.h b/xen/include/asm-arm/vgic.h
index 22c8502c95..f4240df371 100644
--- a/xen/include/asm-arm/vgic.h
+++ b/xen/include/asm-arm/vgic.h
@@ -219,6 +219,8 @@  int vgic_v2_init(struct domain *d, int *mmio_count);
 int vgic_v3_init(struct domain *d, int *mmio_count);
 
 bool vgic_evtchn_irq_pending(struct vcpu *v);
+int vgic_connect_hw_irq(struct domain *d, struct vcpu *v, unsigned int virq,
+                        struct irq_desc *desc);
 
 extern int domain_vgic_register(struct domain *d, int *mmio_count);
 extern int vcpu_vgic_free(struct vcpu *v);