diff mbox series

[API-NEXT,2/4] linux-generic: ring.c: use required memory orderings

Message ID 20170328192330.62599-2-brian.brooks@arm.com
State Superseded
Headers show
Series [API-NEXT,1/4] helper: cuckootable: Specify queue ring_size | expand

Commit Message

Brian Brooks March 28, 2017, 7:23 p.m. UTC
From: Ola Liljedahl <ola.liljedahl@arm.com>


Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

Reviewed-by: Brian Brooks <brian.brooks@arm.com>

---
 platform/linux-generic/pktio/ring.c | 30 ++++++++++++++----------------
 1 file changed, 14 insertions(+), 16 deletions(-)
 mode change 100644 => 100755 platform/linux-generic/pktio/ring.c

-- 
2.12.1

Comments

Bill Fischofer March 30, 2017, 8:48 p.m. UTC | #1
Maxim: Per discussions today, this part needs to be separated from the
rest of this series and merged on an expedited basis to resolve
critical issues when running ODP on ARM systems.  V2 of the scheduler
patch will omit this part.


On Tue, Mar 28, 2017 at 2:23 PM, Brian Brooks <brian.brooks@arm.com> wrote:
> From: Ola Liljedahl <ola.liljedahl@arm.com>

>

> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

> Reviewed-by: Brian Brooks <brian.brooks@arm.com>


Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org>

> ---

>  platform/linux-generic/pktio/ring.c | 30 ++++++++++++++----------------

>  1 file changed, 14 insertions(+), 16 deletions(-)

>  mode change 100644 => 100755 platform/linux-generic/pktio/ring.c

>

> diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c

> old mode 100644

> new mode 100755

> index aeda04b2..e3c73d1c

> --- a/platform/linux-generic/pktio/ring.c

> +++ b/platform/linux-generic/pktio/ring.c

> @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>                 /* Reset n to the initial burst count */

>                 n = max;

>

> -               prod_head = r->prod.head;

> -               cons_tail = r->cons.tail;

> +               prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);

> +               cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>                 /* The subtraction is done between two unsigned 32bits value

>                  * (the result is always modulo 32 bits even if we have

>                  * prod_head > cons_tail). So 'free_entries' is always between 0

> @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>          * If there are other enqueues in progress that preceded us,

>          * we need to wait for them to complete

>          */

> -       while (odp_unlikely(r->prod.tail != prod_head))

> +       while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) !=

> +                           prod_head))

>                 odp_cpu_pause();

>

>         /* Release our entries and the memory they refer to */

> -       __atomic_thread_fence(__ATOMIC_RELEASE);

> -       r->prod.tail = prod_next;

> +       __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>         return ret;

>  }

>

> @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>         int ret;

>

>         prod_head = r->prod.head;

> -       cons_tail = r->cons.tail;

> +       cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>         /* The subtraction is done between two unsigned 32bits value

>          * (the result is always modulo 32 bits even if we have

>          * prod_head > cons_tail). So 'free_entries' is always between 0

> @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>         }

>

>         /* Release our entries and the memory they refer to */

> -       __atomic_thread_fence(__ATOMIC_RELEASE);

> -       r->prod.tail = prod_next;

> +       __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>         return ret;

>  }

>

> @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>                 /* Restore n as it may change every loop */

>                 n = max;

>

> -               cons_head = r->cons.head;

> -               prod_tail = r->prod.tail;

> +               cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);

> +               prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>                 /* The subtraction is done between two unsigned 32bits value

>                  * (the result is always modulo 32 bits even if we have

>                  * cons_head > prod_tail). So 'entries' is always between 0

> @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>          * If there are other dequeues in progress that preceded us,

>          * we need to wait for them to complete

>          */

> -       while (odp_unlikely(r->cons.tail != cons_head))

> +       while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) !=

> +                                           cons_head))

>                 odp_cpu_pause();

>

>         /* Release our entries and the memory they refer to */

> -       __atomic_thread_fence(__ATOMIC_RELEASE);

> -       r->cons.tail = cons_next;

> +       __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>

>         return behavior == _RING_QUEUE_FIXED ? 0 : n;

>  }

> @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>         uint32_t mask = r->prod.mask;

>

>         cons_head = r->cons.head;

> -       prod_tail = r->prod.tail;

> +       prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>         /* The subtraction is done between two unsigned 32bits value

>          * (the result is always modulo 32 bits even if we have

>          * cons_head > prod_tail). So 'entries' is always between 0

> @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>         r->cons.head = cons_next;

>

>         /* Acquire the pointers and the memory they refer to */

> -       __atomic_thread_fence(__ATOMIC_ACQUIRE);

>         /* copy in table */

>         DEQUEUE_PTRS();

>

> -       r->cons.tail = cons_next;

> +       __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>         return behavior == _RING_QUEUE_FIXED ? 0 : n;

>  }

>

> --

> 2.12.1

>
Maxim Uvarov March 31, 2017, 1:21 p.m. UTC | #2
On 03/28/17 22:23, Brian Brooks wrote:
> From: Ola Liljedahl <ola.liljedahl@arm.com>

> 

> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

> Reviewed-by: Brian Brooks <brian.brooks@arm.com>

> ---

>  platform/linux-generic/pktio/ring.c | 30 ++++++++++++++----------------

>  1 file changed, 14 insertions(+), 16 deletions(-)

>  mode change 100644 => 100755 platform/linux-generic/pktio/ring.c

> 

> diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c

> old mode 100644

> new mode 100755



no need of setting executable permissions to c file. And of course you
have to run checkpatch.pl or push that code to github and it will do all
required checks.

I will fix it, no need to resend.

Maxim.

> index aeda04b2..e3c73d1c

> --- a/platform/linux-generic/pktio/ring.c

> +++ b/platform/linux-generic/pktio/ring.c

> @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>  		/* Reset n to the initial burst count */

>  		n = max;

>  

> -		prod_head = r->prod.head;

> -		cons_tail = r->cons.tail;

> +		prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);

> +		cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>  		/* The subtraction is done between two unsigned 32bits value

>  		 * (the result is always modulo 32 bits even if we have

>  		 * prod_head > cons_tail). So 'free_entries' is always between 0

> @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>  	 * If there are other enqueues in progress that preceded us,

>  	 * we need to wait for them to complete

>  	 */

> -	while (odp_unlikely(r->prod.tail != prod_head))

> +	while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) !=

> +			    prod_head))

>  		odp_cpu_pause();

>  

>  	/* Release our entries and the memory they refer to */

> -	__atomic_thread_fence(__ATOMIC_RELEASE);

> -	r->prod.tail = prod_next;

> +	__atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>  	return ret;

>  }

>  

> @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>  	int ret;

>  

>  	prod_head = r->prod.head;

> -	cons_tail = r->cons.tail;

> +	cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>  	/* The subtraction is done between two unsigned 32bits value

>  	 * (the result is always modulo 32 bits even if we have

>  	 * prod_head > cons_tail). So 'free_entries' is always between 0

> @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>  	}

>  

>  	/* Release our entries and the memory they refer to */

> -	__atomic_thread_fence(__ATOMIC_RELEASE);

> -	r->prod.tail = prod_next;

> +	__atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>  	return ret;

>  }

>  

> @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>  		/* Restore n as it may change every loop */

>  		n = max;

>  

> -		cons_head = r->cons.head;

> -		prod_tail = r->prod.tail;

> +		cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);

> +		prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>  		/* The subtraction is done between two unsigned 32bits value

>  		 * (the result is always modulo 32 bits even if we have

>  		 * cons_head > prod_tail). So 'entries' is always between 0

> @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>  	 * If there are other dequeues in progress that preceded us,

>  	 * we need to wait for them to complete

>  	 */

> -	while (odp_unlikely(r->cons.tail != cons_head))

> +	while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) !=

> +					    cons_head))

>  		odp_cpu_pause();

>  

>  	/* Release our entries and the memory they refer to */

> -	__atomic_thread_fence(__ATOMIC_RELEASE);

> -	r->cons.tail = cons_next;

> +	__atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>  

>  	return behavior == _RING_QUEUE_FIXED ? 0 : n;

>  }

> @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>  	uint32_t mask = r->prod.mask;

>  

>  	cons_head = r->cons.head;

> -	prod_tail = r->prod.tail;

> +	prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>  	/* The subtraction is done between two unsigned 32bits value

>  	 * (the result is always modulo 32 bits even if we have

>  	 * cons_head > prod_tail). So 'entries' is always between 0

> @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>  	r->cons.head = cons_next;

>  

>  	/* Acquire the pointers and the memory they refer to */

> -	__atomic_thread_fence(__ATOMIC_ACQUIRE);

>  	/* copy in table */

>  	DEQUEUE_PTRS();

>  

> -	r->cons.tail = cons_next;

> +	__atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>  	return behavior == _RING_QUEUE_FIXED ? 0 : n;

>  }

>  

>
Ola Liljedahl March 31, 2017, 1:45 p.m. UTC | #3
On 31 March 2017 at 15:21, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:
> On 03/28/17 22:23, Brian Brooks wrote:

>> From: Ola Liljedahl <ola.liljedahl@arm.com>

>>

>> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

>> Reviewed-by: Brian Brooks <brian.brooks@arm.com>

>> ---

>>  platform/linux-generic/pktio/ring.c | 30 ++++++++++++++----------------

>>  1 file changed, 14 insertions(+), 16 deletions(-)

>>  mode change 100644 => 100755 platform/linux-generic/pktio/ring.c

>>

>> diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c

>> old mode 100644

>> new mode 100755

>

>

> no need of setting executable permissions to c file. And of course you

Very strange. I can assure you that I have not actively changed
permissions on this file (I made the original changes on our local
copy).

> have to run checkpatch.pl or push that code to github and it will do all

> required checks.

>

> I will fix it, no need to resend.

>

> Maxim.

>

>> index aeda04b2..e3c73d1c

>> --- a/platform/linux-generic/pktio/ring.c

>> +++ b/platform/linux-generic/pktio/ring.c

>> @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>>               /* Reset n to the initial burst count */

>>               n = max;

>>

>> -             prod_head = r->prod.head;

>> -             cons_tail = r->cons.tail;

>> +             prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);

>> +             cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>>               /* The subtraction is done between two unsigned 32bits value

>>                * (the result is always modulo 32 bits even if we have

>>                * prod_head > cons_tail). So 'free_entries' is always between 0

>> @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>>        * If there are other enqueues in progress that preceded us,

>>        * we need to wait for them to complete

>>        */

>> -     while (odp_unlikely(r->prod.tail != prod_head))

>> +     while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) !=

>> +                         prod_head))

>>               odp_cpu_pause();

>>

>>       /* Release our entries and the memory they refer to */

>> -     __atomic_thread_fence(__ATOMIC_RELEASE);

>> -     r->prod.tail = prod_next;

>> +     __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>>       return ret;

>>  }

>>

>> @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>>       int ret;

>>

>>       prod_head = r->prod.head;

>> -     cons_tail = r->cons.tail;

>> +     cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>>       /* The subtraction is done between two unsigned 32bits value

>>        * (the result is always modulo 32 bits even if we have

>>        * prod_head > cons_tail). So 'free_entries' is always between 0

>> @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>>       }

>>

>>       /* Release our entries and the memory they refer to */

>> -     __atomic_thread_fence(__ATOMIC_RELEASE);

>> -     r->prod.tail = prod_next;

>> +     __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>>       return ret;

>>  }

>>

>> @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>>               /* Restore n as it may change every loop */

>>               n = max;

>>

>> -             cons_head = r->cons.head;

>> -             prod_tail = r->prod.tail;

>> +             cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);

>> +             prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>>               /* The subtraction is done between two unsigned 32bits value

>>                * (the result is always modulo 32 bits even if we have

>>                * cons_head > prod_tail). So 'entries' is always between 0

>> @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>>        * If there are other dequeues in progress that preceded us,

>>        * we need to wait for them to complete

>>        */

>> -     while (odp_unlikely(r->cons.tail != cons_head))

>> +     while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) !=

>> +                                         cons_head))

>>               odp_cpu_pause();

>>

>>       /* Release our entries and the memory they refer to */

>> -     __atomic_thread_fence(__ATOMIC_RELEASE);

>> -     r->cons.tail = cons_next;

>> +     __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>>

>>       return behavior == _RING_QUEUE_FIXED ? 0 : n;

>>  }

>> @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>>       uint32_t mask = r->prod.mask;

>>

>>       cons_head = r->cons.head;

>> -     prod_tail = r->prod.tail;

>> +     prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>>       /* The subtraction is done between two unsigned 32bits value

>>        * (the result is always modulo 32 bits even if we have

>>        * cons_head > prod_tail). So 'entries' is always between 0

>> @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>>       r->cons.head = cons_next;

>>

>>       /* Acquire the pointers and the memory they refer to */

>> -     __atomic_thread_fence(__ATOMIC_ACQUIRE);

>>       /* copy in table */

>>       DEQUEUE_PTRS();

>>

>> -     r->cons.tail = cons_next;

>> +     __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>>       return behavior == _RING_QUEUE_FIXED ? 0 : n;

>>  }

>>

>>

>
Maxim Uvarov March 31, 2017, 1:49 p.m. UTC | #4
Merged to master branch.

Maxim.

On 03/31/17 16:45, Ola Liljedahl wrote:
> On 31 March 2017 at 15:21, Maxim Uvarov <maxim.uvarov@linaro.org> wrote:

>> On 03/28/17 22:23, Brian Brooks wrote:

>>> From: Ola Liljedahl <ola.liljedahl@arm.com>

>>>

>>> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com>

>>> Reviewed-by: Brian Brooks <brian.brooks@arm.com>

>>> ---

>>>  platform/linux-generic/pktio/ring.c | 30 ++++++++++++++----------------

>>>  1 file changed, 14 insertions(+), 16 deletions(-)

>>>  mode change 100644 => 100755 platform/linux-generic/pktio/ring.c

>>>

>>> diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c

>>> old mode 100644

>>> new mode 100755

>>

>>

>> no need of setting executable permissions to c file. And of course you

> Very strange. I can assure you that I have not actively changed

> permissions on this file (I made the original changes on our local

> copy).

> 

>> have to run checkpatch.pl or push that code to github and it will do all

>> required checks.

>>

>> I will fix it, no need to resend.

>>

>> Maxim.

>>

>>> index aeda04b2..e3c73d1c

>>> --- a/platform/linux-generic/pktio/ring.c

>>> +++ b/platform/linux-generic/pktio/ring.c

>>> @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>>>               /* Reset n to the initial burst count */

>>>               n = max;

>>>

>>> -             prod_head = r->prod.head;

>>> -             cons_tail = r->cons.tail;

>>> +             prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);

>>> +             cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>>>               /* The subtraction is done between two unsigned 32bits value

>>>                * (the result is always modulo 32 bits even if we have

>>>                * prod_head > cons_tail). So 'free_entries' is always between 0

>>> @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,

>>>        * If there are other enqueues in progress that preceded us,

>>>        * we need to wait for them to complete

>>>        */

>>> -     while (odp_unlikely(r->prod.tail != prod_head))

>>> +     while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) !=

>>> +                         prod_head))

>>>               odp_cpu_pause();

>>>

>>>       /* Release our entries and the memory they refer to */

>>> -     __atomic_thread_fence(__ATOMIC_RELEASE);

>>> -     r->prod.tail = prod_next;

>>> +     __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>>>       return ret;

>>>  }

>>>

>>> @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>>>       int ret;

>>>

>>>       prod_head = r->prod.head;

>>> -     cons_tail = r->cons.tail;

>>> +     cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);

>>>       /* The subtraction is done between two unsigned 32bits value

>>>        * (the result is always modulo 32 bits even if we have

>>>        * prod_head > cons_tail). So 'free_entries' is always between 0

>>> @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,

>>>       }

>>>

>>>       /* Release our entries and the memory they refer to */

>>> -     __atomic_thread_fence(__ATOMIC_RELEASE);

>>> -     r->prod.tail = prod_next;

>>> +     __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);

>>>       return ret;

>>>  }

>>>

>>> @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>>>               /* Restore n as it may change every loop */

>>>               n = max;

>>>

>>> -             cons_head = r->cons.head;

>>> -             prod_tail = r->prod.tail;

>>> +             cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);

>>> +             prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>>>               /* The subtraction is done between two unsigned 32bits value

>>>                * (the result is always modulo 32 bits even if we have

>>>                * cons_head > prod_tail). So 'entries' is always between 0

>>> @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,

>>>        * If there are other dequeues in progress that preceded us,

>>>        * we need to wait for them to complete

>>>        */

>>> -     while (odp_unlikely(r->cons.tail != cons_head))

>>> +     while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) !=

>>> +                                         cons_head))

>>>               odp_cpu_pause();

>>>

>>>       /* Release our entries and the memory they refer to */

>>> -     __atomic_thread_fence(__ATOMIC_RELEASE);

>>> -     r->cons.tail = cons_next;

>>> +     __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>>>

>>>       return behavior == _RING_QUEUE_FIXED ? 0 : n;

>>>  }

>>> @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>>>       uint32_t mask = r->prod.mask;

>>>

>>>       cons_head = r->cons.head;

>>> -     prod_tail = r->prod.tail;

>>> +     prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);

>>>       /* The subtraction is done between two unsigned 32bits value

>>>        * (the result is always modulo 32 bits even if we have

>>>        * cons_head > prod_tail). So 'entries' is always between 0

>>> @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,

>>>       r->cons.head = cons_next;

>>>

>>>       /* Acquire the pointers and the memory they refer to */

>>> -     __atomic_thread_fence(__ATOMIC_ACQUIRE);

>>>       /* copy in table */

>>>       DEQUEUE_PTRS();

>>>

>>> -     r->cons.tail = cons_next;

>>> +     __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);

>>>       return behavior == _RING_QUEUE_FIXED ? 0 : n;

>>>  }

>>>

>>>

>>
diff mbox series

Patch

diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c
old mode 100644
new mode 100755
index aeda04b2..e3c73d1c
--- a/platform/linux-generic/pktio/ring.c
+++ b/platform/linux-generic/pktio/ring.c
@@ -263,8 +263,8 @@  int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,
 		/* Reset n to the initial burst count */
 		n = max;
 
-		prod_head = r->prod.head;
-		cons_tail = r->cons.tail;
+		prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED);
+		cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);
 		/* The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * prod_head > cons_tail). So 'free_entries' is always between 0
@@ -306,12 +306,12 @@  int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table,
 	 * If there are other enqueues in progress that preceded us,
 	 * we need to wait for them to complete
 	 */
-	while (odp_unlikely(r->prod.tail != prod_head))
+	while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) !=
+			    prod_head))
 		odp_cpu_pause();
 
 	/* Release our entries and the memory they refer to */
-	__atomic_thread_fence(__ATOMIC_RELEASE);
-	r->prod.tail = prod_next;
+	__atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);
 	return ret;
 }
 
@@ -328,7 +328,7 @@  int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,
 	int ret;
 
 	prod_head = r->prod.head;
-	cons_tail = r->cons.tail;
+	cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE);
 	/* The subtraction is done between two unsigned 32bits value
 	 * (the result is always modulo 32 bits even if we have
 	 * prod_head > cons_tail). So 'free_entries' is always between 0
@@ -361,8 +361,7 @@  int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table,
 	}
 
 	/* Release our entries and the memory they refer to */
-	__atomic_thread_fence(__ATOMIC_RELEASE);
-	r->prod.tail = prod_next;
+	__atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE);
 	return ret;
 }
 
@@ -385,8 +384,8 @@  int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,
 		/* Restore n as it may change every loop */
 		n = max;
 
-		cons_head = r->cons.head;
-		prod_tail = r->prod.tail;
+		cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED);
+		prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);
 		/* The subtraction is done between two unsigned 32bits value
 		 * (the result is always modulo 32 bits even if we have
 		 * cons_head > prod_tail). So 'entries' is always between 0
@@ -419,12 +418,12 @@  int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table,
 	 * If there are other dequeues in progress that preceded us,
 	 * we need to wait for them to complete
 	 */
-	while (odp_unlikely(r->cons.tail != cons_head))
+	while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) !=
+					    cons_head))
 		odp_cpu_pause();
 
 	/* Release our entries and the memory they refer to */
-	__atomic_thread_fence(__ATOMIC_RELEASE);
-	r->cons.tail = cons_next;
+	__atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);
 
 	return behavior == _RING_QUEUE_FIXED ? 0 : n;
 }
@@ -441,7 +440,7 @@  int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,
 	uint32_t mask = r->prod.mask;
 
 	cons_head = r->cons.head;
-	prod_tail = r->prod.tail;
+	prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE);
 	/* The subtraction is done between two unsigned 32bits value
 	 * (the result is always modulo 32 bits even if we have
 	 * cons_head > prod_tail). So 'entries' is always between 0
@@ -461,11 +460,10 @@  int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table,
 	r->cons.head = cons_next;
 
 	/* Acquire the pointers and the memory they refer to */
-	__atomic_thread_fence(__ATOMIC_ACQUIRE);
 	/* copy in table */
 	DEQUEUE_PTRS();
 
-	r->cons.tail = cons_next;
+	__atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE);
 	return behavior == _RING_QUEUE_FIXED ? 0 : n;
 }