Message ID | 20170328192330.62599-2-brian.brooks@arm.com |
---|---|
State | Superseded |
Headers | show |
Series | [API-NEXT,1/4] helper: cuckootable: Specify queue ring_size | expand |
Maxim: Per discussions today, this part needs to be separated from the rest of this series and merged on an expedited basis to resolve critical issues when running ODP on ARM systems. V2 of the scheduler patch will omit this part. On Tue, Mar 28, 2017 at 2:23 PM, Brian Brooks <brian.brooks@arm.com> wrote: > From: Ola Liljedahl <ola.liljedahl@arm.com> > > Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com> > Reviewed-by: Brian Brooks <brian.brooks@arm.com> Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> > --- > platform/linux-generic/pktio/ring.c | 30 ++++++++++++++---------------- > 1 file changed, 14 insertions(+), 16 deletions(-) > mode change 100644 => 100755 platform/linux-generic/pktio/ring.c > > diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c > old mode 100644 > new mode 100755 > index aeda04b2..e3c73d1c > --- a/platform/linux-generic/pktio/ring.c > +++ b/platform/linux-generic/pktio/ring.c > @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, > /* Reset n to the initial burst count */ > n = max; > > - prod_head = r->prod.head; > - cons_tail = r->cons.tail; > + prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); > + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * prod_head > cons_tail). So 'free_entries' is always between 0 > @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, > * If there are other enqueues in progress that preceded us, > * we need to wait for them to complete > */ > - while (odp_unlikely(r->prod.tail != prod_head)) > + while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) != > + prod_head)) > odp_cpu_pause(); > > /* Release our entries and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_RELEASE); > - r->prod.tail = prod_next; > + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); > return ret; > } > > @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, > int ret; > > prod_head = r->prod.head; > - cons_tail = r->cons.tail; > + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * prod_head > cons_tail). So 'free_entries' is always between 0 > @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, > } > > /* Release our entries and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_RELEASE); > - r->prod.tail = prod_next; > + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); > return ret; > } > > @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, > /* Restore n as it may change every loop */ > n = max; > > - cons_head = r->cons.head; > - prod_tail = r->prod.tail; > + cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); > + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * cons_head > prod_tail). So 'entries' is always between 0 > @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, > * If there are other dequeues in progress that preceded us, > * we need to wait for them to complete > */ > - while (odp_unlikely(r->cons.tail != cons_head)) > + while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) != > + cons_head)) > odp_cpu_pause(); > > /* Release our entries and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_RELEASE); > - r->cons.tail = cons_next; > + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); > > return behavior == _RING_QUEUE_FIXED ? 0 : n; > } > @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, > uint32_t mask = r->prod.mask; > > cons_head = r->cons.head; > - prod_tail = r->prod.tail; > + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * cons_head > prod_tail). So 'entries' is always between 0 > @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, > r->cons.head = cons_next; > > /* Acquire the pointers and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_ACQUIRE); > /* copy in table */ > DEQUEUE_PTRS(); > > - r->cons.tail = cons_next; > + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); > return behavior == _RING_QUEUE_FIXED ? 0 : n; > } > > -- > 2.12.1 >
On 03/28/17 22:23, Brian Brooks wrote: > From: Ola Liljedahl <ola.liljedahl@arm.com> > > Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com> > Reviewed-by: Brian Brooks <brian.brooks@arm.com> > --- > platform/linux-generic/pktio/ring.c | 30 ++++++++++++++---------------- > 1 file changed, 14 insertions(+), 16 deletions(-) > mode change 100644 => 100755 platform/linux-generic/pktio/ring.c > > diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c > old mode 100644 > new mode 100755 no need of setting executable permissions to c file. And of course you have to run checkpatch.pl or push that code to github and it will do all required checks. I will fix it, no need to resend. Maxim. > index aeda04b2..e3c73d1c > --- a/platform/linux-generic/pktio/ring.c > +++ b/platform/linux-generic/pktio/ring.c > @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, > /* Reset n to the initial burst count */ > n = max; > > - prod_head = r->prod.head; > - cons_tail = r->cons.tail; > + prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); > + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * prod_head > cons_tail). So 'free_entries' is always between 0 > @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, > * If there are other enqueues in progress that preceded us, > * we need to wait for them to complete > */ > - while (odp_unlikely(r->prod.tail != prod_head)) > + while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) != > + prod_head)) > odp_cpu_pause(); > > /* Release our entries and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_RELEASE); > - r->prod.tail = prod_next; > + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); > return ret; > } > > @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, > int ret; > > prod_head = r->prod.head; > - cons_tail = r->cons.tail; > + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * prod_head > cons_tail). So 'free_entries' is always between 0 > @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, > } > > /* Release our entries and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_RELEASE); > - r->prod.tail = prod_next; > + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); > return ret; > } > > @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, > /* Restore n as it may change every loop */ > n = max; > > - cons_head = r->cons.head; > - prod_tail = r->prod.tail; > + cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); > + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * cons_head > prod_tail). So 'entries' is always between 0 > @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, > * If there are other dequeues in progress that preceded us, > * we need to wait for them to complete > */ > - while (odp_unlikely(r->cons.tail != cons_head)) > + while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) != > + cons_head)) > odp_cpu_pause(); > > /* Release our entries and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_RELEASE); > - r->cons.tail = cons_next; > + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); > > return behavior == _RING_QUEUE_FIXED ? 0 : n; > } > @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, > uint32_t mask = r->prod.mask; > > cons_head = r->cons.head; > - prod_tail = r->prod.tail; > + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); > /* The subtraction is done between two unsigned 32bits value > * (the result is always modulo 32 bits even if we have > * cons_head > prod_tail). So 'entries' is always between 0 > @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, > r->cons.head = cons_next; > > /* Acquire the pointers and the memory they refer to */ > - __atomic_thread_fence(__ATOMIC_ACQUIRE); > /* copy in table */ > DEQUEUE_PTRS(); > > - r->cons.tail = cons_next; > + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); > return behavior == _RING_QUEUE_FIXED ? 0 : n; > } > >
On 31 March 2017 at 15:21, Maxim Uvarov <maxim.uvarov@linaro.org> wrote: > On 03/28/17 22:23, Brian Brooks wrote: >> From: Ola Liljedahl <ola.liljedahl@arm.com> >> >> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com> >> Reviewed-by: Brian Brooks <brian.brooks@arm.com> >> --- >> platform/linux-generic/pktio/ring.c | 30 ++++++++++++++---------------- >> 1 file changed, 14 insertions(+), 16 deletions(-) >> mode change 100644 => 100755 platform/linux-generic/pktio/ring.c >> >> diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c >> old mode 100644 >> new mode 100755 > > > no need of setting executable permissions to c file. And of course you Very strange. I can assure you that I have not actively changed permissions on this file (I made the original changes on our local copy). > have to run checkpatch.pl or push that code to github and it will do all > required checks. > > I will fix it, no need to resend. > > Maxim. > >> index aeda04b2..e3c73d1c >> --- a/platform/linux-generic/pktio/ring.c >> +++ b/platform/linux-generic/pktio/ring.c >> @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, >> /* Reset n to the initial burst count */ >> n = max; >> >> - prod_head = r->prod.head; >> - cons_tail = r->cons.tail; >> + prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); >> + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); >> /* The subtraction is done between two unsigned 32bits value >> * (the result is always modulo 32 bits even if we have >> * prod_head > cons_tail). So 'free_entries' is always between 0 >> @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, >> * If there are other enqueues in progress that preceded us, >> * we need to wait for them to complete >> */ >> - while (odp_unlikely(r->prod.tail != prod_head)) >> + while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) != >> + prod_head)) >> odp_cpu_pause(); >> >> /* Release our entries and the memory they refer to */ >> - __atomic_thread_fence(__ATOMIC_RELEASE); >> - r->prod.tail = prod_next; >> + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); >> return ret; >> } >> >> @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, >> int ret; >> >> prod_head = r->prod.head; >> - cons_tail = r->cons.tail; >> + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); >> /* The subtraction is done between two unsigned 32bits value >> * (the result is always modulo 32 bits even if we have >> * prod_head > cons_tail). So 'free_entries' is always between 0 >> @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, >> } >> >> /* Release our entries and the memory they refer to */ >> - __atomic_thread_fence(__ATOMIC_RELEASE); >> - r->prod.tail = prod_next; >> + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); >> return ret; >> } >> >> @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, >> /* Restore n as it may change every loop */ >> n = max; >> >> - cons_head = r->cons.head; >> - prod_tail = r->prod.tail; >> + cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); >> + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); >> /* The subtraction is done between two unsigned 32bits value >> * (the result is always modulo 32 bits even if we have >> * cons_head > prod_tail). So 'entries' is always between 0 >> @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, >> * If there are other dequeues in progress that preceded us, >> * we need to wait for them to complete >> */ >> - while (odp_unlikely(r->cons.tail != cons_head)) >> + while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) != >> + cons_head)) >> odp_cpu_pause(); >> >> /* Release our entries and the memory they refer to */ >> - __atomic_thread_fence(__ATOMIC_RELEASE); >> - r->cons.tail = cons_next; >> + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); >> >> return behavior == _RING_QUEUE_FIXED ? 0 : n; >> } >> @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, >> uint32_t mask = r->prod.mask; >> >> cons_head = r->cons.head; >> - prod_tail = r->prod.tail; >> + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); >> /* The subtraction is done between two unsigned 32bits value >> * (the result is always modulo 32 bits even if we have >> * cons_head > prod_tail). So 'entries' is always between 0 >> @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, >> r->cons.head = cons_next; >> >> /* Acquire the pointers and the memory they refer to */ >> - __atomic_thread_fence(__ATOMIC_ACQUIRE); >> /* copy in table */ >> DEQUEUE_PTRS(); >> >> - r->cons.tail = cons_next; >> + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); >> return behavior == _RING_QUEUE_FIXED ? 0 : n; >> } >> >> >
Merged to master branch. Maxim. On 03/31/17 16:45, Ola Liljedahl wrote: > On 31 March 2017 at 15:21, Maxim Uvarov <maxim.uvarov@linaro.org> wrote: >> On 03/28/17 22:23, Brian Brooks wrote: >>> From: Ola Liljedahl <ola.liljedahl@arm.com> >>> >>> Signed-off-by: Ola Liljedahl <ola.liljedahl@arm.com> >>> Reviewed-by: Brian Brooks <brian.brooks@arm.com> >>> --- >>> platform/linux-generic/pktio/ring.c | 30 ++++++++++++++---------------- >>> 1 file changed, 14 insertions(+), 16 deletions(-) >>> mode change 100644 => 100755 platform/linux-generic/pktio/ring.c >>> >>> diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c >>> old mode 100644 >>> new mode 100755 >> >> >> no need of setting executable permissions to c file. And of course you > Very strange. I can assure you that I have not actively changed > permissions on this file (I made the original changes on our local > copy). > >> have to run checkpatch.pl or push that code to github and it will do all >> required checks. >> >> I will fix it, no need to resend. >> >> Maxim. >> >>> index aeda04b2..e3c73d1c >>> --- a/platform/linux-generic/pktio/ring.c >>> +++ b/platform/linux-generic/pktio/ring.c >>> @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, >>> /* Reset n to the initial burst count */ >>> n = max; >>> >>> - prod_head = r->prod.head; >>> - cons_tail = r->cons.tail; >>> + prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); >>> + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); >>> /* The subtraction is done between two unsigned 32bits value >>> * (the result is always modulo 32 bits even if we have >>> * prod_head > cons_tail). So 'free_entries' is always between 0 >>> @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, >>> * If there are other enqueues in progress that preceded us, >>> * we need to wait for them to complete >>> */ >>> - while (odp_unlikely(r->prod.tail != prod_head)) >>> + while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) != >>> + prod_head)) >>> odp_cpu_pause(); >>> >>> /* Release our entries and the memory they refer to */ >>> - __atomic_thread_fence(__ATOMIC_RELEASE); >>> - r->prod.tail = prod_next; >>> + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); >>> return ret; >>> } >>> >>> @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, >>> int ret; >>> >>> prod_head = r->prod.head; >>> - cons_tail = r->cons.tail; >>> + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); >>> /* The subtraction is done between two unsigned 32bits value >>> * (the result is always modulo 32 bits even if we have >>> * prod_head > cons_tail). So 'free_entries' is always between 0 >>> @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, >>> } >>> >>> /* Release our entries and the memory they refer to */ >>> - __atomic_thread_fence(__ATOMIC_RELEASE); >>> - r->prod.tail = prod_next; >>> + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); >>> return ret; >>> } >>> >>> @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, >>> /* Restore n as it may change every loop */ >>> n = max; >>> >>> - cons_head = r->cons.head; >>> - prod_tail = r->prod.tail; >>> + cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); >>> + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); >>> /* The subtraction is done between two unsigned 32bits value >>> * (the result is always modulo 32 bits even if we have >>> * cons_head > prod_tail). So 'entries' is always between 0 >>> @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, >>> * If there are other dequeues in progress that preceded us, >>> * we need to wait for them to complete >>> */ >>> - while (odp_unlikely(r->cons.tail != cons_head)) >>> + while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) != >>> + cons_head)) >>> odp_cpu_pause(); >>> >>> /* Release our entries and the memory they refer to */ >>> - __atomic_thread_fence(__ATOMIC_RELEASE); >>> - r->cons.tail = cons_next; >>> + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); >>> >>> return behavior == _RING_QUEUE_FIXED ? 0 : n; >>> } >>> @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, >>> uint32_t mask = r->prod.mask; >>> >>> cons_head = r->cons.head; >>> - prod_tail = r->prod.tail; >>> + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); >>> /* The subtraction is done between two unsigned 32bits value >>> * (the result is always modulo 32 bits even if we have >>> * cons_head > prod_tail). So 'entries' is always between 0 >>> @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, >>> r->cons.head = cons_next; >>> >>> /* Acquire the pointers and the memory they refer to */ >>> - __atomic_thread_fence(__ATOMIC_ACQUIRE); >>> /* copy in table */ >>> DEQUEUE_PTRS(); >>> >>> - r->cons.tail = cons_next; >>> + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); >>> return behavior == _RING_QUEUE_FIXED ? 0 : n; >>> } >>> >>> >>
diff --git a/platform/linux-generic/pktio/ring.c b/platform/linux-generic/pktio/ring.c old mode 100644 new mode 100755 index aeda04b2..e3c73d1c --- a/platform/linux-generic/pktio/ring.c +++ b/platform/linux-generic/pktio/ring.c @@ -263,8 +263,8 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, /* Reset n to the initial burst count */ n = max; - prod_head = r->prod.head; - cons_tail = r->cons.tail; + prod_head = __atomic_load_n(&r->prod.head, __ATOMIC_RELAXED); + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * prod_head > cons_tail). So 'free_entries' is always between 0 @@ -306,12 +306,12 @@ int ___ring_mp_do_enqueue(_ring_t *r, void * const *obj_table, * If there are other enqueues in progress that preceded us, * we need to wait for them to complete */ - while (odp_unlikely(r->prod.tail != prod_head)) + while (odp_unlikely(__atomic_load_n(&r->prod.tail, __ATOMIC_RELAXED) != + prod_head)) odp_cpu_pause(); /* Release our entries and the memory they refer to */ - __atomic_thread_fence(__ATOMIC_RELEASE); - r->prod.tail = prod_next; + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); return ret; } @@ -328,7 +328,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, int ret; prod_head = r->prod.head; - cons_tail = r->cons.tail; + cons_tail = __atomic_load_n(&r->cons.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * prod_head > cons_tail). So 'free_entries' is always between 0 @@ -361,8 +361,7 @@ int ___ring_sp_do_enqueue(_ring_t *r, void * const *obj_table, } /* Release our entries and the memory they refer to */ - __atomic_thread_fence(__ATOMIC_RELEASE); - r->prod.tail = prod_next; + __atomic_store_n(&r->prod.tail, prod_next, __ATOMIC_RELEASE); return ret; } @@ -385,8 +384,8 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, /* Restore n as it may change every loop */ n = max; - cons_head = r->cons.head; - prod_tail = r->prod.tail; + cons_head = __atomic_load_n(&r->cons.head, __ATOMIC_RELAXED); + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 @@ -419,12 +418,12 @@ int ___ring_mc_do_dequeue(_ring_t *r, void **obj_table, * If there are other dequeues in progress that preceded us, * we need to wait for them to complete */ - while (odp_unlikely(r->cons.tail != cons_head)) + while (odp_unlikely(__atomic_load_n(&r->cons.tail, __ATOMIC_RELAXED) != + cons_head)) odp_cpu_pause(); /* Release our entries and the memory they refer to */ - __atomic_thread_fence(__ATOMIC_RELEASE); - r->cons.tail = cons_next; + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); return behavior == _RING_QUEUE_FIXED ? 0 : n; } @@ -441,7 +440,7 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, uint32_t mask = r->prod.mask; cons_head = r->cons.head; - prod_tail = r->prod.tail; + prod_tail = __atomic_load_n(&r->prod.tail, __ATOMIC_ACQUIRE); /* The subtraction is done between two unsigned 32bits value * (the result is always modulo 32 bits even if we have * cons_head > prod_tail). So 'entries' is always between 0 @@ -461,11 +460,10 @@ int ___ring_sc_do_dequeue(_ring_t *r, void **obj_table, r->cons.head = cons_next; /* Acquire the pointers and the memory they refer to */ - __atomic_thread_fence(__ATOMIC_ACQUIRE); /* copy in table */ DEQUEUE_PTRS(); - r->cons.tail = cons_next; + __atomic_store_n(&r->cons.tail, cons_next, __ATOMIC_RELEASE); return behavior == _RING_QUEUE_FIXED ? 0 : n; }