@@ -109,85 +109,88 @@ __rte_experimental
struct rte_ring *rte_ring_create_elem(const char *name, unsigned int count,
unsigned int esize, int socket_id, unsigned int flags);
-#define ENQUEUE_PTRS_GEN(r, ring_start, prod_head, obj_table, esize, n) do { \
- unsigned int i, j; \
- const uint32_t size = (r)->size; \
- uint32_t idx = prod_head & (r)->mask; \
- uint32_t *ring = (uint32_t *)ring_start; \
- uint32_t *obj = (uint32_t *)obj_table; \
- uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
- uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
- uint32_t seg0 = size - idx; \
- if (likely(n < seg0)) { \
- for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
- i += 8, nr_idx += 8) { \
- memcpy(ring + nr_idx, obj + i, 8 * sizeof (uint32_t)); \
- } \
- switch (nr_n & 0x7) { \
- case 7: \
- ring[nr_idx++] = obj[i++]; /* fallthrough */ \
- case 6: \
- ring[nr_idx++] = obj[i++]; /* fallthrough */ \
- case 5: \
- ring[nr_idx++] = obj[i++]; /* fallthrough */ \
- case 4: \
- ring[nr_idx++] = obj[i++]; /* fallthrough */ \
- case 3: \
- ring[nr_idx++] = obj[i++]; /* fallthrough */ \
- case 2: \
- ring[nr_idx++] = obj[i++]; /* fallthrough */ \
- case 1: \
- ring[nr_idx++] = obj[i++]; /* fallthrough */ \
- } \
- } else { \
- uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
- uint32_t nr_seg1 = nr_n - nr_seg0; \
- for (i = 0; i < nr_seg0; i++, nr_idx++)\
- ring[nr_idx] = obj[i]; \
- for (j = 0; j < nr_seg1; i++, j++) \
- ring[j] = obj[i]; \
- } \
-} while (0)
-
-#define DEQUEUE_PTRS_GEN(r, ring_start, cons_head, obj_table, esize, n) do { \
- unsigned int i, j; \
- uint32_t idx = cons_head & (r)->mask; \
- const uint32_t size = (r)->size; \
- uint32_t *ring = (uint32_t *)ring_start; \
- uint32_t *obj = (uint32_t *)obj_table; \
- uint32_t nr_n = n * (esize / sizeof(uint32_t)); \
- uint32_t nr_idx = idx * (esize / sizeof(uint32_t)); \
- uint32_t seg0 = size - idx; \
- if (likely(n < seg0)) { \
- for (i = 0; i < (nr_n & ((~(unsigned)0x7))); \
- i += 8, nr_idx += 8) { \
- memcpy(obj + i, ring + nr_idx, 8 * sizeof (uint32_t)); \
- } \
- switch (nr_n & 0x7) { \
- case 7: \
- obj[i++] = ring[nr_idx++]; /* fallthrough */ \
- case 6: \
- obj[i++] = ring[nr_idx++]; /* fallthrough */ \
- case 5: \
- obj[i++] = ring[nr_idx++]; /* fallthrough */ \
- case 4: \
- obj[i++] = ring[nr_idx++]; /* fallthrough */ \
- case 3: \
- obj[i++] = ring[nr_idx++]; /* fallthrough */ \
- case 2: \
- obj[i++] = ring[nr_idx++]; /* fallthrough */ \
- case 1: \
- obj[i++] = ring[nr_idx++]; /* fallthrough */ \
- } \
- } else { \
- uint32_t nr_seg0 = seg0 * (esize / sizeof(uint32_t)); \
- uint32_t nr_seg1 = nr_n - nr_seg0; \
- for (i = 0; i < nr_seg0; i++, nr_idx++)\
- obj[i] = ring[nr_idx];\
- for (j = 0; j < nr_seg1; i++, j++) \
- obj[i] = ring[j]; \
- } \
-} while (0)
+static __rte_always_inline void
+copy_elems(uint32_t du32[], const uint32_t su32[], uint32_t nr_num)
+{
+ uint32_t i;
+
+ for (i = 0; i < (nr_num & ~7); i += 8)
+ memcpy(du32 + i, su32 + i, 8 * sizeof(uint32_t));
+
+ switch (nr_num & 7) {
+ case 7: du32[nr_num - 7] = su32[nr_num - 7]; /* fallthrough */
+ case 6: du32[nr_num - 6] = su32[nr_num - 6]; /* fallthrough */
+ case 5: du32[nr_num - 5] = su32[nr_num - 5]; /* fallthrough */
+ case 4: du32[nr_num - 4] = su32[nr_num - 4]; /* fallthrough */
+ case 3: du32[nr_num - 3] = su32[nr_num - 3]; /* fallthrough */
+ case 2: du32[nr_num - 2] = su32[nr_num - 2]; /* fallthrough */
+ case 1: du32[nr_num - 1] = su32[nr_num - 1]; /* fallthrough */
+ }
+}
+
+static __rte_always_inline void
+enqueue_elems(struct rte_ring *r, void *ring_start, uint32_t prod_head,
+ void *obj_table, uint32_t num, uint32_t esize)
+{
+ uint32_t idx, nr_idx, nr_num;
+ uint32_t *du32;
+ const uint32_t *su32;
+
+ const uint32_t size = r->size;
+ uint32_t s0, nr_s0, nr_s1;
+
+ idx = prod_head & (r)->mask;
+ /* Normalize the idx to uint32_t */
+ nr_idx = (idx * esize) / sizeof(uint32_t);
+
+ du32 = (uint32_t *)ring_start + nr_idx;
+ su32 = obj_table;
+
+ /* Normalize the number of elements to uint32_t */
+ nr_num = (num * esize) / sizeof(uint32_t);
+
+ s0 = size - idx;
+ if (num < s0)
+ copy_elems(du32, su32, nr_num);
+ else {
+ nr_s0 = (s0 * esize) / sizeof(uint32_t);
+ nr_s1 = nr_num - nr_s0;
+ copy_elems(du32, su32, nr_s0);
+ copy_elems(ring_start, su32 + nr_s0, nr_s1);
+ }
+}
+
+static __rte_always_inline void
+dequeue_elems(struct rte_ring *r, void *ring_start, uint32_t cons_head,
+ void *obj_table, uint32_t num, uint32_t esize)
+{
+ uint32_t idx, nr_idx, nr_num;
+ uint32_t *du32;
+ const uint32_t *su32;
+
+ const uint32_t size = r->size;
+ uint32_t s0, nr_s0, nr_s1;
+
+ idx = cons_head & (r)->mask;
+ /* Normalize the idx to uint32_t */
+ nr_idx = (idx * esize) / sizeof(uint32_t);
+
+ su32 = (uint32_t *)ring_start + nr_idx;
+ du32 = obj_table;
+
+ /* Normalize the number of elements to uint32_t */
+ nr_num = (num * esize) / sizeof(uint32_t);
+
+ s0 = size - idx;
+ if (num < s0)
+ copy_elems(du32, su32, nr_num);
+ else {
+ nr_s0 = (s0 * esize) / sizeof(uint32_t);
+ nr_s1 = nr_num - nr_s0;
+ copy_elems(du32, su32, nr_s0);
+ copy_elems(du32 + nr_s0, ring_start, nr_s1);
+ }
+}
/* Between load and load. there might be cpu reorder in weak model
* (powerpc/arm).
@@ -242,7 +245,7 @@ __rte_ring_do_enqueue_elem(struct rte_ring *r, void * const obj_table,
if (n == 0)
goto end;
- ENQUEUE_PTRS_GEN(r, &r[1], prod_head, obj_table, esize, n);
+ enqueue_elems(r, &r[1], prod_head, obj_table, n, esize);
update_tail(&r->prod, prod_head, prod_next, is_sp, 1);
end:
@@ -289,7 +292,7 @@ __rte_ring_do_dequeue_elem(struct rte_ring *r, void *obj_table,
if (n == 0)
goto end;
- DEQUEUE_PTRS_GEN(r, &r[1], cons_head, obj_table, esize, n);
+ dequeue_elems(r, &r[1], cons_head, obj_table, n, esize);
update_tail(&r->cons, cons_head, cons_next, is_sc, 0);