Message ID | 1469181897-19168-1-git-send-email-matias.elo@nokia.com |
---|---|
State | New |
Headers | show |
For this series: Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> On Fri, Jul 22, 2016 at 5:04 AM, Matias Elo <matias.elo@nokia.com> wrote: > Optimize local buffer cache performance which is critical to > many use cases - including packet IO. > > Main parts of the optimization are: > * Local cache implemented as an array of buf_hdr pointers, > instead of a linked list (which causes a lot of cache misses) > * Alloc and free N buffers per operation > > All above steps are needed to demonstrate the performance upgrade. > Some related pool functions (get_buf(), ret_buf(), etc) were moved > from pool header to c source file, since those were actual local > to the c source file. Also some unused pool variables are removed > also. > > Signed-off-by: Petri Savolainen <petri.savolainen@nokia.com> > Signed-off-by: Matias Elo <matias.elo@nokia.com> > --- > > V2: > - Split pktio modifications into a separate patch (Bill) > - Improve performance by adding separate functions for single buffer > alloc/free operations > > .../linux-generic/include/odp_buffer_inlines.h | 26 +- > .../linux-generic/include/odp_buffer_internal.h | 5 +- > platform/linux-generic/include/odp_internal.h | 2 - > platform/linux-generic/include/odp_pool_internal.h | 143 +------ > platform/linux-generic/odp_buffer.c | 3 - > platform/linux-generic/odp_packet.c | 5 +- > platform/linux-generic/odp_pool.c | 473 > +++++++++++++++++---- > 7 files changed, 426 insertions(+), 231 deletions(-) > > diff --git a/platform/linux-generic/include/odp_buffer_inlines.h > b/platform/linux-generic/include/odp_buffer_inlines.h > index 3f4d9fd..2b1ab42 100644 > --- a/platform/linux-generic/include/odp_buffer_inlines.h > +++ b/platform/linux-generic/include/odp_buffer_inlines.h > @@ -56,30 +56,12 @@ static inline odp_buffer_hdr_t > *odp_buf_to_hdr(odp_buffer_t buf) > (pool->pool_mdata_addr + (index * ODP_CACHE_LINE_SIZE)); > } > > -static inline uint32_t odp_buffer_refcount(odp_buffer_hdr_t *buf) > +static inline uint32_t pool_id_from_buf(odp_buffer_t buf) > { > - return odp_atomic_load_u32(&buf->ref_count); > -} > + odp_buffer_bits_t handle; > > -static inline uint32_t odp_buffer_incr_refcount(odp_buffer_hdr_t *buf, > - uint32_t val) > -{ > - return odp_atomic_fetch_add_u32(&buf->ref_count, val) + val; > -} > - > -static inline uint32_t odp_buffer_decr_refcount(odp_buffer_hdr_t *buf, > - uint32_t val) > -{ > - uint32_t tmp; > - > - tmp = odp_atomic_fetch_sub_u32(&buf->ref_count, val); > - > - if (tmp < val) { > - odp_atomic_fetch_add_u32(&buf->ref_count, val - tmp); > - return 0; > - } else { > - return tmp - val; > - } > + handle.handle = buf; > + return handle.pool_id; > } > > static inline odp_buffer_hdr_t *validate_buf(odp_buffer_t buf) > diff --git a/platform/linux-generic/include/odp_buffer_internal.h > b/platform/linux-generic/include/odp_buffer_internal.h > index f21364c..7b0ef8b 100644 > --- a/platform/linux-generic/include/odp_buffer_internal.h > +++ b/platform/linux-generic/include/odp_buffer_internal.h > @@ -114,7 +114,6 @@ struct odp_buffer_hdr_t { > union { > uint32_t all; > struct { > - uint32_t zeroized:1; /* Zeroize buf data on free */ > uint32_t hdrdata:1; /* Data is in buffer hdr */ > uint32_t sustain:1; /* Sustain order */ > }; > @@ -123,7 +122,6 @@ struct odp_buffer_hdr_t { > int8_t type; /* buffer type */ > odp_event_type_t event_type; /* for reuse as event */ > uint32_t size; /* max data size */ > - odp_atomic_u32_t ref_count; /* reference count */ > odp_pool_t pool_hdl; /* buffer pool handle */ > union { > uint64_t buf_u64; /* user u64 */ > @@ -174,6 +172,9 @@ typedef struct { > odp_buffer_t buffer_alloc(odp_pool_t pool, size_t size); > int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, > odp_buffer_t buf[], int num); > +void buffer_free(uint32_t pool_id, const odp_buffer_t buf); > +void buffer_free_multi(uint32_t pool_id, > + const odp_buffer_t buf[], int num_free); > int seg_alloc_head(odp_buffer_hdr_t *buf_hdr, int segcount); > void seg_free_head(odp_buffer_hdr_t *buf_hdr, int segcount); > int seg_alloc_tail(odp_buffer_hdr_t *buf_hdr, int segcount); > diff --git a/platform/linux-generic/include/odp_internal.h > b/platform/linux-generic/include/odp_internal.h > index d12f850..8bad450 100644 > --- a/platform/linux-generic/include/odp_internal.h > +++ b/platform/linux-generic/include/odp_internal.h > @@ -119,8 +119,6 @@ int odp_tm_term_global(void); > int _odp_int_name_tbl_init_global(void); > int _odp_int_name_tbl_term_global(void); > > -void _odp_flush_caches(void); > - > int cpuinfo_parser(FILE *file, system_info_t *sysinfo); > uint64_t odp_cpu_hz_current(int id); > > diff --git a/platform/linux-generic/include/odp_pool_internal.h > b/platform/linux-generic/include/odp_pool_internal.h > index 3317bd0..d6717ff 100644 > --- a/platform/linux-generic/include/odp_pool_internal.h > +++ b/platform/linux-generic/include/odp_pool_internal.h > @@ -51,15 +51,25 @@ typedef struct _odp_buffer_pool_init_t { > void *buf_init_arg; /**< Argument to be passed to > buf_init() */ > } _odp_buffer_pool_init_t; /**< Type of buffer initialization > struct */ > > +#define POOL_MAX_LOCAL_CHUNKS 4 > +#define POOL_CHUNK_SIZE 32 > +#define POOL_MAX_LOCAL_BUFS (POOL_MAX_LOCAL_CHUNKS * POOL_CHUNK_SIZE) > + > +struct local_cache_s { > + uint64_t bufallocs; /* Local buffer alloc count */ > + uint64_t buffrees; /* Local buffer free count */ > + > + uint32_t num_buf; > + odp_buffer_hdr_t *buf[POOL_MAX_LOCAL_BUFS]; > +}; > + > /* Local cache for buffer alloc/free acceleration */ > typedef struct local_cache_t { > union { > - struct { > - odp_buffer_hdr_t *buf_freelist; /* The local > cache */ > - uint64_t bufallocs; /* Local buffer alloc count */ > - uint64_t buffrees; /* Local buffer free count */ > - }; > - uint8_t pad[ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(uint64_t))]; > + struct local_cache_s s; > + > + uint8_t pad[ODP_CACHE_LINE_SIZE_ROUNDUP( > + sizeof(struct local_cache_s))]; > }; > } local_cache_t; > > @@ -214,127 +224,6 @@ static inline void ret_blk(struct pool_entry_s > *pool, void *block) > odp_atomic_inc_u64(&pool->poolstats.blkfrees); > } > > -static inline odp_buffer_hdr_t *get_buf(struct pool_entry_s *pool) > -{ > - odp_buffer_hdr_t *myhead; > - POOL_LOCK(&pool->buf_lock); > - > - myhead = pool->buf_freelist; > - > - if (odp_unlikely(myhead == NULL)) { > - POOL_UNLOCK(&pool->buf_lock); > - odp_atomic_inc_u64(&pool->poolstats.bufempty); > - } else { > - pool->buf_freelist = myhead->next; > - POOL_UNLOCK(&pool->buf_lock); > - uint64_t bufcount = > - odp_atomic_fetch_sub_u32(&pool->bufcount, 1) - 1; > - > - /* Check for low watermark condition */ > - if (bufcount == pool->buf_low_wm && > !pool->buf_low_wm_assert) { > - pool->buf_low_wm_assert = 1; > - > odp_atomic_inc_u64(&pool->poolstats.buf_low_wm_count); > - } > - > - odp_atomic_inc_u64(&pool->poolstats.bufallocs); > - } > - > - return (void *)myhead; > -} > - > -static inline void ret_buf(struct pool_entry_s *pool, odp_buffer_hdr_t > *buf) > -{ > - if (!buf->flags.hdrdata && buf->type != ODP_EVENT_BUFFER) { > - while (buf->segcount > 0) { > - if (buffer_is_secure(buf) || pool_is_secure(pool)) > - memset(buf->addr[buf->segcount - 1], > - 0, buf->segsize); > - ret_blk(pool, buf->addr[--buf->segcount]); > - } > - buf->size = 0; > - } > - > - buf->allocator = ODP_FREEBUF; /* Mark buffer free */ > - POOL_LOCK(&pool->buf_lock); > - buf->next = pool->buf_freelist; > - pool->buf_freelist = buf; > - POOL_UNLOCK(&pool->buf_lock); > - > - uint64_t bufcount = odp_atomic_fetch_add_u32(&pool->bufcount, 1) + > 1; > - > - /* Check if low watermark condition should be deasserted */ > - if (bufcount == pool->buf_high_wm && pool->buf_low_wm_assert) { > - pool->buf_low_wm_assert = 0; > - odp_atomic_inc_u64(&pool->poolstats.buf_high_wm_count); > - } > - > - odp_atomic_inc_u64(&pool->poolstats.buffrees); > -} > - > -static inline void *get_local_buf(local_cache_t *buf_cache, > - struct pool_entry_s *pool, > - size_t totsize) > -{ > - odp_buffer_hdr_t *buf = buf_cache->buf_freelist; > - > - if (odp_likely(buf != NULL)) { > - buf_cache->buf_freelist = buf->next; > - > - if (odp_unlikely(buf->size < totsize)) { > - intmax_t needed = totsize - buf->size; > - > - do { > - void *blk = get_blk(pool); > - if (odp_unlikely(blk == NULL)) { > - ret_buf(pool, buf); > - buf_cache->buffrees--; > - return NULL; > - } > - buf->addr[buf->segcount++] = blk; > - needed -= pool->seg_size; > - } while (needed > 0); > - > - buf->size = buf->segcount * pool->seg_size; > - } > - > - buf_cache->bufallocs++; > - } > - > - return buf; > -} > - > -static inline void ret_local_buf(local_cache_t *buf_cache, > - odp_buffer_hdr_t *buf) > -{ > - buf->allocator = ODP_FREEBUF; > - buf->next = buf_cache->buf_freelist; > - buf_cache->buf_freelist = buf; > - > - buf_cache->buffrees++; > -} > - > -static inline void flush_cache(local_cache_t *buf_cache, > - struct pool_entry_s *pool) > -{ > - odp_buffer_hdr_t *buf = buf_cache->buf_freelist; > - uint32_t flush_count = 0; > - > - while (buf != NULL) { > - odp_buffer_hdr_t *next = buf->next; > - ret_buf(pool, buf); > - buf = next; > - flush_count++; > - } > - > - odp_atomic_add_u64(&pool->poolstats.bufallocs, > buf_cache->bufallocs); > - odp_atomic_add_u64(&pool->poolstats.buffrees, > - buf_cache->buffrees - flush_count); > - > - buf_cache->buf_freelist = NULL; > - buf_cache->bufallocs = 0; > - buf_cache->buffrees = 0; > -} > - > static inline odp_pool_t pool_index_to_handle(uint32_t pool_id) > { > return _odp_cast_scalar(odp_pool_t, pool_id); > diff --git a/platform/linux-generic/odp_buffer.c > b/platform/linux-generic/odp_buffer.c > index e7e4d58..ce2fdba 100644 > --- a/platform/linux-generic/odp_buffer.c > +++ b/platform/linux-generic/odp_buffer.c > @@ -67,9 +67,6 @@ int odp_buffer_snprint(char *str, uint32_t n, > odp_buffer_t buf) > len += snprintf(&str[len], n-len, > " size %" PRIu32 "\n", hdr->size); > len += snprintf(&str[len], n-len, > - " ref_count %" PRIu32 "\n", > - odp_atomic_load_u32(&hdr->ref_count)); > - len += snprintf(&str[len], n-len, > " type %i\n", hdr->type); > > return len; > diff --git a/platform/linux-generic/odp_packet.c > b/platform/linux-generic/odp_packet.c > index 0e319d2..474fa81 100644 > --- a/platform/linux-generic/odp_packet.c > +++ b/platform/linux-generic/odp_packet.c > @@ -972,10 +972,7 @@ int _odp_packet_copy_md_to_packet(odp_packet_t > srcpkt, odp_packet_t dstpkt) > srchdr->buf_hdr.uarea_size ? > dsthdr->buf_hdr.uarea_size : > srchdr->buf_hdr.uarea_size); > - odp_atomic_store_u32( > - &dsthdr->buf_hdr.ref_count, > - odp_atomic_load_u32( > - &srchdr->buf_hdr.ref_count)); > + > copy_packet_parser_metadata(srchdr, dsthdr); > > /* Metadata copied, but return indication of whether the packet > diff --git a/platform/linux-generic/odp_pool.c > b/platform/linux-generic/odp_pool.c > index 419f03f..0a427ed 100644 > --- a/platform/linux-generic/odp_pool.c > +++ b/platform/linux-generic/odp_pool.c > @@ -57,8 +57,15 @@ static const char SHM_DEFAULT_NAME[] = > "odp_buffer_pools"; > /* Pool entry pointers (for inlining) */ > void *pool_entry_ptr[ODP_CONFIG_POOLS]; > > -/* Cache thread id locally for local cache performance */ > -static __thread int local_id; > +/* Thread local variables */ > +typedef struct pool_local_t { > + local_cache_t *cache[ODP_CONFIG_POOLS]; > + int thr_id; > +} pool_local_t; > + > +static __thread pool_local_t local; > + > +static void flush_cache(local_cache_t *buf_cache, struct pool_entry_s > *pool); > > int odp_pool_init_global(void) > { > @@ -111,7 +118,19 @@ int odp_pool_init_global(void) > > int odp_pool_init_local(void) > { > - local_id = odp_thread_id(); > + pool_entry_t *pool; > + int i; > + int thr_id = odp_thread_id(); > + > + memset(&local, 0, sizeof(pool_local_t)); > + > + for (i = 0; i < ODP_CONFIG_POOLS; i++) { > + pool = get_pool_entry(i); > + local.cache[i] = &pool->s.local_cache[thr_id]; > + local.cache[i]->s.num_buf = 0; > + } > + > + local.thr_id = thr_id; > return 0; > } > > @@ -144,7 +163,14 @@ int odp_pool_term_global(void) > > int odp_pool_term_local(void) > { > - _odp_flush_caches(); > + int i; > + > + for (i = 0; i < ODP_CONFIG_POOLS; i++) { > + pool_entry_t *pool = get_pool_entry(i); > + > + flush_cache(local.cache[i], &pool->s); > + } > + > return 0; > } > > @@ -179,10 +205,53 @@ int odp_pool_capability(odp_pool_capability_t *capa) > return 0; > } > > -/** > +static inline odp_buffer_hdr_t *get_buf(struct pool_entry_s *pool) > +{ > + odp_buffer_hdr_t *myhead; > + > + POOL_LOCK(&pool->buf_lock); > + > + myhead = pool->buf_freelist; > + > + if (odp_unlikely(myhead == NULL)) { > + POOL_UNLOCK(&pool->buf_lock); > + odp_atomic_inc_u64(&pool->poolstats.bufempty); > + } else { > + pool->buf_freelist = myhead->next; > + POOL_UNLOCK(&pool->buf_lock); > + > + odp_atomic_fetch_sub_u32(&pool->bufcount, 1); > + odp_atomic_inc_u64(&pool->poolstats.bufallocs); > + } > + > + return (void *)myhead; > +} > + > +static inline void ret_buf(struct pool_entry_s *pool, odp_buffer_hdr_t > *buf) > +{ > + if (!buf->flags.hdrdata && buf->type != ODP_EVENT_BUFFER) { > + while (buf->segcount > 0) { > + if (buffer_is_secure(buf) || pool_is_secure(pool)) > + memset(buf->addr[buf->segcount - 1], > + 0, buf->segsize); > + ret_blk(pool, buf->addr[--buf->segcount]); > + } > + buf->size = 0; > + } > + > + buf->allocator = ODP_FREEBUF; /* Mark buffer free */ > + POOL_LOCK(&pool->buf_lock); > + buf->next = pool->buf_freelist; > + pool->buf_freelist = buf; > + POOL_UNLOCK(&pool->buf_lock); > + > + odp_atomic_fetch_add_u32(&pool->bufcount, 1); > + odp_atomic_inc_u64(&pool->poolstats.buffrees); > +} > + > +/* > * Pool creation > */ > - > odp_pool_t _pool_create(const char *name, > odp_pool_param_t *params, > uint32_t shmflags) > @@ -208,9 +277,6 @@ odp_pool_t _pool_create(const char *name, > /* Restriction for v1.0: All non-packet buffers are unsegmented */ > int unseg = 1; > > - /* Restriction for v1.0: No zeroization support */ > - const int zeroized = 0; > - > uint32_t blk_size, buf_stride, buf_num, blk_num, seg_len = 0; > uint32_t buf_align = > params->type == ODP_POOL_BUFFER ? params->buf.align : 0; > @@ -350,7 +416,6 @@ odp_pool_t _pool_create(const char *name, > POOL_UNLOCK(&pool->s.lock); > > pool->s.flags.unsegmented = unseg; > - pool->s.flags.zeroized = zeroized; > pool->s.seg_size = unseg ? blk_size : seg_len; > pool->s.blk_size = blk_size; > > @@ -383,9 +448,7 @@ odp_pool_t _pool_create(const char *name, > /* Iniitalize buffer metadata */ > tmp->allocator = ODP_FREEBUF; > tmp->flags.all = 0; > - tmp->flags.zeroized = zeroized; > tmp->size = 0; > - odp_atomic_init_u32(&tmp->ref_count, 0); > tmp->type = params->type; > tmp->event_type = params->type; > tmp->pool_hdl = pool->s.pool_hdl; > @@ -503,6 +566,41 @@ int odp_pool_info(odp_pool_t pool_hdl, > odp_pool_info_t *info) > return 0; > } > > +static inline void get_local_cache_bufs(local_cache_t *buf_cache, > uint32_t idx, > + odp_buffer_hdr_t *buf_hdr[], > + uint32_t num) > +{ > + uint32_t i; > + > + for (i = 0; i < num; i++) { > + buf_hdr[i] = buf_cache->s.buf[idx + i]; > + odp_prefetch(buf_hdr[i]); > + odp_prefetch_store(buf_hdr[i]); > + } > +} > + > +static void flush_cache(local_cache_t *buf_cache, struct pool_entry_s > *pool) > +{ > + uint32_t flush_count = 0; > + uint32_t num; > + > + while ((num = buf_cache->s.num_buf)) { > + odp_buffer_hdr_t *buf; > + > + buf = buf_cache->s.buf[num - 1]; > + ret_buf(pool, buf); > + flush_count++; > + buf_cache->s.num_buf--; > + } > + > + odp_atomic_add_u64(&pool->poolstats.bufallocs, > buf_cache->s.bufallocs); > + odp_atomic_add_u64(&pool->poolstats.buffrees, > + buf_cache->s.buffrees - flush_count); > + > + buf_cache->s.bufallocs = 0; > + buf_cache->s.buffrees = 0; > +} > + > int odp_pool_destroy(odp_pool_t pool_hdl) > { > uint32_t pool_id = pool_handle_to_index(pool_hdl); > @@ -621,71 +719,207 @@ void seg_free_tail(odp_buffer_hdr_t *buf_hdr, int > segcount) > buf_hdr->size = buf_hdr->segcount * pool->s.seg_size; > } > > -odp_buffer_t buffer_alloc(odp_pool_t pool_hdl, size_t size) > +static inline int get_local_bufs(local_cache_t *buf_cache, > + odp_buffer_hdr_t *buf_hdr[], uint32_t > max_num) > +{ > + uint32_t num_buf = buf_cache->s.num_buf; > + uint32_t num = num_buf; > + > + if (odp_unlikely(num_buf == 0)) > + return 0; > + > + if (odp_likely(max_num < num)) > + num = max_num; > + > + get_local_cache_bufs(buf_cache, num_buf - num, buf_hdr, num); > + buf_cache->s.num_buf -= num; > + buf_cache->s.bufallocs += num; > + > + return num; > +} > + > +static inline void ret_local_buf(local_cache_t *buf_cache, uint32_t idx, > + odp_buffer_hdr_t *buf) > +{ > + buf_cache->s.buf[idx] = buf; > + buf_cache->s.num_buf++; > + buf_cache->s.buffrees++; > +} > + > +static inline void ret_local_bufs(local_cache_t *buf_cache, uint32_t idx, > + odp_buffer_hdr_t *buf[], int num_buf) > +{ > + int i; > + > + for (i = 0; i < num_buf; i++) > + buf_cache->s.buf[idx + i] = buf[i]; > + > + buf_cache->s.num_buf += num_buf; > + buf_cache->s.buffrees += num_buf; > +} > + > +int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, > + odp_buffer_t buf[], int max_num) > { > uint32_t pool_id = pool_handle_to_index(pool_hdl); > pool_entry_t *pool = get_pool_entry(pool_id); > uintmax_t totsize = pool->s.headroom + size + pool->s.tailroom; > - odp_anybuf_t *buf; > + odp_buffer_hdr_t *buf_tbl[max_num]; > + odp_buffer_hdr_t *buf_hdr; > + int num, i; > + intmax_t needed; > + void *blk; > > /* Reject oversized allocation requests */ > if ((pool->s.flags.unsegmented && totsize > pool->s.seg_size) || > (!pool->s.flags.unsegmented && > totsize > pool->s.seg_size * ODP_BUFFER_MAX_SEG)) > - return ODP_BUFFER_INVALID; > + return 0; > > /* Try to satisfy request from the local cache */ > - buf = (odp_anybuf_t *) > - (void *)get_local_buf(&pool->s.local_cache[local_id], > - &pool->s, totsize); > + num = get_local_bufs(local.cache[pool_id], buf_tbl, max_num); > > /* If cache is empty, satisfy request from the pool */ > - if (odp_unlikely(buf == NULL)) { > - buf = (odp_anybuf_t *)(void *)get_buf(&pool->s); > + if (odp_unlikely(num < max_num)) { > + for (; num < max_num; num++) { > + buf_hdr = get_buf(&pool->s); > > - if (odp_unlikely(buf == NULL)) > + if (odp_unlikely(buf_hdr == NULL)) > + goto pool_empty; > + > + /* Get blocks for this buffer, if pool uses > + * application data */ > + if (buf_hdr->size < totsize) { > + uint32_t segcount; > + > + needed = totsize - buf_hdr->size; > + do { > + blk = get_blk(&pool->s); > + if (odp_unlikely(blk == NULL)) { > + ret_buf(&pool->s, buf_hdr); > + goto pool_empty; > + } > + > + segcount = buf_hdr->segcount++; > + buf_hdr->addr[segcount] = blk; > + needed -= pool->s.seg_size; > + } while (needed > 0); > + buf_hdr->size = buf_hdr->segcount * > + pool->s.seg_size; > + } > + > + buf_tbl[num] = buf_hdr; > + } > + } > + > +pool_empty: > + for (i = 0; i < num; i++) { > + buf_hdr = buf_tbl[i]; > + > + /* Mark buffer as allocated */ > + buf_hdr->allocator = local.thr_id; > + > + /* By default, buffers are not associated with > + * an ordered queue */ > + buf_hdr->origin_qe = NULL; > + > + buf[i] = odp_hdr_to_buf(buf_hdr); > + > + /* Add more segments if buffer from local cache is too > small */ > + if (odp_unlikely(buf_hdr->size < totsize)) { > + needed = totsize - buf_hdr->size; > + do { > + blk = get_blk(&pool->s); > + if (odp_unlikely(blk == NULL)) { > + int j; > + > + ret_buf(&pool->s, buf_hdr); > + buf_hdr = NULL; > + local.cache[pool_id]->s.buffrees--; > + > + /* move remaining bufs up one step > + * and update loop counters */ > + num--; > + for (j = i; j < num; j++) > + buf_tbl[j] = buf_tbl[j + > 1]; > + > + i--; > + break; > + } > + needed -= pool->s.seg_size; > + buf_hdr->addr[buf_hdr->segcount++] = blk; > + buf_hdr->size = buf_hdr->segcount * > + pool->s.seg_size; > + } while (needed > 0); > + } > + } > + > + return num; > +} > + > +odp_buffer_t buffer_alloc(odp_pool_t pool_hdl, size_t size) > +{ > + uint32_t pool_id = pool_handle_to_index(pool_hdl); > + pool_entry_t *pool = get_pool_entry(pool_id); > + uintmax_t totsize = pool->s.headroom + size + pool->s.tailroom; > + odp_buffer_hdr_t *buf_hdr; > + intmax_t needed; > + void *blk; > + > + /* Reject oversized allocation requests */ > + if ((pool->s.flags.unsegmented && totsize > pool->s.seg_size) || > + (!pool->s.flags.unsegmented && > + totsize > pool->s.seg_size * ODP_BUFFER_MAX_SEG)) > + return 0; > + > + /* Try to satisfy request from the local cache. If cache is empty, > + * satisfy request from the pool */ > + if (odp_unlikely(!get_local_bufs(local.cache[pool_id], &buf_hdr, > 1))) { > + buf_hdr = get_buf(&pool->s); > + > + if (odp_unlikely(buf_hdr == NULL)) > return ODP_BUFFER_INVALID; > > /* Get blocks for this buffer, if pool uses application > data */ > - if (buf->buf.size < totsize) { > - intmax_t needed = totsize - buf->buf.size; > + if (buf_hdr->size < totsize) { > + needed = totsize - buf_hdr->size; > do { > - uint8_t *blk = get_blk(&pool->s); > - if (blk == NULL) { > - ret_buf(&pool->s, &buf->buf); > + blk = get_blk(&pool->s); > + if (odp_unlikely(blk == NULL)) { > + ret_buf(&pool->s, buf_hdr); > return ODP_BUFFER_INVALID; > } > - buf->buf.addr[buf->buf.segcount++] = blk; > + buf_hdr->addr[buf_hdr->segcount++] = blk; > needed -= pool->s.seg_size; > } while (needed > 0); > - buf->buf.size = buf->buf.segcount * > pool->s.seg_size; > + buf_hdr->size = buf_hdr->segcount * > pool->s.seg_size; > } > } > - > /* Mark buffer as allocated */ > - buf->buf.allocator = local_id; > + buf_hdr->allocator = local.thr_id; > > - /* By default, buffers inherit their pool's zeroization setting */ > - buf->buf.flags.zeroized = pool->s.flags.zeroized; > + /* By default, buffers are not associated with > + * an ordered queue */ > + buf_hdr->origin_qe = NULL; > > - /* By default, buffers are not associated with an ordered queue */ > - buf->buf.origin_qe = NULL; > - > - return odp_hdr_to_buf(&buf->buf); > -} > - > -int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, > - odp_buffer_t buf[], int num) > -{ > - int count; > - > - for (count = 0; count < num; ++count) { > - buf[count] = buffer_alloc(pool_hdl, size); > - if (buf[count] == ODP_BUFFER_INVALID) > - break; > + /* Add more segments if buffer from local cache is too small */ > + if (odp_unlikely(buf_hdr->size < totsize)) { > + needed = totsize - buf_hdr->size; > + do { > + blk = get_blk(&pool->s); > + if (odp_unlikely(blk == NULL)) { > + ret_buf(&pool->s, buf_hdr); > + buf_hdr = NULL; > + local.cache[pool_id]->s.buffrees--; > + return ODP_BUFFER_INVALID; > + } > + buf_hdr->addr[buf_hdr->segcount++] = blk; > + needed -= pool->s.seg_size; > + } while (needed > 0); > + buf_hdr->size = buf_hdr->segcount * pool->s.seg_size; > } > > - return count; > + return odp_hdr_to_buf(buf_hdr); > } > > odp_buffer_t odp_buffer_alloc(odp_pool_t pool_hdl) > @@ -701,35 +935,132 @@ int odp_buffer_alloc_multi(odp_pool_t pool_hdl, > odp_buffer_t buf[], int num) > return buffer_alloc_multi(pool_hdl, buf_size, buf, num); > } > > -void odp_buffer_free(odp_buffer_t buf) > +static void multi_pool_free(odp_buffer_hdr_t *buf_hdr[], int num_buf) > { > - odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr(buf); > - pool_entry_t *pool = odp_buf_to_pool(buf_hdr); > + uint32_t pool_id, num; > + local_cache_t *buf_cache; > + pool_entry_t *pool; > + int i, j, idx; > > + for (i = 0; i < num_buf; i++) { > + pool_id = pool_handle_to_index(buf_hdr[i]->pool_hdl); > + buf_cache = local.cache[pool_id]; > + num = buf_cache->s.num_buf; > + > + if (num < POOL_MAX_LOCAL_BUFS) { > + ret_local_buf(buf_cache, num, buf_hdr[i]); > + continue; > + } > + > + idx = POOL_MAX_LOCAL_BUFS - POOL_CHUNK_SIZE; > + pool = get_pool_entry(pool_id); > + > + /* local cache full, return a chunk */ > + for (j = 0; j < POOL_CHUNK_SIZE; j++) { > + odp_buffer_hdr_t *tmp; > + > + tmp = buf_cache->s.buf[idx + i]; > + ret_buf(&pool->s, tmp); > + } > + > + num = POOL_MAX_LOCAL_BUFS - POOL_CHUNK_SIZE; > + buf_cache->s.num_buf = num; > + ret_local_buf(buf_cache, num, buf_hdr[i]); > + } > +} > + > +void buffer_free_multi(uint32_t pool_id, > + const odp_buffer_t buf[], int num_free) > +{ > + local_cache_t *buf_cache = local.cache[pool_id]; > + uint32_t num; > + int i, idx; > + pool_entry_t *pool; > + odp_buffer_hdr_t *buf_hdr[num_free]; > + int multi_pool = 0; > + > + for (i = 0; i < num_free; i++) { > + uint32_t id; > + > + buf_hdr[i] = odp_buf_to_hdr(buf[i]); > + ODP_ASSERT(buf_hdr[i]->allocator != ODP_FREEBUF); > + buf_hdr[i]->allocator = ODP_FREEBUF; > + id = pool_handle_to_index(buf_hdr[i]->pool_hdl); > + multi_pool |= (pool_id != id); > + } > + > + if (odp_unlikely(multi_pool)) { > + multi_pool_free(buf_hdr, num_free); > + return; > + } > + > + num = buf_cache->s.num_buf; > + > + if (odp_likely((num + num_free) < POOL_MAX_LOCAL_BUFS)) { > + ret_local_bufs(buf_cache, num, buf_hdr, num_free); > + return; > + } > + > + pool = get_pool_entry(pool_id); > + > + /* Return at least one chunk into the global pool */ > + if (odp_unlikely(num_free > POOL_CHUNK_SIZE)) { > + for (i = 0; i < num_free; i++) > + ret_buf(&pool->s, buf_hdr[i]); > + > + return; > + } > + > + idx = num - POOL_CHUNK_SIZE; > + for (i = 0; i < POOL_CHUNK_SIZE; i++) > + ret_buf(&pool->s, buf_cache->s.buf[idx + i]); > + > + num -= POOL_CHUNK_SIZE; > + buf_cache->s.num_buf = num; > + ret_local_bufs(buf_cache, num, buf_hdr, num_free); > +} > + > +void buffer_free(uint32_t pool_id, const odp_buffer_t buf) > +{ > + local_cache_t *buf_cache = local.cache[pool_id]; > + uint32_t num; > + int i; > + pool_entry_t *pool; > + odp_buffer_hdr_t *buf_hdr; > + > + buf_hdr = odp_buf_to_hdr(buf); > ODP_ASSERT(buf_hdr->allocator != ODP_FREEBUF); > + buf_hdr->allocator = ODP_FREEBUF; > > - if (odp_unlikely(pool->s.buf_low_wm_assert || > pool->s.blk_low_wm_assert)) > - ret_buf(&pool->s, buf_hdr); > - else > - ret_local_buf(&pool->s.local_cache[local_id], buf_hdr); > + num = buf_cache->s.num_buf; > + > + if (odp_likely((num + 1) < POOL_MAX_LOCAL_BUFS)) { > + ret_local_bufs(buf_cache, num, &buf_hdr, 1); > + return; > + } > + > + pool = get_pool_entry(pool_id); > + > + num -= POOL_CHUNK_SIZE; > + for (i = 0; i < POOL_CHUNK_SIZE; i++) > + ret_buf(&pool->s, buf_cache->s.buf[num + i]); > + > + buf_cache->s.num_buf = num; > + ret_local_bufs(buf_cache, num, &buf_hdr, 1); > +} > + > +void odp_buffer_free(odp_buffer_t buf) > +{ > + uint32_t pool_id = pool_id_from_buf(buf); > + > + buffer_free(pool_id, buf); > } > > void odp_buffer_free_multi(const odp_buffer_t buf[], int num) > { > - int i; > + uint32_t pool_id = pool_id_from_buf(buf[0]); > > - for (i = 0; i < num; ++i) > - odp_buffer_free(buf[i]); > -} > - > -void _odp_flush_caches(void) > -{ > - int i; > - > - for (i = 0; i < ODP_CONFIG_POOLS; i++) { > - pool_entry_t *pool = get_pool_entry(i); > - flush_cache(&pool->s.local_cache[local_id], &pool->s); > - } > + buffer_free_multi(pool_id, buf, num); > } > > void odp_pool_print(odp_pool_t pool_hdl) > @@ -774,7 +1105,6 @@ void odp_pool_print(odp_pool_t pool_hdl) > pool->s.quiesced ? "quiesced" : "active"); > ODP_DBG(" pool opts %s, %s, %s\n", > pool->s.flags.unsegmented ? "unsegmented" : "segmented", > - pool->s.flags.zeroized ? "zeroized" : "non-zeroized", > pool->s.flags.predefined ? "predefined" : "created"); > ODP_DBG(" pool base %p\n", pool->s.pool_base_addr); > ODP_DBG(" pool size %zu (%zu pages)\n", > @@ -817,10 +1147,11 @@ void odp_pool_print(odp_pool_t pool_hdl) > ODP_DBG(" blk low wm count %lu\n", blklowmct); > } > > - > odp_pool_t odp_buffer_pool(odp_buffer_t buf) > { > - return odp_buf_to_hdr(buf)->pool_hdl; > + uint32_t pool_id = pool_id_from_buf(buf); > + > + return pool_index_to_handle(pool_id); > } > > void odp_pool_param_init(odp_pool_param_t *params) > -- > 2.7.4 > >
Merged, Maxim. On 07/26/16 03:15, Bill Fischofer wrote: > For this series: > > Reviewed-and-tested-by: Bill Fischofer <bill.fischofer@linaro.org> > > On Fri, Jul 22, 2016 at 5:04 AM, Matias Elo <matias.elo@nokia.com> wrote: > >> Optimize local buffer cache performance which is critical to >> many use cases - including packet IO. >> >> Main parts of the optimization are: >> * Local cache implemented as an array of buf_hdr pointers, >> instead of a linked list (which causes a lot of cache misses) >> * Alloc and free N buffers per operation >> >> All above steps are needed to demonstrate the performance upgrade. >> Some related pool functions (get_buf(), ret_buf(), etc) were moved >> from pool header to c source file, since those were actual local >> to the c source file. Also some unused pool variables are removed >> also. >> >> Signed-off-by: Petri Savolainen <petri.savolainen@nokia.com> >> Signed-off-by: Matias Elo <matias.elo@nokia.com> >> --- >> >> V2: >> - Split pktio modifications into a separate patch (Bill) >> - Improve performance by adding separate functions for single buffer >> alloc/free operations >> >> .../linux-generic/include/odp_buffer_inlines.h | 26 +- >> .../linux-generic/include/odp_buffer_internal.h | 5 +- >> platform/linux-generic/include/odp_internal.h | 2 - >> platform/linux-generic/include/odp_pool_internal.h | 143 +------ >> platform/linux-generic/odp_buffer.c | 3 - >> platform/linux-generic/odp_packet.c | 5 +- >> platform/linux-generic/odp_pool.c | 473 >> +++++++++++++++++---- >> 7 files changed, 426 insertions(+), 231 deletions(-) >> >> diff --git a/platform/linux-generic/include/odp_buffer_inlines.h >> b/platform/linux-generic/include/odp_buffer_inlines.h >> index 3f4d9fd..2b1ab42 100644 >> --- a/platform/linux-generic/include/odp_buffer_inlines.h >> +++ b/platform/linux-generic/include/odp_buffer_inlines.h >> @@ -56,30 +56,12 @@ static inline odp_buffer_hdr_t >> *odp_buf_to_hdr(odp_buffer_t buf) >> (pool->pool_mdata_addr + (index * ODP_CACHE_LINE_SIZE)); >> } >> >> -static inline uint32_t odp_buffer_refcount(odp_buffer_hdr_t *buf) >> +static inline uint32_t pool_id_from_buf(odp_buffer_t buf) >> { >> - return odp_atomic_load_u32(&buf->ref_count); >> -} >> + odp_buffer_bits_t handle; >> >> -static inline uint32_t odp_buffer_incr_refcount(odp_buffer_hdr_t *buf, >> - uint32_t val) >> -{ >> - return odp_atomic_fetch_add_u32(&buf->ref_count, val) + val; >> -} >> - >> -static inline uint32_t odp_buffer_decr_refcount(odp_buffer_hdr_t *buf, >> - uint32_t val) >> -{ >> - uint32_t tmp; >> - >> - tmp = odp_atomic_fetch_sub_u32(&buf->ref_count, val); >> - >> - if (tmp < val) { >> - odp_atomic_fetch_add_u32(&buf->ref_count, val - tmp); >> - return 0; >> - } else { >> - return tmp - val; >> - } >> + handle.handle = buf; >> + return handle.pool_id; >> } >> >> static inline odp_buffer_hdr_t *validate_buf(odp_buffer_t buf) >> diff --git a/platform/linux-generic/include/odp_buffer_internal.h >> b/platform/linux-generic/include/odp_buffer_internal.h >> index f21364c..7b0ef8b 100644 >> --- a/platform/linux-generic/include/odp_buffer_internal.h >> +++ b/platform/linux-generic/include/odp_buffer_internal.h >> @@ -114,7 +114,6 @@ struct odp_buffer_hdr_t { >> union { >> uint32_t all; >> struct { >> - uint32_t zeroized:1; /* Zeroize buf data on free */ >> uint32_t hdrdata:1; /* Data is in buffer hdr */ >> uint32_t sustain:1; /* Sustain order */ >> }; >> @@ -123,7 +122,6 @@ struct odp_buffer_hdr_t { >> int8_t type; /* buffer type */ >> odp_event_type_t event_type; /* for reuse as event */ >> uint32_t size; /* max data size */ >> - odp_atomic_u32_t ref_count; /* reference count */ >> odp_pool_t pool_hdl; /* buffer pool handle */ >> union { >> uint64_t buf_u64; /* user u64 */ >> @@ -174,6 +172,9 @@ typedef struct { >> odp_buffer_t buffer_alloc(odp_pool_t pool, size_t size); >> int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, >> odp_buffer_t buf[], int num); >> +void buffer_free(uint32_t pool_id, const odp_buffer_t buf); >> +void buffer_free_multi(uint32_t pool_id, >> + const odp_buffer_t buf[], int num_free); >> int seg_alloc_head(odp_buffer_hdr_t *buf_hdr, int segcount); >> void seg_free_head(odp_buffer_hdr_t *buf_hdr, int segcount); >> int seg_alloc_tail(odp_buffer_hdr_t *buf_hdr, int segcount); >> diff --git a/platform/linux-generic/include/odp_internal.h >> b/platform/linux-generic/include/odp_internal.h >> index d12f850..8bad450 100644 >> --- a/platform/linux-generic/include/odp_internal.h >> +++ b/platform/linux-generic/include/odp_internal.h >> @@ -119,8 +119,6 @@ int odp_tm_term_global(void); >> int _odp_int_name_tbl_init_global(void); >> int _odp_int_name_tbl_term_global(void); >> >> -void _odp_flush_caches(void); >> - >> int cpuinfo_parser(FILE *file, system_info_t *sysinfo); >> uint64_t odp_cpu_hz_current(int id); >> >> diff --git a/platform/linux-generic/include/odp_pool_internal.h >> b/platform/linux-generic/include/odp_pool_internal.h >> index 3317bd0..d6717ff 100644 >> --- a/platform/linux-generic/include/odp_pool_internal.h >> +++ b/platform/linux-generic/include/odp_pool_internal.h >> @@ -51,15 +51,25 @@ typedef struct _odp_buffer_pool_init_t { >> void *buf_init_arg; /**< Argument to be passed to >> buf_init() */ >> } _odp_buffer_pool_init_t; /**< Type of buffer initialization >> struct */ >> >> +#define POOL_MAX_LOCAL_CHUNKS 4 >> +#define POOL_CHUNK_SIZE 32 >> +#define POOL_MAX_LOCAL_BUFS (POOL_MAX_LOCAL_CHUNKS * POOL_CHUNK_SIZE) >> + >> +struct local_cache_s { >> + uint64_t bufallocs; /* Local buffer alloc count */ >> + uint64_t buffrees; /* Local buffer free count */ >> + >> + uint32_t num_buf; >> + odp_buffer_hdr_t *buf[POOL_MAX_LOCAL_BUFS]; >> +}; >> + >> /* Local cache for buffer alloc/free acceleration */ >> typedef struct local_cache_t { >> union { >> - struct { >> - odp_buffer_hdr_t *buf_freelist; /* The local >> cache */ >> - uint64_t bufallocs; /* Local buffer alloc count */ >> - uint64_t buffrees; /* Local buffer free count */ >> - }; >> - uint8_t pad[ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(uint64_t))]; >> + struct local_cache_s s; >> + >> + uint8_t pad[ODP_CACHE_LINE_SIZE_ROUNDUP( >> + sizeof(struct local_cache_s))]; >> }; >> } local_cache_t; >> >> @@ -214,127 +224,6 @@ static inline void ret_blk(struct pool_entry_s >> *pool, void *block) >> odp_atomic_inc_u64(&pool->poolstats.blkfrees); >> } >> >> -static inline odp_buffer_hdr_t *get_buf(struct pool_entry_s *pool) >> -{ >> - odp_buffer_hdr_t *myhead; >> - POOL_LOCK(&pool->buf_lock); >> - >> - myhead = pool->buf_freelist; >> - >> - if (odp_unlikely(myhead == NULL)) { >> - POOL_UNLOCK(&pool->buf_lock); >> - odp_atomic_inc_u64(&pool->poolstats.bufempty); >> - } else { >> - pool->buf_freelist = myhead->next; >> - POOL_UNLOCK(&pool->buf_lock); >> - uint64_t bufcount = >> - odp_atomic_fetch_sub_u32(&pool->bufcount, 1) - 1; >> - >> - /* Check for low watermark condition */ >> - if (bufcount == pool->buf_low_wm && >> !pool->buf_low_wm_assert) { >> - pool->buf_low_wm_assert = 1; >> - >> odp_atomic_inc_u64(&pool->poolstats.buf_low_wm_count); >> - } >> - >> - odp_atomic_inc_u64(&pool->poolstats.bufallocs); >> - } >> - >> - return (void *)myhead; >> -} >> - >> -static inline void ret_buf(struct pool_entry_s *pool, odp_buffer_hdr_t >> *buf) >> -{ >> - if (!buf->flags.hdrdata && buf->type != ODP_EVENT_BUFFER) { >> - while (buf->segcount > 0) { >> - if (buffer_is_secure(buf) || pool_is_secure(pool)) >> - memset(buf->addr[buf->segcount - 1], >> - 0, buf->segsize); >> - ret_blk(pool, buf->addr[--buf->segcount]); >> - } >> - buf->size = 0; >> - } >> - >> - buf->allocator = ODP_FREEBUF; /* Mark buffer free */ >> - POOL_LOCK(&pool->buf_lock); >> - buf->next = pool->buf_freelist; >> - pool->buf_freelist = buf; >> - POOL_UNLOCK(&pool->buf_lock); >> - >> - uint64_t bufcount = odp_atomic_fetch_add_u32(&pool->bufcount, 1) + >> 1; >> - >> - /* Check if low watermark condition should be deasserted */ >> - if (bufcount == pool->buf_high_wm && pool->buf_low_wm_assert) { >> - pool->buf_low_wm_assert = 0; >> - odp_atomic_inc_u64(&pool->poolstats.buf_high_wm_count); >> - } >> - >> - odp_atomic_inc_u64(&pool->poolstats.buffrees); >> -} >> - >> -static inline void *get_local_buf(local_cache_t *buf_cache, >> - struct pool_entry_s *pool, >> - size_t totsize) >> -{ >> - odp_buffer_hdr_t *buf = buf_cache->buf_freelist; >> - >> - if (odp_likely(buf != NULL)) { >> - buf_cache->buf_freelist = buf->next; >> - >> - if (odp_unlikely(buf->size < totsize)) { >> - intmax_t needed = totsize - buf->size; >> - >> - do { >> - void *blk = get_blk(pool); >> - if (odp_unlikely(blk == NULL)) { >> - ret_buf(pool, buf); >> - buf_cache->buffrees--; >> - return NULL; >> - } >> - buf->addr[buf->segcount++] = blk; >> - needed -= pool->seg_size; >> - } while (needed > 0); >> - >> - buf->size = buf->segcount * pool->seg_size; >> - } >> - >> - buf_cache->bufallocs++; >> - } >> - >> - return buf; >> -} >> - >> -static inline void ret_local_buf(local_cache_t *buf_cache, >> - odp_buffer_hdr_t *buf) >> -{ >> - buf->allocator = ODP_FREEBUF; >> - buf->next = buf_cache->buf_freelist; >> - buf_cache->buf_freelist = buf; >> - >> - buf_cache->buffrees++; >> -} >> - >> -static inline void flush_cache(local_cache_t *buf_cache, >> - struct pool_entry_s *pool) >> -{ >> - odp_buffer_hdr_t *buf = buf_cache->buf_freelist; >> - uint32_t flush_count = 0; >> - >> - while (buf != NULL) { >> - odp_buffer_hdr_t *next = buf->next; >> - ret_buf(pool, buf); >> - buf = next; >> - flush_count++; >> - } >> - >> - odp_atomic_add_u64(&pool->poolstats.bufallocs, >> buf_cache->bufallocs); >> - odp_atomic_add_u64(&pool->poolstats.buffrees, >> - buf_cache->buffrees - flush_count); >> - >> - buf_cache->buf_freelist = NULL; >> - buf_cache->bufallocs = 0; >> - buf_cache->buffrees = 0; >> -} >> - >> static inline odp_pool_t pool_index_to_handle(uint32_t pool_id) >> { >> return _odp_cast_scalar(odp_pool_t, pool_id); >> diff --git a/platform/linux-generic/odp_buffer.c >> b/platform/linux-generic/odp_buffer.c >> index e7e4d58..ce2fdba 100644 >> --- a/platform/linux-generic/odp_buffer.c >> +++ b/platform/linux-generic/odp_buffer.c >> @@ -67,9 +67,6 @@ int odp_buffer_snprint(char *str, uint32_t n, >> odp_buffer_t buf) >> len += snprintf(&str[len], n-len, >> " size %" PRIu32 "\n", hdr->size); >> len += snprintf(&str[len], n-len, >> - " ref_count %" PRIu32 "\n", >> - odp_atomic_load_u32(&hdr->ref_count)); >> - len += snprintf(&str[len], n-len, >> " type %i\n", hdr->type); >> >> return len; >> diff --git a/platform/linux-generic/odp_packet.c >> b/platform/linux-generic/odp_packet.c >> index 0e319d2..474fa81 100644 >> --- a/platform/linux-generic/odp_packet.c >> +++ b/platform/linux-generic/odp_packet.c >> @@ -972,10 +972,7 @@ int _odp_packet_copy_md_to_packet(odp_packet_t >> srcpkt, odp_packet_t dstpkt) >> srchdr->buf_hdr.uarea_size ? >> dsthdr->buf_hdr.uarea_size : >> srchdr->buf_hdr.uarea_size); >> - odp_atomic_store_u32( >> - &dsthdr->buf_hdr.ref_count, >> - odp_atomic_load_u32( >> - &srchdr->buf_hdr.ref_count)); >> + >> copy_packet_parser_metadata(srchdr, dsthdr); >> >> /* Metadata copied, but return indication of whether the packet >> diff --git a/platform/linux-generic/odp_pool.c >> b/platform/linux-generic/odp_pool.c >> index 419f03f..0a427ed 100644 >> --- a/platform/linux-generic/odp_pool.c >> +++ b/platform/linux-generic/odp_pool.c >> @@ -57,8 +57,15 @@ static const char SHM_DEFAULT_NAME[] = >> "odp_buffer_pools"; >> /* Pool entry pointers (for inlining) */ >> void *pool_entry_ptr[ODP_CONFIG_POOLS]; >> >> -/* Cache thread id locally for local cache performance */ >> -static __thread int local_id; >> +/* Thread local variables */ >> +typedef struct pool_local_t { >> + local_cache_t *cache[ODP_CONFIG_POOLS]; >> + int thr_id; >> +} pool_local_t; >> + >> +static __thread pool_local_t local; >> + >> +static void flush_cache(local_cache_t *buf_cache, struct pool_entry_s >> *pool); >> >> int odp_pool_init_global(void) >> { >> @@ -111,7 +118,19 @@ int odp_pool_init_global(void) >> >> int odp_pool_init_local(void) >> { >> - local_id = odp_thread_id(); >> + pool_entry_t *pool; >> + int i; >> + int thr_id = odp_thread_id(); >> + >> + memset(&local, 0, sizeof(pool_local_t)); >> + >> + for (i = 0; i < ODP_CONFIG_POOLS; i++) { >> + pool = get_pool_entry(i); >> + local.cache[i] = &pool->s.local_cache[thr_id]; >> + local.cache[i]->s.num_buf = 0; >> + } >> + >> + local.thr_id = thr_id; >> return 0; >> } >> >> @@ -144,7 +163,14 @@ int odp_pool_term_global(void) >> >> int odp_pool_term_local(void) >> { >> - _odp_flush_caches(); >> + int i; >> + >> + for (i = 0; i < ODP_CONFIG_POOLS; i++) { >> + pool_entry_t *pool = get_pool_entry(i); >> + >> + flush_cache(local.cache[i], &pool->s); >> + } >> + >> return 0; >> } >> >> @@ -179,10 +205,53 @@ int odp_pool_capability(odp_pool_capability_t *capa) >> return 0; >> } >> >> -/** >> +static inline odp_buffer_hdr_t *get_buf(struct pool_entry_s *pool) >> +{ >> + odp_buffer_hdr_t *myhead; >> + >> + POOL_LOCK(&pool->buf_lock); >> + >> + myhead = pool->buf_freelist; >> + >> + if (odp_unlikely(myhead == NULL)) { >> + POOL_UNLOCK(&pool->buf_lock); >> + odp_atomic_inc_u64(&pool->poolstats.bufempty); >> + } else { >> + pool->buf_freelist = myhead->next; >> + POOL_UNLOCK(&pool->buf_lock); >> + >> + odp_atomic_fetch_sub_u32(&pool->bufcount, 1); >> + odp_atomic_inc_u64(&pool->poolstats.bufallocs); >> + } >> + >> + return (void *)myhead; >> +} >> + >> +static inline void ret_buf(struct pool_entry_s *pool, odp_buffer_hdr_t >> *buf) >> +{ >> + if (!buf->flags.hdrdata && buf->type != ODP_EVENT_BUFFER) { >> + while (buf->segcount > 0) { >> + if (buffer_is_secure(buf) || pool_is_secure(pool)) >> + memset(buf->addr[buf->segcount - 1], >> + 0, buf->segsize); >> + ret_blk(pool, buf->addr[--buf->segcount]); >> + } >> + buf->size = 0; >> + } >> + >> + buf->allocator = ODP_FREEBUF; /* Mark buffer free */ >> + POOL_LOCK(&pool->buf_lock); >> + buf->next = pool->buf_freelist; >> + pool->buf_freelist = buf; >> + POOL_UNLOCK(&pool->buf_lock); >> + >> + odp_atomic_fetch_add_u32(&pool->bufcount, 1); >> + odp_atomic_inc_u64(&pool->poolstats.buffrees); >> +} >> + >> +/* >> * Pool creation >> */ >> - >> odp_pool_t _pool_create(const char *name, >> odp_pool_param_t *params, >> uint32_t shmflags) >> @@ -208,9 +277,6 @@ odp_pool_t _pool_create(const char *name, >> /* Restriction for v1.0: All non-packet buffers are unsegmented */ >> int unseg = 1; >> >> - /* Restriction for v1.0: No zeroization support */ >> - const int zeroized = 0; >> - >> uint32_t blk_size, buf_stride, buf_num, blk_num, seg_len = 0; >> uint32_t buf_align = >> params->type == ODP_POOL_BUFFER ? params->buf.align : 0; >> @@ -350,7 +416,6 @@ odp_pool_t _pool_create(const char *name, >> POOL_UNLOCK(&pool->s.lock); >> >> pool->s.flags.unsegmented = unseg; >> - pool->s.flags.zeroized = zeroized; >> pool->s.seg_size = unseg ? blk_size : seg_len; >> pool->s.blk_size = blk_size; >> >> @@ -383,9 +448,7 @@ odp_pool_t _pool_create(const char *name, >> /* Iniitalize buffer metadata */ >> tmp->allocator = ODP_FREEBUF; >> tmp->flags.all = 0; >> - tmp->flags.zeroized = zeroized; >> tmp->size = 0; >> - odp_atomic_init_u32(&tmp->ref_count, 0); >> tmp->type = params->type; >> tmp->event_type = params->type; >> tmp->pool_hdl = pool->s.pool_hdl; >> @@ -503,6 +566,41 @@ int odp_pool_info(odp_pool_t pool_hdl, >> odp_pool_info_t *info) >> return 0; >> } >> >> +static inline void get_local_cache_bufs(local_cache_t *buf_cache, >> uint32_t idx, >> + odp_buffer_hdr_t *buf_hdr[], >> + uint32_t num) >> +{ >> + uint32_t i; >> + >> + for (i = 0; i < num; i++) { >> + buf_hdr[i] = buf_cache->s.buf[idx + i]; >> + odp_prefetch(buf_hdr[i]); >> + odp_prefetch_store(buf_hdr[i]); >> + } >> +} >> + >> +static void flush_cache(local_cache_t *buf_cache, struct pool_entry_s >> *pool) >> +{ >> + uint32_t flush_count = 0; >> + uint32_t num; >> + >> + while ((num = buf_cache->s.num_buf)) { >> + odp_buffer_hdr_t *buf; >> + >> + buf = buf_cache->s.buf[num - 1]; >> + ret_buf(pool, buf); >> + flush_count++; >> + buf_cache->s.num_buf--; >> + } >> + >> + odp_atomic_add_u64(&pool->poolstats.bufallocs, >> buf_cache->s.bufallocs); >> + odp_atomic_add_u64(&pool->poolstats.buffrees, >> + buf_cache->s.buffrees - flush_count); >> + >> + buf_cache->s.bufallocs = 0; >> + buf_cache->s.buffrees = 0; >> +} >> + >> int odp_pool_destroy(odp_pool_t pool_hdl) >> { >> uint32_t pool_id = pool_handle_to_index(pool_hdl); >> @@ -621,71 +719,207 @@ void seg_free_tail(odp_buffer_hdr_t *buf_hdr, int >> segcount) >> buf_hdr->size = buf_hdr->segcount * pool->s.seg_size; >> } >> >> -odp_buffer_t buffer_alloc(odp_pool_t pool_hdl, size_t size) >> +static inline int get_local_bufs(local_cache_t *buf_cache, >> + odp_buffer_hdr_t *buf_hdr[], uint32_t >> max_num) >> +{ >> + uint32_t num_buf = buf_cache->s.num_buf; >> + uint32_t num = num_buf; >> + >> + if (odp_unlikely(num_buf == 0)) >> + return 0; >> + >> + if (odp_likely(max_num < num)) >> + num = max_num; >> + >> + get_local_cache_bufs(buf_cache, num_buf - num, buf_hdr, num); >> + buf_cache->s.num_buf -= num; >> + buf_cache->s.bufallocs += num; >> + >> + return num; >> +} >> + >> +static inline void ret_local_buf(local_cache_t *buf_cache, uint32_t idx, >> + odp_buffer_hdr_t *buf) >> +{ >> + buf_cache->s.buf[idx] = buf; >> + buf_cache->s.num_buf++; >> + buf_cache->s.buffrees++; >> +} >> + >> +static inline void ret_local_bufs(local_cache_t *buf_cache, uint32_t idx, >> + odp_buffer_hdr_t *buf[], int num_buf) >> +{ >> + int i; >> + >> + for (i = 0; i < num_buf; i++) >> + buf_cache->s.buf[idx + i] = buf[i]; >> + >> + buf_cache->s.num_buf += num_buf; >> + buf_cache->s.buffrees += num_buf; >> +} >> + >> +int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, >> + odp_buffer_t buf[], int max_num) >> { >> uint32_t pool_id = pool_handle_to_index(pool_hdl); >> pool_entry_t *pool = get_pool_entry(pool_id); >> uintmax_t totsize = pool->s.headroom + size + pool->s.tailroom; >> - odp_anybuf_t *buf; >> + odp_buffer_hdr_t *buf_tbl[max_num]; >> + odp_buffer_hdr_t *buf_hdr; >> + int num, i; >> + intmax_t needed; >> + void *blk; >> >> /* Reject oversized allocation requests */ >> if ((pool->s.flags.unsegmented && totsize > pool->s.seg_size) || >> (!pool->s.flags.unsegmented && >> totsize > pool->s.seg_size * ODP_BUFFER_MAX_SEG)) >> - return ODP_BUFFER_INVALID; >> + return 0; >> >> /* Try to satisfy request from the local cache */ >> - buf = (odp_anybuf_t *) >> - (void *)get_local_buf(&pool->s.local_cache[local_id], >> - &pool->s, totsize); >> + num = get_local_bufs(local.cache[pool_id], buf_tbl, max_num); >> >> /* If cache is empty, satisfy request from the pool */ >> - if (odp_unlikely(buf == NULL)) { >> - buf = (odp_anybuf_t *)(void *)get_buf(&pool->s); >> + if (odp_unlikely(num < max_num)) { >> + for (; num < max_num; num++) { >> + buf_hdr = get_buf(&pool->s); >> >> - if (odp_unlikely(buf == NULL)) >> + if (odp_unlikely(buf_hdr == NULL)) >> + goto pool_empty; >> + >> + /* Get blocks for this buffer, if pool uses >> + * application data */ >> + if (buf_hdr->size < totsize) { >> + uint32_t segcount; >> + >> + needed = totsize - buf_hdr->size; >> + do { >> + blk = get_blk(&pool->s); >> + if (odp_unlikely(blk == NULL)) { >> + ret_buf(&pool->s, buf_hdr); >> + goto pool_empty; >> + } >> + >> + segcount = buf_hdr->segcount++; >> + buf_hdr->addr[segcount] = blk; >> + needed -= pool->s.seg_size; >> + } while (needed > 0); >> + buf_hdr->size = buf_hdr->segcount * >> + pool->s.seg_size; >> + } >> + >> + buf_tbl[num] = buf_hdr; >> + } >> + } >> + >> +pool_empty: >> + for (i = 0; i < num; i++) { >> + buf_hdr = buf_tbl[i]; >> + >> + /* Mark buffer as allocated */ >> + buf_hdr->allocator = local.thr_id; >> + >> + /* By default, buffers are not associated with >> + * an ordered queue */ >> + buf_hdr->origin_qe = NULL; >> + >> + buf[i] = odp_hdr_to_buf(buf_hdr); >> + >> + /* Add more segments if buffer from local cache is too >> small */ >> + if (odp_unlikely(buf_hdr->size < totsize)) { >> + needed = totsize - buf_hdr->size; >> + do { >> + blk = get_blk(&pool->s); >> + if (odp_unlikely(blk == NULL)) { >> + int j; >> + >> + ret_buf(&pool->s, buf_hdr); >> + buf_hdr = NULL; >> + local.cache[pool_id]->s.buffrees--; >> + >> + /* move remaining bufs up one step >> + * and update loop counters */ >> + num--; >> + for (j = i; j < num; j++) >> + buf_tbl[j] = buf_tbl[j + >> 1]; >> + >> + i--; >> + break; >> + } >> + needed -= pool->s.seg_size; >> + buf_hdr->addr[buf_hdr->segcount++] = blk; >> + buf_hdr->size = buf_hdr->segcount * >> + pool->s.seg_size; >> + } while (needed > 0); >> + } >> + } >> + >> + return num; >> +} >> + >> +odp_buffer_t buffer_alloc(odp_pool_t pool_hdl, size_t size) >> +{ >> + uint32_t pool_id = pool_handle_to_index(pool_hdl); >> + pool_entry_t *pool = get_pool_entry(pool_id); >> + uintmax_t totsize = pool->s.headroom + size + pool->s.tailroom; >> + odp_buffer_hdr_t *buf_hdr; >> + intmax_t needed; >> + void *blk; >> + >> + /* Reject oversized allocation requests */ >> + if ((pool->s.flags.unsegmented && totsize > pool->s.seg_size) || >> + (!pool->s.flags.unsegmented && >> + totsize > pool->s.seg_size * ODP_BUFFER_MAX_SEG)) >> + return 0; >> + >> + /* Try to satisfy request from the local cache. If cache is empty, >> + * satisfy request from the pool */ >> + if (odp_unlikely(!get_local_bufs(local.cache[pool_id], &buf_hdr, >> 1))) { >> + buf_hdr = get_buf(&pool->s); >> + >> + if (odp_unlikely(buf_hdr == NULL)) >> return ODP_BUFFER_INVALID; >> >> /* Get blocks for this buffer, if pool uses application >> data */ >> - if (buf->buf.size < totsize) { >> - intmax_t needed = totsize - buf->buf.size; >> + if (buf_hdr->size < totsize) { >> + needed = totsize - buf_hdr->size; >> do { >> - uint8_t *blk = get_blk(&pool->s); >> - if (blk == NULL) { >> - ret_buf(&pool->s, &buf->buf); >> + blk = get_blk(&pool->s); >> + if (odp_unlikely(blk == NULL)) { >> + ret_buf(&pool->s, buf_hdr); >> return ODP_BUFFER_INVALID; >> } >> - buf->buf.addr[buf->buf.segcount++] = blk; >> + buf_hdr->addr[buf_hdr->segcount++] = blk; >> needed -= pool->s.seg_size; >> } while (needed > 0); >> - buf->buf.size = buf->buf.segcount * >> pool->s.seg_size; >> + buf_hdr->size = buf_hdr->segcount * >> pool->s.seg_size; >> } >> } >> - >> /* Mark buffer as allocated */ >> - buf->buf.allocator = local_id; >> + buf_hdr->allocator = local.thr_id; >> >> - /* By default, buffers inherit their pool's zeroization setting */ >> - buf->buf.flags.zeroized = pool->s.flags.zeroized; >> + /* By default, buffers are not associated with >> + * an ordered queue */ >> + buf_hdr->origin_qe = NULL; >> >> - /* By default, buffers are not associated with an ordered queue */ >> - buf->buf.origin_qe = NULL; >> - >> - return odp_hdr_to_buf(&buf->buf); >> -} >> - >> -int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, >> - odp_buffer_t buf[], int num) >> -{ >> - int count; >> - >> - for (count = 0; count < num; ++count) { >> - buf[count] = buffer_alloc(pool_hdl, size); >> - if (buf[count] == ODP_BUFFER_INVALID) >> - break; >> + /* Add more segments if buffer from local cache is too small */ >> + if (odp_unlikely(buf_hdr->size < totsize)) { >> + needed = totsize - buf_hdr->size; >> + do { >> + blk = get_blk(&pool->s); >> + if (odp_unlikely(blk == NULL)) { >> + ret_buf(&pool->s, buf_hdr); >> + buf_hdr = NULL; >> + local.cache[pool_id]->s.buffrees--; >> + return ODP_BUFFER_INVALID; >> + } >> + buf_hdr->addr[buf_hdr->segcount++] = blk; >> + needed -= pool->s.seg_size; >> + } while (needed > 0); >> + buf_hdr->size = buf_hdr->segcount * pool->s.seg_size; >> } >> >> - return count; >> + return odp_hdr_to_buf(buf_hdr); >> } >> >> odp_buffer_t odp_buffer_alloc(odp_pool_t pool_hdl) >> @@ -701,35 +935,132 @@ int odp_buffer_alloc_multi(odp_pool_t pool_hdl, >> odp_buffer_t buf[], int num) >> return buffer_alloc_multi(pool_hdl, buf_size, buf, num); >> } >> >> -void odp_buffer_free(odp_buffer_t buf) >> +static void multi_pool_free(odp_buffer_hdr_t *buf_hdr[], int num_buf) >> { >> - odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr(buf); >> - pool_entry_t *pool = odp_buf_to_pool(buf_hdr); >> + uint32_t pool_id, num; >> + local_cache_t *buf_cache; >> + pool_entry_t *pool; >> + int i, j, idx; >> >> + for (i = 0; i < num_buf; i++) { >> + pool_id = pool_handle_to_index(buf_hdr[i]->pool_hdl); >> + buf_cache = local.cache[pool_id]; >> + num = buf_cache->s.num_buf; >> + >> + if (num < POOL_MAX_LOCAL_BUFS) { >> + ret_local_buf(buf_cache, num, buf_hdr[i]); >> + continue; >> + } >> + >> + idx = POOL_MAX_LOCAL_BUFS - POOL_CHUNK_SIZE; >> + pool = get_pool_entry(pool_id); >> + >> + /* local cache full, return a chunk */ >> + for (j = 0; j < POOL_CHUNK_SIZE; j++) { >> + odp_buffer_hdr_t *tmp; >> + >> + tmp = buf_cache->s.buf[idx + i]; >> + ret_buf(&pool->s, tmp); >> + } >> + >> + num = POOL_MAX_LOCAL_BUFS - POOL_CHUNK_SIZE; >> + buf_cache->s.num_buf = num; >> + ret_local_buf(buf_cache, num, buf_hdr[i]); >> + } >> +} >> + >> +void buffer_free_multi(uint32_t pool_id, >> + const odp_buffer_t buf[], int num_free) >> +{ >> + local_cache_t *buf_cache = local.cache[pool_id]; >> + uint32_t num; >> + int i, idx; >> + pool_entry_t *pool; >> + odp_buffer_hdr_t *buf_hdr[num_free]; >> + int multi_pool = 0; >> + >> + for (i = 0; i < num_free; i++) { >> + uint32_t id; >> + >> + buf_hdr[i] = odp_buf_to_hdr(buf[i]); >> + ODP_ASSERT(buf_hdr[i]->allocator != ODP_FREEBUF); >> + buf_hdr[i]->allocator = ODP_FREEBUF; >> + id = pool_handle_to_index(buf_hdr[i]->pool_hdl); >> + multi_pool |= (pool_id != id); >> + } >> + >> + if (odp_unlikely(multi_pool)) { >> + multi_pool_free(buf_hdr, num_free); >> + return; >> + } >> + >> + num = buf_cache->s.num_buf; >> + >> + if (odp_likely((num + num_free) < POOL_MAX_LOCAL_BUFS)) { >> + ret_local_bufs(buf_cache, num, buf_hdr, num_free); >> + return; >> + } >> + >> + pool = get_pool_entry(pool_id); >> + >> + /* Return at least one chunk into the global pool */ >> + if (odp_unlikely(num_free > POOL_CHUNK_SIZE)) { >> + for (i = 0; i < num_free; i++) >> + ret_buf(&pool->s, buf_hdr[i]); >> + >> + return; >> + } >> + >> + idx = num - POOL_CHUNK_SIZE; >> + for (i = 0; i < POOL_CHUNK_SIZE; i++) >> + ret_buf(&pool->s, buf_cache->s.buf[idx + i]); >> + >> + num -= POOL_CHUNK_SIZE; >> + buf_cache->s.num_buf = num; >> + ret_local_bufs(buf_cache, num, buf_hdr, num_free); >> +} >> + >> +void buffer_free(uint32_t pool_id, const odp_buffer_t buf) >> +{ >> + local_cache_t *buf_cache = local.cache[pool_id]; >> + uint32_t num; >> + int i; >> + pool_entry_t *pool; >> + odp_buffer_hdr_t *buf_hdr; >> + >> + buf_hdr = odp_buf_to_hdr(buf); >> ODP_ASSERT(buf_hdr->allocator != ODP_FREEBUF); >> + buf_hdr->allocator = ODP_FREEBUF; >> >> - if (odp_unlikely(pool->s.buf_low_wm_assert || >> pool->s.blk_low_wm_assert)) >> - ret_buf(&pool->s, buf_hdr); >> - else >> - ret_local_buf(&pool->s.local_cache[local_id], buf_hdr); >> + num = buf_cache->s.num_buf; >> + >> + if (odp_likely((num + 1) < POOL_MAX_LOCAL_BUFS)) { >> + ret_local_bufs(buf_cache, num, &buf_hdr, 1); >> + return; >> + } >> + >> + pool = get_pool_entry(pool_id); >> + >> + num -= POOL_CHUNK_SIZE; >> + for (i = 0; i < POOL_CHUNK_SIZE; i++) >> + ret_buf(&pool->s, buf_cache->s.buf[num + i]); >> + >> + buf_cache->s.num_buf = num; >> + ret_local_bufs(buf_cache, num, &buf_hdr, 1); >> +} >> + >> +void odp_buffer_free(odp_buffer_t buf) >> +{ >> + uint32_t pool_id = pool_id_from_buf(buf); >> + >> + buffer_free(pool_id, buf); >> } >> >> void odp_buffer_free_multi(const odp_buffer_t buf[], int num) >> { >> - int i; >> + uint32_t pool_id = pool_id_from_buf(buf[0]); >> >> - for (i = 0; i < num; ++i) >> - odp_buffer_free(buf[i]); >> -} >> - >> -void _odp_flush_caches(void) >> -{ >> - int i; >> - >> - for (i = 0; i < ODP_CONFIG_POOLS; i++) { >> - pool_entry_t *pool = get_pool_entry(i); >> - flush_cache(&pool->s.local_cache[local_id], &pool->s); >> - } >> + buffer_free_multi(pool_id, buf, num); >> } >> >> void odp_pool_print(odp_pool_t pool_hdl) >> @@ -774,7 +1105,6 @@ void odp_pool_print(odp_pool_t pool_hdl) >> pool->s.quiesced ? "quiesced" : "active"); >> ODP_DBG(" pool opts %s, %s, %s\n", >> pool->s.flags.unsegmented ? "unsegmented" : "segmented", >> - pool->s.flags.zeroized ? "zeroized" : "non-zeroized", >> pool->s.flags.predefined ? "predefined" : "created"); >> ODP_DBG(" pool base %p\n", pool->s.pool_base_addr); >> ODP_DBG(" pool size %zu (%zu pages)\n", >> @@ -817,10 +1147,11 @@ void odp_pool_print(odp_pool_t pool_hdl) >> ODP_DBG(" blk low wm count %lu\n", blklowmct); >> } >> >> - >> odp_pool_t odp_buffer_pool(odp_buffer_t buf) >> { >> - return odp_buf_to_hdr(buf)->pool_hdl; >> + uint32_t pool_id = pool_id_from_buf(buf); >> + >> + return pool_index_to_handle(pool_id); >> } >> >> void odp_pool_param_init(odp_pool_param_t *params) >> -- >> 2.7.4 >> >>
diff --git a/platform/linux-generic/include/odp_buffer_inlines.h b/platform/linux-generic/include/odp_buffer_inlines.h index 3f4d9fd..2b1ab42 100644 --- a/platform/linux-generic/include/odp_buffer_inlines.h +++ b/platform/linux-generic/include/odp_buffer_inlines.h @@ -56,30 +56,12 @@ static inline odp_buffer_hdr_t *odp_buf_to_hdr(odp_buffer_t buf) (pool->pool_mdata_addr + (index * ODP_CACHE_LINE_SIZE)); } -static inline uint32_t odp_buffer_refcount(odp_buffer_hdr_t *buf) +static inline uint32_t pool_id_from_buf(odp_buffer_t buf) { - return odp_atomic_load_u32(&buf->ref_count); -} + odp_buffer_bits_t handle; -static inline uint32_t odp_buffer_incr_refcount(odp_buffer_hdr_t *buf, - uint32_t val) -{ - return odp_atomic_fetch_add_u32(&buf->ref_count, val) + val; -} - -static inline uint32_t odp_buffer_decr_refcount(odp_buffer_hdr_t *buf, - uint32_t val) -{ - uint32_t tmp; - - tmp = odp_atomic_fetch_sub_u32(&buf->ref_count, val); - - if (tmp < val) { - odp_atomic_fetch_add_u32(&buf->ref_count, val - tmp); - return 0; - } else { - return tmp - val; - } + handle.handle = buf; + return handle.pool_id; } static inline odp_buffer_hdr_t *validate_buf(odp_buffer_t buf) diff --git a/platform/linux-generic/include/odp_buffer_internal.h b/platform/linux-generic/include/odp_buffer_internal.h index f21364c..7b0ef8b 100644 --- a/platform/linux-generic/include/odp_buffer_internal.h +++ b/platform/linux-generic/include/odp_buffer_internal.h @@ -114,7 +114,6 @@ struct odp_buffer_hdr_t { union { uint32_t all; struct { - uint32_t zeroized:1; /* Zeroize buf data on free */ uint32_t hdrdata:1; /* Data is in buffer hdr */ uint32_t sustain:1; /* Sustain order */ }; @@ -123,7 +122,6 @@ struct odp_buffer_hdr_t { int8_t type; /* buffer type */ odp_event_type_t event_type; /* for reuse as event */ uint32_t size; /* max data size */ - odp_atomic_u32_t ref_count; /* reference count */ odp_pool_t pool_hdl; /* buffer pool handle */ union { uint64_t buf_u64; /* user u64 */ @@ -174,6 +172,9 @@ typedef struct { odp_buffer_t buffer_alloc(odp_pool_t pool, size_t size); int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, odp_buffer_t buf[], int num); +void buffer_free(uint32_t pool_id, const odp_buffer_t buf); +void buffer_free_multi(uint32_t pool_id, + const odp_buffer_t buf[], int num_free); int seg_alloc_head(odp_buffer_hdr_t *buf_hdr, int segcount); void seg_free_head(odp_buffer_hdr_t *buf_hdr, int segcount); int seg_alloc_tail(odp_buffer_hdr_t *buf_hdr, int segcount); diff --git a/platform/linux-generic/include/odp_internal.h b/platform/linux-generic/include/odp_internal.h index d12f850..8bad450 100644 --- a/platform/linux-generic/include/odp_internal.h +++ b/platform/linux-generic/include/odp_internal.h @@ -119,8 +119,6 @@ int odp_tm_term_global(void); int _odp_int_name_tbl_init_global(void); int _odp_int_name_tbl_term_global(void); -void _odp_flush_caches(void); - int cpuinfo_parser(FILE *file, system_info_t *sysinfo); uint64_t odp_cpu_hz_current(int id); diff --git a/platform/linux-generic/include/odp_pool_internal.h b/platform/linux-generic/include/odp_pool_internal.h index 3317bd0..d6717ff 100644 --- a/platform/linux-generic/include/odp_pool_internal.h +++ b/platform/linux-generic/include/odp_pool_internal.h @@ -51,15 +51,25 @@ typedef struct _odp_buffer_pool_init_t { void *buf_init_arg; /**< Argument to be passed to buf_init() */ } _odp_buffer_pool_init_t; /**< Type of buffer initialization struct */ +#define POOL_MAX_LOCAL_CHUNKS 4 +#define POOL_CHUNK_SIZE 32 +#define POOL_MAX_LOCAL_BUFS (POOL_MAX_LOCAL_CHUNKS * POOL_CHUNK_SIZE) + +struct local_cache_s { + uint64_t bufallocs; /* Local buffer alloc count */ + uint64_t buffrees; /* Local buffer free count */ + + uint32_t num_buf; + odp_buffer_hdr_t *buf[POOL_MAX_LOCAL_BUFS]; +}; + /* Local cache for buffer alloc/free acceleration */ typedef struct local_cache_t { union { - struct { - odp_buffer_hdr_t *buf_freelist; /* The local cache */ - uint64_t bufallocs; /* Local buffer alloc count */ - uint64_t buffrees; /* Local buffer free count */ - }; - uint8_t pad[ODP_CACHE_LINE_SIZE_ROUNDUP(sizeof(uint64_t))]; + struct local_cache_s s; + + uint8_t pad[ODP_CACHE_LINE_SIZE_ROUNDUP( + sizeof(struct local_cache_s))]; }; } local_cache_t; @@ -214,127 +224,6 @@ static inline void ret_blk(struct pool_entry_s *pool, void *block) odp_atomic_inc_u64(&pool->poolstats.blkfrees); } -static inline odp_buffer_hdr_t *get_buf(struct pool_entry_s *pool) -{ - odp_buffer_hdr_t *myhead; - POOL_LOCK(&pool->buf_lock); - - myhead = pool->buf_freelist; - - if (odp_unlikely(myhead == NULL)) { - POOL_UNLOCK(&pool->buf_lock); - odp_atomic_inc_u64(&pool->poolstats.bufempty); - } else { - pool->buf_freelist = myhead->next; - POOL_UNLOCK(&pool->buf_lock); - uint64_t bufcount = - odp_atomic_fetch_sub_u32(&pool->bufcount, 1) - 1; - - /* Check for low watermark condition */ - if (bufcount == pool->buf_low_wm && !pool->buf_low_wm_assert) { - pool->buf_low_wm_assert = 1; - odp_atomic_inc_u64(&pool->poolstats.buf_low_wm_count); - } - - odp_atomic_inc_u64(&pool->poolstats.bufallocs); - } - - return (void *)myhead; -} - -static inline void ret_buf(struct pool_entry_s *pool, odp_buffer_hdr_t *buf) -{ - if (!buf->flags.hdrdata && buf->type != ODP_EVENT_BUFFER) { - while (buf->segcount > 0) { - if (buffer_is_secure(buf) || pool_is_secure(pool)) - memset(buf->addr[buf->segcount - 1], - 0, buf->segsize); - ret_blk(pool, buf->addr[--buf->segcount]); - } - buf->size = 0; - } - - buf->allocator = ODP_FREEBUF; /* Mark buffer free */ - POOL_LOCK(&pool->buf_lock); - buf->next = pool->buf_freelist; - pool->buf_freelist = buf; - POOL_UNLOCK(&pool->buf_lock); - - uint64_t bufcount = odp_atomic_fetch_add_u32(&pool->bufcount, 1) + 1; - - /* Check if low watermark condition should be deasserted */ - if (bufcount == pool->buf_high_wm && pool->buf_low_wm_assert) { - pool->buf_low_wm_assert = 0; - odp_atomic_inc_u64(&pool->poolstats.buf_high_wm_count); - } - - odp_atomic_inc_u64(&pool->poolstats.buffrees); -} - -static inline void *get_local_buf(local_cache_t *buf_cache, - struct pool_entry_s *pool, - size_t totsize) -{ - odp_buffer_hdr_t *buf = buf_cache->buf_freelist; - - if (odp_likely(buf != NULL)) { - buf_cache->buf_freelist = buf->next; - - if (odp_unlikely(buf->size < totsize)) { - intmax_t needed = totsize - buf->size; - - do { - void *blk = get_blk(pool); - if (odp_unlikely(blk == NULL)) { - ret_buf(pool, buf); - buf_cache->buffrees--; - return NULL; - } - buf->addr[buf->segcount++] = blk; - needed -= pool->seg_size; - } while (needed > 0); - - buf->size = buf->segcount * pool->seg_size; - } - - buf_cache->bufallocs++; - } - - return buf; -} - -static inline void ret_local_buf(local_cache_t *buf_cache, - odp_buffer_hdr_t *buf) -{ - buf->allocator = ODP_FREEBUF; - buf->next = buf_cache->buf_freelist; - buf_cache->buf_freelist = buf; - - buf_cache->buffrees++; -} - -static inline void flush_cache(local_cache_t *buf_cache, - struct pool_entry_s *pool) -{ - odp_buffer_hdr_t *buf = buf_cache->buf_freelist; - uint32_t flush_count = 0; - - while (buf != NULL) { - odp_buffer_hdr_t *next = buf->next; - ret_buf(pool, buf); - buf = next; - flush_count++; - } - - odp_atomic_add_u64(&pool->poolstats.bufallocs, buf_cache->bufallocs); - odp_atomic_add_u64(&pool->poolstats.buffrees, - buf_cache->buffrees - flush_count); - - buf_cache->buf_freelist = NULL; - buf_cache->bufallocs = 0; - buf_cache->buffrees = 0; -} - static inline odp_pool_t pool_index_to_handle(uint32_t pool_id) { return _odp_cast_scalar(odp_pool_t, pool_id); diff --git a/platform/linux-generic/odp_buffer.c b/platform/linux-generic/odp_buffer.c index e7e4d58..ce2fdba 100644 --- a/platform/linux-generic/odp_buffer.c +++ b/platform/linux-generic/odp_buffer.c @@ -67,9 +67,6 @@ int odp_buffer_snprint(char *str, uint32_t n, odp_buffer_t buf) len += snprintf(&str[len], n-len, " size %" PRIu32 "\n", hdr->size); len += snprintf(&str[len], n-len, - " ref_count %" PRIu32 "\n", - odp_atomic_load_u32(&hdr->ref_count)); - len += snprintf(&str[len], n-len, " type %i\n", hdr->type); return len; diff --git a/platform/linux-generic/odp_packet.c b/platform/linux-generic/odp_packet.c index 0e319d2..474fa81 100644 --- a/platform/linux-generic/odp_packet.c +++ b/platform/linux-generic/odp_packet.c @@ -972,10 +972,7 @@ int _odp_packet_copy_md_to_packet(odp_packet_t srcpkt, odp_packet_t dstpkt) srchdr->buf_hdr.uarea_size ? dsthdr->buf_hdr.uarea_size : srchdr->buf_hdr.uarea_size); - odp_atomic_store_u32( - &dsthdr->buf_hdr.ref_count, - odp_atomic_load_u32( - &srchdr->buf_hdr.ref_count)); + copy_packet_parser_metadata(srchdr, dsthdr); /* Metadata copied, but return indication of whether the packet diff --git a/platform/linux-generic/odp_pool.c b/platform/linux-generic/odp_pool.c index 419f03f..0a427ed 100644 --- a/platform/linux-generic/odp_pool.c +++ b/platform/linux-generic/odp_pool.c @@ -57,8 +57,15 @@ static const char SHM_DEFAULT_NAME[] = "odp_buffer_pools"; /* Pool entry pointers (for inlining) */ void *pool_entry_ptr[ODP_CONFIG_POOLS]; -/* Cache thread id locally for local cache performance */ -static __thread int local_id; +/* Thread local variables */ +typedef struct pool_local_t { + local_cache_t *cache[ODP_CONFIG_POOLS]; + int thr_id; +} pool_local_t; + +static __thread pool_local_t local; + +static void flush_cache(local_cache_t *buf_cache, struct pool_entry_s *pool); int odp_pool_init_global(void) { @@ -111,7 +118,19 @@ int odp_pool_init_global(void) int odp_pool_init_local(void) { - local_id = odp_thread_id(); + pool_entry_t *pool; + int i; + int thr_id = odp_thread_id(); + + memset(&local, 0, sizeof(pool_local_t)); + + for (i = 0; i < ODP_CONFIG_POOLS; i++) { + pool = get_pool_entry(i); + local.cache[i] = &pool->s.local_cache[thr_id]; + local.cache[i]->s.num_buf = 0; + } + + local.thr_id = thr_id; return 0; } @@ -144,7 +163,14 @@ int odp_pool_term_global(void) int odp_pool_term_local(void) { - _odp_flush_caches(); + int i; + + for (i = 0; i < ODP_CONFIG_POOLS; i++) { + pool_entry_t *pool = get_pool_entry(i); + + flush_cache(local.cache[i], &pool->s); + } + return 0; } @@ -179,10 +205,53 @@ int odp_pool_capability(odp_pool_capability_t *capa) return 0; } -/** +static inline odp_buffer_hdr_t *get_buf(struct pool_entry_s *pool) +{ + odp_buffer_hdr_t *myhead; + + POOL_LOCK(&pool->buf_lock); + + myhead = pool->buf_freelist; + + if (odp_unlikely(myhead == NULL)) { + POOL_UNLOCK(&pool->buf_lock); + odp_atomic_inc_u64(&pool->poolstats.bufempty); + } else { + pool->buf_freelist = myhead->next; + POOL_UNLOCK(&pool->buf_lock); + + odp_atomic_fetch_sub_u32(&pool->bufcount, 1); + odp_atomic_inc_u64(&pool->poolstats.bufallocs); + } + + return (void *)myhead; +} + +static inline void ret_buf(struct pool_entry_s *pool, odp_buffer_hdr_t *buf) +{ + if (!buf->flags.hdrdata && buf->type != ODP_EVENT_BUFFER) { + while (buf->segcount > 0) { + if (buffer_is_secure(buf) || pool_is_secure(pool)) + memset(buf->addr[buf->segcount - 1], + 0, buf->segsize); + ret_blk(pool, buf->addr[--buf->segcount]); + } + buf->size = 0; + } + + buf->allocator = ODP_FREEBUF; /* Mark buffer free */ + POOL_LOCK(&pool->buf_lock); + buf->next = pool->buf_freelist; + pool->buf_freelist = buf; + POOL_UNLOCK(&pool->buf_lock); + + odp_atomic_fetch_add_u32(&pool->bufcount, 1); + odp_atomic_inc_u64(&pool->poolstats.buffrees); +} + +/* * Pool creation */ - odp_pool_t _pool_create(const char *name, odp_pool_param_t *params, uint32_t shmflags) @@ -208,9 +277,6 @@ odp_pool_t _pool_create(const char *name, /* Restriction for v1.0: All non-packet buffers are unsegmented */ int unseg = 1; - /* Restriction for v1.0: No zeroization support */ - const int zeroized = 0; - uint32_t blk_size, buf_stride, buf_num, blk_num, seg_len = 0; uint32_t buf_align = params->type == ODP_POOL_BUFFER ? params->buf.align : 0; @@ -350,7 +416,6 @@ odp_pool_t _pool_create(const char *name, POOL_UNLOCK(&pool->s.lock); pool->s.flags.unsegmented = unseg; - pool->s.flags.zeroized = zeroized; pool->s.seg_size = unseg ? blk_size : seg_len; pool->s.blk_size = blk_size; @@ -383,9 +448,7 @@ odp_pool_t _pool_create(const char *name, /* Iniitalize buffer metadata */ tmp->allocator = ODP_FREEBUF; tmp->flags.all = 0; - tmp->flags.zeroized = zeroized; tmp->size = 0; - odp_atomic_init_u32(&tmp->ref_count, 0); tmp->type = params->type; tmp->event_type = params->type; tmp->pool_hdl = pool->s.pool_hdl; @@ -503,6 +566,41 @@ int odp_pool_info(odp_pool_t pool_hdl, odp_pool_info_t *info) return 0; } +static inline void get_local_cache_bufs(local_cache_t *buf_cache, uint32_t idx, + odp_buffer_hdr_t *buf_hdr[], + uint32_t num) +{ + uint32_t i; + + for (i = 0; i < num; i++) { + buf_hdr[i] = buf_cache->s.buf[idx + i]; + odp_prefetch(buf_hdr[i]); + odp_prefetch_store(buf_hdr[i]); + } +} + +static void flush_cache(local_cache_t *buf_cache, struct pool_entry_s *pool) +{ + uint32_t flush_count = 0; + uint32_t num; + + while ((num = buf_cache->s.num_buf)) { + odp_buffer_hdr_t *buf; + + buf = buf_cache->s.buf[num - 1]; + ret_buf(pool, buf); + flush_count++; + buf_cache->s.num_buf--; + } + + odp_atomic_add_u64(&pool->poolstats.bufallocs, buf_cache->s.bufallocs); + odp_atomic_add_u64(&pool->poolstats.buffrees, + buf_cache->s.buffrees - flush_count); + + buf_cache->s.bufallocs = 0; + buf_cache->s.buffrees = 0; +} + int odp_pool_destroy(odp_pool_t pool_hdl) { uint32_t pool_id = pool_handle_to_index(pool_hdl); @@ -621,71 +719,207 @@ void seg_free_tail(odp_buffer_hdr_t *buf_hdr, int segcount) buf_hdr->size = buf_hdr->segcount * pool->s.seg_size; } -odp_buffer_t buffer_alloc(odp_pool_t pool_hdl, size_t size) +static inline int get_local_bufs(local_cache_t *buf_cache, + odp_buffer_hdr_t *buf_hdr[], uint32_t max_num) +{ + uint32_t num_buf = buf_cache->s.num_buf; + uint32_t num = num_buf; + + if (odp_unlikely(num_buf == 0)) + return 0; + + if (odp_likely(max_num < num)) + num = max_num; + + get_local_cache_bufs(buf_cache, num_buf - num, buf_hdr, num); + buf_cache->s.num_buf -= num; + buf_cache->s.bufallocs += num; + + return num; +} + +static inline void ret_local_buf(local_cache_t *buf_cache, uint32_t idx, + odp_buffer_hdr_t *buf) +{ + buf_cache->s.buf[idx] = buf; + buf_cache->s.num_buf++; + buf_cache->s.buffrees++; +} + +static inline void ret_local_bufs(local_cache_t *buf_cache, uint32_t idx, + odp_buffer_hdr_t *buf[], int num_buf) +{ + int i; + + for (i = 0; i < num_buf; i++) + buf_cache->s.buf[idx + i] = buf[i]; + + buf_cache->s.num_buf += num_buf; + buf_cache->s.buffrees += num_buf; +} + +int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, + odp_buffer_t buf[], int max_num) { uint32_t pool_id = pool_handle_to_index(pool_hdl); pool_entry_t *pool = get_pool_entry(pool_id); uintmax_t totsize = pool->s.headroom + size + pool->s.tailroom; - odp_anybuf_t *buf; + odp_buffer_hdr_t *buf_tbl[max_num]; + odp_buffer_hdr_t *buf_hdr; + int num, i; + intmax_t needed; + void *blk; /* Reject oversized allocation requests */ if ((pool->s.flags.unsegmented && totsize > pool->s.seg_size) || (!pool->s.flags.unsegmented && totsize > pool->s.seg_size * ODP_BUFFER_MAX_SEG)) - return ODP_BUFFER_INVALID; + return 0; /* Try to satisfy request from the local cache */ - buf = (odp_anybuf_t *) - (void *)get_local_buf(&pool->s.local_cache[local_id], - &pool->s, totsize); + num = get_local_bufs(local.cache[pool_id], buf_tbl, max_num); /* If cache is empty, satisfy request from the pool */ - if (odp_unlikely(buf == NULL)) { - buf = (odp_anybuf_t *)(void *)get_buf(&pool->s); + if (odp_unlikely(num < max_num)) { + for (; num < max_num; num++) { + buf_hdr = get_buf(&pool->s); - if (odp_unlikely(buf == NULL)) + if (odp_unlikely(buf_hdr == NULL)) + goto pool_empty; + + /* Get blocks for this buffer, if pool uses + * application data */ + if (buf_hdr->size < totsize) { + uint32_t segcount; + + needed = totsize - buf_hdr->size; + do { + blk = get_blk(&pool->s); + if (odp_unlikely(blk == NULL)) { + ret_buf(&pool->s, buf_hdr); + goto pool_empty; + } + + segcount = buf_hdr->segcount++; + buf_hdr->addr[segcount] = blk; + needed -= pool->s.seg_size; + } while (needed > 0); + buf_hdr->size = buf_hdr->segcount * + pool->s.seg_size; + } + + buf_tbl[num] = buf_hdr; + } + } + +pool_empty: + for (i = 0; i < num; i++) { + buf_hdr = buf_tbl[i]; + + /* Mark buffer as allocated */ + buf_hdr->allocator = local.thr_id; + + /* By default, buffers are not associated with + * an ordered queue */ + buf_hdr->origin_qe = NULL; + + buf[i] = odp_hdr_to_buf(buf_hdr); + + /* Add more segments if buffer from local cache is too small */ + if (odp_unlikely(buf_hdr->size < totsize)) { + needed = totsize - buf_hdr->size; + do { + blk = get_blk(&pool->s); + if (odp_unlikely(blk == NULL)) { + int j; + + ret_buf(&pool->s, buf_hdr); + buf_hdr = NULL; + local.cache[pool_id]->s.buffrees--; + + /* move remaining bufs up one step + * and update loop counters */ + num--; + for (j = i; j < num; j++) + buf_tbl[j] = buf_tbl[j + 1]; + + i--; + break; + } + needed -= pool->s.seg_size; + buf_hdr->addr[buf_hdr->segcount++] = blk; + buf_hdr->size = buf_hdr->segcount * + pool->s.seg_size; + } while (needed > 0); + } + } + + return num; +} + +odp_buffer_t buffer_alloc(odp_pool_t pool_hdl, size_t size) +{ + uint32_t pool_id = pool_handle_to_index(pool_hdl); + pool_entry_t *pool = get_pool_entry(pool_id); + uintmax_t totsize = pool->s.headroom + size + pool->s.tailroom; + odp_buffer_hdr_t *buf_hdr; + intmax_t needed; + void *blk; + + /* Reject oversized allocation requests */ + if ((pool->s.flags.unsegmented && totsize > pool->s.seg_size) || + (!pool->s.flags.unsegmented && + totsize > pool->s.seg_size * ODP_BUFFER_MAX_SEG)) + return 0; + + /* Try to satisfy request from the local cache. If cache is empty, + * satisfy request from the pool */ + if (odp_unlikely(!get_local_bufs(local.cache[pool_id], &buf_hdr, 1))) { + buf_hdr = get_buf(&pool->s); + + if (odp_unlikely(buf_hdr == NULL)) return ODP_BUFFER_INVALID; /* Get blocks for this buffer, if pool uses application data */ - if (buf->buf.size < totsize) { - intmax_t needed = totsize - buf->buf.size; + if (buf_hdr->size < totsize) { + needed = totsize - buf_hdr->size; do { - uint8_t *blk = get_blk(&pool->s); - if (blk == NULL) { - ret_buf(&pool->s, &buf->buf); + blk = get_blk(&pool->s); + if (odp_unlikely(blk == NULL)) { + ret_buf(&pool->s, buf_hdr); return ODP_BUFFER_INVALID; } - buf->buf.addr[buf->buf.segcount++] = blk; + buf_hdr->addr[buf_hdr->segcount++] = blk; needed -= pool->s.seg_size; } while (needed > 0); - buf->buf.size = buf->buf.segcount * pool->s.seg_size; + buf_hdr->size = buf_hdr->segcount * pool->s.seg_size; } } - /* Mark buffer as allocated */ - buf->buf.allocator = local_id; + buf_hdr->allocator = local.thr_id; - /* By default, buffers inherit their pool's zeroization setting */ - buf->buf.flags.zeroized = pool->s.flags.zeroized; + /* By default, buffers are not associated with + * an ordered queue */ + buf_hdr->origin_qe = NULL; - /* By default, buffers are not associated with an ordered queue */ - buf->buf.origin_qe = NULL; - - return odp_hdr_to_buf(&buf->buf); -} - -int buffer_alloc_multi(odp_pool_t pool_hdl, size_t size, - odp_buffer_t buf[], int num) -{ - int count; - - for (count = 0; count < num; ++count) { - buf[count] = buffer_alloc(pool_hdl, size); - if (buf[count] == ODP_BUFFER_INVALID) - break; + /* Add more segments if buffer from local cache is too small */ + if (odp_unlikely(buf_hdr->size < totsize)) { + needed = totsize - buf_hdr->size; + do { + blk = get_blk(&pool->s); + if (odp_unlikely(blk == NULL)) { + ret_buf(&pool->s, buf_hdr); + buf_hdr = NULL; + local.cache[pool_id]->s.buffrees--; + return ODP_BUFFER_INVALID; + } + buf_hdr->addr[buf_hdr->segcount++] = blk; + needed -= pool->s.seg_size; + } while (needed > 0); + buf_hdr->size = buf_hdr->segcount * pool->s.seg_size; } - return count; + return odp_hdr_to_buf(buf_hdr); } odp_buffer_t odp_buffer_alloc(odp_pool_t pool_hdl) @@ -701,35 +935,132 @@ int odp_buffer_alloc_multi(odp_pool_t pool_hdl, odp_buffer_t buf[], int num) return buffer_alloc_multi(pool_hdl, buf_size, buf, num); } -void odp_buffer_free(odp_buffer_t buf) +static void multi_pool_free(odp_buffer_hdr_t *buf_hdr[], int num_buf) { - odp_buffer_hdr_t *buf_hdr = odp_buf_to_hdr(buf); - pool_entry_t *pool = odp_buf_to_pool(buf_hdr); + uint32_t pool_id, num; + local_cache_t *buf_cache; + pool_entry_t *pool; + int i, j, idx; + for (i = 0; i < num_buf; i++) { + pool_id = pool_handle_to_index(buf_hdr[i]->pool_hdl); + buf_cache = local.cache[pool_id]; + num = buf_cache->s.num_buf; + + if (num < POOL_MAX_LOCAL_BUFS) { + ret_local_buf(buf_cache, num, buf_hdr[i]); + continue; + } + + idx = POOL_MAX_LOCAL_BUFS - POOL_CHUNK_SIZE; + pool = get_pool_entry(pool_id); + + /* local cache full, return a chunk */ + for (j = 0; j < POOL_CHUNK_SIZE; j++) { + odp_buffer_hdr_t *tmp; + + tmp = buf_cache->s.buf[idx + i]; + ret_buf(&pool->s, tmp); + } + + num = POOL_MAX_LOCAL_BUFS - POOL_CHUNK_SIZE; + buf_cache->s.num_buf = num; + ret_local_buf(buf_cache, num, buf_hdr[i]); + } +} + +void buffer_free_multi(uint32_t pool_id, + const odp_buffer_t buf[], int num_free) +{ + local_cache_t *buf_cache = local.cache[pool_id]; + uint32_t num; + int i, idx; + pool_entry_t *pool; + odp_buffer_hdr_t *buf_hdr[num_free]; + int multi_pool = 0; + + for (i = 0; i < num_free; i++) { + uint32_t id; + + buf_hdr[i] = odp_buf_to_hdr(buf[i]); + ODP_ASSERT(buf_hdr[i]->allocator != ODP_FREEBUF); + buf_hdr[i]->allocator = ODP_FREEBUF; + id = pool_handle_to_index(buf_hdr[i]->pool_hdl); + multi_pool |= (pool_id != id); + } + + if (odp_unlikely(multi_pool)) { + multi_pool_free(buf_hdr, num_free); + return; + } + + num = buf_cache->s.num_buf; + + if (odp_likely((num + num_free) < POOL_MAX_LOCAL_BUFS)) { + ret_local_bufs(buf_cache, num, buf_hdr, num_free); + return; + } + + pool = get_pool_entry(pool_id); + + /* Return at least one chunk into the global pool */ + if (odp_unlikely(num_free > POOL_CHUNK_SIZE)) { + for (i = 0; i < num_free; i++) + ret_buf(&pool->s, buf_hdr[i]); + + return; + } + + idx = num - POOL_CHUNK_SIZE; + for (i = 0; i < POOL_CHUNK_SIZE; i++) + ret_buf(&pool->s, buf_cache->s.buf[idx + i]); + + num -= POOL_CHUNK_SIZE; + buf_cache->s.num_buf = num; + ret_local_bufs(buf_cache, num, buf_hdr, num_free); +} + +void buffer_free(uint32_t pool_id, const odp_buffer_t buf) +{ + local_cache_t *buf_cache = local.cache[pool_id]; + uint32_t num; + int i; + pool_entry_t *pool; + odp_buffer_hdr_t *buf_hdr; + + buf_hdr = odp_buf_to_hdr(buf); ODP_ASSERT(buf_hdr->allocator != ODP_FREEBUF); + buf_hdr->allocator = ODP_FREEBUF; - if (odp_unlikely(pool->s.buf_low_wm_assert || pool->s.blk_low_wm_assert)) - ret_buf(&pool->s, buf_hdr); - else - ret_local_buf(&pool->s.local_cache[local_id], buf_hdr); + num = buf_cache->s.num_buf; + + if (odp_likely((num + 1) < POOL_MAX_LOCAL_BUFS)) { + ret_local_bufs(buf_cache, num, &buf_hdr, 1); + return; + } + + pool = get_pool_entry(pool_id); + + num -= POOL_CHUNK_SIZE; + for (i = 0; i < POOL_CHUNK_SIZE; i++) + ret_buf(&pool->s, buf_cache->s.buf[num + i]); + + buf_cache->s.num_buf = num; + ret_local_bufs(buf_cache, num, &buf_hdr, 1); +} + +void odp_buffer_free(odp_buffer_t buf) +{ + uint32_t pool_id = pool_id_from_buf(buf); + + buffer_free(pool_id, buf); } void odp_buffer_free_multi(const odp_buffer_t buf[], int num) { - int i; + uint32_t pool_id = pool_id_from_buf(buf[0]); - for (i = 0; i < num; ++i) - odp_buffer_free(buf[i]); -} - -void _odp_flush_caches(void) -{ - int i; - - for (i = 0; i < ODP_CONFIG_POOLS; i++) { - pool_entry_t *pool = get_pool_entry(i); - flush_cache(&pool->s.local_cache[local_id], &pool->s); - } + buffer_free_multi(pool_id, buf, num); } void odp_pool_print(odp_pool_t pool_hdl) @@ -774,7 +1105,6 @@ void odp_pool_print(odp_pool_t pool_hdl) pool->s.quiesced ? "quiesced" : "active"); ODP_DBG(" pool opts %s, %s, %s\n", pool->s.flags.unsegmented ? "unsegmented" : "segmented", - pool->s.flags.zeroized ? "zeroized" : "non-zeroized", pool->s.flags.predefined ? "predefined" : "created"); ODP_DBG(" pool base %p\n", pool->s.pool_base_addr); ODP_DBG(" pool size %zu (%zu pages)\n", @@ -817,10 +1147,11 @@ void odp_pool_print(odp_pool_t pool_hdl) ODP_DBG(" blk low wm count %lu\n", blklowmct); } - odp_pool_t odp_buffer_pool(odp_buffer_t buf) { - return odp_buf_to_hdr(buf)->pool_hdl; + uint32_t pool_id = pool_id_from_buf(buf); + + return pool_index_to_handle(pool_id); } void odp_pool_param_init(odp_pool_param_t *params)