Message ID | 20210312162127.239795-1-alobakin@pm.me |
---|---|
Headers | show |
Series | gro: micro-optimize dev_gro_receive() | expand |
On Fri, Mar 12, 2021 at 5:22 PM Alexander Lobakin <alobakin@pm.me> wrote: > > GRO bucket index doesn't change through the entire function. > Store a pointer to the corresponding bucket on stack once and use > it later instead of dereferencing again and again. > > Signed-off-by: Alexander Lobakin <alobakin@pm.me> > --- > net/core/dev.c | 9 +++++---- > 1 file changed, 5 insertions(+), 4 deletions(-) > > diff --git a/net/core/dev.c b/net/core/dev.c > index adc42ba7ffd8..ee124aecb8a2 100644 > --- a/net/core/dev.c > +++ b/net/core/dev.c > @@ -5957,6 +5957,7 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) > static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) > { > u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); > + struct gro_list *gro_list = &napi->gro_hash[bucket]; > struct list_head *head = &offload_base; > struct packet_offload *ptype; > __be16 type = skb->protocol; > @@ -6024,7 +6025,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > if (pp) { > skb_list_del_init(pp); > napi_gro_complete(napi, pp); > - napi->gro_hash[bucket].count--; > + gro_list->count--; > } > > if (same_flow) > @@ -6033,10 +6034,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > if (NAPI_GRO_CB(skb)->flush) > goto normal; > > - if (unlikely(napi->gro_hash[bucket].count >= MAX_GRO_SKBS)) { > + if (unlikely(gro_list->count >= MAX_GRO_SKBS)) { > gro_flush_oldest(napi, gro_head); > } else { > - napi->gro_hash[bucket].count++; > + gro_list->count++; > } > NAPI_GRO_CB(skb)->count = 1; > NAPI_GRO_CB(skb)->age = jiffies; > @@ -6050,7 +6051,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > if (grow > 0) > gro_pull_from_frag0(skb, grow); > ok: > - if (napi->gro_hash[bucket].count) { > + if (gro_list->count) { > if (!test_bit(bucket, &napi->gro_bitmask)) > __set_bit(bucket, &napi->gro_bitmask); > } else if (test_bit(bucket, &napi->gro_bitmask)) { > -- > 2.30.2 > > This adds more register pressure, do you have precise measures to confirm this change is a win ? Presumably the compiler should be able to optimize the code just fine, it can see @bucket does not change.
From: Eric Dumazet <edumazet@google.com> Date: Fri, 12 Mar 2021 17:47:04 +0100 > On Fri, Mar 12, 2021 at 5:22 PM Alexander Lobakin <alobakin@pm.me> wrote: > > > > GRO bucket index doesn't change through the entire function. > > Store a pointer to the corresponding bucket on stack once and use > > it later instead of dereferencing again and again. > > > > Signed-off-by: Alexander Lobakin <alobakin@pm.me> > > --- > > net/core/dev.c | 9 +++++---- > > 1 file changed, 5 insertions(+), 4 deletions(-) > > > > diff --git a/net/core/dev.c b/net/core/dev.c > > index adc42ba7ffd8..ee124aecb8a2 100644 > > --- a/net/core/dev.c > > +++ b/net/core/dev.c > > @@ -5957,6 +5957,7 @@ static void gro_flush_oldest(struct napi_struct *napi, struct list_head *head) > > static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb) > > { > > u32 bucket = skb_get_hash_raw(skb) & (GRO_HASH_BUCKETS - 1); > > + struct gro_list *gro_list = &napi->gro_hash[bucket]; > > struct list_head *head = &offload_base; > > struct packet_offload *ptype; > > __be16 type = skb->protocol; > > @@ -6024,7 +6025,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > > if (pp) { > > skb_list_del_init(pp); > > napi_gro_complete(napi, pp); > > - napi->gro_hash[bucket].count--; > > + gro_list->count--; > > } > > > > if (same_flow) > > @@ -6033,10 +6034,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > > if (NAPI_GRO_CB(skb)->flush) > > goto normal; > > > > - if (unlikely(napi->gro_hash[bucket].count >= MAX_GRO_SKBS)) { > > + if (unlikely(gro_list->count >= MAX_GRO_SKBS)) { > > gro_flush_oldest(napi, gro_head); > > } else { > > - napi->gro_hash[bucket].count++; > > + gro_list->count++; > > } > > NAPI_GRO_CB(skb)->count = 1; > > NAPI_GRO_CB(skb)->age = jiffies; > > @@ -6050,7 +6051,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff > > if (grow > 0) > > gro_pull_from_frag0(skb, grow); > > ok: > > - if (napi->gro_hash[bucket].count) { > > + if (gro_list->count) { > > if (!test_bit(bucket, &napi->gro_bitmask)) > > __set_bit(bucket, &napi->gro_bitmask); > > } else if (test_bit(bucket, &napi->gro_bitmask)) { > > -- > > 2.30.2 > > > > > > This adds more register pressure, do you have precise measures to > confirm this change is a win ? > > Presumably the compiler should be able to optimize the code just fine, > it can see @bucket does not change. This is mostly (if not purely) cosmetic, I don't think it changes anything at all for the most of sane compilers. Regarding registers, since @gro_list and @gro_head are pretty the same, we could drop @gro_head in favour of @gro_list and just use @gro_list->list instead. Al