Message ID | 20210609103326.278782-3-toke@redhat.com |
---|---|
State | Superseded |
Headers | show |
Series | Clean up and document RCU-based object protection for XDP_REDIRECT | expand |
On Wed, Jun 9, 2021 at 7:24 AM Toke Høiland-Jørgensen <toke@redhat.com> wrote: > > XDP programs are called from a NAPI poll context, which means the RCU > reference liveness is ensured by local_bh_disable(). Add > rcu_read_lock_bh_held() as a condition to the RCU checks for map lookups so > lockdep understands that the dereferences are safe from inside *either* an > rcu_read_lock() section *or* a local_bh_disable() section. This is done in > preparation for removing the redundant rcu_read_lock()s from the drivers. > > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> > --- > kernel/bpf/hashtab.c | 21 ++++++++++++++------- > kernel/bpf/helpers.c | 6 +++--- > kernel/bpf/lpm_trie.c | 6 ++++-- > 3 files changed, 21 insertions(+), 12 deletions(-) > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > index 6f6681b07364..72c58cc516a3 100644 > --- a/kernel/bpf/hashtab.c > +++ b/kernel/bpf/hashtab.c > @@ -596,7 +596,8 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) > struct htab_elem *l; > u32 hash, key_size; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); It's not clear to me whether rcu_read_lock_held() is still needed. All comments sound like rcu_read_lock_bh_held() is a superset of rcu that includes bh. But reading rcu source code it looks like RCU_BH is its own rcu flavor... which is confusing.
On Wed, Jun 09, 2021 at 12:33:11PM +0200, Toke Høiland-Jørgensen wrote: > XDP programs are called from a NAPI poll context, which means the RCU > reference liveness is ensured by local_bh_disable(). Add > rcu_read_lock_bh_held() as a condition to the RCU checks for map lookups so > lockdep understands that the dereferences are safe from inside *either* an > rcu_read_lock() section *or* a local_bh_disable() section. This is done in > preparation for removing the redundant rcu_read_lock()s from the drivers. > > Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> > --- > kernel/bpf/hashtab.c | 21 ++++++++++++++------- > kernel/bpf/helpers.c | 6 +++--- > kernel/bpf/lpm_trie.c | 6 ++++-- > 3 files changed, 21 insertions(+), 12 deletions(-) > > diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c > index 6f6681b07364..72c58cc516a3 100644 > --- a/kernel/bpf/hashtab.c > +++ b/kernel/bpf/hashtab.c > @@ -596,7 +596,8 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) > struct htab_elem *l; > u32 hash, key_size; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); > > key_size = map->key_size; > > @@ -989,7 +990,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, > /* unknown flags */ > return -EINVAL; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); > > key_size = map->key_size; > > @@ -1082,7 +1084,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, > /* unknown flags */ > return -EINVAL; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); > > key_size = map->key_size; > > @@ -1148,7 +1151,8 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, > /* unknown flags */ > return -EINVAL; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); > > key_size = map->key_size; > > @@ -1202,7 +1206,8 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, > /* unknown flags */ > return -EINVAL; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); > > key_size = map->key_size; > > @@ -1276,7 +1281,8 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) > u32 hash, key_size; > int ret; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); > > key_size = map->key_size; > > @@ -1311,7 +1317,8 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) > u32 hash, key_size; > int ret; > > - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && > + !rcu_read_lock_bh_held()); > > key_size = map->key_size; > > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c > index 544773970dbc..e880f6bb6f28 100644 > --- a/kernel/bpf/helpers.c > +++ b/kernel/bpf/helpers.c > @@ -28,7 +28,7 @@ > */ > BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) > { > - WARN_ON_ONCE(!rcu_read_lock_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); There is a discrepancy in rcu_read_lock_trace_held() here but I think the patch_map_ops_generic step in the verifier has skipped these helper calls. It is unrelated and can be addressed later until it is needed. Acked-by: Martin KaFai Lau <kafai@fb.com> > return (unsigned long) map->ops->map_lookup_elem(map, key); > } > > @@ -44,7 +44,7 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = { > BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, > void *, value, u64, flags) > { > - WARN_ON_ONCE(!rcu_read_lock_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); > return map->ops->map_update_elem(map, key, value, flags); > } > > @@ -61,7 +61,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { > > BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) > { > - WARN_ON_ONCE(!rcu_read_lock_held()); > + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); > return map->ops->map_delete_elem(map, key); > }
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c index 6f6681b07364..72c58cc516a3 100644 --- a/kernel/bpf/hashtab.c +++ b/kernel/bpf/hashtab.c @@ -596,7 +596,8 @@ static void *__htab_map_lookup_elem(struct bpf_map *map, void *key) struct htab_elem *l; u32 hash, key_size; - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); key_size = map->key_size; @@ -989,7 +990,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value, /* unknown flags */ return -EINVAL; - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); key_size = map->key_size; @@ -1082,7 +1084,8 @@ static int htab_lru_map_update_elem(struct bpf_map *map, void *key, void *value, /* unknown flags */ return -EINVAL; - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); key_size = map->key_size; @@ -1148,7 +1151,8 @@ static int __htab_percpu_map_update_elem(struct bpf_map *map, void *key, /* unknown flags */ return -EINVAL; - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); key_size = map->key_size; @@ -1202,7 +1206,8 @@ static int __htab_lru_percpu_map_update_elem(struct bpf_map *map, void *key, /* unknown flags */ return -EINVAL; - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); key_size = map->key_size; @@ -1276,7 +1281,8 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key) u32 hash, key_size; int ret; - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); key_size = map->key_size; @@ -1311,7 +1317,8 @@ static int htab_lru_map_delete_elem(struct bpf_map *map, void *key) u32 hash, key_size; int ret; - WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_trace_held() && + !rcu_read_lock_bh_held()); key_size = map->key_size; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 544773970dbc..e880f6bb6f28 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -28,7 +28,7 @@ */ BPF_CALL_2(bpf_map_lookup_elem, struct bpf_map *, map, void *, key) { - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); return (unsigned long) map->ops->map_lookup_elem(map, key); } @@ -44,7 +44,7 @@ const struct bpf_func_proto bpf_map_lookup_elem_proto = { BPF_CALL_4(bpf_map_update_elem, struct bpf_map *, map, void *, key, void *, value, u64, flags) { - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); return map->ops->map_update_elem(map, key, value, flags); } @@ -61,7 +61,7 @@ const struct bpf_func_proto bpf_map_update_elem_proto = { BPF_CALL_2(bpf_map_delete_elem, struct bpf_map *, map, void *, key) { - WARN_ON_ONCE(!rcu_read_lock_held()); + WARN_ON_ONCE(!rcu_read_lock_held() && !rcu_read_lock_bh_held()); return map->ops->map_delete_elem(map, key); } diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 1b7b8a6f34ee..423549d2c52e 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -232,7 +232,8 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) /* Start walking the trie from the root node ... */ - for (node = rcu_dereference(trie->root); node;) { + for (node = rcu_dereference_check(trie->root, rcu_read_lock_bh_held()); + node;) { unsigned int next_bit; size_t matchlen; @@ -264,7 +265,8 @@ static void *trie_lookup_elem(struct bpf_map *map, void *_key) * traverse down. */ next_bit = extract_bit(key->data, node->prefixlen); - node = rcu_dereference(node->child[next_bit]); + node = rcu_dereference_check(node->child[next_bit], + rcu_read_lock_bh_held()); } if (!found)
XDP programs are called from a NAPI poll context, which means the RCU reference liveness is ensured by local_bh_disable(). Add rcu_read_lock_bh_held() as a condition to the RCU checks for map lookups so lockdep understands that the dereferences are safe from inside *either* an rcu_read_lock() section *or* a local_bh_disable() section. This is done in preparation for removing the redundant rcu_read_lock()s from the drivers. Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com> --- kernel/bpf/hashtab.c | 21 ++++++++++++++------- kernel/bpf/helpers.c | 6 +++--- kernel/bpf/lpm_trie.c | 6 ++++-- 3 files changed, 21 insertions(+), 12 deletions(-)