Message ID | 20200917135700.649909-1-luka.oreskovic@sartura.hr |
---|---|
State | New |
Headers | show |
Series | [bpf-next] bpf: add support for other map types to bpf_map_lookup_and_delete_elem | expand |
On Thu, Sep 17, 2020 at 7:16 AM Luka Oreskovic <luka.oreskovic@sartura.hr> wrote: > [...] > +++ b/kernel/bpf/syscall.c > @@ -1475,6 +1475,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) > return -EINVAL; > > + if (attr->flags & ~BPF_F_LOCK) > + return -EINVAL; > + Please explain (in comments for commit log) the use of BPF_F_LOCK in the commit log, as it is new for BPF_MAP_LOOKUP_AND_DELETE_ELEM. > f = fdget(ufd); > map = __bpf_map_get(f); > if (IS_ERR(map)) > @@ -1485,13 +1488,19 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > goto err_put; > } > > + if ((attr->flags & BPF_F_LOCK) && > + !map_value_has_spin_lock(map)) { > + err = -EINVAL; > + goto err_put; > + } > + > key = __bpf_copy_key(ukey, map->key_size); > if (IS_ERR(key)) { > err = PTR_ERR(key); > goto err_put; > } > > - value_size = map->value_size; > + value_size = bpf_map_value_size(map); > > err = -ENOMEM; > value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); > @@ -1502,7 +1511,24 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > map->map_type == BPF_MAP_TYPE_STACK) { > err = map->ops->map_pop_elem(map, value); > } else { > - err = -ENOTSUPP; > + err = bpf_map_copy_value(map, key, value, attr->flags); > + if (err) > + goto free_value; IIUC, we cannot guarantee the value returned is the same as the value we deleted. If this is true, I guess this may confuse the user with some concurrency bug. Thanks, Song [...]
On Fri, Sep 18, 2020 at 1:21 AM Song Liu <song@kernel.org> wrote: > > On Thu, Sep 17, 2020 at 7:16 AM Luka Oreskovic > <luka.oreskovic@sartura.hr> wrote: > > > [...] > > > +++ b/kernel/bpf/syscall.c > > @@ -1475,6 +1475,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > > if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) > > return -EINVAL; > > > > + if (attr->flags & ~BPF_F_LOCK) > > + return -EINVAL; > > + > > Please explain (in comments for commit log) the use of BPF_F_LOCK in > the commit log, > as it is new for BPF_MAP_LOOKUP_AND_DELETE_ELEM. > > > f = fdget(ufd); > > map = __bpf_map_get(f); > > if (IS_ERR(map)) > > @@ -1485,13 +1488,19 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > > goto err_put; > > } > > > > + if ((attr->flags & BPF_F_LOCK) && > > + !map_value_has_spin_lock(map)) { > > + err = -EINVAL; > > + goto err_put; > > + } > > + > > key = __bpf_copy_key(ukey, map->key_size); > > if (IS_ERR(key)) { > > err = PTR_ERR(key); > > goto err_put; > > } > > > > - value_size = map->value_size; > > + value_size = bpf_map_value_size(map); > > > > err = -ENOMEM; > > value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); > > @@ -1502,7 +1511,24 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > > map->map_type == BPF_MAP_TYPE_STACK) { > > err = map->ops->map_pop_elem(map, value); > > } else { > > - err = -ENOTSUPP; > > + err = bpf_map_copy_value(map, key, value, attr->flags); > > + if (err) > > + goto free_value; > > IIUC, we cannot guarantee the value returned is the same as the value we > deleted. If this is true, I guess this may confuse the user with some > concurrency > bug. > > Thanks, > Song > > [...] Thank you very much for your review. This is my first time contributing to the linux community, so I am very grateful for any input. For the first point, you are correct, the commit message should have been more detailed. As for the second point, I see the problem, but I'm not sure how to resolve it. Maybe moving the bpf_disable_instrumentation call could work, but I'm not sure if that could create different problems. I'll try to find and acceptable solution and resubmit the patch. Best wishes, Luka Oreskovic
On Thu, Sep 17, 2020 at 7:16 AM Luka Oreskovic <luka.oreskovic@sartura.hr> wrote: > > Since this function already exists, it made sense to implement it for > map types other than stack and queue. This patch adds the necessary parts > from bpf_map_lookup_elem and bpf_map_delete_elem so it works as expected > for all map types. > > Signed-off-by: Luka Oreskovic <luka.oreskovic@sartura.hr> > CC: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr> > CC: Luka Perkov <luka.perkov@sartura.hr> > --- > kernel/bpf/syscall.c | 30 ++++++++++++++++++++++++++++-- > 1 file changed, 28 insertions(+), 2 deletions(-) > > diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c > index 2ce32cad5c8e..955de6ca8c45 100644 > --- a/kernel/bpf/syscall.c > +++ b/kernel/bpf/syscall.c > @@ -1475,6 +1475,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) > return -EINVAL; > > + if (attr->flags & ~BPF_F_LOCK) If you want to use attr->flags, you need to update BPF_MAP_LOOKUP_AND_DELETE_ELEM_LAST_FIELD few lines above. And every new feature needs to come with selftests, so please check tools/testing/selftests/bpf and latest patch sets adding new selftests to see how it's done. > + return -EINVAL; > + > f = fdget(ufd); > map = __bpf_map_get(f); > if (IS_ERR(map)) > @@ -1485,13 +1488,19 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > goto err_put; > } > > + if ((attr->flags & BPF_F_LOCK) && > + !map_value_has_spin_lock(map)) { > + err = -EINVAL; > + goto err_put; > + } > + > key = __bpf_copy_key(ukey, map->key_size); > if (IS_ERR(key)) { > err = PTR_ERR(key); > goto err_put; > } > > - value_size = map->value_size; > + value_size = bpf_map_value_size(map); > > err = -ENOMEM; > value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); > @@ -1502,7 +1511,24 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) > map->map_type == BPF_MAP_TYPE_STACK) { > err = map->ops->map_pop_elem(map, value); > } else { > - err = -ENOTSUPP; > + err = bpf_map_copy_value(map, key, value, attr->flags); > + if (err) > + goto free_value; > + > + if (bpf_map_is_dev_bound(map)) { > + err = bpf_map_offload_delete_elem(map, key); > + } else if (IS_FD_PROG_ARRAY(map) || > + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { > + /* These maps require sleepable context */ > + err = map->ops->map_delete_elem(map, key); > + } else { > + bpf_disable_instrumentation(); > + rcu_read_lock(); > + err = map->ops->map_delete_elem(map, key); > + rcu_read_unlock(); > + bpf_enable_instrumentation(); > + maybe_wait_bpf_programs(map); > + } The whole point of this operation is to do lookup and deletion of elements atomically. You can't do it with a separate lookup, followed by a separate delete operation. Those two have to be implemented by each type of map specifically. E.g., for hashmap, you'd have a separate function implementation that takes a bucket lock, copies data, and deletes entry, while still holding the lock. Of course internally you'd want to reuse as much code as possible, but it will be a separate bpf_map_ops operation. > } > > if (err) > -- > 2.26.2 >
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 2ce32cad5c8e..955de6ca8c45 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -1475,6 +1475,9 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) if (CHECK_ATTR(BPF_MAP_LOOKUP_AND_DELETE_ELEM)) return -EINVAL; + if (attr->flags & ~BPF_F_LOCK) + return -EINVAL; + f = fdget(ufd); map = __bpf_map_get(f); if (IS_ERR(map)) @@ -1485,13 +1488,19 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) goto err_put; } + if ((attr->flags & BPF_F_LOCK) && + !map_value_has_spin_lock(map)) { + err = -EINVAL; + goto err_put; + } + key = __bpf_copy_key(ukey, map->key_size); if (IS_ERR(key)) { err = PTR_ERR(key); goto err_put; } - value_size = map->value_size; + value_size = bpf_map_value_size(map); err = -ENOMEM; value = kmalloc(value_size, GFP_USER | __GFP_NOWARN); @@ -1502,7 +1511,24 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) map->map_type == BPF_MAP_TYPE_STACK) { err = map->ops->map_pop_elem(map, value); } else { - err = -ENOTSUPP; + err = bpf_map_copy_value(map, key, value, attr->flags); + if (err) + goto free_value; + + if (bpf_map_is_dev_bound(map)) { + err = bpf_map_offload_delete_elem(map, key); + } else if (IS_FD_PROG_ARRAY(map) || + map->map_type == BPF_MAP_TYPE_STRUCT_OPS) { + /* These maps require sleepable context */ + err = map->ops->map_delete_elem(map, key); + } else { + bpf_disable_instrumentation(); + rcu_read_lock(); + err = map->ops->map_delete_elem(map, key); + rcu_read_unlock(); + bpf_enable_instrumentation(); + maybe_wait_bpf_programs(map); + } } if (err)
Since this function already exists, it made sense to implement it for map types other than stack and queue. This patch adds the necessary parts from bpf_map_lookup_elem and bpf_map_delete_elem so it works as expected for all map types. Signed-off-by: Luka Oreskovic <luka.oreskovic@sartura.hr> CC: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr> CC: Luka Perkov <luka.perkov@sartura.hr> --- kernel/bpf/syscall.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-)