@@ -423,6 +423,9 @@ struct bpf_verifier_env {
u32 peak_states;
/* longest register parentage chain walked for liveness marking */
u32 longest_mark_read_walk;
+ /* Are all leaf nodes redirect_map? */
+ bool all_leaves_redirect;
+ u32 redirect_call_cnt;
};
__printf(2, 0) void bpf_verifier_vlog(struct bpf_verifier_log *log,
@@ -534,7 +534,8 @@ struct bpf_prog {
kprobe_override:1, /* Do we override a kprobe? */
has_callchain_buf:1, /* callchain buffer allocated? */
enforce_expected_attach_type:1, /* Enforce expected_attach_type checking at attach time */
- call_get_stack:1; /* Do we call bpf_get_stack() or bpf_get_stackid() */
+ call_get_stack:1, /* Do we call bpf_get_stack() or bpf_get_stackid() */
+ redirect_opt:1; /* All bpf_redirect{,_map}() are leaf calls */
enum bpf_prog_type type; /* Type of BPF program */
enum bpf_attach_type expected_attach_type; /* For some prog types */
u32 len; /* Number of filter blocks */
@@ -622,6 +623,8 @@ struct bpf_redirect_info {
struct bpf_map *map;
u32 kern_flags;
struct bpf_nh_params nh;
+ const struct bpf_prog *xdp_prog_redirect_opt;
+ struct xdp_buff *xdp;
};
DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info);
@@ -734,6 +737,13 @@ DECLARE_BPF_DISPATCHER(xdp)
static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
struct xdp_buff *xdp)
{
+ if (prog->redirect_opt) {
+ struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
+
+ ri->xdp_prog_redirect_opt = prog;
+ ri->xdp = xdp;
+ }
+
/* Caller needs to hold rcu_read_lock() (!), otherwise program
* can be released while still running, or map elements could be
* freed early while still having concurrent users. XDP fastpath
@@ -743,6 +753,11 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog,
return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
}
+static __always_inline u32 bpf_prog_run_xdp_skb(const struct bpf_prog *prog, struct xdp_buff *xdp)
+{
+ return __BPF_PROG_RUN(prog, xdp, BPF_DISPATCHER_FUNC(xdp));
+}
+
void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog);
static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog)
@@ -951,9 +966,16 @@ static inline int xdp_ok_fwd_dev(const struct net_device *fwd,
*/
int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
struct xdp_buff *xdp, struct bpf_prog *prog);
-int xdp_do_redirect(struct net_device *dev,
- struct xdp_buff *xdp,
- struct bpf_prog *prog);
+int __xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, struct bpf_prog *prog);
+static inline int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *prog)
+{
+ if (prog->redirect_opt)
+ return 0;
+
+ return __xdp_do_redirect(dev, xdp, prog);
+}
+
void xdp_do_flush(void);
/* The xdp_do_flush_map() helper has been renamed to drop the _map suffix, as
@@ -5032,6 +5032,58 @@ static int check_reference_leak(struct bpf_verifier_env *env)
return state->acquired_refs ? -EINVAL : 0;
}
+static void check_redirect_opt(struct bpf_verifier_env *env, int func_id, int insn_idx)
+{
+ struct bpf_insn *insns = env->prog->insnsi;
+ int insn_cnt = env->prog->len;
+ struct bpf_insn *insn;
+ bool is_leaf = false;
+
+ if (!(func_id == BPF_FUNC_redirect || func_id == BPF_FUNC_redirect_map))
+ return;
+
+ /* Naive peephole leaf node checking */
+ insn_idx++;
+ if (insn_idx >= insn_cnt)
+ return;
+
+ insn = &insns[insn_idx];
+ switch (insn->code) {
+ /* Is the instruction following the call, an exit? */
+ case BPF_JMP | BPF_EXIT:
+ is_leaf = true;
+ break;
+ /* Follow the true branch of "if return value (r/w0) is not
+ * zero", and look for exit.
+ */
+ case BPF_JMP | BPF_JSGT | BPF_K:
+ case BPF_JMP32 | BPF_JSGT | BPF_K:
+ case BPF_JMP | BPF_JGT | BPF_K:
+ case BPF_JMP32 | BPF_JGT | BPF_K:
+ case BPF_JMP | BPF_JNE | BPF_K:
+ case BPF_JMP32 | BPF_JNE | BPF_K:
+ if (insn->dst_reg == BPF_REG_0 && insn->imm == 0) {
+ insn_idx += insn->off + 1;
+ if (insn_idx >= insn_cnt)
+ break;
+
+ insn = &insns[insn_idx];
+ is_leaf = insn->code == (BPF_JMP | BPF_EXIT);
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (!env->redirect_call_cnt++) {
+ env->all_leaves_redirect = is_leaf;
+ return;
+ }
+
+ if (!is_leaf)
+ env->all_leaves_redirect = false;
+}
+
static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
{
const struct bpf_func_proto *fn = NULL;
@@ -5125,6 +5177,8 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
}
}
+ check_redirect_opt(env, func_id, insn_idx);
+
regs = cur_regs(env);
/* check that flags argument in get_local_storage(map, flags) is 0,
@@ -11894,6 +11948,17 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
return 0;
}
+static void validate_redirect_opt(struct bpf_verifier_env *env)
+{
+ if (env->subprog_cnt != 1)
+ return;
+
+ if (env->subprog_info[0].has_tail_call)
+ return;
+
+ env->prog->redirect_opt = env->all_leaves_redirect;
+}
+
struct btf *bpf_get_btf_vmlinux(void)
{
if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
@@ -12092,6 +12157,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
if (ret == 0)
adjust_btf_func(env);
+ if (ret == 0)
+ validate_redirect_opt(env);
+
err_release_maps:
if (!env->prog->aux->used_maps)
/* if we didn't copy map pointers into bpf_prog_info, release
@@ -4654,7 +4654,7 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
rxqueue = netif_get_rxqueue(skb);
xdp->rxq = &rxqueue->xdp_rxq;
- act = bpf_prog_run_xdp(xdp_prog, xdp);
+ act = bpf_prog_run_xdp_skb(xdp_prog, xdp);
/* check if bpf_xdp_adjust_head was used */
off = xdp->data - orig_data;
@@ -3981,8 +3981,8 @@ void bpf_clear_redirect_map(struct bpf_map *map)
}
}
-int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
- struct bpf_prog *xdp_prog)
+int __xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
+ struct bpf_prog *xdp_prog)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
struct bpf_map *map = READ_ONCE(ri->map);
@@ -4015,7 +4015,7 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
_trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err);
return err;
}
-EXPORT_SYMBOL_GPL(xdp_do_redirect);
+EXPORT_SYMBOL_GPL(__xdp_do_redirect);
static int xdp_do_generic_redirect_map(struct net_device *dev,
struct sk_buff *skb,
@@ -4091,6 +4091,36 @@ int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb,
return err;
}
+static u64 __bpf_xdp_redirect_opt(u32 index, struct bpf_redirect_info *ri)
+{
+ const struct bpf_prog *xdp_prog;
+ struct net_device *fwd, *dev;
+ struct xdp_buff *xdp;
+ int err;
+
+ xdp_prog = ri->xdp_prog_redirect_opt;
+ xdp = ri->xdp;
+ dev = xdp->rxq->dev;
+
+ ri->xdp_prog_redirect_opt = NULL;
+
+ fwd = dev_get_by_index_rcu(dev_net(dev), index);
+ if (unlikely(!fwd)) {
+ err = -EINVAL;
+ goto err;
+ }
+
+ err = dev_xdp_enqueue(fwd, xdp, dev);
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect_map(dev, xdp_prog, fwd, NULL, index);
+ return XDP_REDIRECT;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, NULL, index, err);
+ return XDP_ABORTED;
+}
+
BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
{
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
@@ -4098,6 +4128,9 @@ BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags)
if (unlikely(flags))
return XDP_ABORTED;
+ if (ri->xdp_prog_redirect_opt)
+ return __bpf_xdp_redirect_opt(ifindex, ri);
+
ri->flags = flags;
ri->tgt_index = ifindex;
ri->tgt_value = NULL;
@@ -4114,6 +4147,64 @@ static const struct bpf_func_proto bpf_xdp_redirect_proto = {
.arg2_type = ARG_ANYTHING,
};
+static u64 __bpf_xdp_redirect_map_opt(struct bpf_map *map, u32 index, u64 flags,
+ struct bpf_redirect_info *ri)
+{
+ const struct bpf_prog *xdp_prog;
+ struct net_device *dev;
+ struct xdp_buff *xdp;
+ void *val;
+ int err;
+
+ xdp_prog = ri->xdp_prog_redirect_opt;
+ xdp = ri->xdp;
+ dev = xdp->rxq->dev;
+
+ ri->xdp_prog_redirect_opt = NULL;
+
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_DEVMAP: {
+ val = __dev_map_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = dev_map_enqueue(val, xdp, dev);
+ break;
+ }
+ case BPF_MAP_TYPE_DEVMAP_HASH: {
+ val = __dev_map_hash_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = dev_map_enqueue(val, xdp, dev);
+ break;
+ }
+ case BPF_MAP_TYPE_CPUMAP: {
+ val = __cpu_map_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = cpu_map_enqueue(val, xdp, dev);
+ break;
+ }
+ case BPF_MAP_TYPE_XSKMAP: {
+ val = __xsk_map_lookup_elem(map, index);
+ if (unlikely(!val))
+ return flags;
+ err = __xsk_map_redirect(val, xdp);
+ break;
+ }
+ default:
+ return flags;
+ }
+
+ if (unlikely(err))
+ goto err;
+
+ _trace_xdp_redirect_map(dev, xdp_prog, val, map, index);
+ return XDP_REDIRECT;
+err:
+ _trace_xdp_redirect_map_err(dev, xdp_prog, val, map, index, err);
+ return XDP_ABORTED;
+}
+
BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
u64, flags)
{
@@ -4123,6 +4214,9 @@ BPF_CALL_3(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex,
if (unlikely(flags > XDP_TX))
return XDP_ABORTED;
+ if (ri->xdp_prog_redirect_opt)
+ return __bpf_xdp_redirect_map_opt(map, ifindex, flags, ri);
+
ri->tgt_value = __xdp_map_lookup_elem(map, ifindex);
if (unlikely(!ri->tgt_value)) {
/* If the lookup fails we want to clear out the state in the