diff mbox series

[net-next,v12,03/10] tun: Allow steering eBPF program to fall back

Message ID 20250530-rss-v12-3-95d8b348de91@daynix.com
State New
Headers show
Series tun: Introduce virtio-net hashing feature | expand

Commit Message

Akihiko Odaki May 30, 2025, 4:50 a.m. UTC
This clarifies a steering eBPF program takes precedence over the other
steering algorithms.

Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
---
 Documentation/networking/tuntap.rst |  7 +++++++
 drivers/net/tun.c                   | 28 +++++++++++++++++-----------
 include/uapi/linux/if_tun.h         |  9 +++++++++
 3 files changed, 33 insertions(+), 11 deletions(-)

Comments

Akihiko Odaki June 4, 2025, 7:24 a.m. UTC | #1
On 2025/06/04 10:27, Jason Wang wrote:
> On Fri, May 30, 2025 at 12:50 PM Akihiko Odaki <akihiko.odaki@daynix.com> wrote:
>>
>> This clarifies a steering eBPF program takes precedence over the other
>> steering algorithms.
> 
> Let's give an example on the use case for this.
> 
>>
>> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
>> ---
>>   Documentation/networking/tuntap.rst |  7 +++++++
>>   drivers/net/tun.c                   | 28 +++++++++++++++++-----------
>>   include/uapi/linux/if_tun.h         |  9 +++++++++
>>   3 files changed, 33 insertions(+), 11 deletions(-)
>>
>> diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
>> index 4d7087f727be..86b4ae8caa8a 100644
>> --- a/Documentation/networking/tuntap.rst
>> +++ b/Documentation/networking/tuntap.rst
>> @@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
>>         return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
>>     }
>>
>> +3.4 Reference
>> +-------------
>> +
>> +``linux/if_tun.h`` defines the interface described below:
>> +
>> +.. kernel-doc:: include/uapi/linux/if_tun.h
>> +
>>   Universal TUN/TAP device driver Frequently Asked Question
>>   =========================================================
>>
>> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
>> index d8f4d3e996a7..9133ab9ed3f5 100644
>> --- a/drivers/net/tun.c
>> +++ b/drivers/net/tun.c
>> @@ -476,21 +476,29 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
>>          return txq;
>>   }
>>
>> -static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
>> +static bool tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb,
>> +                                 u16 *ret)
>>   {
>>          struct tun_prog *prog;
>>          u32 numqueues;
>> -       u16 ret = 0;
>> +       u32 prog_ret;
>> +
>> +       prog = rcu_dereference(tun->steering_prog);
>> +       if (!prog)
>> +               return false;
>>
>>          numqueues = READ_ONCE(tun->numqueues);
>> -       if (!numqueues)
>> -               return 0;
>> +       if (!numqueues) {
>> +               *ret = 0;
>> +               return true;
>> +       }
>>
>> -       prog = rcu_dereference(tun->steering_prog);
>> -       if (prog)
>> -               ret = bpf_prog_run_clear_cb(prog->prog, skb);
>> +       prog_ret = bpf_prog_run_clear_cb(prog->prog, skb);
>> +       if (prog_ret == TUN_STEERINGEBPF_FALLBACK)
>> +               return false;
> 
> This seems to break the uAPI. So I think we need a new ioctl to enable
> the behaviour

I assumed it is fine to repurpose one of the 32-bit integer values since 
32-bit integer is too big to specify the queue number, but it may not be 
fine. I don't have a concrete use case either.

Perhaps it is safer to note that TUNSETSTEERINGEBPF takes precedence 
over TUNSETVNETRSS to allow such an extension in the future (but without 
implementing one now).

> 
>>
>> -       return ret % numqueues;
>> +       *ret = (u16)prog_ret % numqueues;
>> +       return true;
>>   }
>>
>>   static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
>> @@ -500,9 +508,7 @@ static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
>>          u16 ret;
>>
>>          rcu_read_lock();
>> -       if (rcu_dereference(tun->steering_prog))
>> -               ret = tun_ebpf_select_queue(tun, skb);
>> -       else
>> +       if (!tun_ebpf_select_queue(tun, skb, &ret))
>>                  ret = tun_automq_select_queue(tun, skb);
>>          rcu_read_unlock();
>>
>> diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
>> index 287cdc81c939..980de74724fc 100644
>> --- a/include/uapi/linux/if_tun.h
>> +++ b/include/uapi/linux/if_tun.h
>> @@ -115,4 +115,13 @@ struct tun_filter {
>>          __u8   addr[][ETH_ALEN];
>>   };
>>
>> +/**
>> + * define TUN_STEERINGEBPF_FALLBACK - A steering eBPF return value to fall back
>> + *
>> + * A steering eBPF program may return this value to fall back to the steering
>> + * algorithm that should have been used if the program was not set. This allows
>> + * selectively overriding the steering decision.
>> + */
>> +#define TUN_STEERINGEBPF_FALLBACK -1
> 
> Not a native speaker, consider it works more like XDP_PASS, would it
> be better to use "TUN_STERRING_PASS"?

That sounds indeed better to me.

Regards,
Akihiko Odaki
Jason Wang June 5, 2025, 1:55 a.m. UTC | #2
On Wed, Jun 4, 2025 at 3:24 PM Akihiko Odaki <akihiko.odaki@daynix.com> wrote:
>
> On 2025/06/04 10:27, Jason Wang wrote:
> > On Fri, May 30, 2025 at 12:50 PM Akihiko Odaki <akihiko.odaki@daynix.com> wrote:
> >>
> >> This clarifies a steering eBPF program takes precedence over the other
> >> steering algorithms.
> >
> > Let's give an example on the use case for this.
> >
> >>
> >> Signed-off-by: Akihiko Odaki <akihiko.odaki@daynix.com>
> >> ---
> >>   Documentation/networking/tuntap.rst |  7 +++++++
> >>   drivers/net/tun.c                   | 28 +++++++++++++++++-----------
> >>   include/uapi/linux/if_tun.h         |  9 +++++++++
> >>   3 files changed, 33 insertions(+), 11 deletions(-)
> >>
> >> diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
> >> index 4d7087f727be..86b4ae8caa8a 100644
> >> --- a/Documentation/networking/tuntap.rst
> >> +++ b/Documentation/networking/tuntap.rst
> >> @@ -206,6 +206,13 @@ enable is true we enable it, otherwise we disable it::
> >>         return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
> >>     }
> >>
> >> +3.4 Reference
> >> +-------------
> >> +
> >> +``linux/if_tun.h`` defines the interface described below:
> >> +
> >> +.. kernel-doc:: include/uapi/linux/if_tun.h
> >> +
> >>   Universal TUN/TAP device driver Frequently Asked Question
> >>   =========================================================
> >>
> >> diff --git a/drivers/net/tun.c b/drivers/net/tun.c
> >> index d8f4d3e996a7..9133ab9ed3f5 100644
> >> --- a/drivers/net/tun.c
> >> +++ b/drivers/net/tun.c
> >> @@ -476,21 +476,29 @@ static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> >>          return txq;
> >>   }
> >>
> >> -static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
> >> +static bool tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb,
> >> +                                 u16 *ret)
> >>   {
> >>          struct tun_prog *prog;
> >>          u32 numqueues;
> >> -       u16 ret = 0;
> >> +       u32 prog_ret;
> >> +
> >> +       prog = rcu_dereference(tun->steering_prog);
> >> +       if (!prog)
> >> +               return false;
> >>
> >>          numqueues = READ_ONCE(tun->numqueues);
> >> -       if (!numqueues)
> >> -               return 0;
> >> +       if (!numqueues) {
> >> +               *ret = 0;
> >> +               return true;
> >> +       }
> >>
> >> -       prog = rcu_dereference(tun->steering_prog);
> >> -       if (prog)
> >> -               ret = bpf_prog_run_clear_cb(prog->prog, skb);
> >> +       prog_ret = bpf_prog_run_clear_cb(prog->prog, skb);
> >> +       if (prog_ret == TUN_STEERINGEBPF_FALLBACK)
> >> +               return false;
> >
> > This seems to break the uAPI. So I think we need a new ioctl to enable
> > the behaviour
>
> I assumed it is fine to repurpose one of the 32-bit integer values since
> 32-bit integer is too big to specify the queue number, but it may not be
> fine. I don't have a concrete use case either.
>
> Perhaps it is safer to note that TUNSETSTEERINGEBPF takes precedence
> over TUNSETVNETRSS to allow such an extension in the future (but without
> implementing one now).

Yes, that's my original point. Let's start from something that is
simpler and safer.

Thanks
diff mbox series

Patch

diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
index 4d7087f727be..86b4ae8caa8a 100644
--- a/Documentation/networking/tuntap.rst
+++ b/Documentation/networking/tuntap.rst
@@ -206,6 +206,13 @@  enable is true we enable it, otherwise we disable it::
       return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
   }
 
+3.4 Reference
+-------------
+
+``linux/if_tun.h`` defines the interface described below:
+
+.. kernel-doc:: include/uapi/linux/if_tun.h
+
 Universal TUN/TAP device driver Frequently Asked Question
 =========================================================
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index d8f4d3e996a7..9133ab9ed3f5 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -476,21 +476,29 @@  static u16 tun_automq_select_queue(struct tun_struct *tun, struct sk_buff *skb)
 	return txq;
 }
 
-static u16 tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb)
+static bool tun_ebpf_select_queue(struct tun_struct *tun, struct sk_buff *skb,
+				  u16 *ret)
 {
 	struct tun_prog *prog;
 	u32 numqueues;
-	u16 ret = 0;
+	u32 prog_ret;
+
+	prog = rcu_dereference(tun->steering_prog);
+	if (!prog)
+		return false;
 
 	numqueues = READ_ONCE(tun->numqueues);
-	if (!numqueues)
-		return 0;
+	if (!numqueues) {
+		*ret = 0;
+		return true;
+	}
 
-	prog = rcu_dereference(tun->steering_prog);
-	if (prog)
-		ret = bpf_prog_run_clear_cb(prog->prog, skb);
+	prog_ret = bpf_prog_run_clear_cb(prog->prog, skb);
+	if (prog_ret == TUN_STEERINGEBPF_FALLBACK)
+		return false;
 
-	return ret % numqueues;
+	*ret = (u16)prog_ret % numqueues;
+	return true;
 }
 
 static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
@@ -500,9 +508,7 @@  static u16 tun_select_queue(struct net_device *dev, struct sk_buff *skb,
 	u16 ret;
 
 	rcu_read_lock();
-	if (rcu_dereference(tun->steering_prog))
-		ret = tun_ebpf_select_queue(tun, skb);
-	else
+	if (!tun_ebpf_select_queue(tun, skb, &ret))
 		ret = tun_automq_select_queue(tun, skb);
 	rcu_read_unlock();
 
diff --git a/include/uapi/linux/if_tun.h b/include/uapi/linux/if_tun.h
index 287cdc81c939..980de74724fc 100644
--- a/include/uapi/linux/if_tun.h
+++ b/include/uapi/linux/if_tun.h
@@ -115,4 +115,13 @@  struct tun_filter {
 	__u8   addr[][ETH_ALEN];
 };
 
+/**
+ * define TUN_STEERINGEBPF_FALLBACK - A steering eBPF return value to fall back
+ *
+ * A steering eBPF program may return this value to fall back to the steering
+ * algorithm that should have been used if the program was not set. This allows
+ * selectively overriding the steering decision.
+ */
+#define TUN_STEERINGEBPF_FALLBACK -1
+
 #endif /* _UAPI__IF_TUN_H */