diff mbox series

[net-next,v11,08/23] ovpn: implement basic TX path (UDP)

Message ID 20241029-b4-ovpn-v11-8-de4698c73a25@openvpn.net
State New
Headers show
Series Introducing OpenVPN Data Channel Offload | expand

Commit Message

Antonio Quartulli Oct. 29, 2024, 10:47 a.m. UTC
Packets sent over the ovpn interface are processed and transmitted to the
connected peer, if any.

Implementation is UDP only. TCP will be added by a later patch.

Note: no crypto/encapsulation exists yet. packets are just captured and
sent.

Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
---
 drivers/net/ovpn/io.c   | 138 +++++++++++++++++++++++++++-
 drivers/net/ovpn/peer.c |  37 +++++++-
 drivers/net/ovpn/peer.h |   4 +
 drivers/net/ovpn/skb.h  |  51 +++++++++++
 drivers/net/ovpn/udp.c  | 232 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/net/ovpn/udp.h  |   8 ++
 6 files changed, 468 insertions(+), 2 deletions(-)

Comments

Sabrina Dubroca Oct. 30, 2024, 5:14 p.m. UTC | #1
2024-10-29, 11:47:21 +0100, Antonio Quartulli wrote:
> +static void ovpn_send(struct ovpn_struct *ovpn, struct sk_buff *skb,
> +		      struct ovpn_peer *peer)
> +{
> +	struct sk_buff *curr, *next;
> +
> +	if (likely(!peer))
> +		/* retrieve peer serving the destination IP of this packet */
> +		peer = ovpn_peer_get_by_dst(ovpn, skb);
> +	if (unlikely(!peer)) {
> +		net_dbg_ratelimited("%s: no peer to send data to\n",
> +				    ovpn->dev->name);
> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
> +		goto drop;
> +	}
> +
> +	/* this might be a GSO-segmented skb list: process each skb
> +	 * independently
> +	 */
> +	skb_list_walk_safe(skb, curr, next)

nit (if you end up reposting): there should probably be some braces
around the (multi-line) loop body.

> +		if (unlikely(!ovpn_encrypt_one(peer, curr))) {
> +			dev_core_stats_tx_dropped_inc(ovpn->dev);
> +			kfree_skb(curr);
> +		}

> +void ovpn_udp_send_skb(struct ovpn_struct *ovpn, struct ovpn_peer *peer,
> +		       struct sk_buff *skb)
> +{
[...]
> +	/* crypto layer -> transport (UDP) */
> +	pkt_len = skb->len;
> +	ret = ovpn_udp_output(ovpn, bind, &peer->dst_cache, sock->sk, skb);
> +
> +out_unlock:
> +	rcu_read_unlock();
> +out:
> +	if (unlikely(ret < 0)) {
> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
> +		kfree_skb(skb);
> +		return;
> +	}
> +
> +	dev_sw_netstats_tx_add(ovpn->dev, 1, pkt_len);

If I'm following things correctly, that's already been counted:

ovpn_udp_output -> ovpn_udp4_output -> udp_tunnel_xmit_skb
                                    -> iptunnel_xmit
                                    -> iptunnel_xmit_stats

which does (on success) the same thing as dev_sw_netstats_tx_add. On
failure it increments a different tx_dropped counter than what
dev_core_stats_tx_dropped_inc, but they should get summed in the end.

> +}
Antonio Quartulli Oct. 30, 2024, 8:58 p.m. UTC | #2
On 30/10/2024 18:14, Sabrina Dubroca wrote:
> 2024-10-29, 11:47:21 +0100, Antonio Quartulli wrote:
>> +static void ovpn_send(struct ovpn_struct *ovpn, struct sk_buff *skb,
>> +		      struct ovpn_peer *peer)
>> +{
>> +	struct sk_buff *curr, *next;
>> +
>> +	if (likely(!peer))
>> +		/* retrieve peer serving the destination IP of this packet */
>> +		peer = ovpn_peer_get_by_dst(ovpn, skb);
>> +	if (unlikely(!peer)) {
>> +		net_dbg_ratelimited("%s: no peer to send data to\n",
>> +				    ovpn->dev->name);
>> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +		goto drop;
>> +	}
>> +
>> +	/* this might be a GSO-segmented skb list: process each skb
>> +	 * independently
>> +	 */
>> +	skb_list_walk_safe(skb, curr, next)
> 
> nit (if you end up reposting): there should probably be some braces
> around the (multi-line) loop body.

ACK

> 
>> +		if (unlikely(!ovpn_encrypt_one(peer, curr))) {
>> +			dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +			kfree_skb(curr);
>> +		}
> 
>> +void ovpn_udp_send_skb(struct ovpn_struct *ovpn, struct ovpn_peer *peer,
>> +		       struct sk_buff *skb)
>> +{
> [...]
>> +	/* crypto layer -> transport (UDP) */
>> +	pkt_len = skb->len;
>> +	ret = ovpn_udp_output(ovpn, bind, &peer->dst_cache, sock->sk, skb);
>> +
>> +out_unlock:
>> +	rcu_read_unlock();
>> +out:
>> +	if (unlikely(ret < 0)) {
>> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +		kfree_skb(skb);
>> +		return;
>> +	}
>> +
>> +	dev_sw_netstats_tx_add(ovpn->dev, 1, pkt_len);
> 
> If I'm following things correctly, that's already been counted:
> 
> ovpn_udp_output -> ovpn_udp4_output -> udp_tunnel_xmit_skb
>                                      -> iptunnel_xmit
>                                      -> iptunnel_xmit_stats
> 
> which does (on success) the same thing as dev_sw_netstats_tx_add. On

Right. This means we can remove that call to tx_add().

> failure it increments a different tx_dropped counter than what
> dev_core_stats_tx_dropped_inc, but they should get summed in the end.

It seems they are summed up in dev_get_tstats64(), therefore I should 
remove the tx_dropped_inc() call to avoid double counting.

Thanks!

Cheers,

> 
>> +}
>
Sergey Ryazanov Nov. 10, 2024, 10:32 p.m. UTC | #3
On 29.10.2024 12:47, Antonio Quartulli wrote:
> Packets sent over the ovpn interface are processed and transmitted to the
> connected peer, if any.
> 
> Implementation is UDP only. TCP will be added by a later patch.
> 
> Note: no crypto/encapsulation exists yet. packets are just captured and
> sent.
> 
> Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
> ---
>   drivers/net/ovpn/io.c   | 138 +++++++++++++++++++++++++++-
>   drivers/net/ovpn/peer.c |  37 +++++++-
>   drivers/net/ovpn/peer.h |   4 +
>   drivers/net/ovpn/skb.h  |  51 +++++++++++
>   drivers/net/ovpn/udp.c  | 232 ++++++++++++++++++++++++++++++++++++++++++++++++
>   drivers/net/ovpn/udp.h  |   8 ++
>   6 files changed, 468 insertions(+), 2 deletions(-)
> 
> diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
> index ad3813419c33cbdfe7e8ad6f5c8b444a3540a69f..77ba4d33ae0bd2f52e8bd1c06a182d24285297b4 100644
> --- a/drivers/net/ovpn/io.c
> +++ b/drivers/net/ovpn/io.c
> @@ -9,14 +9,150 @@
>   
>   #include <linux/netdevice.h>
>   #include <linux/skbuff.h>
> +#include <net/gso.h>
>   
>   #include "io.h"
> +#include "ovpnstruct.h"
> +#include "peer.h"
> +#include "udp.h"
> +#include "skb.h"
> +#include "socket.h"
> +
> +static void ovpn_encrypt_post(struct sk_buff *skb, int ret)
> +{
> +	struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer;
> +
> +	if (unlikely(ret < 0))
> +		goto err;
> +
> +	skb_mark_not_on_list(skb);
> +
> +	switch (peer->sock->sock->sk->sk_protocol) {
> +	case IPPROTO_UDP:
> +		ovpn_udp_send_skb(peer->ovpn, peer, skb);
> +		break;
> +	default:
> +		/* no transport configured yet */
> +		goto err;
> +	}

Did you consider calling protocol specific sending function indirectly? 
E.g.:

         peer->sock->send(peer, skb);

> +	/* skb passed down the stack - don't free it */
> +	skb = NULL;
> +err:
> +	if (unlikely(skb))
> +		dev_core_stats_tx_dropped_inc(peer->ovpn->dev);
> +	ovpn_peer_put(peer);
> +	kfree_skb(skb);
> +}
> +
> +static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb)
> +{
> +	ovpn_skb_cb(skb)->peer = peer;
> +
> +	/* take a reference to the peer because the crypto code may run async.
> +	 * ovpn_encrypt_post() will release it upon completion
> +	 */
> +	if (unlikely(!ovpn_peer_hold(peer))) {
> +		DEBUG_NET_WARN_ON_ONCE(1);
> +		return false;
> +	}
> +
> +	ovpn_encrypt_post(skb, 0);
> +	return true;
> +}
> +
> +/* send skb to connected peer, if any */
> +static void ovpn_send(struct ovpn_struct *ovpn, struct sk_buff *skb,
> +		      struct ovpn_peer *peer)
> +{
> +	struct sk_buff *curr, *next;
> +
> +	if (likely(!peer))
> +		/* retrieve peer serving the destination IP of this packet */
> +		peer = ovpn_peer_get_by_dst(ovpn, skb);
> +	if (unlikely(!peer)) {
> +		net_dbg_ratelimited("%s: no peer to send data to\n",
> +				    ovpn->dev->name);
> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
> +		goto drop;
> +	}

The function is called only from ovpn_xmit_special() and from 
ovpn_net_xmit(). The keepalive always provides a peer object, while 
ovpn_net_xmit() never do it. If we move the peer lookup call into 
ovpn_net_xmit() then we can eliminate all the above peer checks.

> +
> +	/* this might be a GSO-segmented skb list: process each skb
> +	 * independently
> +	 */
> +	skb_list_walk_safe(skb, curr, next)
> +		if (unlikely(!ovpn_encrypt_one(peer, curr))) {
> +			dev_core_stats_tx_dropped_inc(ovpn->dev);
> +			kfree_skb(curr);
> +		}
> +
> +	/* skb passed over, no need to free */
> +	skb = NULL;
> +drop:
> +	if (likely(peer))
> +		ovpn_peer_put(peer);
> +	kfree_skb_list(skb);
> +}
>   
>   /* Send user data to the network
>    */
>   netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
>   {
> +	struct ovpn_struct *ovpn = netdev_priv(dev);
> +	struct sk_buff *segments, *curr, *next;
> +	struct sk_buff_head skb_list;
> +	__be16 proto;
> +	int ret;
> +
> +	/* reset netfilter state */
> +	nf_reset_ct(skb);
> +
> +	/* verify IP header size in network packet */
> +	proto = ovpn_ip_check_protocol(skb);
> +	if (unlikely(!proto || skb->protocol != proto)) {
> +		net_err_ratelimited("%s: dropping malformed payload packet\n",
> +				    dev->name);
> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
> +		goto drop;
> +	}
> +
> +	if (skb_is_gso(skb)) {
> +		segments = skb_gso_segment(skb, 0);
> +		if (IS_ERR(segments)) {
> +			ret = PTR_ERR(segments);
> +			net_err_ratelimited("%s: cannot segment packet: %d\n",
> +					    dev->name, ret);
> +			dev_core_stats_tx_dropped_inc(ovpn->dev);
> +			goto drop;
> +		}
> +
> +		consume_skb(skb);
> +		skb = segments;
> +	}
> +
> +	/* from this moment on, "skb" might be a list */
> +
> +	__skb_queue_head_init(&skb_list);
> +	skb_list_walk_safe(skb, curr, next) {
> +		skb_mark_not_on_list(curr);
> +
> +		curr = skb_share_check(curr, GFP_ATOMIC);
> +		if (unlikely(!curr)) {
> +			net_err_ratelimited("%s: skb_share_check failed\n",
> +					    dev->name);
> +			dev_core_stats_tx_dropped_inc(ovpn->dev);
> +			continue;
> +		}
> +
> +		__skb_queue_tail(&skb_list, curr);
> +	}
> +	skb_list.prev->next = NULL;
> +

I belive, the peer lookup should be done here to call ovpn_send() with 
proper peer object and simplify it.

> +	ovpn_send(ovpn, skb_list.next, NULL);
> +
> +	return NETDEV_TX_OK;
> +
> +drop:
>   	skb_tx_error(skb);
> -	kfree_skb(skb);
> +	kfree_skb_list(skb);
>   	return NET_XMIT_DROP;
>   }
> diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
> index d9788a0cc99b5839c466c35d1b2266cc6b95fb72..aff3e9e99b7d2dd2fa68484d9a396d43f75a6d0b 100644
> --- a/drivers/net/ovpn/peer.c
> +++ b/drivers/net/ovpn/peer.c
> @@ -16,6 +16,7 @@
>   #include "main.h"
>   #include "netlink.h"
>   #include "peer.h"
> +#include "socket.h"
>   
>   /**
>    * ovpn_peer_new - allocate and initialize a new peer object
> @@ -64,8 +65,10 @@ struct ovpn_peer *ovpn_peer_new(struct ovpn_struct *ovpn, u32 id)
>    */
>   static void ovpn_peer_release(struct ovpn_peer *peer)
>   {
> -	ovpn_bind_reset(peer, NULL);
> +	if (peer->sock)
> +		ovpn_socket_put(peer->sock);
>   
> +	ovpn_bind_reset(peer, NULL);
>   	dst_cache_destroy(&peer->dst_cache);
>   	netdev_put(peer->ovpn->dev, &peer->ovpn->dev_tracker);
>   	kfree_rcu(peer, rcu);
> @@ -243,6 +246,38 @@ struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_struct *ovpn, u32 peer_id)
>   	return peer;
>   }
>   
> +/**
> + * ovpn_peer_get_by_dst - Lookup peer to send skb to
> + * @ovpn: the private data representing the current VPN session
> + * @skb: the skb to extract the destination address from
> + *
> + * This function takes a tunnel packet and looks up the peer to send it to
> + * after encapsulation. The skb is expected to be the in-tunnel packet, without
> + * any OpenVPN related header.
> + *
> + * Assume that the IP header is accessible in the skb data.
> + *
> + * Return: the peer if found or NULL otherwise.
> + */
> +struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_struct *ovpn,
> +				       struct sk_buff *skb)
> +{
> +	struct ovpn_peer *peer = NULL;
> +
> +	/* in P2P mode, no matter the destination, packets are always sent to
> +	 * the single peer listening on the other side
> +	 */
> +	if (ovpn->mode == OVPN_MODE_P2P) {
> +		rcu_read_lock();
> +		peer = rcu_dereference(ovpn->peer);
> +		if (unlikely(peer && !ovpn_peer_hold(peer)))
> +			peer = NULL;
> +		rcu_read_unlock();
> +	}
> +
> +	return peer;
> +}
> +
>   /**
>    * ovpn_peer_add_p2p - add peer to related tables in a P2P instance
>    * @ovpn: the instance to add the peer to
> diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
> index 6e0c6b14559de886d0677117f5a7ae029214e1f8..51955aa39f1aa85ce541e289c60e9635cadb9c48 100644
> --- a/drivers/net/ovpn/peer.h
> +++ b/drivers/net/ovpn/peer.h
> @@ -19,6 +19,7 @@
>    * @vpn_addrs: IP addresses assigned over the tunnel
>    * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel
>    * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel
> + * @sock: the socket being used to talk to this peer
>    * @dst_cache: cache for dst_entry used to send to peer
>    * @bind: remote peer binding
>    * @halt: true if ovpn_peer_mark_delete was called
> @@ -35,6 +36,7 @@ struct ovpn_peer {
>   		struct in_addr ipv4;
>   		struct in6_addr ipv6;
>   	} vpn_addrs;
> +	struct ovpn_socket *sock;
>   	struct dst_cache dst_cache;
>   	struct ovpn_bind __rcu *bind;
>   	bool halt;
> @@ -75,5 +77,7 @@ void ovpn_peer_release_p2p(struct ovpn_struct *ovpn);
>   struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_struct *ovpn,
>   					       struct sk_buff *skb);
>   struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_struct *ovpn, u32 peer_id);
> +struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_struct *ovpn,
> +				       struct sk_buff *skb);
>   
>   #endif /* _NET_OVPN_OVPNPEER_H_ */
> diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h
> new file mode 100644
> index 0000000000000000000000000000000000000000..e070fe6f448c0b7a9631394ebef4554f6348ef44
> --- /dev/null
> +++ b/drivers/net/ovpn/skb.h
> @@ -0,0 +1,51 @@
> +/* SPDX-License-Identifier: GPL-2.0-only */
> +/*  OpenVPN data channel offload
> + *
> + *  Copyright (C) 2020-2024 OpenVPN, Inc.
> + *
> + *  Author:	Antonio Quartulli <antonio@openvpn.net>
> + *		James Yonan <james@openvpn.net>
> + */
> +
> +#ifndef _NET_OVPN_SKB_H_
> +#define _NET_OVPN_SKB_H_
> +
> +#include <linux/in.h>
> +#include <linux/in6.h>
> +#include <linux/ip.h>
> +#include <linux/skbuff.h>
> +#include <linux/socket.h>
> +#include <linux/types.h>
> +
> +struct ovpn_cb {
> +	struct ovpn_peer *peer;
> +};
> +
> +static inline struct ovpn_cb *ovpn_skb_cb(struct sk_buff *skb)
> +{
> +	BUILD_BUG_ON(sizeof(struct ovpn_cb) > sizeof(skb->cb));
> +	return (struct ovpn_cb *)skb->cb;
> +}
> +
> +/* Return IP protocol version from skb header.
> + * Return 0 if protocol is not IPv4/IPv6 or cannot be read.
> + */
> +static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb)
> +{
> +	__be16 proto = 0;
> +
> +	/* skb could be non-linear,
> +	 * make sure IP header is in non-fragmented part
> +	 */
> +	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
> +		return 0;
> +
> +	if (ip_hdr(skb)->version == 4)
> +		proto = htons(ETH_P_IP);
> +	else if (ip_hdr(skb)->version == 6)
> +		proto = htons(ETH_P_IPV6);
> +
> +	return proto;
> +}
> +
> +#endif /* _NET_OVPN_SKB_H_ */
> diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
> index c10474d252e19a0626d17a6f5dd328a5e5811551..d26d7566e9c8dfe91fa77f49c34fb179a9fb2239 100644
> --- a/drivers/net/ovpn/udp.c
> +++ b/drivers/net/ovpn/udp.c
> @@ -7,14 +7,246 @@
>    */
>   
>   #include <linux/netdevice.h>
> +#include <linux/inetdevice.h>
>   #include <linux/socket.h>
> +#include <net/addrconf.h>
> +#include <net/dst_cache.h>
> +#include <net/route.h>
> +#include <net/ipv6_stubs.h>
>   #include <net/udp.h>
> +#include <net/udp_tunnel.h>
>   
>   #include "ovpnstruct.h"
>   #include "main.h"
> +#include "bind.h"
> +#include "io.h"
> +#include "peer.h"
>   #include "socket.h"
>   #include "udp.h"
>   
> +/**
> + * ovpn_udp4_output - send IPv4 packet over udp socket
> + * @ovpn: the openvpn instance
> + * @bind: the binding related to the destination peer
> + * @cache: dst cache
> + * @sk: the socket to send the packet over
> + * @skb: the packet to send
> + *
> + * Return: 0 on success or a negative error code otherwise
> + */
> +static int ovpn_udp4_output(struct ovpn_struct *ovpn, struct ovpn_bind *bind,
> +			    struct dst_cache *cache, struct sock *sk,
> +			    struct sk_buff *skb)
> +{
> +	struct rtable *rt;
> +	struct flowi4 fl = {
> +		.saddr = bind->local.ipv4.s_addr,
> +		.daddr = bind->remote.in4.sin_addr.s_addr,
> +		.fl4_sport = inet_sk(sk)->inet_sport,
> +		.fl4_dport = bind->remote.in4.sin_port,
> +		.flowi4_proto = sk->sk_protocol,
> +		.flowi4_mark = sk->sk_mark,
> +	};
> +	int ret;
> +
> +	local_bh_disable();
> +	rt = dst_cache_get_ip4(cache, &fl.saddr);
> +	if (rt)
> +		goto transmit;
> +
> +	if (unlikely(!inet_confirm_addr(sock_net(sk), NULL, 0, fl.saddr,
> +					RT_SCOPE_HOST))) {
> +		/* we may end up here when the cached address is not usable
> +		 * anymore. In this case we reset address/cache and perform a
> +		 * new look up
> +		 */
> +		fl.saddr = 0;
> +		bind->local.ipv4.s_addr = 0;
> +		dst_cache_reset(cache);
> +	}
> +
> +	rt = ip_route_output_flow(sock_net(sk), &fl, sk);
> +	if (IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) {
> +		fl.saddr = 0;
> +		bind->local.ipv4.s_addr = 0;
> +		dst_cache_reset(cache);
> +
> +		rt = ip_route_output_flow(sock_net(sk), &fl, sk);
> +	}
> +
> +	if (IS_ERR(rt)) {
> +		ret = PTR_ERR(rt);
> +		net_dbg_ratelimited("%s: no route to host %pISpc: %d\n",
> +				    ovpn->dev->name, &bind->remote.in4, ret);
> +		goto err;
> +	}
> +	dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
> +
> +transmit:
> +	udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0,
> +			    ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
> +			    fl.fl4_dport, false, sk->sk_no_check_tx);
> +	ret = 0;
> +err:
> +	local_bh_enable();
> +	return ret;
> +}
> +
> +#if IS_ENABLED(CONFIG_IPV6)
> +/**
> + * ovpn_udp6_output - send IPv6 packet over udp socket
> + * @ovpn: the openvpn instance
> + * @bind: the binding related to the destination peer
> + * @cache: dst cache
> + * @sk: the socket to send the packet over
> + * @skb: the packet to send
> + *
> + * Return: 0 on success or a negative error code otherwise
> + */
> +static int ovpn_udp6_output(struct ovpn_struct *ovpn, struct ovpn_bind *bind,
> +			    struct dst_cache *cache, struct sock *sk,
> +			    struct sk_buff *skb)
> +{
> +	struct dst_entry *dst;
> +	int ret;
> +
> +	struct flowi6 fl = {
> +		.saddr = bind->local.ipv6,
> +		.daddr = bind->remote.in6.sin6_addr,
> +		.fl6_sport = inet_sk(sk)->inet_sport,
> +		.fl6_dport = bind->remote.in6.sin6_port,
> +		.flowi6_proto = sk->sk_protocol,
> +		.flowi6_mark = sk->sk_mark,
> +		.flowi6_oif = bind->remote.in6.sin6_scope_id,
> +	};
> +
> +	local_bh_disable();
> +	dst = dst_cache_get_ip6(cache, &fl.saddr);
> +	if (dst)
> +		goto transmit;
> +
> +	if (unlikely(!ipv6_chk_addr(sock_net(sk), &fl.saddr, NULL, 0))) {
> +		/* we may end up here when the cached address is not usable
> +		 * anymore. In this case we reset address/cache and perform a
> +		 * new look up
> +		 */
> +		fl.saddr = in6addr_any;
> +		bind->local.ipv6 = in6addr_any;
> +		dst_cache_reset(cache);
> +	}
> +
> +	dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sk), sk, &fl, NULL);
> +	if (IS_ERR(dst)) {
> +		ret = PTR_ERR(dst);
> +		net_dbg_ratelimited("%s: no route to host %pISpc: %d\n",
> +				    ovpn->dev->name, &bind->remote.in6, ret);
> +		goto err;
> +	}
> +	dst_cache_set_ip6(cache, dst, &fl.saddr);
> +
> +transmit:
> +	udp_tunnel6_xmit_skb(dst, sk, skb, skb->dev, &fl.saddr, &fl.daddr, 0,
> +			     ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
> +			     fl.fl6_dport, udp_get_no_check6_tx(sk));
> +	ret = 0;
> +err:
> +	local_bh_enable();
> +	return ret;
> +}
> +#endif
> +
> +/**
> + * ovpn_udp_output - transmit skb using udp-tunnel
> + * @ovpn: the openvpn instance
> + * @bind: the binding related to the destination peer
> + * @cache: dst cache
> + * @sk: the socket to send the packet over
> + * @skb: the packet to send
> + *
> + * rcu_read_lock should be held on entry.
> + * On return, the skb is consumed.
> + *
> + * Return: 0 on success or a negative error code otherwise
> + */
> +static int ovpn_udp_output(struct ovpn_struct *ovpn, struct ovpn_bind *bind,
> +			   struct dst_cache *cache, struct sock *sk,
> +			   struct sk_buff *skb)
> +{
> +	int ret;
> +
> +	/* set sk to null if skb is already orphaned */
> +	if (!skb->destructor)
> +		skb->sk = NULL;
> +
> +	/* always permit openvpn-created packets to be (outside) fragmented */
> +	skb->ignore_df = 1;
> +
> +	switch (bind->remote.in4.sin_family) {
> +	case AF_INET:
> +		ret = ovpn_udp4_output(ovpn, bind, cache, sk, skb);
> +		break;
> +#if IS_ENABLED(CONFIG_IPV6)
> +	case AF_INET6:
> +		ret = ovpn_udp6_output(ovpn, bind, cache, sk, skb);
> +		break;
> +#endif
> +	default:
> +		ret = -EAFNOSUPPORT;
> +		break;
> +	}
> +
> +	return ret;
> +}
> +
> +/**
> + * ovpn_udp_send_skb - prepare skb and send it over via UDP
> + * @ovpn: the openvpn instance
> + * @peer: the destination peer
> + * @skb: the packet to send
> + */
> +void ovpn_udp_send_skb(struct ovpn_struct *ovpn, struct ovpn_peer *peer,
> +		       struct sk_buff *skb)
> +{
> +	struct ovpn_bind *bind;
> +	unsigned int pkt_len;
> +	struct socket *sock;
> +	int ret = -1;
> +
> +	skb->dev = ovpn->dev;
> +	/* no checksum performed at this layer */
> +	skb->ip_summed = CHECKSUM_NONE;
> +
> +	/* get socket info */
> +	sock = peer->sock->sock;
> +	if (unlikely(!sock)) {
> +		net_warn_ratelimited("%s: no sock for remote peer\n", __func__);

If we do not have netdev_{err,warn,etc}_ratelimited() helper functions, 
can we at least emulate it like this:

net_warn_ratelimited("%s: no UDP sock for remote peer #%u\n",
                      netdev_name(ovpn->dev), peer->id);

or just use netdev_warn_once(...) since the condition looks more 
speculative than expected.

Peer id and interface name are more informative than just a function name.

> +		goto out;
> +	}
> +
> +	rcu_read_lock();
> +	/* get binding */
> +	bind = rcu_dereference(peer->bind);
> +	if (unlikely(!bind)) {
> +		net_warn_ratelimited("%s: no bind for remote peer\n", __func__);

Ditto

> +		goto out_unlock;
> +	}
> +
> +	/* crypto layer -> transport (UDP) */
> +	pkt_len = skb->len;
> +	ret = ovpn_udp_output(ovpn, bind, &peer->dst_cache, sock->sk, skb);
> +
> +out_unlock:
> +	rcu_read_unlock();
> +out:
> +	if (unlikely(ret < 0)) {
> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
> +		kfree_skb(skb);
> +		return;
> +	}
> +
> +	dev_sw_netstats_tx_add(ovpn->dev, 1, pkt_len);
> +}
> +
>   /**
>    * ovpn_udp_socket_attach - set udp-tunnel CBs on socket and link it to ovpn
>    * @sock: socket to configure
> diff --git a/drivers/net/ovpn/udp.h b/drivers/net/ovpn/udp.h
> index f2507f8f2c71ea9d5e5ac5446801e2d56f86700f..e60f8cd2b4ac8f910aabcf8ed546af59d6ca4be4 100644
> --- a/drivers/net/ovpn/udp.h
> +++ b/drivers/net/ovpn/udp.h
> @@ -9,9 +9,17 @@
>   #ifndef _NET_OVPN_UDP_H_
>   #define _NET_OVPN_UDP_H_
>   
> +#include <linux/skbuff.h>
> +#include <net/sock.h>
> +
> +struct ovpn_peer;
>   struct ovpn_struct;
> +struct sk_buff;

This declaration looks odd since we already have skbuff.h included above.

>   struct socket;
>   
>   int ovpn_udp_socket_attach(struct socket *sock, struct ovpn_struct *ovpn);
>   
> +void ovpn_udp_send_skb(struct ovpn_struct *ovpn, struct ovpn_peer *peer,
> +		       struct sk_buff *skb);
> +
>   #endif /* _NET_OVPN_UDP_H_ */
>
Sergey Ryazanov Nov. 10, 2024, 11:54 p.m. UTC | #4
Another one forgotten question, sorry about this. Please find the 
question inlined.

On 29.10.2024 12:47, Antonio Quartulli wrote:
>   /* Send user data to the network
>    */
>   netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
>   {
> +	struct ovpn_struct *ovpn = netdev_priv(dev);
> +	struct sk_buff *segments, *curr, *next;
> +	struct sk_buff_head skb_list;
> +	__be16 proto;
> +	int ret;
> +
> +	/* reset netfilter state */
> +	nf_reset_ct(skb);
> +
> +	/* verify IP header size in network packet */
> +	proto = ovpn_ip_check_protocol(skb);
> +	if (unlikely(!proto || skb->protocol != proto)) {
> +		net_err_ratelimited("%s: dropping malformed payload packet\n",
> +				    dev->name);
> +		dev_core_stats_tx_dropped_inc(ovpn->dev);
> +		goto drop;
> +	}

The above check implies that kernel can feed a network device with 
skb->protocol value mismatches actual skb content. Can you share any 
example of such case?

If you just want to be sure that the user packet is either IPv4 or IPv6 
then it can be done like this and without error messages:

/* Support only IPv4 or IPv6 traffic transporting */
if (unlikely(skb->protocol == ETH_P_IP || skb->protocol == ETH_P_IPV6))
     goto drop;

> +
> +	if (skb_is_gso(skb)) {
> +		segments = skb_gso_segment(skb, 0);
> +		if (IS_ERR(segments)) {
> +			ret = PTR_ERR(segments);
> +			net_err_ratelimited("%s: cannot segment packet: %d\n",
> +					    dev->name, ret);
> +			dev_core_stats_tx_dropped_inc(ovpn->dev);
> +			goto drop;
> +		}
> +
> +		consume_skb(skb);
> +		skb = segments;
> +	}
> +
> +	/* from this moment on, "skb" might be a list */
> +
> +	__skb_queue_head_init(&skb_list);
> +	skb_list_walk_safe(skb, curr, next) {
> +		skb_mark_not_on_list(curr);
> +
> +		curr = skb_share_check(curr, GFP_ATOMIC);
> +		if (unlikely(!curr)) {
> +			net_err_ratelimited("%s: skb_share_check failed\n",
> +					    dev->name);
> +			dev_core_stats_tx_dropped_inc(ovpn->dev);
> +			continue;
> +		}
> +
> +		__skb_queue_tail(&skb_list, curr);
> +	}
> +	skb_list.prev->next = NULL;
> +
> +	ovpn_send(ovpn, skb_list.next, NULL);
> +
> +	return NETDEV_TX_OK;
> +
> +drop:
>   	skb_tx_error(skb);
> -	kfree_skb(skb);
> +	kfree_skb_list(skb);
>   	return NET_XMIT_DROP;
>   }

--
Sergey
Sabrina Dubroca Nov. 12, 2024, 5:28 p.m. UTC | #5
2024-11-11, 00:32:51 +0200, Sergey Ryazanov wrote:
> On 29.10.2024 12:47, Antonio Quartulli wrote:
> > +static void ovpn_encrypt_post(struct sk_buff *skb, int ret)
> > +{
> > +	struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer;
> > +
> > +	if (unlikely(ret < 0))
> > +		goto err;
> > +
> > +	skb_mark_not_on_list(skb);
> > +
> > +	switch (peer->sock->sock->sk->sk_protocol) {
> > +	case IPPROTO_UDP:
> > +		ovpn_udp_send_skb(peer->ovpn, peer, skb);
> > +		break;
> > +	default:
> > +		/* no transport configured yet */
> > +		goto err;
> > +	}
> 
> Did you consider calling protocol specific sending function indirectly?
> E.g.:
> 
>         peer->sock->send(peer, skb);

In a case where
 - only 2 implementations exist
 - no other implementation is likely to be added in the future
 - both implementations are part of the same module

I don't think indirect calls are beneficial (especially after the
meltdown/etc mitigations, see for example 4f24ed77dec9 ("udp: use
indirect call wrappers for GRO socket lookup"), 0e219ae48c3b ("net:
use indirect calls helpers for L3 handler hooks"), and many others
similar patches).


[...]
> > +	ovpn_send(ovpn, skb_list.next, NULL);
> > +
> > +	return NETDEV_TX_OK;
> > +
> > +drop:
> >   	skb_tx_error(skb);
> > -	kfree_skb(skb);
> > +	kfree_skb_list(skb);
> >   	return NET_XMIT_DROP;
> >   }
> > diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
> > index d9788a0cc99b5839c466c35d1b2266cc6b95fb72..aff3e9e99b7d2dd2fa68484d9a396d43f75a6d0b 100644
> > --- a/drivers/net/ovpn/peer.c
> > +++ b/drivers/net/ovpn/peer.c

[very long chunk of Antonio's patch quoted without comments]

Please trim your replies to only the necessary context.
Antonio Quartulli Nov. 14, 2024, 3:25 p.m. UTC | #6
On 10/11/2024 23:32, Sergey Ryazanov wrote:
[...]
>> +/* send skb to connected peer, if any */
>> +static void ovpn_send(struct ovpn_struct *ovpn, struct sk_buff *skb,
>> +              struct ovpn_peer *peer)
>> +{
>> +    struct sk_buff *curr, *next;
>> +
>> +    if (likely(!peer))
>> +        /* retrieve peer serving the destination IP of this packet */
>> +        peer = ovpn_peer_get_by_dst(ovpn, skb);
>> +    if (unlikely(!peer)) {
>> +        net_dbg_ratelimited("%s: no peer to send data to\n",
>> +                    ovpn->dev->name);
>> +        dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +        goto drop;
>> +    }
> 
> The function is called only from ovpn_xmit_special() and from 
> ovpn_net_xmit(). The keepalive always provides a peer object, while 
> ovpn_net_xmit() never do it. If we move the peer lookup call into 
> ovpn_net_xmit() then we can eliminate all the above peer checks.

yeah, I think that's a good idea! See below..

> 
>> +
>> +    /* this might be a GSO-segmented skb list: process each skb
>> +     * independently
>> +     */
>> +    skb_list_walk_safe(skb, curr, next)
>> +        if (unlikely(!ovpn_encrypt_one(peer, curr))) {
>> +            dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +            kfree_skb(curr);
>> +        }
>> +
>> +    /* skb passed over, no need to free */
>> +    skb = NULL;
>> +drop:
>> +    if (likely(peer))
>> +        ovpn_peer_put(peer);
>> +    kfree_skb_list(skb);
>> +}

..because this error path disappears as well.

And I can move the stats increment to ovpn_net_xmit() in order to avoid 
counting keepalive packets as vpn data.

>>   /* Send user data to the network
>>    */
>>   netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>> +    struct ovpn_struct *ovpn = netdev_priv(dev);
>> +    struct sk_buff *segments, *curr, *next;
>> +    struct sk_buff_head skb_list;
>> +    __be16 proto;
>> +    int ret;
>> +
>> +    /* reset netfilter state */
>> +    nf_reset_ct(skb);
>> +
>> +    /* verify IP header size in network packet */
>> +    proto = ovpn_ip_check_protocol(skb);
>> +    if (unlikely(!proto || skb->protocol != proto)) {
>> +        net_err_ratelimited("%s: dropping malformed payload packet\n",
>> +                    dev->name);
>> +        dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +        goto drop;
>> +    }
>> +
>> +    if (skb_is_gso(skb)) {
>> +        segments = skb_gso_segment(skb, 0);
>> +        if (IS_ERR(segments)) {
>> +            ret = PTR_ERR(segments);
>> +            net_err_ratelimited("%s: cannot segment packet: %d\n",
>> +                        dev->name, ret);
>> +            dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +            goto drop;
>> +        }
>> +
>> +        consume_skb(skb);
>> +        skb = segments;
>> +    }
>> +
>> +    /* from this moment on, "skb" might be a list */
>> +
>> +    __skb_queue_head_init(&skb_list);
>> +    skb_list_walk_safe(skb, curr, next) {
>> +        skb_mark_not_on_list(curr);
>> +
>> +        curr = skb_share_check(curr, GFP_ATOMIC);
>> +        if (unlikely(!curr)) {
>> +            net_err_ratelimited("%s: skb_share_check failed\n",
>> +                        dev->name);
>> +            dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +            continue;
>> +        }
>> +
>> +        __skb_queue_tail(&skb_list, curr);
>> +    }
>> +    skb_list.prev->next = NULL;
>> +
> 
> I belive, the peer lookup should be done here to call ovpn_send() with 
> proper peer object and simplify it.

ACK

> 
>> +    ovpn_send(ovpn, skb_list.next, NULL);
>> +
>> +    return NETDEV_TX_OK;
>> +
>> +drop:
>>       skb_tx_error(skb);
>> -    kfree_skb(skb);
>> +    kfree_skb_list(skb);
>>       return NET_XMIT_DROP;
>>   }

[...]

>> +/**
>> + * ovpn_udp_send_skb - prepare skb and send it over via UDP
>> + * @ovpn: the openvpn instance
>> + * @peer: the destination peer
>> + * @skb: the packet to send
>> + */
>> +void ovpn_udp_send_skb(struct ovpn_struct *ovpn, struct ovpn_peer *peer,
>> +               struct sk_buff *skb)
>> +{
>> +    struct ovpn_bind *bind;
>> +    unsigned int pkt_len;
>> +    struct socket *sock;
>> +    int ret = -1;
>> +
>> +    skb->dev = ovpn->dev;
>> +    /* no checksum performed at this layer */
>> +    skb->ip_summed = CHECKSUM_NONE;
>> +
>> +    /* get socket info */
>> +    sock = peer->sock->sock;
>> +    if (unlikely(!sock)) {
>> +        net_warn_ratelimited("%s: no sock for remote peer\n", __func__);
> 
> If we do not have netdev_{err,warn,etc}_ratelimited() helper functions, 
> can we at least emulate it like this:
> 
> net_warn_ratelimited("%s: no UDP sock for remote peer #%u\n",
>                       netdev_name(ovpn->dev), peer->id);

that's what I try to do, but some prints have escaped my axe.
Will fix that, thanks!

> 
> or just use netdev_warn_once(...) since the condition looks more 
> speculative than expected.
> 
> Peer id and interface name are more informative than just a function name.

Yeah, I use the function name in some debug messages, although not 
extremely useful.

Will make sure the iface name is always printed (there are similar 
occurrences like this)

> 
>> +        goto out;
>> +    }
>> +
>> +    rcu_read_lock();
>> +    /* get binding */
>> +    bind = rcu_dereference(peer->bind);
>> +    if (unlikely(!bind)) {
>> +        net_warn_ratelimited("%s: no bind for remote peer\n", __func__);
> 
> Ditto
> 
>> +        goto out_unlock;
>> +    }
>> +
>> +    /* crypto layer -> transport (UDP) */
>> +    pkt_len = skb->len;
>> +    ret = ovpn_udp_output(ovpn, bind, &peer->dst_cache, sock->sk, skb);
>> +
>> +out_unlock:
>> +    rcu_read_unlock();
>> +out:
>> +    if (unlikely(ret < 0)) {
>> +        dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +        kfree_skb(skb);
>> +        return;
>> +    }
>> +
>> +    dev_sw_netstats_tx_add(ovpn->dev, 1, pkt_len);
>> +}
>> +
>>   /**
>>    * ovpn_udp_socket_attach - set udp-tunnel CBs on socket and link it 
>> to ovpn
>>    * @sock: socket to configure
>> diff --git a/drivers/net/ovpn/udp.h b/drivers/net/ovpn/udp.h
>> index 
>> f2507f8f2c71ea9d5e5ac5446801e2d56f86700f..e60f8cd2b4ac8f910aabcf8ed546af59d6ca4be4 100644
>> --- a/drivers/net/ovpn/udp.h
>> +++ b/drivers/net/ovpn/udp.h
>> @@ -9,9 +9,17 @@
>>   #ifndef _NET_OVPN_UDP_H_
>>   #define _NET_OVPN_UDP_H_
>> +#include <linux/skbuff.h>
>> +#include <net/sock.h>
>> +
>> +struct ovpn_peer;
>>   struct ovpn_struct;
>> +struct sk_buff;
> 
> This declaration looks odd since we already have skbuff.h included above.

I believe originally there was no include, then I need to add that.
Will double check,

Thanks a lot!
Regards,
Antonio Quartulli Nov. 15, 2024, 2:39 p.m. UTC | #7
On 11/11/2024 00:54, Sergey Ryazanov wrote:
> Another one forgotten question, sorry about this. Please find the 
> question inlined.
> 
> On 29.10.2024 12:47, Antonio Quartulli wrote:
>>   /* Send user data to the network
>>    */
>>   netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
>>   {
>> +    struct ovpn_struct *ovpn = netdev_priv(dev);
>> +    struct sk_buff *segments, *curr, *next;
>> +    struct sk_buff_head skb_list;
>> +    __be16 proto;
>> +    int ret;
>> +
>> +    /* reset netfilter state */
>> +    nf_reset_ct(skb);
>> +
>> +    /* verify IP header size in network packet */
>> +    proto = ovpn_ip_check_protocol(skb);
>> +    if (unlikely(!proto || skb->protocol != proto)) {
>> +        net_err_ratelimited("%s: dropping malformed payload packet\n",
>> +                    dev->name);
>> +        dev_core_stats_tx_dropped_inc(ovpn->dev);
>> +        goto drop;
>> +    }
> 
> The above check implies that kernel can feed a network device with skb- 
>  >protocol value mismatches actual skb content. Can you share any 
> example of such case?
> 
> If you just want to be sure that the user packet is either IPv4 or IPv6 
> then it can be done like this and without error messages:
> 
> /* Support only IPv4 or IPv6 traffic transporting */
> if (unlikely(skb->protocol == ETH_P_IP || skb->protocol == ETH_P_IPV6))
>      goto drop;

It look good, but I will still increase the drop counter, because 
something entered the interface and we are trashing it.

Why not printing a message? The interface is not Ethernet based, so I 
think we should not expect anything else other than v4 or v6, no?

Thanks.

Regards,
diff mbox series

Patch

diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index ad3813419c33cbdfe7e8ad6f5c8b444a3540a69f..77ba4d33ae0bd2f52e8bd1c06a182d24285297b4 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -9,14 +9,150 @@ 
 
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <net/gso.h>
 
 #include "io.h"
+#include "ovpnstruct.h"
+#include "peer.h"
+#include "udp.h"
+#include "skb.h"
+#include "socket.h"
+
+static void ovpn_encrypt_post(struct sk_buff *skb, int ret)
+{
+	struct ovpn_peer *peer = ovpn_skb_cb(skb)->peer;
+
+	if (unlikely(ret < 0))
+		goto err;
+
+	skb_mark_not_on_list(skb);
+
+	switch (peer->sock->sock->sk->sk_protocol) {
+	case IPPROTO_UDP:
+		ovpn_udp_send_skb(peer->ovpn, peer, skb);
+		break;
+	default:
+		/* no transport configured yet */
+		goto err;
+	}
+	/* skb passed down the stack - don't free it */
+	skb = NULL;
+err:
+	if (unlikely(skb))
+		dev_core_stats_tx_dropped_inc(peer->ovpn->dev);
+	ovpn_peer_put(peer);
+	kfree_skb(skb);
+}
+
+static bool ovpn_encrypt_one(struct ovpn_peer *peer, struct sk_buff *skb)
+{
+	ovpn_skb_cb(skb)->peer = peer;
+
+	/* take a reference to the peer because the crypto code may run async.
+	 * ovpn_encrypt_post() will release it upon completion
+	 */
+	if (unlikely(!ovpn_peer_hold(peer))) {
+		DEBUG_NET_WARN_ON_ONCE(1);
+		return false;
+	}
+
+	ovpn_encrypt_post(skb, 0);
+	return true;
+}
+
+/* send skb to connected peer, if any */
+static void ovpn_send(struct ovpn_struct *ovpn, struct sk_buff *skb,
+		      struct ovpn_peer *peer)
+{
+	struct sk_buff *curr, *next;
+
+	if (likely(!peer))
+		/* retrieve peer serving the destination IP of this packet */
+		peer = ovpn_peer_get_by_dst(ovpn, skb);
+	if (unlikely(!peer)) {
+		net_dbg_ratelimited("%s: no peer to send data to\n",
+				    ovpn->dev->name);
+		dev_core_stats_tx_dropped_inc(ovpn->dev);
+		goto drop;
+	}
+
+	/* this might be a GSO-segmented skb list: process each skb
+	 * independently
+	 */
+	skb_list_walk_safe(skb, curr, next)
+		if (unlikely(!ovpn_encrypt_one(peer, curr))) {
+			dev_core_stats_tx_dropped_inc(ovpn->dev);
+			kfree_skb(curr);
+		}
+
+	/* skb passed over, no need to free */
+	skb = NULL;
+drop:
+	if (likely(peer))
+		ovpn_peer_put(peer);
+	kfree_skb_list(skb);
+}
 
 /* Send user data to the network
  */
 netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+	struct ovpn_struct *ovpn = netdev_priv(dev);
+	struct sk_buff *segments, *curr, *next;
+	struct sk_buff_head skb_list;
+	__be16 proto;
+	int ret;
+
+	/* reset netfilter state */
+	nf_reset_ct(skb);
+
+	/* verify IP header size in network packet */
+	proto = ovpn_ip_check_protocol(skb);
+	if (unlikely(!proto || skb->protocol != proto)) {
+		net_err_ratelimited("%s: dropping malformed payload packet\n",
+				    dev->name);
+		dev_core_stats_tx_dropped_inc(ovpn->dev);
+		goto drop;
+	}
+
+	if (skb_is_gso(skb)) {
+		segments = skb_gso_segment(skb, 0);
+		if (IS_ERR(segments)) {
+			ret = PTR_ERR(segments);
+			net_err_ratelimited("%s: cannot segment packet: %d\n",
+					    dev->name, ret);
+			dev_core_stats_tx_dropped_inc(ovpn->dev);
+			goto drop;
+		}
+
+		consume_skb(skb);
+		skb = segments;
+	}
+
+	/* from this moment on, "skb" might be a list */
+
+	__skb_queue_head_init(&skb_list);
+	skb_list_walk_safe(skb, curr, next) {
+		skb_mark_not_on_list(curr);
+
+		curr = skb_share_check(curr, GFP_ATOMIC);
+		if (unlikely(!curr)) {
+			net_err_ratelimited("%s: skb_share_check failed\n",
+					    dev->name);
+			dev_core_stats_tx_dropped_inc(ovpn->dev);
+			continue;
+		}
+
+		__skb_queue_tail(&skb_list, curr);
+	}
+	skb_list.prev->next = NULL;
+
+	ovpn_send(ovpn, skb_list.next, NULL);
+
+	return NETDEV_TX_OK;
+
+drop:
 	skb_tx_error(skb);
-	kfree_skb(skb);
+	kfree_skb_list(skb);
 	return NET_XMIT_DROP;
 }
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index d9788a0cc99b5839c466c35d1b2266cc6b95fb72..aff3e9e99b7d2dd2fa68484d9a396d43f75a6d0b 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -16,6 +16,7 @@ 
 #include "main.h"
 #include "netlink.h"
 #include "peer.h"
+#include "socket.h"
 
 /**
  * ovpn_peer_new - allocate and initialize a new peer object
@@ -64,8 +65,10 @@  struct ovpn_peer *ovpn_peer_new(struct ovpn_struct *ovpn, u32 id)
  */
 static void ovpn_peer_release(struct ovpn_peer *peer)
 {
-	ovpn_bind_reset(peer, NULL);
+	if (peer->sock)
+		ovpn_socket_put(peer->sock);
 
+	ovpn_bind_reset(peer, NULL);
 	dst_cache_destroy(&peer->dst_cache);
 	netdev_put(peer->ovpn->dev, &peer->ovpn->dev_tracker);
 	kfree_rcu(peer, rcu);
@@ -243,6 +246,38 @@  struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_struct *ovpn, u32 peer_id)
 	return peer;
 }
 
+/**
+ * ovpn_peer_get_by_dst - Lookup peer to send skb to
+ * @ovpn: the private data representing the current VPN session
+ * @skb: the skb to extract the destination address from
+ *
+ * This function takes a tunnel packet and looks up the peer to send it to
+ * after encapsulation. The skb is expected to be the in-tunnel packet, without
+ * any OpenVPN related header.
+ *
+ * Assume that the IP header is accessible in the skb data.
+ *
+ * Return: the peer if found or NULL otherwise.
+ */
+struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_struct *ovpn,
+				       struct sk_buff *skb)
+{
+	struct ovpn_peer *peer = NULL;
+
+	/* in P2P mode, no matter the destination, packets are always sent to
+	 * the single peer listening on the other side
+	 */
+	if (ovpn->mode == OVPN_MODE_P2P) {
+		rcu_read_lock();
+		peer = rcu_dereference(ovpn->peer);
+		if (unlikely(peer && !ovpn_peer_hold(peer)))
+			peer = NULL;
+		rcu_read_unlock();
+	}
+
+	return peer;
+}
+
 /**
  * ovpn_peer_add_p2p - add peer to related tables in a P2P instance
  * @ovpn: the instance to add the peer to
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
index 6e0c6b14559de886d0677117f5a7ae029214e1f8..51955aa39f1aa85ce541e289c60e9635cadb9c48 100644
--- a/drivers/net/ovpn/peer.h
+++ b/drivers/net/ovpn/peer.h
@@ -19,6 +19,7 @@ 
  * @vpn_addrs: IP addresses assigned over the tunnel
  * @vpn_addrs.ipv4: IPv4 assigned to peer on the tunnel
  * @vpn_addrs.ipv6: IPv6 assigned to peer on the tunnel
+ * @sock: the socket being used to talk to this peer
  * @dst_cache: cache for dst_entry used to send to peer
  * @bind: remote peer binding
  * @halt: true if ovpn_peer_mark_delete was called
@@ -35,6 +36,7 @@  struct ovpn_peer {
 		struct in_addr ipv4;
 		struct in6_addr ipv6;
 	} vpn_addrs;
+	struct ovpn_socket *sock;
 	struct dst_cache dst_cache;
 	struct ovpn_bind __rcu *bind;
 	bool halt;
@@ -75,5 +77,7 @@  void ovpn_peer_release_p2p(struct ovpn_struct *ovpn);
 struct ovpn_peer *ovpn_peer_get_by_transp_addr(struct ovpn_struct *ovpn,
 					       struct sk_buff *skb);
 struct ovpn_peer *ovpn_peer_get_by_id(struct ovpn_struct *ovpn, u32 peer_id);
+struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_struct *ovpn,
+				       struct sk_buff *skb);
 
 #endif /* _NET_OVPN_OVPNPEER_H_ */
diff --git a/drivers/net/ovpn/skb.h b/drivers/net/ovpn/skb.h
new file mode 100644
index 0000000000000000000000000000000000000000..e070fe6f448c0b7a9631394ebef4554f6348ef44
--- /dev/null
+++ b/drivers/net/ovpn/skb.h
@@ -0,0 +1,51 @@ 
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*  OpenVPN data channel offload
+ *
+ *  Copyright (C) 2020-2024 OpenVPN, Inc.
+ *
+ *  Author:	Antonio Quartulli <antonio@openvpn.net>
+ *		James Yonan <james@openvpn.net>
+ */
+
+#ifndef _NET_OVPN_SKB_H_
+#define _NET_OVPN_SKB_H_
+
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/skbuff.h>
+#include <linux/socket.h>
+#include <linux/types.h>
+
+struct ovpn_cb {
+	struct ovpn_peer *peer;
+};
+
+static inline struct ovpn_cb *ovpn_skb_cb(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct ovpn_cb) > sizeof(skb->cb));
+	return (struct ovpn_cb *)skb->cb;
+}
+
+/* Return IP protocol version from skb header.
+ * Return 0 if protocol is not IPv4/IPv6 or cannot be read.
+ */
+static inline __be16 ovpn_ip_check_protocol(struct sk_buff *skb)
+{
+	__be16 proto = 0;
+
+	/* skb could be non-linear,
+	 * make sure IP header is in non-fragmented part
+	 */
+	if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
+		return 0;
+
+	if (ip_hdr(skb)->version == 4)
+		proto = htons(ETH_P_IP);
+	else if (ip_hdr(skb)->version == 6)
+		proto = htons(ETH_P_IPV6);
+
+	return proto;
+}
+
+#endif /* _NET_OVPN_SKB_H_ */
diff --git a/drivers/net/ovpn/udp.c b/drivers/net/ovpn/udp.c
index c10474d252e19a0626d17a6f5dd328a5e5811551..d26d7566e9c8dfe91fa77f49c34fb179a9fb2239 100644
--- a/drivers/net/ovpn/udp.c
+++ b/drivers/net/ovpn/udp.c
@@ -7,14 +7,246 @@ 
  */
 
 #include <linux/netdevice.h>
+#include <linux/inetdevice.h>
 #include <linux/socket.h>
+#include <net/addrconf.h>
+#include <net/dst_cache.h>
+#include <net/route.h>
+#include <net/ipv6_stubs.h>
 #include <net/udp.h>
+#include <net/udp_tunnel.h>
 
 #include "ovpnstruct.h"
 #include "main.h"
+#include "bind.h"
+#include "io.h"
+#include "peer.h"
 #include "socket.h"
 #include "udp.h"
 
+/**
+ * ovpn_udp4_output - send IPv4 packet over udp socket
+ * @ovpn: the openvpn instance
+ * @bind: the binding related to the destination peer
+ * @cache: dst cache
+ * @sk: the socket to send the packet over
+ * @skb: the packet to send
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_udp4_output(struct ovpn_struct *ovpn, struct ovpn_bind *bind,
+			    struct dst_cache *cache, struct sock *sk,
+			    struct sk_buff *skb)
+{
+	struct rtable *rt;
+	struct flowi4 fl = {
+		.saddr = bind->local.ipv4.s_addr,
+		.daddr = bind->remote.in4.sin_addr.s_addr,
+		.fl4_sport = inet_sk(sk)->inet_sport,
+		.fl4_dport = bind->remote.in4.sin_port,
+		.flowi4_proto = sk->sk_protocol,
+		.flowi4_mark = sk->sk_mark,
+	};
+	int ret;
+
+	local_bh_disable();
+	rt = dst_cache_get_ip4(cache, &fl.saddr);
+	if (rt)
+		goto transmit;
+
+	if (unlikely(!inet_confirm_addr(sock_net(sk), NULL, 0, fl.saddr,
+					RT_SCOPE_HOST))) {
+		/* we may end up here when the cached address is not usable
+		 * anymore. In this case we reset address/cache and perform a
+		 * new look up
+		 */
+		fl.saddr = 0;
+		bind->local.ipv4.s_addr = 0;
+		dst_cache_reset(cache);
+	}
+
+	rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+	if (IS_ERR(rt) && PTR_ERR(rt) == -EINVAL) {
+		fl.saddr = 0;
+		bind->local.ipv4.s_addr = 0;
+		dst_cache_reset(cache);
+
+		rt = ip_route_output_flow(sock_net(sk), &fl, sk);
+	}
+
+	if (IS_ERR(rt)) {
+		ret = PTR_ERR(rt);
+		net_dbg_ratelimited("%s: no route to host %pISpc: %d\n",
+				    ovpn->dev->name, &bind->remote.in4, ret);
+		goto err;
+	}
+	dst_cache_set_ip4(cache, &rt->dst, fl.saddr);
+
+transmit:
+	udp_tunnel_xmit_skb(rt, sk, skb, fl.saddr, fl.daddr, 0,
+			    ip4_dst_hoplimit(&rt->dst), 0, fl.fl4_sport,
+			    fl.fl4_dport, false, sk->sk_no_check_tx);
+	ret = 0;
+err:
+	local_bh_enable();
+	return ret;
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/**
+ * ovpn_udp6_output - send IPv6 packet over udp socket
+ * @ovpn: the openvpn instance
+ * @bind: the binding related to the destination peer
+ * @cache: dst cache
+ * @sk: the socket to send the packet over
+ * @skb: the packet to send
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_udp6_output(struct ovpn_struct *ovpn, struct ovpn_bind *bind,
+			    struct dst_cache *cache, struct sock *sk,
+			    struct sk_buff *skb)
+{
+	struct dst_entry *dst;
+	int ret;
+
+	struct flowi6 fl = {
+		.saddr = bind->local.ipv6,
+		.daddr = bind->remote.in6.sin6_addr,
+		.fl6_sport = inet_sk(sk)->inet_sport,
+		.fl6_dport = bind->remote.in6.sin6_port,
+		.flowi6_proto = sk->sk_protocol,
+		.flowi6_mark = sk->sk_mark,
+		.flowi6_oif = bind->remote.in6.sin6_scope_id,
+	};
+
+	local_bh_disable();
+	dst = dst_cache_get_ip6(cache, &fl.saddr);
+	if (dst)
+		goto transmit;
+
+	if (unlikely(!ipv6_chk_addr(sock_net(sk), &fl.saddr, NULL, 0))) {
+		/* we may end up here when the cached address is not usable
+		 * anymore. In this case we reset address/cache and perform a
+		 * new look up
+		 */
+		fl.saddr = in6addr_any;
+		bind->local.ipv6 = in6addr_any;
+		dst_cache_reset(cache);
+	}
+
+	dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sk), sk, &fl, NULL);
+	if (IS_ERR(dst)) {
+		ret = PTR_ERR(dst);
+		net_dbg_ratelimited("%s: no route to host %pISpc: %d\n",
+				    ovpn->dev->name, &bind->remote.in6, ret);
+		goto err;
+	}
+	dst_cache_set_ip6(cache, dst, &fl.saddr);
+
+transmit:
+	udp_tunnel6_xmit_skb(dst, sk, skb, skb->dev, &fl.saddr, &fl.daddr, 0,
+			     ip6_dst_hoplimit(dst), 0, fl.fl6_sport,
+			     fl.fl6_dport, udp_get_no_check6_tx(sk));
+	ret = 0;
+err:
+	local_bh_enable();
+	return ret;
+}
+#endif
+
+/**
+ * ovpn_udp_output - transmit skb using udp-tunnel
+ * @ovpn: the openvpn instance
+ * @bind: the binding related to the destination peer
+ * @cache: dst cache
+ * @sk: the socket to send the packet over
+ * @skb: the packet to send
+ *
+ * rcu_read_lock should be held on entry.
+ * On return, the skb is consumed.
+ *
+ * Return: 0 on success or a negative error code otherwise
+ */
+static int ovpn_udp_output(struct ovpn_struct *ovpn, struct ovpn_bind *bind,
+			   struct dst_cache *cache, struct sock *sk,
+			   struct sk_buff *skb)
+{
+	int ret;
+
+	/* set sk to null if skb is already orphaned */
+	if (!skb->destructor)
+		skb->sk = NULL;
+
+	/* always permit openvpn-created packets to be (outside) fragmented */
+	skb->ignore_df = 1;
+
+	switch (bind->remote.in4.sin_family) {
+	case AF_INET:
+		ret = ovpn_udp4_output(ovpn, bind, cache, sk, skb);
+		break;
+#if IS_ENABLED(CONFIG_IPV6)
+	case AF_INET6:
+		ret = ovpn_udp6_output(ovpn, bind, cache, sk, skb);
+		break;
+#endif
+	default:
+		ret = -EAFNOSUPPORT;
+		break;
+	}
+
+	return ret;
+}
+
+/**
+ * ovpn_udp_send_skb - prepare skb and send it over via UDP
+ * @ovpn: the openvpn instance
+ * @peer: the destination peer
+ * @skb: the packet to send
+ */
+void ovpn_udp_send_skb(struct ovpn_struct *ovpn, struct ovpn_peer *peer,
+		       struct sk_buff *skb)
+{
+	struct ovpn_bind *bind;
+	unsigned int pkt_len;
+	struct socket *sock;
+	int ret = -1;
+
+	skb->dev = ovpn->dev;
+	/* no checksum performed at this layer */
+	skb->ip_summed = CHECKSUM_NONE;
+
+	/* get socket info */
+	sock = peer->sock->sock;
+	if (unlikely(!sock)) {
+		net_warn_ratelimited("%s: no sock for remote peer\n", __func__);
+		goto out;
+	}
+
+	rcu_read_lock();
+	/* get binding */
+	bind = rcu_dereference(peer->bind);
+	if (unlikely(!bind)) {
+		net_warn_ratelimited("%s: no bind for remote peer\n", __func__);
+		goto out_unlock;
+	}
+
+	/* crypto layer -> transport (UDP) */
+	pkt_len = skb->len;
+	ret = ovpn_udp_output(ovpn, bind, &peer->dst_cache, sock->sk, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	if (unlikely(ret < 0)) {
+		dev_core_stats_tx_dropped_inc(ovpn->dev);
+		kfree_skb(skb);
+		return;
+	}
+
+	dev_sw_netstats_tx_add(ovpn->dev, 1, pkt_len);
+}
+
 /**
  * ovpn_udp_socket_attach - set udp-tunnel CBs on socket and link it to ovpn
  * @sock: socket to configure
diff --git a/drivers/net/ovpn/udp.h b/drivers/net/ovpn/udp.h
index f2507f8f2c71ea9d5e5ac5446801e2d56f86700f..e60f8cd2b4ac8f910aabcf8ed546af59d6ca4be4 100644
--- a/drivers/net/ovpn/udp.h
+++ b/drivers/net/ovpn/udp.h
@@ -9,9 +9,17 @@ 
 #ifndef _NET_OVPN_UDP_H_
 #define _NET_OVPN_UDP_H_
 
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+struct ovpn_peer;
 struct ovpn_struct;
+struct sk_buff;
 struct socket;
 
 int ovpn_udp_socket_attach(struct socket *sock, struct ovpn_struct *ovpn);
 
+void ovpn_udp_send_skb(struct ovpn_struct *ovpn, struct ovpn_peer *peer,
+		       struct sk_buff *skb);
+
 #endif /* _NET_OVPN_UDP_H_ */