diff mbox series

[net-next,v11,15/23] ovpn: implement keepalive mechanism

Message ID 20241029-b4-ovpn-v11-15-de4698c73a25@openvpn.net
State New
Headers show
Series Introducing OpenVPN Data Channel Offload | expand

Commit Message

Antonio Quartulli Oct. 29, 2024, 10:47 a.m. UTC
OpenVPN supports configuring a periodic keepalive packet.
message to allow the remote endpoint detect link failures.

This change implements the keepalive sending and timer expiring logic.

Signed-off-by: Antonio Quartulli <antonio@openvpn.net>
---
 drivers/net/ovpn/io.c         |  77 +++++++++++++++++
 drivers/net/ovpn/io.h         |   5 ++
 drivers/net/ovpn/main.c       |   3 +
 drivers/net/ovpn/ovpnstruct.h |   2 +
 drivers/net/ovpn/peer.c       | 188 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/ovpn/peer.h       |  15 ++++
 drivers/net/ovpn/proto.h      |   2 -
 7 files changed, 290 insertions(+), 2 deletions(-)

Comments

Sabrina Dubroca Nov. 5, 2024, 6:10 p.m. UTC | #1
2024-10-29, 11:47:28 +0100, Antonio Quartulli wrote:
> @@ -105,6 +132,9 @@ void ovpn_decrypt_post(void *data, int ret)
>  		goto drop;
>  	}
>  
> +	/* keep track of last received authenticated packet for keepalive */
> +	peer->last_recv = ktime_get_real_seconds();

It doesn't look like we're locking the peer here so that should be a
WRITE_ONCE() (and READ_ONCE(peer->last_recv) for all reads).

> +
>  	/* point to encapsulated IP packet */
>  	__skb_pull(skb, payload_offset);
>  
> @@ -121,6 +151,12 @@ void ovpn_decrypt_post(void *data, int ret)
>  			goto drop;
>  		}
>  
> +		if (ovpn_is_keepalive(skb)) {
> +			net_dbg_ratelimited("%s: ping received from peer %u\n",
> +					    peer->ovpn->dev->name, peer->id);
> +			goto drop;

To help with debugging connectivity issues, maybe keepalives shouldn't
be counted as drops? (consume_skb instead of kfree_skb, and not
incrementing rx_dropped)
The packet was successfully received and did all it had to do.

> +		}
> +
>  		net_info_ratelimited("%s: unsupported protocol received from peer %u\n",
>  				     peer->ovpn->dev->name, peer->id);
>  		goto drop;
> @@ -221,6 +257,10 @@ void ovpn_encrypt_post(void *data, int ret)
>  		/* no transport configured yet */
>  		goto err;
>  	}
> +
> +	/* keep track of last sent packet for keepalive */
> +	peer->last_sent = ktime_get_real_seconds();

And another WRITE_ONCE() here (also paired with READ_ONCE() on the
read side).


> +static int ovpn_peer_del_nolock(struct ovpn_peer *peer,
> +				enum ovpn_del_peer_reason reason)
> +{
> +	switch (peer->ovpn->mode) {
> +	case OVPN_MODE_MP:

I think it would be nice to add

    lockdep_assert_held(&peer->ovpn->peers->lock);

> +		return ovpn_peer_del_mp(peer, reason);
> +	case OVPN_MODE_P2P:

and here

    lockdep_assert_held(&peer->ovpn->lock);

(I had to check that ovpn_peer_del_nolock is indeed called with those
locks held since they're taken by ovpn_peer_keepalive_work_{mp,p2p},
adding these assertions would make it clear that ovpn_peer_del_nolock
is not an unsafe version of ovpn_peer_del)

> +		return ovpn_peer_del_p2p(peer, reason);
> +	default:
> +		return -EOPNOTSUPP;
> +	}
> +}
> +
>  /**
>   * ovpn_peers_free - free all peers in the instance
>   * @ovpn: the instance whose peers should be released
> @@ -830,3 +871,150 @@ void ovpn_peers_free(struct ovpn_struct *ovpn)
>  		ovpn_peer_unhash(peer, OVPN_DEL_PEER_REASON_TEARDOWN);
>  	spin_unlock_bh(&ovpn->peers->lock);
>  }
> +
> +static time64_t ovpn_peer_keepalive_work_single(struct ovpn_peer *peer,
> +						time64_t now)
> +{
> +	time64_t next_run1, next_run2, delta;
> +	unsigned long timeout, interval;
> +	bool expired;
> +
> +	spin_lock_bh(&peer->lock);
> +	/* we expect both timers to be configured at the same time,
> +	 * therefore bail out if either is not set
> +	 */
> +	if (!peer->keepalive_timeout || !peer->keepalive_interval) {
> +		spin_unlock_bh(&peer->lock);
> +		return 0;
> +	}
> +
> +	/* check for peer timeout */
> +	expired = false;
> +	timeout = peer->keepalive_timeout;
> +	delta = now - peer->last_recv;

I'm not sure that's always > 0 if we finish decrypting a packet just
as the workqueue starts:

  ovpn_peer_keepalive_work
    now = ...

                                       ovpn_decrypt_post
                                         peer->last_recv = ...

  ovpn_peer_keepalive_work_single
    delta: now < peer->last_recv



> +	if (delta < timeout) {
> +		peer->keepalive_recv_exp = now + timeout - delta;

I'd shorten that to

    peer->keepalive_recv_exp = peer->last_recv + timeout;

it's a bit more readable to my eyes and avoids risks of wrapping
values.

So I'd probably get rid of delta and go with:

    last_recv = READ_ONCE(peer->last_recv)
    if (now < last_recv + timeout) {
    	peer->keepalive_recv_exp = last_recv + timeout;
    	next_run1 = peer->keepalive_recv_exp;
    } else if ...

> +		next_run1 = peer->keepalive_recv_exp;
> +	} else if (peer->keepalive_recv_exp > now) {
> +		next_run1 = peer->keepalive_recv_exp;
> +	} else {
> +		expired = true;
> +	}

[...]
> +	/* check for peer keepalive */
> +	expired = false;
> +	interval = peer->keepalive_interval;
> +	delta = now - peer->last_sent;
> +	if (delta < interval) {
> +		peer->keepalive_xmit_exp = now + interval - delta;
> +		next_run2 = peer->keepalive_xmit_exp;

and same here
Antonio Quartulli Nov. 12, 2024, 1:20 p.m. UTC | #2
On 05/11/2024 19:10, Sabrina Dubroca wrote:
> 2024-10-29, 11:47:28 +0100, Antonio Quartulli wrote:
>> @@ -105,6 +132,9 @@ void ovpn_decrypt_post(void *data, int ret)
>>   		goto drop;
>>   	}
>>   
>> +	/* keep track of last received authenticated packet for keepalive */
>> +	peer->last_recv = ktime_get_real_seconds();
> 
> It doesn't look like we're locking the peer here so that should be a
> WRITE_ONCE() (and READ_ONCE(peer->last_recv) for all reads).

Is that because last_recv is 64 bit long (and might be more than one 
word on certain architectures)?

I don't remember having to do so for reading/writing 32 bit long integers.

I presume we need a WRITE_ONCE also upon initialization in 
ovpn_peer_keepalive_set() right?
We still want to coordinate that with other reads/writes.

> 
>> +
>>   	/* point to encapsulated IP packet */
>>   	__skb_pull(skb, payload_offset);
>>   
>> @@ -121,6 +151,12 @@ void ovpn_decrypt_post(void *data, int ret)
>>   			goto drop;
>>   		}
>>   
>> +		if (ovpn_is_keepalive(skb)) {
>> +			net_dbg_ratelimited("%s: ping received from peer %u\n",
>> +					    peer->ovpn->dev->name, peer->id);
>> +			goto drop;
> 
> To help with debugging connectivity issues, maybe keepalives shouldn't
> be counted as drops? (consume_skb instead of kfree_skb, and not
> incrementing rx_dropped)
> The packet was successfully received and did all it had to do.

you're absolutely right. Will change that.

> 
>> +		}
>> +
>>   		net_info_ratelimited("%s: unsupported protocol received from peer %u\n",
>>   				     peer->ovpn->dev->name, peer->id);
>>   		goto drop;
>> @@ -221,6 +257,10 @@ void ovpn_encrypt_post(void *data, int ret)
>>   		/* no transport configured yet */
>>   		goto err;
>>   	}
>> +
>> +	/* keep track of last sent packet for keepalive */
>> +	peer->last_sent = ktime_get_real_seconds();
> 
> And another WRITE_ONCE() here (also paired with READ_ONCE() on the
> read side).

Yap

> 
> 
>> +static int ovpn_peer_del_nolock(struct ovpn_peer *peer,
>> +				enum ovpn_del_peer_reason reason)
>> +{
>> +	switch (peer->ovpn->mode) {
>> +	case OVPN_MODE_MP:
> 
> I think it would be nice to add
> 
>      lockdep_assert_held(&peer->ovpn->peers->lock);
> 
>> +		return ovpn_peer_del_mp(peer, reason);
>> +	case OVPN_MODE_P2P:
> 
> and here
> 
>      lockdep_assert_held(&peer->ovpn->lock);

Yeah, good idea.
__must_hold() can't work here, so lockdep_assert_held is definitely the 
way to go.

> 
> (I had to check that ovpn_peer_del_nolock is indeed called with those
> locks held since they're taken by ovpn_peer_keepalive_work_{mp,p2p},
> adding these assertions would make it clear that ovpn_peer_del_nolock
> is not an unsafe version of ovpn_peer_del)

Right, it makes sense.

> 
>> +		return ovpn_peer_del_p2p(peer, reason);
>> +	default:
>> +		return -EOPNOTSUPP;
>> +	}
>> +}
>> +
>>   /**
>>    * ovpn_peers_free - free all peers in the instance
>>    * @ovpn: the instance whose peers should be released
>> @@ -830,3 +871,150 @@ void ovpn_peers_free(struct ovpn_struct *ovpn)
>>   		ovpn_peer_unhash(peer, OVPN_DEL_PEER_REASON_TEARDOWN);
>>   	spin_unlock_bh(&ovpn->peers->lock);
>>   }
>> +
>> +static time64_t ovpn_peer_keepalive_work_single(struct ovpn_peer *peer,
>> +						time64_t now)
>> +{
>> +	time64_t next_run1, next_run2, delta;
>> +	unsigned long timeout, interval;
>> +	bool expired;
>> +
>> +	spin_lock_bh(&peer->lock);
>> +	/* we expect both timers to be configured at the same time,
>> +	 * therefore bail out if either is not set
>> +	 */
>> +	if (!peer->keepalive_timeout || !peer->keepalive_interval) {
>> +		spin_unlock_bh(&peer->lock);
>> +		return 0;
>> +	}
>> +
>> +	/* check for peer timeout */
>> +	expired = false;
>> +	timeout = peer->keepalive_timeout;
>> +	delta = now - peer->last_recv;
> 
> I'm not sure that's always > 0 if we finish decrypting a packet just
> as the workqueue starts:
> 
>    ovpn_peer_keepalive_work
>      now = ...
> 
>                                         ovpn_decrypt_post
>                                           peer->last_recv = ...
> 
>    ovpn_peer_keepalive_work_single
>      delta: now < peer->last_recv
> 

Yeah, there is nothing preventing this from happening...but is this 
truly a problem? The math should still work, no?

However:

> 
> 
>> +	if (delta < timeout) {
>> +		peer->keepalive_recv_exp = now + timeout - delta;
> 
> I'd shorten that to
> 
>      peer->keepalive_recv_exp = peer->last_recv + timeout;
> 
> it's a bit more readable to my eyes and avoids risks of wrapping
> values.
> 
> So I'd probably get rid of delta and go with:
> 
>      last_recv = READ_ONCE(peer->last_recv)
>      if (now < last_recv + timeout) {
>      	peer->keepalive_recv_exp = last_recv + timeout;
>      	next_run1 = peer->keepalive_recv_exp;
>      } else if ...
> 
>> +		next_run1 = peer->keepalive_recv_exp;
>> +	} else if (peer->keepalive_recv_exp > now) {
>> +		next_run1 = peer->keepalive_recv_exp;
>> +	} else {
>> +		expired = true;
>> +	}

I agree this is simpler to read and gets rid of some extra operations.

[note: I took inspiration from nat_keepalive_work_single() - it could be 
simplified as well I guess]

> 
> [...]
>> +	/* check for peer keepalive */
>> +	expired = false;
>> +	interval = peer->keepalive_interval;
>> +	delta = now - peer->last_sent;
>> +	if (delta < interval) {
>> +		peer->keepalive_xmit_exp = now + interval - delta;
>> +		next_run2 = peer->keepalive_xmit_exp;
> 
> and same here

Yeah, will change both. Thanks!


Regards,
Sabrina Dubroca Nov. 13, 2024, 10:36 a.m. UTC | #3
2024-11-12, 14:20:45 +0100, Antonio Quartulli wrote:
> On 05/11/2024 19:10, Sabrina Dubroca wrote:
> > 2024-10-29, 11:47:28 +0100, Antonio Quartulli wrote:
> > > @@ -105,6 +132,9 @@ void ovpn_decrypt_post(void *data, int ret)
> > >   		goto drop;
> > >   	}
> > > +	/* keep track of last received authenticated packet for keepalive */
> > > +	peer->last_recv = ktime_get_real_seconds();
> > 
> > It doesn't look like we're locking the peer here so that should be a
> > WRITE_ONCE() (and READ_ONCE(peer->last_recv) for all reads).
> 
> Is that because last_recv is 64 bit long (and might be more than one word on
> certain architectures)?
> 
> I don't remember having to do so for reading/writing 32 bit long integers.

AFAIK it's not just that. The compiler is free to do the read/write in
any way it wants when you don't specify _ONCE. On the read side, it
could read from memory a single time or multiple times (getting
possibly different values each time), or maybe split the load
(possibly reading chunks from different values being written in
parallel).

> I presume we need a WRITE_ONCE also upon initialization in
> ovpn_peer_keepalive_set() right?
> We still want to coordinate that with other reads/writes.

I think it makes sense, yes.

> > > +
> > >   	/* point to encapsulated IP packet */
> > >   	__skb_pull(skb, payload_offset);
> > > @@ -121,6 +151,12 @@ void ovpn_decrypt_post(void *data, int ret)
> > >   			goto drop;
> > >   		}
> > > +		if (ovpn_is_keepalive(skb)) {
> > > +			net_dbg_ratelimited("%s: ping received from peer %u\n",
> > > +					    peer->ovpn->dev->name, peer->id);
> > > +			goto drop;
> > 
> > To help with debugging connectivity issues, maybe keepalives shouldn't
> > be counted as drops? (consume_skb instead of kfree_skb, and not
> > incrementing rx_dropped)
> > The packet was successfully received and did all it had to do.
> 
> you're absolutely right. Will change that.

Thanks.

> > > +	/* check for peer timeout */
> > > +	expired = false;
> > > +	timeout = peer->keepalive_timeout;
> > > +	delta = now - peer->last_recv;
> > 
> > I'm not sure that's always > 0 if we finish decrypting a packet just
> > as the workqueue starts:
> > 
> >    ovpn_peer_keepalive_work
> >      now = ...
> > 
> >                                         ovpn_decrypt_post
> >                                           peer->last_recv = ...
> > 
> >    ovpn_peer_keepalive_work_single
> >      delta: now < peer->last_recv
> > 
> 
> Yeah, there is nothing preventing this from happening...but is this truly a
> problem? The math should still work, no?

We'll fail "delta < timeout" (which we shouldn't), so we'll end up
either in the "expired = true" case, or not updating
keepalive_recv_exp. Both of these seem not ideal.

> 
> However:
> 
> > 
> > 
> > > +	if (delta < timeout) {
> > > +		peer->keepalive_recv_exp = now + timeout - delta;
> > 
> > I'd shorten that to
> > 
> >      peer->keepalive_recv_exp = peer->last_recv + timeout;
> > 
> > it's a bit more readable to my eyes and avoids risks of wrapping
> > values.
> > 
> > So I'd probably get rid of delta and go with:
> > 
> >      last_recv = READ_ONCE(peer->last_recv)
> >      if (now < last_recv + timeout) {
> >      	peer->keepalive_recv_exp = last_recv + timeout;
> >      	next_run1 = peer->keepalive_recv_exp;
> >      } else if ...
> > 
> > > +		next_run1 = peer->keepalive_recv_exp;
> > > +	} else if (peer->keepalive_recv_exp > now) {
> > > +		next_run1 = peer->keepalive_recv_exp;
> > > +	} else {
> > > +		expired = true;
> > > +	}
> 
> I agree this is simpler to read and gets rid of some extra operations.
> 
> [note: I took inspiration from nat_keepalive_work_single() - it could be
> simplified as well I guess]

Ah, ok. I wanted to review this code when it was posted but didn't
have time :(

> > 
> > [...]
> > > +	/* check for peer keepalive */
> > > +	expired = false;
> > > +	interval = peer->keepalive_interval;
> > > +	delta = now - peer->last_sent;
> > > +	if (delta < interval) {
> > > +		peer->keepalive_xmit_exp = now + interval - delta;
> > > +		next_run2 = peer->keepalive_xmit_exp;
> > 
> > and same here
> 
> Yeah, will change both. Thanks!

Thanks.
Antonio Quartulli Nov. 14, 2024, 8:12 a.m. UTC | #4
On 13/11/2024 11:36, Sabrina Dubroca wrote:
> 2024-11-12, 14:20:45 +0100, Antonio Quartulli wrote:
>> On 05/11/2024 19:10, Sabrina Dubroca wrote:
>>> 2024-10-29, 11:47:28 +0100, Antonio Quartulli wrote:
>>>> @@ -105,6 +132,9 @@ void ovpn_decrypt_post(void *data, int ret)
>>>>    		goto drop;
>>>>    	}
>>>> +	/* keep track of last received authenticated packet for keepalive */
>>>> +	peer->last_recv = ktime_get_real_seconds();
>>>
>>> It doesn't look like we're locking the peer here so that should be a
>>> WRITE_ONCE() (and READ_ONCE(peer->last_recv) for all reads).
>>
>> Is that because last_recv is 64 bit long (and might be more than one word on
>> certain architectures)?
>>
>> I don't remember having to do so for reading/writing 32 bit long integers.
> 
> AFAIK it's not just that. The compiler is free to do the read/write in
> any way it wants when you don't specify _ONCE. On the read side, it
> could read from memory a single time or multiple times (getting
> possibly different values each time), or maybe split the load
> (possibly reading chunks from different values being written in
> parallel).

Ok, thanks. Will switch to WRITE/READ_ONE then.

> 
>> I presume we need a WRITE_ONCE also upon initialization in
>> ovpn_peer_keepalive_set() right?
>> We still want to coordinate that with other reads/writes.
> 
> I think it makes sense, yes.

ACK

[...]
> 
>>>> +	/* check for peer timeout */
>>>> +	expired = false;
>>>> +	timeout = peer->keepalive_timeout;
>>>> +	delta = now - peer->last_recv;
>>>
>>> I'm not sure that's always > 0 if we finish decrypting a packet just
>>> as the workqueue starts:
>>>
>>>     ovpn_peer_keepalive_work
>>>       now = ...
>>>
>>>                                          ovpn_decrypt_post
>>>                                            peer->last_recv = ...
>>>
>>>     ovpn_peer_keepalive_work_single
>>>       delta: now < peer->last_recv
>>>
>>
>> Yeah, there is nothing preventing this from happening...but is this truly a
>> problem? The math should still work, no?
> 
> We'll fail "delta < timeout" (which we shouldn't), so we'll end up
> either in the "expired = true" case, or not updating
> keepalive_recv_exp. Both of these seem not ideal.

delta is signed, so it'll end up being a negative value and "delta < 
timeout" should not fail then. Unless I am missing something.

Anyway, this was just an exercise to understand what was going on.
I already changed the code as per your suggestion (the fact that we are 
still discussing this chunk proves that it needed to be simplified :))

> 
>>
>> However:
>>
>>>
>>>
>>>> +	if (delta < timeout) {
>>>> +		peer->keepalive_recv_exp = now + timeout - delta;
>>>
>>> I'd shorten that to
>>>
>>>       peer->keepalive_recv_exp = peer->last_recv + timeout;
>>>
>>> it's a bit more readable to my eyes and avoids risks of wrapping
>>> values.
>>>
>>> So I'd probably get rid of delta and go with:
>>>
>>>       last_recv = READ_ONCE(peer->last_recv)
>>>       if (now < last_recv + timeout) {
>>>       	peer->keepalive_recv_exp = last_recv + timeout;
>>>       	next_run1 = peer->keepalive_recv_exp;
>>>       } else if ...
>>>
>>>> +		next_run1 = peer->keepalive_recv_exp;
>>>> +	} else if (peer->keepalive_recv_exp > now) {
>>>> +		next_run1 = peer->keepalive_recv_exp;
>>>> +	} else {
>>>> +		expired = true;
>>>> +	}
>>
>> I agree this is simpler to read and gets rid of some extra operations.
>>
>> [note: I took inspiration from nat_keepalive_work_single() - it could be
>> simplified as well I guess]
> 
> Ah, ok. I wanted to review this code when it was posted but didn't
> have time :(

It can still be fixed ;)


Thanks.
Regards,
Sabrina Dubroca Nov. 14, 2024, 9:03 a.m. UTC | #5
2024-11-14, 09:12:01 +0100, Antonio Quartulli wrote:
> On 13/11/2024 11:36, Sabrina Dubroca wrote:
> > 2024-11-12, 14:20:45 +0100, Antonio Quartulli wrote:
> > > On 05/11/2024 19:10, Sabrina Dubroca wrote:
> > > > 2024-10-29, 11:47:28 +0100, Antonio Quartulli wrote:
> > > > > +	/* check for peer timeout */
> > > > > +	expired = false;
> > > > > +	timeout = peer->keepalive_timeout;
> > > > > +	delta = now - peer->last_recv;
> > > > 
> > > > I'm not sure that's always > 0 if we finish decrypting a packet just
> > > > as the workqueue starts:
> > > > 
> > > >     ovpn_peer_keepalive_work
> > > >       now = ...
> > > > 
> > > >                                          ovpn_decrypt_post
> > > >                                            peer->last_recv = ...
> > > > 
> > > >     ovpn_peer_keepalive_work_single
> > > >       delta: now < peer->last_recv
> > > > 
> > > 
> > > Yeah, there is nothing preventing this from happening...but is this truly a
> > > problem? The math should still work, no?
> > 
> > We'll fail "delta < timeout" (which we shouldn't), so we'll end up
> > either in the "expired = true" case, or not updating
> > keepalive_recv_exp. Both of these seem not ideal.
> 
> delta is signed, so it'll end up being a negative value and "delta <
> timeout" should not fail then. Unless I am missing something.

But timeout is "unsigned long", so the comparison will be done as
unsigned.

> Anyway, this was just an exercise to understand what was going on.
> I already changed the code as per your suggestion (the fact that we are
> still discussing this chunk proves that it needed to be simplified :))

:)
Sabrina Dubroca Nov. 22, 2024, 4:18 p.m. UTC | #6
2024-11-22, 10:41:26 +0100, Antonio Quartulli wrote:
> On 12/11/2024 14:20, Antonio Quartulli wrote:
> [...]
> > > > +static int ovpn_peer_del_nolock(struct ovpn_peer *peer,
> > > > +                enum ovpn_del_peer_reason reason)
> > > > +{
> > > > +    switch (peer->ovpn->mode) {
> > > > +    case OVPN_MODE_MP:
> > > 
> > > I think it would be nice to add
> > > 
> > >      lockdep_assert_held(&peer->ovpn->peers->lock);
> 
> Sabrina, in other places I have used the sparse notation __must_hold()
> instead.
> Is there any preference in regards to lockdep vs sparse?
> 
> I could switch them all to lockdep_assert_held if needed.

__must_hold has the advantage of being checked at compile time (though
I'm not sure it's that reliable [1]), so you don't need to run a test
that actually hits that particular code path.

In this case I see lockdep_assert_held as mainly documenting that the
locking that makes ovpn_peer_del_nolock safe (as safe as
ovpn_peer_del) is provided by its caller. The splat for incorrect use
on debug kernels is a bonus. Sprinkling lockdep_assert_held all over
ovpn might be bloating the code too much, but I'm not opposed to
adding them if it helps.

[1] I ran sparse on drivers/net/ovpn/peer.c before/after removing the
locking from ovpn_peer_del and didn't get any warnings. sparse is good
to detect imbalances (function that locks without unlocking), but
maybe don't trust __must_hold for more than documenting expectations.

[note: if you end up merging ovpn->peers->lock with ovpn->lock as
we've discussed somewhere else, the locking around keepalive and
ovpn_peer_del becomes a bit less hairy]
Antonio Quartulli Nov. 24, 2024, 12:28 a.m. UTC | #7
On 22/11/2024 17:18, Sabrina Dubroca wrote:
> 2024-11-22, 10:41:26 +0100, Antonio Quartulli wrote:
>> On 12/11/2024 14:20, Antonio Quartulli wrote:
>> [...]
>>>>> +static int ovpn_peer_del_nolock(struct ovpn_peer *peer,
>>>>> +                enum ovpn_del_peer_reason reason)
>>>>> +{
>>>>> +    switch (peer->ovpn->mode) {
>>>>> +    case OVPN_MODE_MP:
>>>>
>>>> I think it would be nice to add
>>>>
>>>>       lockdep_assert_held(&peer->ovpn->peers->lock);
>>
>> Sabrina, in other places I have used the sparse notation __must_hold()
>> instead.
>> Is there any preference in regards to lockdep vs sparse?
>>
>> I could switch them all to lockdep_assert_held if needed.
> 
> __must_hold has the advantage of being checked at compile time (though
> I'm not sure it's that reliable [1]), so you don't need to run a test
> that actually hits that particular code path.
> 
> In this case I see lockdep_assert_held as mainly documenting that the
> locking that makes ovpn_peer_del_nolock safe (as safe as
> ovpn_peer_del) is provided by its caller. The splat for incorrect use
> on debug kernels is a bonus. Sprinkling lockdep_assert_held all over
> ovpn might be bloating the code too much, but I'm not opposed to
> adding them if it helps.
> 
> [1] I ran sparse on drivers/net/ovpn/peer.c before/after removing the
> locking from ovpn_peer_del and didn't get any warnings. sparse is good
> to detect imbalances (function that locks without unlocking), but
> maybe don't trust __must_hold for more than documenting expectations.

Same here. I didn't expect that.
Then I think it's better to rely on lockdep_assert_held() for this kind 
of assumptions.

> 
> [note: if you end up merging ovpn->peers->lock with ovpn->lock as
> we've discussed somewhere else, the locking around keepalive and
> ovpn_peer_del becomes a bit less hairy]

Yeah, this is happening.

Thanks a lot!

Regards,

>
diff mbox series

Patch

diff --git a/drivers/net/ovpn/io.c b/drivers/net/ovpn/io.c
index deda19ab87391f86964ba43088b7847d22420eee..63c140138bf98e5d1df79a2565b666d86513323d 100644
--- a/drivers/net/ovpn/io.c
+++ b/drivers/net/ovpn/io.c
@@ -27,6 +27,33 @@ 
 #include "skb.h"
 #include "socket.h"
 
+const unsigned char ovpn_keepalive_message[OVPN_KEEPALIVE_SIZE] = {
+	0x2a, 0x18, 0x7b, 0xf3, 0x64, 0x1e, 0xb4, 0xcb,
+	0x07, 0xed, 0x2d, 0x0a, 0x98, 0x1f, 0xc7, 0x48
+};
+
+/**
+ * ovpn_is_keepalive - check if skb contains a keepalive message
+ * @skb: packet to check
+ *
+ * Assumes that the first byte of skb->data is defined.
+ *
+ * Return: true if skb contains a keepalive or false otherwise
+ */
+static bool ovpn_is_keepalive(struct sk_buff *skb)
+{
+	if (*skb->data != ovpn_keepalive_message[0])
+		return false;
+
+	if (skb->len != OVPN_KEEPALIVE_SIZE)
+		return false;
+
+	if (!pskb_may_pull(skb, OVPN_KEEPALIVE_SIZE))
+		return false;
+
+	return !memcmp(skb->data, ovpn_keepalive_message, OVPN_KEEPALIVE_SIZE);
+}
+
 /* Called after decrypt to write the IP packet to the device.
  * This method is expected to manage/free the skb.
  */
@@ -105,6 +132,9 @@  void ovpn_decrypt_post(void *data, int ret)
 		goto drop;
 	}
 
+	/* keep track of last received authenticated packet for keepalive */
+	peer->last_recv = ktime_get_real_seconds();
+
 	/* point to encapsulated IP packet */
 	__skb_pull(skb, payload_offset);
 
@@ -121,6 +151,12 @@  void ovpn_decrypt_post(void *data, int ret)
 			goto drop;
 		}
 
+		if (ovpn_is_keepalive(skb)) {
+			net_dbg_ratelimited("%s: ping received from peer %u\n",
+					    peer->ovpn->dev->name, peer->id);
+			goto drop;
+		}
+
 		net_info_ratelimited("%s: unsupported protocol received from peer %u\n",
 				     peer->ovpn->dev->name, peer->id);
 		goto drop;
@@ -221,6 +257,10 @@  void ovpn_encrypt_post(void *data, int ret)
 		/* no transport configured yet */
 		goto err;
 	}
+
+	/* keep track of last sent packet for keepalive */
+	peer->last_sent = ktime_get_real_seconds();
+
 	/* skb passed down the stack - don't free it */
 	skb = NULL;
 err:
@@ -361,3 +401,40 @@  netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	kfree_skb_list(skb);
 	return NET_XMIT_DROP;
 }
+
+/**
+ * ovpn_xmit_special - encrypt and transmit an out-of-band message to peer
+ * @peer: peer to send the message to
+ * @data: message content
+ * @len: message length
+ *
+ * Assumes that caller holds a reference to peer
+ */
+void ovpn_xmit_special(struct ovpn_peer *peer, const void *data,
+		       const unsigned int len)
+{
+	struct ovpn_struct *ovpn;
+	struct sk_buff *skb;
+
+	ovpn = peer->ovpn;
+	if (unlikely(!ovpn))
+		return;
+
+	skb = alloc_skb(256 + len, GFP_ATOMIC);
+	if (unlikely(!skb))
+		return;
+
+	skb_reserve(skb, 128);
+	skb->priority = TC_PRIO_BESTEFFORT;
+	__skb_put_data(skb, data, len);
+
+	/* increase reference counter when passing peer to sending queue */
+	if (!ovpn_peer_hold(peer)) {
+		netdev_dbg(ovpn->dev, "%s: cannot hold peer reference for sending special packet\n",
+			   __func__);
+		kfree_skb(skb);
+		return;
+	}
+
+	ovpn_send(ovpn, skb, peer);
+}
diff --git a/drivers/net/ovpn/io.h b/drivers/net/ovpn/io.h
index ad81dd86924689309b3299573575a1705eddaf99..eb224114152c29f42aadf026212e8d278006b490 100644
--- a/drivers/net/ovpn/io.h
+++ b/drivers/net/ovpn/io.h
@@ -10,9 +10,14 @@ 
 #ifndef _NET_OVPN_OVPN_H_
 #define _NET_OVPN_OVPN_H_
 
+#define OVPN_KEEPALIVE_SIZE 16
+extern const unsigned char ovpn_keepalive_message[OVPN_KEEPALIVE_SIZE];
+
 netdev_tx_t ovpn_net_xmit(struct sk_buff *skb, struct net_device *dev);
 
 void ovpn_recv(struct ovpn_peer *peer, struct sk_buff *skb);
+void ovpn_xmit_special(struct ovpn_peer *peer, const void *data,
+		       const unsigned int len);
 
 void ovpn_encrypt_post(void *data, int ret);
 void ovpn_decrypt_post(void *data, int ret);
diff --git a/drivers/net/ovpn/main.c b/drivers/net/ovpn/main.c
index c7453127ab640d7268c1ce919a87cc5419fac9ee..1bd563e3f16f49dd01c897fbe79cbd90f4b8e9aa 100644
--- a/drivers/net/ovpn/main.c
+++ b/drivers/net/ovpn/main.c
@@ -191,6 +191,7 @@  static int ovpn_newlink(struct net *src_net, struct net_device *dev,
 	ovpn->dev = dev;
 	ovpn->mode = mode;
 	spin_lock_init(&ovpn->lock);
+	INIT_DELAYED_WORK(&ovpn->keepalive_work, ovpn_peer_keepalive_work);
 
 	err = ovpn_mp_alloc(ovpn);
 	if (err < 0)
@@ -244,6 +245,8 @@  static int ovpn_netdev_notifier_call(struct notifier_block *nb,
 		netif_carrier_off(dev);
 		ovpn->registered = false;
 
+		cancel_delayed_work_sync(&ovpn->keepalive_work);
+
 		switch (ovpn->mode) {
 		case OVPN_MODE_P2P:
 			ovpn_peer_release_p2p(ovpn);
diff --git a/drivers/net/ovpn/ovpnstruct.h b/drivers/net/ovpn/ovpnstruct.h
index 12ed5e22c2108c9f143d1984048eb40c887cac63..4ac00d550ecb9f84c6c132dd2bdc0a3fc0ab342c 100644
--- a/drivers/net/ovpn/ovpnstruct.h
+++ b/drivers/net/ovpn/ovpnstruct.h
@@ -43,6 +43,7 @@  struct ovpn_peer_collection {
  * @peer: in P2P mode, this is the only remote peer
  * @dev_list: entry for the module wide device list
  * @gro_cells: pointer to the Generic Receive Offload cell
+ * @keepalive_work: struct used to schedule keepalive periodic job
  */
 struct ovpn_struct {
 	struct net_device *dev;
@@ -54,6 +55,7 @@  struct ovpn_struct {
 	struct ovpn_peer __rcu *peer;
 	struct list_head dev_list;
 	struct gro_cells gro_cells;
+	struct delayed_work keepalive_work;
 };
 
 #endif /* _NET_OVPN_OVPNSTRUCT_H_ */
diff --git a/drivers/net/ovpn/peer.c b/drivers/net/ovpn/peer.c
index c7dc9032c2b55fd42befc1f3e7a0eca893a96576..e8a42212af391916b5321e729f7e8a864d0a541f 100644
--- a/drivers/net/ovpn/peer.c
+++ b/drivers/net/ovpn/peer.c
@@ -22,6 +22,34 @@ 
 #include "peer.h"
 #include "socket.h"
 
+/**
+ * ovpn_peer_keepalive_set - configure keepalive values for peer
+ * @peer: the peer to configure
+ * @interval: outgoing keepalive interval
+ * @timeout: incoming keepalive timeout
+ */
+void ovpn_peer_keepalive_set(struct ovpn_peer *peer, u32 interval, u32 timeout)
+{
+	time64_t now = ktime_get_real_seconds();
+
+	netdev_dbg(peer->ovpn->dev,
+		   "%s: scheduling keepalive for peer %u: interval=%u timeout=%u\n",
+		   __func__, peer->id, interval, timeout);
+
+	peer->keepalive_interval = interval;
+	peer->last_sent = now;
+	peer->keepalive_xmit_exp = now + interval;
+
+	peer->keepalive_timeout = timeout;
+	peer->last_recv = now;
+	peer->keepalive_recv_exp = now + timeout;
+
+	/* now that interval and timeout have been changed, kick
+	 * off the worker so that the next delay can be recomputed
+	 */
+	mod_delayed_work(system_wq, &peer->ovpn->keepalive_work, 0);
+}
+
 /**
  * ovpn_peer_new - allocate and initialize a new peer object
  * @ovpn: the openvpn instance inside which the peer should be created
@@ -815,6 +843,19 @@  int ovpn_peer_del(struct ovpn_peer *peer, enum ovpn_del_peer_reason reason)
 	}
 }
 
+static int ovpn_peer_del_nolock(struct ovpn_peer *peer,
+				enum ovpn_del_peer_reason reason)
+{
+	switch (peer->ovpn->mode) {
+	case OVPN_MODE_MP:
+		return ovpn_peer_del_mp(peer, reason);
+	case OVPN_MODE_P2P:
+		return ovpn_peer_del_p2p(peer, reason);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 /**
  * ovpn_peers_free - free all peers in the instance
  * @ovpn: the instance whose peers should be released
@@ -830,3 +871,150 @@  void ovpn_peers_free(struct ovpn_struct *ovpn)
 		ovpn_peer_unhash(peer, OVPN_DEL_PEER_REASON_TEARDOWN);
 	spin_unlock_bh(&ovpn->peers->lock);
 }
+
+static time64_t ovpn_peer_keepalive_work_single(struct ovpn_peer *peer,
+						time64_t now)
+{
+	time64_t next_run1, next_run2, delta;
+	unsigned long timeout, interval;
+	bool expired;
+
+	spin_lock_bh(&peer->lock);
+	/* we expect both timers to be configured at the same time,
+	 * therefore bail out if either is not set
+	 */
+	if (!peer->keepalive_timeout || !peer->keepalive_interval) {
+		spin_unlock_bh(&peer->lock);
+		return 0;
+	}
+
+	/* check for peer timeout */
+	expired = false;
+	timeout = peer->keepalive_timeout;
+	delta = now - peer->last_recv;
+	if (delta < timeout) {
+		peer->keepalive_recv_exp = now + timeout - delta;
+		next_run1 = peer->keepalive_recv_exp;
+	} else if (peer->keepalive_recv_exp > now) {
+		next_run1 = peer->keepalive_recv_exp;
+	} else {
+		expired = true;
+	}
+
+	if (expired) {
+		/* peer is dead -> kill it and move on */
+		spin_unlock_bh(&peer->lock);
+		netdev_dbg(peer->ovpn->dev, "peer %u expired\n",
+			   peer->id);
+		ovpn_peer_del_nolock(peer, OVPN_DEL_PEER_REASON_EXPIRED);
+		return 0;
+	}
+
+	/* check for peer keepalive */
+	expired = false;
+	interval = peer->keepalive_interval;
+	delta = now - peer->last_sent;
+	if (delta < interval) {
+		peer->keepalive_xmit_exp = now + interval - delta;
+		next_run2 = peer->keepalive_xmit_exp;
+	} else if (peer->keepalive_xmit_exp > now) {
+		next_run2 = peer->keepalive_xmit_exp;
+	} else {
+		expired = true;
+		next_run2 = now + interval;
+	}
+	spin_unlock_bh(&peer->lock);
+
+	if (expired) {
+		/* a keepalive packet is required */
+		netdev_dbg(peer->ovpn->dev,
+			   "sending keepalive to peer %u\n",
+			   peer->id);
+		ovpn_xmit_special(peer, ovpn_keepalive_message,
+				  sizeof(ovpn_keepalive_message));
+	}
+
+	if (next_run1 < next_run2)
+		return next_run1;
+
+	return next_run2;
+}
+
+static time64_t ovpn_peer_keepalive_work_mp(struct ovpn_struct *ovpn,
+					    time64_t now)
+{
+	time64_t tmp_next_run, next_run = 0;
+	struct hlist_node *tmp;
+	struct ovpn_peer *peer;
+	int bkt;
+
+	spin_lock_bh(&ovpn->peers->lock);
+	hash_for_each_safe(ovpn->peers->by_id, bkt, tmp, peer, hash_entry_id) {
+		tmp_next_run = ovpn_peer_keepalive_work_single(peer, now);
+		if (!tmp_next_run)
+			continue;
+
+		/* the next worker run will be scheduled based on the shortest
+		 * required interval across all peers
+		 */
+		if (!next_run || tmp_next_run < next_run)
+			next_run = tmp_next_run;
+	}
+	spin_unlock_bh(&ovpn->peers->lock);
+
+	return next_run;
+}
+
+static time64_t ovpn_peer_keepalive_work_p2p(struct ovpn_struct *ovpn,
+					     time64_t now)
+{
+	struct ovpn_peer *peer;
+	time64_t next_run = 0;
+
+	spin_lock_bh(&ovpn->lock);
+	peer = rcu_dereference_protected(ovpn->peer,
+					 lockdep_is_held(&ovpn->lock));
+	if (peer)
+		next_run = ovpn_peer_keepalive_work_single(peer, now);
+	spin_unlock_bh(&ovpn->lock);
+
+	return next_run;
+}
+
+/**
+ * ovpn_peer_keepalive_work - run keepalive logic on each known peer
+ * @work: pointer to the work member of the related ovpn object
+ *
+ * Each peer has two timers (if configured):
+ * 1. peer timeout: when no data is received for a certain interval,
+ *    the peer is considered dead and it gets killed.
+ * 2. peer keepalive: when no data is sent to a certain peer for a
+ *    certain interval, a special 'keepalive' packet is explicitly sent.
+ *
+ * This function iterates across the whole peer collection while
+ * checking the timers described above.
+ */
+void ovpn_peer_keepalive_work(struct work_struct *work)
+{
+	struct ovpn_struct *ovpn = container_of(work, struct ovpn_struct,
+						keepalive_work.work);
+	time64_t next_run = 0, now = ktime_get_real_seconds();
+
+	switch (ovpn->mode) {
+	case OVPN_MODE_MP:
+		next_run = ovpn_peer_keepalive_work_mp(ovpn, now);
+		break;
+	case OVPN_MODE_P2P:
+		next_run = ovpn_peer_keepalive_work_p2p(ovpn, now);
+		break;
+	}
+
+	/* prevent rearming if the interface is being destroyed */
+	if (next_run > 0 && ovpn->registered) {
+		netdev_dbg(ovpn->dev,
+			   "scheduling keepalive work: now=%llu next_run=%llu delta=%llu\n",
+			   next_run, now, next_run - now);
+		schedule_delayed_work(&ovpn->keepalive_work,
+				      (next_run - now) * HZ);
+	}
+}
diff --git a/drivers/net/ovpn/peer.h b/drivers/net/ovpn/peer.h
index 942b90c84a0fb9e6fbb96f6df7f7842a9f738caf..952927ae78a3ab753aaf2c6cc6f77121bdac34be 100644
--- a/drivers/net/ovpn/peer.h
+++ b/drivers/net/ovpn/peer.h
@@ -43,6 +43,12 @@ 
  * @crypto: the crypto configuration (ciphers, keys, etc..)
  * @dst_cache: cache for dst_entry used to send to peer
  * @bind: remote peer binding
+ * @keepalive_interval: seconds after which a new keepalive should be sent
+ * @keepalive_xmit_exp: future timestamp when next keepalive should be sent
+ * @last_sent: timestamp of the last successfully sent packet
+ * @keepalive_timeout: seconds after which an inactive peer is considered dead
+ * @keepalive_recv_exp: future timestamp when the peer should expire
+ * @last_recv: timestamp of the last authenticated received packet
  * @halt: true if ovpn_peer_mark_delete was called
  * @vpn_stats: per-peer in-VPN TX/RX stays
  * @link_stats: per-peer link/transport TX/RX stats
@@ -91,6 +97,12 @@  struct ovpn_peer {
 	struct ovpn_crypto_state crypto;
 	struct dst_cache dst_cache;
 	struct ovpn_bind __rcu *bind;
+	unsigned long keepalive_interval;
+	unsigned long keepalive_xmit_exp;
+	time64_t last_sent;
+	unsigned long keepalive_timeout;
+	unsigned long keepalive_recv_exp;
+	time64_t last_recv;
 	bool halt;
 	struct ovpn_peer_stats vpn_stats;
 	struct ovpn_peer_stats link_stats;
@@ -137,4 +149,7 @@  struct ovpn_peer *ovpn_peer_get_by_dst(struct ovpn_struct *ovpn,
 bool ovpn_peer_check_by_src(struct ovpn_struct *ovpn, struct sk_buff *skb,
 			    struct ovpn_peer *peer);
 
+void ovpn_peer_keepalive_set(struct ovpn_peer *peer, u32 interval, u32 timeout);
+void ovpn_peer_keepalive_work(struct work_struct *work);
+
 #endif /* _NET_OVPN_OVPNPEER_H_ */
diff --git a/drivers/net/ovpn/proto.h b/drivers/net/ovpn/proto.h
index 32af6b8e574381fb719a1b3b9de3ae1071cc4846..0de8bafadc89ebb85ce40de95ef394588738a4ad 100644
--- a/drivers/net/ovpn/proto.h
+++ b/drivers/net/ovpn/proto.h
@@ -35,8 +35,6 @@ 
 #define OVPN_OP_SIZE_V2	4
 #define OVPN_PEER_ID_MASK 0x00FFFFFF
 #define OVPN_PEER_ID_UNDEF 0x00FFFFFF
-/* first byte of keepalive message */
-#define OVPN_KEEPALIVE_FIRST_BYTE 0x2a
 /* first byte of exit message */
 #define OVPN_EXPLICIT_EXIT_NOTIFY_FIRST_BYTE 0x28