diff mbox series

[rfc,2/3] virtio-net: support receive timestamp

Message ID 20201228162233.2032571-3-willemdebruijn.kernel@gmail.com
State New
Headers show
Series virtio-net: add tx-hash, rx-tstamp and tx-tstamp | expand

Commit Message

Willem de Bruijn Dec. 28, 2020, 4:22 p.m. UTC
From: Willem de Bruijn <willemb@google.com>

Add optional PTP hardware timestamp offload for virtio-net.

Accurate RTT measurement requires timestamps close to the wire.
Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the
virtio-net header is expanded with room for a timestamp. A host may
pass receive timestamps for all or some packets. A timestamp is valid
if non-zero.

The timestamp straddles (virtual) hardware domains. Like PTP, use
international atomic time (CLOCK_TAI) as global clock base. It is
guest responsibility to sync with host, e.g., through kvm-clock.

Signed-off-by: Willem de Bruijn <willemb@google.com>
---
 drivers/net/virtio_net.c        | 20 +++++++++++++++++++-
 include/uapi/linux/virtio_net.h | 12 ++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

Comments

Michael S. Tsirkin Dec. 28, 2020, 5:28 p.m. UTC | #1
On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Add optional PTP hardware timestamp offload for virtio-net.
> 
> Accurate RTT measurement requires timestamps close to the wire.
> Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the
> virtio-net header is expanded with room for a timestamp. A host may
> pass receive timestamps for all or some packets. A timestamp is valid
> if non-zero.
> 
> The timestamp straddles (virtual) hardware domains. Like PTP, use
> international atomic time (CLOCK_TAI) as global clock base. It is
> guest responsibility to sync with host, e.g., through kvm-clock.
> 
> Signed-off-by: Willem de Bruijn <willemb@google.com>
> ---
>  drivers/net/virtio_net.c        | 20 +++++++++++++++++++-
>  include/uapi/linux/virtio_net.h | 12 ++++++++++++
>  2 files changed, 31 insertions(+), 1 deletion(-)
> 
> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
> index b917b7333928..57744bb6a141 100644
> --- a/drivers/net/virtio_net.c
> +++ b/drivers/net/virtio_net.c
> @@ -204,6 +204,9 @@ struct virtnet_info {
>  	/* Guest will pass tx path info to the host */
>  	bool has_tx_hash;
>  
> +	/* Host will pass CLOCK_TAI receive time to the guest */
> +	bool has_rx_tstamp;
> +
>  	/* Has control virtqueue */
>  	bool has_cvq;
>  
> @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
>  	return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
>  }
>  
> +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb)
> +{
> +	BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb));
> +
> +	return (void *)skb->cb;
> +}
> +
>  /*
>   * private is used to chain pages for big packets, put the whole
>   * most recent used list in the beginning for reuse
> @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
>  		goto frame_err;
>  	}
>  
> +	if (vi->has_rx_tstamp)
> +		skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp);
> +
>  	skb_record_rx_queue(skb, vq2rxq(rq->vq));
>  	skb->protocol = eth_type_trans(skb, dev);
>  	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
> @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev)
>  		vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
>  	}
>  
> +	if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) {
> +		vi->has_rx_tstamp = true;
> +		vi->hdr_len = sizeof(struct virtio_net_hdr_v12);
> +	}
> +
>  	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
>  	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
>  		vi->any_header_sg = true;
> @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = {
>  	VIRTIO_NET_F_CTRL_MAC_ADDR, \
>  	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
>  	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
> -	VIRTIO_NET_F_TX_HASH
> +	VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP
>  
>  static unsigned int features[] = {
>  	VIRTNET_FEATURES,
> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> index f6881b5b77ee..0ffe2eeebd4a 100644
> --- a/include/uapi/linux/virtio_net.h
> +++ b/include/uapi/linux/virtio_net.h
> @@ -57,6 +57,7 @@
>  					 * Steering */
>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
>  
> +#define VIRTIO_NET_F_RX_TSTAMP	  55	/* Host sends TAI receive time */
>  #define VIRTIO_NET_F_TX_HASH	  56	/* Guest sends hash report */
>  #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
>  #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
> @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {
>  	};
>  };
>  
> +struct virtio_net_hdr_v12 {
> +	struct virtio_net_hdr_v1 hdr;
> +	struct {
> +		__le32 value;
> +		__le16 report;
> +		__le16 flow_state;
> +	} hash;
> +	__virtio32 reserved;
> +	__virtio64 tstamp;
> +};
> +
>  #ifndef VIRTIO_NET_NO_LEGACY
>  /* This header comes first in the scatter-gather list.
>   * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must


So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both
VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then?

I am not sure what does v12 mean here.

virtio_net_hdr_v1 is just with VIRTIO_F_VERSION_1,
virtio_net_hdr_v1_hash is with VIRTIO_F_VERSION_1 and
VIRTIO_NET_F_HASH_REPORT.

So this one is virtio_net_hdr_hash_tstamp I guess?


> -- 
> 2.29.2.729.g45daf8777d-goog
Michael S. Tsirkin Dec. 28, 2020, 9:32 p.m. UTC | #2
On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote:
> On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com> wrote:
> >
> > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:
> > > From: Willem de Bruijn <willemb@google.com>
> > >
> > > Add optional PTP hardware timestamp offload for virtio-net.
> > >
> > > Accurate RTT measurement requires timestamps close to the wire.
> > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the
> > > virtio-net header is expanded with room for a timestamp. A host may
> > > pass receive timestamps for all or some packets. A timestamp is valid
> > > if non-zero.
> > >
> > > The timestamp straddles (virtual) hardware domains. Like PTP, use
> > > international atomic time (CLOCK_TAI) as global clock base. It is
> > > guest responsibility to sync with host, e.g., through kvm-clock.
> > >
> > > Signed-off-by: Willem de Bruijn <willemb@google.com>
> > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
> > > index f6881b5b77ee..0ffe2eeebd4a 100644
> > > --- a/include/uapi/linux/virtio_net.h
> > > +++ b/include/uapi/linux/virtio_net.h
> > > @@ -57,6 +57,7 @@
> > >                                        * Steering */
> > >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */
> > >
> > > +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI receive time */
> > >  #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */
> > >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */
> > >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */
> > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {
> > >       };
> > >  };
> > >
> > > +struct virtio_net_hdr_v12 {
> > > +     struct virtio_net_hdr_v1 hdr;
> > > +     struct {
> > > +             __le32 value;
> > > +             __le16 report;
> > > +             __le16 flow_state;
> > > +     } hash;
> > > +     __virtio32 reserved;
> > > +     __virtio64 tstamp;
> > > +};
> > > +
> > >  #ifndef VIRTIO_NET_NO_LEGACY
> > >  /* This header comes first in the scatter-gather list.
> > >   * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must
> >
> >
> > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both
> > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then?
> 
> Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP?
> 
> I think if either is enabled we need to enable the extended layout.
> Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are
> not, then those fields are ignored.

I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not?
If TSTAMP depends on HASH then point is moot.



> > I am not sure what does v12 mean here.
> >
> > virtio_net_hdr_v1 is just with VIRTIO_F_VERSION_1,
> > virtio_net_hdr_v1_hash is with VIRTIO_F_VERSION_1 and
> > VIRTIO_NET_F_HASH_REPORT.
> >
> > So this one is virtio_net_hdr_hash_tstamp I guess?
> 
> Sounds better, yes, will change that.
> 
> This was an attempt at identifying the layout with the likely next
> generation of the spec, 1.2. Similar to virtio_net_hdr_v1. But that is
> both premature and not very helpful.
Jakub Kicinski Dec. 28, 2020, 10:59 p.m. UTC | #3
On Mon, 28 Dec 2020 11:22:32 -0500 Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>
> 
> Add optional PTP hardware timestamp offload for virtio-net.
> 
> Accurate RTT measurement requires timestamps close to the wire.
> Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the
> virtio-net header is expanded with room for a timestamp. A host may
> pass receive timestamps for all or some packets. A timestamp is valid
> if non-zero.
> 
> The timestamp straddles (virtual) hardware domains. Like PTP, use
> international atomic time (CLOCK_TAI) as global clock base. It is
> guest responsibility to sync with host, e.g., through kvm-clock.

Would this not be confusing to some user space SW to have a NIC with 
no PHC deliver HW stamps?

I'd CC Richard on this, unless you already discussed with him offline.
Willem de Bruijn Dec. 29, 2020, 12:57 a.m. UTC | #4
On Mon, Dec 28, 2020 at 5:59 PM Jakub Kicinski <kuba@kernel.org> wrote:
>

> On Mon, 28 Dec 2020 11:22:32 -0500 Willem de Bruijn wrote:

> > From: Willem de Bruijn <willemb@google.com>

> >

> > Add optional PTP hardware timestamp offload for virtio-net.

> >

> > Accurate RTT measurement requires timestamps close to the wire.

> > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> > virtio-net header is expanded with room for a timestamp. A host may

> > pass receive timestamps for all or some packets. A timestamp is valid

> > if non-zero.

> >

> > The timestamp straddles (virtual) hardware domains. Like PTP, use

> > international atomic time (CLOCK_TAI) as global clock base. It is

> > guest responsibility to sync with host, e.g., through kvm-clock.

>

> Would this not be confusing to some user space SW to have a NIC with

> no PHC deliver HW stamps?

>

> I'd CC Richard on this, unless you already discussed with him offline.


Thanks, good point. I should have included Richard.

There is a well understood method for synchronizing guest and host
clock in KVM using ptp_kvm. For virtual environments without NIC
hardware offload, the when host timestamps in software, this suffices.

Syncing host with NIC is assumed if the host advertises the feature
and implements using real hardware timestamps.
Willem de Bruijn Dec. 29, 2020, 1:05 a.m. UTC | #5
On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>

> On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote:

> > On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > >

> > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:

> > > > From: Willem de Bruijn <willemb@google.com>

> > > >

> > > > Add optional PTP hardware timestamp offload for virtio-net.

> > > >

> > > > Accurate RTT measurement requires timestamps close to the wire.

> > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> > > > virtio-net header is expanded with room for a timestamp. A host may

> > > > pass receive timestamps for all or some packets. A timestamp is valid

> > > > if non-zero.

> > > >

> > > > The timestamp straddles (virtual) hardware domains. Like PTP, use

> > > > international atomic time (CLOCK_TAI) as global clock base. It is

> > > > guest responsibility to sync with host, e.g., through kvm-clock.

> > > >

> > > > Signed-off-by: Willem de Bruijn <willemb@google.com>

> > > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h

> > > > index f6881b5b77ee..0ffe2eeebd4a 100644

> > > > --- a/include/uapi/linux/virtio_net.h

> > > > +++ b/include/uapi/linux/virtio_net.h

> > > > @@ -57,6 +57,7 @@

> > > >                                        * Steering */

> > > >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */

> > > >

> > > > +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI receive time */

> > > >  #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */

> > > >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */

> > > >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */

> > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

> > > >       };

> > > >  };

> > > >

> > > > +struct virtio_net_hdr_v12 {

> > > > +     struct virtio_net_hdr_v1 hdr;

> > > > +     struct {

> > > > +             __le32 value;

> > > > +             __le16 report;

> > > > +             __le16 flow_state;

> > > > +     } hash;

> > > > +     __virtio32 reserved;

> > > > +     __virtio64 tstamp;

> > > > +};

> > > > +

> > > >  #ifndef VIRTIO_NET_NO_LEGACY

> > > >  /* This header comes first in the scatter-gather list.

> > > >   * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must

> > >

> > >

> > > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both

> > > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then?

> >

> > Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP?

> >

> > I think if either is enabled we need to enable the extended layout.

> > Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are

> > not, then those fields are ignored.

>

> I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not?

> If TSTAMP depends on HASH then point is moot.


True, but the two features really are independent.

I did consider using configurable metadata layout depending on
features negotiated. If there are tons of optional extensions, that
makes sense. But it is more complex and parsing error prone. With a
handful of options each of a few bytes, that did not seem worth the
cost to me at this point.

And importantly, such a mode can always be added later as a separate
VIRTIO_NET_F_PACKED_HEADER option.

If anything, perhaps if we increase the virtio_net_hdr_* allocation,
we should allocate some additional reserved space now? As each new
size introduces quite a bit of boilerplate. Also, e.g., in qemu just
to pass the sizes between virtio-net driver and vhost-net device.
Jason Wang Dec. 29, 2020, 9:17 a.m. UTC | #6
----- Original Message -----
> On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> >

> > On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote:

> > > On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com>

> > > wrote:

> > > >

> > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:

> > > > > From: Willem de Bruijn <willemb@google.com>

> > > > >

> > > > > Add optional PTP hardware timestamp offload for virtio-net.

> > > > >

> > > > > Accurate RTT measurement requires timestamps close to the wire.

> > > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> > > > > virtio-net header is expanded with room for a timestamp. A host may

> > > > > pass receive timestamps for all or some packets. A timestamp is valid

> > > > > if non-zero.

> > > > >

> > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use

> > > > > international atomic time (CLOCK_TAI) as global clock base. It is

> > > > > guest responsibility to sync with host, e.g., through kvm-clock.

> > > > >

> > > > > Signed-off-by: Willem de Bruijn <willemb@google.com>

> > > > > diff --git a/include/uapi/linux/virtio_net.h

> > > > > b/include/uapi/linux/virtio_net.h

> > > > > index f6881b5b77ee..0ffe2eeebd4a 100644

> > > > > --- a/include/uapi/linux/virtio_net.h

> > > > > +++ b/include/uapi/linux/virtio_net.h

> > > > > @@ -57,6 +57,7 @@

> > > > >                                        * Steering */

> > > > >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */

> > > > >

> > > > > +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI

> > > > > receive time */

> > > > >  #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */

> > > > >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */

> > > > >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */

> > > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

> > > > >       };

> > > > >  };

> > > > >

> > > > > +struct virtio_net_hdr_v12 {

> > > > > +     struct virtio_net_hdr_v1 hdr;

> > > > > +     struct {

> > > > > +             __le32 value;

> > > > > +             __le16 report;

> > > > > +             __le16 flow_state;

> > > > > +     } hash;

> > > > > +     __virtio32 reserved;

> > > > > +     __virtio64 tstamp;

> > > > > +};

> > > > > +

> > > > >  #ifndef VIRTIO_NET_NO_LEGACY

> > > > >  /* This header comes first in the scatter-gather list.

> > > > >   * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it

> > > > >   must

> > > >

> > > >

> > > > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both

> > > > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then?

> > >

> > > Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP?

> > >

> > > I think if either is enabled we need to enable the extended layout.

> > > Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are

> > > not, then those fields are ignored.

> >

> > I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not?

> > If TSTAMP depends on HASH then point is moot.

> 

> True, but the two features really are independent.

> 

> I did consider using configurable metadata layout depending on

> features negotiated. If there are tons of optional extensions, that

> makes sense. But it is more complex and parsing error prone. With a

> handful of options each of a few bytes, that did not seem worth the

> cost to me at this point.


Consider NFV workloads(64B) packet. Most fields of current vnet header
is even a burdern. It might be the time to introduce configurable or
self-descriptive vnet header.


> 

> And importantly, such a mode can always be added later as a separate

> VIRTIO_NET_F_PACKED_HEADER option.


What will do feature provide?

Thanks

> 

> If anything, perhaps if we increase the virtio_net_hdr_* allocation,

> we should allocate some additional reserved space now? As each new

> size introduces quite a bit of boilerplate. Also, e.g., in qemu just

> to pass the sizes between virtio-net driver and vhost-net device.

> 

>
Willem de Bruijn Dec. 29, 2020, 2:20 p.m. UTC | #7
On Tue, Dec 29, 2020 at 4:23 AM Jason Wang <jasowang@redhat.com> wrote:
>

>

>

> ----- Original Message -----

> > On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote:

> > >

> > > On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote:

> > > > On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com>

> > > > wrote:

> > > > >

> > > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:

> > > > > > From: Willem de Bruijn <willemb@google.com>

> > > > > >

> > > > > > Add optional PTP hardware timestamp offload for virtio-net.

> > > > > >

> > > > > > Accurate RTT measurement requires timestamps close to the wire.

> > > > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> > > > > > virtio-net header is expanded with room for a timestamp. A host may

> > > > > > pass receive timestamps for all or some packets. A timestamp is valid

> > > > > > if non-zero.

> > > > > >

> > > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use

> > > > > > international atomic time (CLOCK_TAI) as global clock base. It is

> > > > > > guest responsibility to sync with host, e.g., through kvm-clock.

> > > > > >

> > > > > > Signed-off-by: Willem de Bruijn <willemb@google.com>

> > > > > > diff --git a/include/uapi/linux/virtio_net.h

> > > > > > b/include/uapi/linux/virtio_net.h

> > > > > > index f6881b5b77ee..0ffe2eeebd4a 100644

> > > > > > --- a/include/uapi/linux/virtio_net.h

> > > > > > +++ b/include/uapi/linux/virtio_net.h

> > > > > > @@ -57,6 +57,7 @@

> > > > > >                                        * Steering */

> > > > > >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */

> > > > > >

> > > > > > +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI

> > > > > > receive time */

> > > > > >  #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */

> > > > > >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */

> > > > > >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */

> > > > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

> > > > > >       };

> > > > > >  };

> > > > > >

> > > > > > +struct virtio_net_hdr_v12 {

> > > > > > +     struct virtio_net_hdr_v1 hdr;

> > > > > > +     struct {

> > > > > > +             __le32 value;

> > > > > > +             __le16 report;

> > > > > > +             __le16 flow_state;

> > > > > > +     } hash;

> > > > > > +     __virtio32 reserved;

> > > > > > +     __virtio64 tstamp;

> > > > > > +};

> > > > > > +

> > > > > >  #ifndef VIRTIO_NET_NO_LEGACY

> > > > > >  /* This header comes first in the scatter-gather list.

> > > > > >   * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it

> > > > > >   must

> > > > >

> > > > >

> > > > > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both

> > > > > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then?

> > > >

> > > > Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP?

> > > >

> > > > I think if either is enabled we need to enable the extended layout.

> > > > Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are

> > > > not, then those fields are ignored.

> > >

> > > I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not?

> > > If TSTAMP depends on HASH then point is moot.

> >

> > True, but the two features really are independent.

> >

> > I did consider using configurable metadata layout depending on

> > features negotiated. If there are tons of optional extensions, that

> > makes sense. But it is more complex and parsing error prone. With a

> > handful of options each of a few bytes, that did not seem worth the

> > cost to me at this point.

>

> Consider NFV workloads(64B) packet. Most fields of current vnet header

> is even a burdern.


Such workloads will not negotiate these features, I imagine.

I think this could only become an issue if a small packet workload
would want to negotiate one optional feature, without the others.

Even then, the actual cost of a few untouched bytes is negligible, as
long as the headers don't span cache-lines. I expect it to be cheaper
than having to parse a dynamic layout.

> It might be the time to introduce configurable or

> self-descriptive vnet header.


I did briefly toy with a type-val encoding. Not type-val-len, as all
options have fixed length (so far), so length can be left implicit.
But as each feature is negotiated before hand, the type can be left
implicit too. Leaving just the data elements tightly packed. As long
as order is defined.

> > And importantly, such a mode can always be added later as a separate

> > VIRTIO_NET_F_PACKED_HEADER option.

>

> What will do feature provide?


The tightly packed data elements. Strip out the elements for non
negotiated features. So if either tstamp option is negotiated, but
hash is not, exclude the 64b of hash fields. Note that I'm not
actually arguing for this (at this point).
Jason Wang Dec. 30, 2020, 8:38 a.m. UTC | #8
On 2020/12/29 下午10:20, Willem de Bruijn wrote:
> On Tue, Dec 29, 2020 at 4:23 AM Jason Wang <jasowang@redhat.com> wrote:

>>

>>

>> ----- Original Message -----

>>> On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote:

>>>> On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote:

>>>>> On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com>

>>>>> wrote:

>>>>>> On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:

>>>>>>> From: Willem de Bruijn <willemb@google.com>

>>>>>>>

>>>>>>> Add optional PTP hardware timestamp offload for virtio-net.

>>>>>>>

>>>>>>> Accurate RTT measurement requires timestamps close to the wire.

>>>>>>> Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

>>>>>>> virtio-net header is expanded with room for a timestamp. A host may

>>>>>>> pass receive timestamps for all or some packets. A timestamp is valid

>>>>>>> if non-zero.

>>>>>>>

>>>>>>> The timestamp straddles (virtual) hardware domains. Like PTP, use

>>>>>>> international atomic time (CLOCK_TAI) as global clock base. It is

>>>>>>> guest responsibility to sync with host, e.g., through kvm-clock.

>>>>>>>

>>>>>>> Signed-off-by: Willem de Bruijn <willemb@google.com>

>>>>>>> diff --git a/include/uapi/linux/virtio_net.h

>>>>>>> b/include/uapi/linux/virtio_net.h

>>>>>>> index f6881b5b77ee..0ffe2eeebd4a 100644

>>>>>>> --- a/include/uapi/linux/virtio_net.h

>>>>>>> +++ b/include/uapi/linux/virtio_net.h

>>>>>>> @@ -57,6 +57,7 @@

>>>>>>>                                         * Steering */

>>>>>>>   #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */

>>>>>>>

>>>>>>> +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI

>>>>>>> receive time */

>>>>>>>   #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */

>>>>>>>   #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */

>>>>>>>   #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */

>>>>>>> @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

>>>>>>>        };

>>>>>>>   };

>>>>>>>

>>>>>>> +struct virtio_net_hdr_v12 {

>>>>>>> +     struct virtio_net_hdr_v1 hdr;

>>>>>>> +     struct {

>>>>>>> +             __le32 value;

>>>>>>> +             __le16 report;

>>>>>>> +             __le16 flow_state;

>>>>>>> +     } hash;

>>>>>>> +     __virtio32 reserved;

>>>>>>> +     __virtio64 tstamp;

>>>>>>> +};

>>>>>>> +

>>>>>>>   #ifndef VIRTIO_NET_NO_LEGACY

>>>>>>>   /* This header comes first in the scatter-gather list.

>>>>>>>    * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it

>>>>>>>    must

>>>>>>

>>>>>> So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both

>>>>>> VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then?

>>>>> Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP?

>>>>>

>>>>> I think if either is enabled we need to enable the extended layout.

>>>>> Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are

>>>>> not, then those fields are ignored.

>>>> I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not?

>>>> If TSTAMP depends on HASH then point is moot.

>>> True, but the two features really are independent.

>>>

>>> I did consider using configurable metadata layout depending on

>>> features negotiated. If there are tons of optional extensions, that

>>> makes sense. But it is more complex and parsing error prone. With a

>>> handful of options each of a few bytes, that did not seem worth the

>>> cost to me at this point.

>> Consider NFV workloads(64B) packet. Most fields of current vnet header

>> is even a burdern.

> Such workloads will not negotiate these features, I imagine.

>

> I think this could only become an issue if a small packet workload

> would want to negotiate one optional feature, without the others.



Yes.


> Even then, the actual cost of a few untouched bytes is negligible, as

> long as the headers don't span cache-lines.



Right. It looks to me with hash report support the current header has 
exceeds 32 bytes which is the cacheline size for some archs. And it can 
be imagined that after two or more features it could be larger than 64 
bytes.


>   I expect it to be cheaper

> than having to parse a dynamic layout.



The only overhead is probably analyzing "type" which should be cheap I 
guess.


>

>> It might be the time to introduce configurable or

>> self-descriptive vnet header.

> I did briefly toy with a type-val encoding. Not type-val-len, as all

> options have fixed length (so far), so length can be left implicit.

> But as each feature is negotiated before hand, the type can be left

> implicit too. Leaving just the data elements tightly packed. As long

> as order is defined.



Right, so in this case there should be even no overhead.


>

>>> And importantly, such a mode can always be added later as a separate

>>> VIRTIO_NET_F_PACKED_HEADER option.

>> What will do feature provide?

> The tightly packed data elements. Strip out the elements for non

> negotiated features. So if either tstamp option is negotiated, but

> hash is not, exclude the 64b of hash fields. Note that I'm not

> actually arguing for this (at this point).



I second for this.

Thanks
Jason Wang Dec. 30, 2020, 8:44 a.m. UTC | #9
On 2020/12/29 上午8:57, Willem de Bruijn wrote:
> On Mon, Dec 28, 2020 at 5:59 PM Jakub Kicinski <kuba@kernel.org> wrote:

>> On Mon, 28 Dec 2020 11:22:32 -0500 Willem de Bruijn wrote:

>>> From: Willem de Bruijn <willemb@google.com>

>>>

>>> Add optional PTP hardware timestamp offload for virtio-net.

>>>

>>> Accurate RTT measurement requires timestamps close to the wire.

>>> Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

>>> virtio-net header is expanded with room for a timestamp. A host may

>>> pass receive timestamps for all or some packets. A timestamp is valid

>>> if non-zero.

>>>

>>> The timestamp straddles (virtual) hardware domains. Like PTP, use

>>> international atomic time (CLOCK_TAI) as global clock base. It is

>>> guest responsibility to sync with host, e.g., through kvm-clock.

>> Would this not be confusing to some user space SW to have a NIC with

>> no PHC deliver HW stamps?

>>

>> I'd CC Richard on this, unless you already discussed with him offline.

> Thanks, good point. I should have included Richard.

>

> There is a well understood method for synchronizing guest and host

> clock in KVM using ptp_kvm. For virtual environments without NIC

> hardware offload, the when host timestamps in software, this suffices.

>

> Syncing host with NIC is assumed if the host advertises the feature

> and implements using real hardware timestamps.



Or it could be useful for virtio hardware when there's no KVM that 
provides PTP.

Thanks


>
Richard Cochran Dec. 30, 2020, 12:30 p.m. UTC | #10
On Mon, Dec 28, 2020 at 07:57:20PM -0500, Willem de Bruijn wrote:
> There is a well understood method for synchronizing guest and host

> clock in KVM using ptp_kvm. For virtual environments without NIC

> hardware offload, the when host timestamps in software, this suffices.

> 

> Syncing host with NIC is assumed if the host advertises the feature

> and implements using real hardware timestamps.


Sounds reasonable.

Thanks,
Richard
kernel test robot Feb. 2, 2021, 1:05 p.m. UTC | #11
Hi Willem,

Thank you for the patch! Perhaps something to improve:

[auto build test WARNING on ipvs/master]
[also build test WARNING on linus/master v5.11-rc6 next-20210125]
[If your patch is applied to the wrong git tree, kindly drop us a note.
And when submitting patch, we suggest to use '--base' as documented in
https://git-scm.com/docs/git-format-patch]

url:    https://github.com/0day-ci/linux/commits/Willem-de-Bruijn/virtio-net-add-tx-hash-rx-tstamp-and-tx-tstamp/20201229-002604
base:   https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git master
config: x86_64-randconfig-s021-20201228 (attached as .config)
compiler: gcc-9 (Debian 9.3.0-15) 9.3.0
reproduce:
        # apt-get install sparse
        # sparse version: v0.6.3-215-g0fb77bb6-dirty
        # https://github.com/0day-ci/linux/commit/d309db6857fa35b0d7a11cc5229436d6d71ab274
        git remote add linux-review https://github.com/0day-ci/linux
        git fetch --no-tags linux-review Willem-de-Bruijn/virtio-net-add-tx-hash-rx-tstamp-and-tx-tstamp/20201229-002604
        git checkout d309db6857fa35b0d7a11cc5229436d6d71ab274
        # save the attached .config to linux build tree
        make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot <lkp@intel.com>


"sparse warnings: (new ones prefixed by >>)"
>> drivers/net/virtio_net.c:1096:80: sparse: sparse: incorrect type in argument 1 (different base types) @@     expected unsigned long long [usertype] ns @@     got restricted __virtio64 [usertype] tstamp @@

   drivers/net/virtio_net.c:1096:80: sparse:     expected unsigned long long [usertype] ns
   drivers/net/virtio_net.c:1096:80: sparse:     got restricted __virtio64 [usertype] tstamp
   drivers/net/virtio_net.c:1580:32: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted __le32 [usertype] hash_value @@     got restricted __virtio32 @@
   drivers/net/virtio_net.c:1580:32: sparse:     expected restricted __le32 [usertype] hash_value
   drivers/net/virtio_net.c:1580:32: sparse:     got restricted __virtio32
   drivers/net/virtio_net.c:1581:33: sparse: sparse: incorrect type in assignment (different base types) @@     expected restricted __le16 [usertype] hash_report @@     got int @@
   drivers/net/virtio_net.c:1581:33: sparse:     expected restricted __le16 [usertype] hash_report
   drivers/net/virtio_net.c:1581:33: sparse:     got int

vim +1096 drivers/net/virtio_net.c

  1048	
  1049	static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
  1050				void *buf, unsigned int len, void **ctx,
  1051				unsigned int *xdp_xmit,
  1052				struct virtnet_rq_stats *stats)
  1053	{
  1054		struct net_device *dev = vi->dev;
  1055		struct sk_buff *skb;
  1056		struct virtio_net_hdr_mrg_rxbuf *hdr;
  1057	
  1058		if (unlikely(len < vi->hdr_len + ETH_HLEN)) {
  1059			pr_debug("%s: short packet %i\n", dev->name, len);
  1060			dev->stats.rx_length_errors++;
  1061			if (vi->mergeable_rx_bufs) {
  1062				put_page(virt_to_head_page(buf));
  1063			} else if (vi->big_packets) {
  1064				give_pages(rq, buf);
  1065			} else {
  1066				put_page(virt_to_head_page(buf));
  1067			}
  1068			return;
  1069		}
  1070	
  1071		if (vi->mergeable_rx_bufs)
  1072			skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit,
  1073						stats);
  1074		else if (vi->big_packets)
  1075			skb = receive_big(dev, vi, rq, buf, len, stats);
  1076		else
  1077			skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats);
  1078	
  1079		if (unlikely(!skb))
  1080			return;
  1081	
  1082		hdr = skb_vnet_hdr(skb);
  1083	
  1084		if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID)
  1085			skb->ip_summed = CHECKSUM_UNNECESSARY;
  1086	
  1087		if (virtio_net_hdr_to_skb(skb, &hdr->hdr,
  1088					  virtio_is_little_endian(vi->vdev))) {
  1089			net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n",
  1090					     dev->name, hdr->hdr.gso_type,
  1091					     hdr->hdr.gso_size);
  1092			goto frame_err;
  1093		}
  1094	
  1095		if (vi->has_rx_tstamp)
> 1096			skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp);

  1097	
  1098		skb_record_rx_queue(skb, vq2rxq(rq->vq));
  1099		skb->protocol = eth_type_trans(skb, dev);
  1100		pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
  1101			 ntohs(skb->protocol), skb->len, skb->pkt_type);
  1102	
  1103		napi_gro_receive(&rq->napi, skb);
  1104		return;
  1105	
  1106	frame_err:
  1107		dev->stats.rx_frame_errors++;
  1108		dev_kfree_skb(skb);
  1109	}
  1110	

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
Michael S. Tsirkin Feb. 2, 2021, 2:08 p.m. UTC | #12
On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:
> From: Willem de Bruijn <willemb@google.com>

> 

> Add optional PTP hardware timestamp offload for virtio-net.

> 

> Accurate RTT measurement requires timestamps close to the wire.

> Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> virtio-net header is expanded with room for a timestamp. A host may

> pass receive timestamps for all or some packets. A timestamp is valid

> if non-zero.

> 

> The timestamp straddles (virtual) hardware domains. Like PTP, use

> international atomic time (CLOCK_TAI) as global clock base. It is

> guest responsibility to sync with host, e.g., through kvm-clock.

> 

> Signed-off-by: Willem de Bruijn <willemb@google.com>

> ---

>  drivers/net/virtio_net.c        | 20 +++++++++++++++++++-

>  include/uapi/linux/virtio_net.h | 12 ++++++++++++

>  2 files changed, 31 insertions(+), 1 deletion(-)

> 

> diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

> index b917b7333928..57744bb6a141 100644

> --- a/drivers/net/virtio_net.c

> +++ b/drivers/net/virtio_net.c

> @@ -204,6 +204,9 @@ struct virtnet_info {

>  	/* Guest will pass tx path info to the host */

>  	bool has_tx_hash;

>  

> +	/* Host will pass CLOCK_TAI receive time to the guest */

> +	bool has_rx_tstamp;

> +

>  	/* Has control virtqueue */

>  	bool has_cvq;

>  

> @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)

>  	return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;

>  }

>  

> +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb)

> +{

> +	BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb));

> +

> +	return (void *)skb->cb;

> +}

> +

>  /*

>   * private is used to chain pages for big packets, put the whole

>   * most recent used list in the beginning for reuse

> @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,

>  		goto frame_err;

>  	}

>  

> +	if (vi->has_rx_tstamp)

> +		skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp);

> +

>  	skb_record_rx_queue(skb, vq2rxq(rq->vq));

>  	skb->protocol = eth_type_trans(skb, dev);

>  	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",

> @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev)

>  		vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);

>  	}

>  

> +	if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) {

> +		vi->has_rx_tstamp = true;

> +		vi->hdr_len = sizeof(struct virtio_net_hdr_v12);

> +	}

> +

>  	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||

>  	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))

>  		vi->any_header_sg = true;

> @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = {

>  	VIRTIO_NET_F_CTRL_MAC_ADDR, \

>  	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \

>  	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \

> -	VIRTIO_NET_F_TX_HASH

> +	VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP

>  

>  static unsigned int features[] = {

>  	VIRTNET_FEATURES,

> diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h

> index f6881b5b77ee..0ffe2eeebd4a 100644

> --- a/include/uapi/linux/virtio_net.h

> +++ b/include/uapi/linux/virtio_net.h

> @@ -57,6 +57,7 @@

>  					 * Steering */

>  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */

>  

> +#define VIRTIO_NET_F_RX_TSTAMP	  55	/* Host sends TAI receive time */

>  #define VIRTIO_NET_F_TX_HASH	  56	/* Guest sends hash report */

>  #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */

>  #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */

> @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

>  	};

>  };

>  

> +struct virtio_net_hdr_v12 {

> +	struct virtio_net_hdr_v1 hdr;

> +	struct {

> +		__le32 value;

> +		__le16 report;

> +		__le16 flow_state;

> +	} hash;

> +	__virtio32 reserved;



Does endian-ness matter? If not - just u32?

> +	__virtio64 tstamp;

> +};

> +


Given it's only available in modern devices, I think we
can make this __le64 tstamp.

>  #ifndef VIRTIO_NET_NO_LEGACY

>  /* This header comes first in the scatter-gather list.

>   * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must

> -- 

> 2.29.2.729.g45daf8777d-goog
Willem de Bruijn Feb. 2, 2021, 10:17 p.m. UTC | #13
On Tue, Feb 2, 2021 at 9:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:
>

> On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:

> > From: Willem de Bruijn <willemb@google.com>

> >

> > Add optional PTP hardware timestamp offload for virtio-net.

> >

> > Accurate RTT measurement requires timestamps close to the wire.

> > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> > virtio-net header is expanded with room for a timestamp. A host may

> > pass receive timestamps for all or some packets. A timestamp is valid

> > if non-zero.

> >

> > The timestamp straddles (virtual) hardware domains. Like PTP, use

> > international atomic time (CLOCK_TAI) as global clock base. It is

> > guest responsibility to sync with host, e.g., through kvm-clock.

> >

> > Signed-off-by: Willem de Bruijn <willemb@google.com>

> > ---

> >  drivers/net/virtio_net.c        | 20 +++++++++++++++++++-

> >  include/uapi/linux/virtio_net.h | 12 ++++++++++++

> >  2 files changed, 31 insertions(+), 1 deletion(-)

> >

> > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

> > index b917b7333928..57744bb6a141 100644

> > --- a/drivers/net/virtio_net.c

> > +++ b/drivers/net/virtio_net.c

> > @@ -204,6 +204,9 @@ struct virtnet_info {

> >       /* Guest will pass tx path info to the host */

> >       bool has_tx_hash;

> >

> > +     /* Host will pass CLOCK_TAI receive time to the guest */

> > +     bool has_rx_tstamp;

> > +

> >       /* Has control virtqueue */

> >       bool has_cvq;

> >

> > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)

> >       return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;

> >  }

> >

> > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb)

> > +{

> > +     BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb));

> > +

> > +     return (void *)skb->cb;

> > +}

> > +

> >  /*

> >   * private is used to chain pages for big packets, put the whole

> >   * most recent used list in the beginning for reuse

> > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,

> >               goto frame_err;

> >       }

> >

> > +     if (vi->has_rx_tstamp)

> > +             skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp);

> > +

> >       skb_record_rx_queue(skb, vq2rxq(rq->vq));

> >       skb->protocol = eth_type_trans(skb, dev);

> >       pr_debug("Receiving skb proto 0x%04x len %i type %i\n",

> > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev)

> >               vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);

> >       }

> >

> > +     if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) {

> > +             vi->has_rx_tstamp = true;

> > +             vi->hdr_len = sizeof(struct virtio_net_hdr_v12);

> > +     }

> > +

> >       if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||

> >           virtio_has_feature(vdev, VIRTIO_F_VERSION_1))

> >               vi->any_header_sg = true;

> > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = {

> >       VIRTIO_NET_F_CTRL_MAC_ADDR, \

> >       VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \

> >       VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \

> > -     VIRTIO_NET_F_TX_HASH

> > +     VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP

> >

> >  static unsigned int features[] = {

> >       VIRTNET_FEATURES,

> > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h

> > index f6881b5b77ee..0ffe2eeebd4a 100644

> > --- a/include/uapi/linux/virtio_net.h

> > +++ b/include/uapi/linux/virtio_net.h

> > @@ -57,6 +57,7 @@

> >                                        * Steering */

> >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */

> >

> > +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI receive time */

> >  #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */

> >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */

> >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */

> > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

> >       };

> >  };

> >

> > +struct virtio_net_hdr_v12 {

> > +     struct virtio_net_hdr_v1 hdr;

> > +     struct {

> > +             __le32 value;

> > +             __le16 report;

> > +             __le16 flow_state;

> > +     } hash;

> > +     __virtio32 reserved;

>

>

> Does endian-ness matter? If not - just u32?


I suppose it does not matter as long as this is reserved. Should it be
__le32, at least?

> > +     __virtio64 tstamp;

> > +};

> > +

>

> Given it's only available in modern devices, I think we

> can make this __le64 tstamp.


Actually, would it be possible to make new features available on
legacy devices? There is nothing in the features bits precluding it.

I have a revised patchset almost ready. I suppose I should send it as
RFC again, and simultaneously file an OASIS ballot for each feature?
Michael S. Tsirkin Feb. 2, 2021, 11:02 p.m. UTC | #14
On Tue, Feb 02, 2021 at 05:17:13PM -0500, Willem de Bruijn wrote:
> On Tue, Feb 2, 2021 at 9:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:

> >

> > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:

> > > From: Willem de Bruijn <willemb@google.com>

> > >

> > > Add optional PTP hardware timestamp offload for virtio-net.

> > >

> > > Accurate RTT measurement requires timestamps close to the wire.

> > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> > > virtio-net header is expanded with room for a timestamp. A host may

> > > pass receive timestamps for all or some packets. A timestamp is valid

> > > if non-zero.

> > >

> > > The timestamp straddles (virtual) hardware domains. Like PTP, use

> > > international atomic time (CLOCK_TAI) as global clock base. It is

> > > guest responsibility to sync with host, e.g., through kvm-clock.

> > >

> > > Signed-off-by: Willem de Bruijn <willemb@google.com>

> > > ---

> > >  drivers/net/virtio_net.c        | 20 +++++++++++++++++++-

> > >  include/uapi/linux/virtio_net.h | 12 ++++++++++++

> > >  2 files changed, 31 insertions(+), 1 deletion(-)

> > >

> > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

> > > index b917b7333928..57744bb6a141 100644

> > > --- a/drivers/net/virtio_net.c

> > > +++ b/drivers/net/virtio_net.c

> > > @@ -204,6 +204,9 @@ struct virtnet_info {

> > >       /* Guest will pass tx path info to the host */

> > >       bool has_tx_hash;

> > >

> > > +     /* Host will pass CLOCK_TAI receive time to the guest */

> > > +     bool has_rx_tstamp;

> > > +

> > >       /* Has control virtqueue */

> > >       bool has_cvq;

> > >

> > > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)

> > >       return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;

> > >  }

> > >

> > > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb)

> > > +{

> > > +     BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb));

> > > +

> > > +     return (void *)skb->cb;

> > > +}

> > > +

> > >  /*

> > >   * private is used to chain pages for big packets, put the whole

> > >   * most recent used list in the beginning for reuse

> > > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,

> > >               goto frame_err;

> > >       }

> > >

> > > +     if (vi->has_rx_tstamp)

> > > +             skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp);

> > > +

> > >       skb_record_rx_queue(skb, vq2rxq(rq->vq));

> > >       skb->protocol = eth_type_trans(skb, dev);

> > >       pr_debug("Receiving skb proto 0x%04x len %i type %i\n",

> > > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev)

> > >               vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);

> > >       }

> > >

> > > +     if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) {

> > > +             vi->has_rx_tstamp = true;

> > > +             vi->hdr_len = sizeof(struct virtio_net_hdr_v12);

> > > +     }

> > > +

> > >       if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||

> > >           virtio_has_feature(vdev, VIRTIO_F_VERSION_1))

> > >               vi->any_header_sg = true;

> > > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = {

> > >       VIRTIO_NET_F_CTRL_MAC_ADDR, \

> > >       VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \

> > >       VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \

> > > -     VIRTIO_NET_F_TX_HASH

> > > +     VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP

> > >

> > >  static unsigned int features[] = {

> > >       VIRTNET_FEATURES,

> > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h

> > > index f6881b5b77ee..0ffe2eeebd4a 100644

> > > --- a/include/uapi/linux/virtio_net.h

> > > +++ b/include/uapi/linux/virtio_net.h

> > > @@ -57,6 +57,7 @@

> > >                                        * Steering */

> > >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */

> > >

> > > +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI receive time */

> > >  #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */

> > >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */

> > >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */

> > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

> > >       };

> > >  };

> > >

> > > +struct virtio_net_hdr_v12 {

> > > +     struct virtio_net_hdr_v1 hdr;

> > > +     struct {

> > > +             __le32 value;

> > > +             __le16 report;

> > > +             __le16 flow_state;

> > > +     } hash;

> > > +     __virtio32 reserved;

> >

> >

> > Does endian-ness matter? If not - just u32?

> 

> I suppose it does not matter as long as this is reserved. Should it be

> __le32, at least?


One can safely assign 0 to any value.


> > > +     __virtio64 tstamp;

> > > +};

> > > +

> >

> > Given it's only available in modern devices, I think we

> > can make this __le64 tstamp.

> 

> Actually, would it be possible to make new features available on

> legacy devices? There is nothing in the features bits precluding it.


I think it won't be possible: you are using feature bit 55,
legacy devices have up to 32 feature bits. And of course the
header looks a bit differently for legacy, you would have to add special
code to handle that when mergeable buffers are off.

> 

> I have a revised patchset almost ready. I suppose I should send it as

> RFC again, and simultaneously file an OASIS ballot for each feature?


that would be great.

-- 
MST
Willem de Bruijn Feb. 2, 2021, 11:43 p.m. UTC | #15
On Tue, Feb 2, 2021 at 6:06 PM Michael S. Tsirkin <mst@redhat.com> wrote:
>

> On Tue, Feb 02, 2021 at 05:17:13PM -0500, Willem de Bruijn wrote:

> > On Tue, Feb 2, 2021 at 9:08 AM Michael S. Tsirkin <mst@redhat.com> wrote:

> > >

> > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote:

> > > > From: Willem de Bruijn <willemb@google.com>

> > > >

> > > > Add optional PTP hardware timestamp offload for virtio-net.

> > > >

> > > > Accurate RTT measurement requires timestamps close to the wire.

> > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the

> > > > virtio-net header is expanded with room for a timestamp. A host may

> > > > pass receive timestamps for all or some packets. A timestamp is valid

> > > > if non-zero.

> > > >

> > > > The timestamp straddles (virtual) hardware domains. Like PTP, use

> > > > international atomic time (CLOCK_TAI) as global clock base. It is

> > > > guest responsibility to sync with host, e.g., through kvm-clock.

> > > >

> > > > Signed-off-by: Willem de Bruijn <willemb@google.com>

> > > > ---

> > > >  drivers/net/virtio_net.c        | 20 +++++++++++++++++++-

> > > >  include/uapi/linux/virtio_net.h | 12 ++++++++++++

> > > >  2 files changed, 31 insertions(+), 1 deletion(-)

> > > >

> > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c

> > > > index b917b7333928..57744bb6a141 100644

> > > > --- a/drivers/net/virtio_net.c

> > > > +++ b/drivers/net/virtio_net.c

> > > > @@ -204,6 +204,9 @@ struct virtnet_info {

> > > >       /* Guest will pass tx path info to the host */

> > > >       bool has_tx_hash;

> > > >

> > > > +     /* Host will pass CLOCK_TAI receive time to the guest */

> > > > +     bool has_rx_tstamp;

> > > > +

> > > >       /* Has control virtqueue */

> > > >       bool has_cvq;

> > > >

> > > > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)

> > > >       return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;

> > > >  }

> > > >

> > > > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb)

> > > > +{

> > > > +     BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb));

> > > > +

> > > > +     return (void *)skb->cb;

> > > > +}

> > > > +

> > > >  /*

> > > >   * private is used to chain pages for big packets, put the whole

> > > >   * most recent used list in the beginning for reuse

> > > > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,

> > > >               goto frame_err;

> > > >       }

> > > >

> > > > +     if (vi->has_rx_tstamp)

> > > > +             skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp);

> > > > +

> > > >       skb_record_rx_queue(skb, vq2rxq(rq->vq));

> > > >       skb->protocol = eth_type_trans(skb, dev);

> > > >       pr_debug("Receiving skb proto 0x%04x len %i type %i\n",

> > > > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev)

> > > >               vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);

> > > >       }

> > > >

> > > > +     if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) {

> > > > +             vi->has_rx_tstamp = true;

> > > > +             vi->hdr_len = sizeof(struct virtio_net_hdr_v12);

> > > > +     }

> > > > +

> > > >       if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||

> > > >           virtio_has_feature(vdev, VIRTIO_F_VERSION_1))

> > > >               vi->any_header_sg = true;

> > > > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = {

> > > >       VIRTIO_NET_F_CTRL_MAC_ADDR, \

> > > >       VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \

> > > >       VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \

> > > > -     VIRTIO_NET_F_TX_HASH

> > > > +     VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP

> > > >

> > > >  static unsigned int features[] = {

> > > >       VIRTNET_FEATURES,

> > > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h

> > > > index f6881b5b77ee..0ffe2eeebd4a 100644

> > > > --- a/include/uapi/linux/virtio_net.h

> > > > +++ b/include/uapi/linux/virtio_net.h

> > > > @@ -57,6 +57,7 @@

> > > >                                        * Steering */

> > > >  #define VIRTIO_NET_F_CTRL_MAC_ADDR 23        /* Set MAC address */

> > > >

> > > > +#define VIRTIO_NET_F_RX_TSTAMP         55    /* Host sends TAI receive time */

> > > >  #define VIRTIO_NET_F_TX_HASH   56    /* Guest sends hash report */

> > > >  #define VIRTIO_NET_F_HASH_REPORT  57 /* Supports hash report */

> > > >  #define VIRTIO_NET_F_RSS       60    /* Supports RSS RX steering */

> > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash {

> > > >       };

> > > >  };

> > > >

> > > > +struct virtio_net_hdr_v12 {

> > > > +     struct virtio_net_hdr_v1 hdr;

> > > > +     struct {

> > > > +             __le32 value;

> > > > +             __le16 report;

> > > > +             __le16 flow_state;

> > > > +     } hash;

> > > > +     __virtio32 reserved;

> > >

> > >

> > > Does endian-ness matter? If not - just u32?

> >

> > I suppose it does not matter as long as this is reserved. Should it be

> > __le32, at least?

>

> One can safely assign 0 to any value.


Ack.

>

> > > > +     __virtio64 tstamp;

> > > > +};

> > > > +

> > >

> > > Given it's only available in modern devices, I think we

> > > can make this __le64 tstamp.

> >

> > Actually, would it be possible to make new features available on

> > legacy devices? There is nothing in the features bits precluding it.

>

> I think it won't be possible: you are using feature bit 55,

> legacy devices have up to 32 feature bits. And of course the

> header looks a bit differently for legacy, you would have to add special

> code to handle that when mergeable buffers are off.


I think I can make the latter work. I did start without a dependency
on the v1 header initially.

Feature bit array length I had not considered. Good point. Need to
think about that. It would be very appealing if in particular the
tx-hash feature could work in legacy mode.

> >

> > I have a revised patchset almost ready. I suppose I should send it as

> > RFC again, and simultaneously file an OASIS ballot for each feature?

>

> that would be great.


Will do, thanks.
diff mbox series

Patch

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b917b7333928..57744bb6a141 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -204,6 +204,9 @@  struct virtnet_info {
 	/* Guest will pass tx path info to the host */
 	bool has_tx_hash;
 
+	/* Host will pass CLOCK_TAI receive time to the guest */
+	bool has_rx_tstamp;
+
 	/* Has control virtqueue */
 	bool has_cvq;
 
@@ -292,6 +295,13 @@  static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb)
 	return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb;
 }
 
+static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb)
+{
+	BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb));
+
+	return (void *)skb->cb;
+}
+
 /*
  * private is used to chain pages for big packets, put the whole
  * most recent used list in the beginning for reuse
@@ -1082,6 +1092,9 @@  static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq,
 		goto frame_err;
 	}
 
+	if (vi->has_rx_tstamp)
+		skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp);
+
 	skb_record_rx_queue(skb, vq2rxq(rq->vq));
 	skb->protocol = eth_type_trans(skb, dev);
 	pr_debug("Receiving skb proto 0x%04x len %i type %i\n",
@@ -3071,6 +3084,11 @@  static int virtnet_probe(struct virtio_device *vdev)
 		vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash);
 	}
 
+	if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) {
+		vi->has_rx_tstamp = true;
+		vi->hdr_len = sizeof(struct virtio_net_hdr_v12);
+	}
+
 	if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) ||
 	    virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
 		vi->any_header_sg = true;
@@ -3261,7 +3279,7 @@  static struct virtio_device_id id_table[] = {
 	VIRTIO_NET_F_CTRL_MAC_ADDR, \
 	VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \
 	VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \
-	VIRTIO_NET_F_TX_HASH
+	VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP
 
 static unsigned int features[] = {
 	VIRTNET_FEATURES,
diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h
index f6881b5b77ee..0ffe2eeebd4a 100644
--- a/include/uapi/linux/virtio_net.h
+++ b/include/uapi/linux/virtio_net.h
@@ -57,6 +57,7 @@ 
 					 * Steering */
 #define VIRTIO_NET_F_CTRL_MAC_ADDR 23	/* Set MAC address */
 
+#define VIRTIO_NET_F_RX_TSTAMP	  55	/* Host sends TAI receive time */
 #define VIRTIO_NET_F_TX_HASH	  56	/* Guest sends hash report */
 #define VIRTIO_NET_F_HASH_REPORT  57	/* Supports hash report */
 #define VIRTIO_NET_F_RSS	  60	/* Supports RSS RX steering */
@@ -182,6 +183,17 @@  struct virtio_net_hdr_v1_hash {
 	};
 };
 
+struct virtio_net_hdr_v12 {
+	struct virtio_net_hdr_v1 hdr;
+	struct {
+		__le32 value;
+		__le16 report;
+		__le16 flow_state;
+	} hash;
+	__virtio32 reserved;
+	__virtio64 tstamp;
+};
+
 #ifndef VIRTIO_NET_NO_LEGACY
 /* This header comes first in the scatter-gather list.
  * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must