Message ID | 20201228162233.2032571-3-willemdebruijn.kernel@gmail.com |
---|---|
State | New |
Headers | show |
Series | virtio-net: add tx-hash, rx-tstamp and tx-tstamp | expand |
On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > From: Willem de Bruijn <willemb@google.com> > > Add optional PTP hardware timestamp offload for virtio-net. > > Accurate RTT measurement requires timestamps close to the wire. > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > virtio-net header is expanded with room for a timestamp. A host may > pass receive timestamps for all or some packets. A timestamp is valid > if non-zero. > > The timestamp straddles (virtual) hardware domains. Like PTP, use > international atomic time (CLOCK_TAI) as global clock base. It is > guest responsibility to sync with host, e.g., through kvm-clock. > > Signed-off-by: Willem de Bruijn <willemb@google.com> > --- > drivers/net/virtio_net.c | 20 +++++++++++++++++++- > include/uapi/linux/virtio_net.h | 12 ++++++++++++ > 2 files changed, 31 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index b917b7333928..57744bb6a141 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -204,6 +204,9 @@ struct virtnet_info { > /* Guest will pass tx path info to the host */ > bool has_tx_hash; > > + /* Host will pass CLOCK_TAI receive time to the guest */ > + bool has_rx_tstamp; > + > /* Has control virtqueue */ > bool has_cvq; > > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) > return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; > } > > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb) > +{ > + BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb)); > + > + return (void *)skb->cb; > +} > + > /* > * private is used to chain pages for big packets, put the whole > * most recent used list in the beginning for reuse > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, > goto frame_err; > } > > + if (vi->has_rx_tstamp) > + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp); > + > skb_record_rx_queue(skb, vq2rxq(rq->vq)); > skb->protocol = eth_type_trans(skb, dev); > pr_debug("Receiving skb proto 0x%04x len %i type %i\n", > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev) > vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); > } > > + if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) { > + vi->has_rx_tstamp = true; > + vi->hdr_len = sizeof(struct virtio_net_hdr_v12); > + } > + > if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || > virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) > vi->any_header_sg = true; > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > - VIRTIO_NET_F_TX_HASH > + VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index f6881b5b77ee..0ffe2eeebd4a 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -57,6 +57,7 @@ > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > }; > }; > > +struct virtio_net_hdr_v12 { > + struct virtio_net_hdr_v1 hdr; > + struct { > + __le32 value; > + __le16 report; > + __le16 flow_state; > + } hash; > + __virtio32 reserved; > + __virtio64 tstamp; > +}; > + > #ifndef VIRTIO_NET_NO_LEGACY > /* This header comes first in the scatter-gather list. > * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then? I am not sure what does v12 mean here. virtio_net_hdr_v1 is just with VIRTIO_F_VERSION_1, virtio_net_hdr_v1_hash is with VIRTIO_F_VERSION_1 and VIRTIO_NET_F_HASH_REPORT. So this one is virtio_net_hdr_hash_tstamp I guess? > -- > 2.29.2.729.g45daf8777d-goog
On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote: > On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > > > From: Willem de Bruijn <willemb@google.com> > > > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > > > Accurate RTT measurement requires timestamps close to the wire. > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > > virtio-net header is expanded with room for a timestamp. A host may > > > pass receive timestamps for all or some packets. A timestamp is valid > > > if non-zero. > > > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > > international atomic time (CLOCK_TAI) as global clock base. It is > > > guest responsibility to sync with host, e.g., through kvm-clock. > > > > > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > > > index f6881b5b77ee..0ffe2eeebd4a 100644 > > > --- a/include/uapi/linux/virtio_net.h > > > +++ b/include/uapi/linux/virtio_net.h > > > @@ -57,6 +57,7 @@ > > > * Steering */ > > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > > > > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ > > > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > > > }; > > > }; > > > > > > +struct virtio_net_hdr_v12 { > > > + struct virtio_net_hdr_v1 hdr; > > > + struct { > > > + __le32 value; > > > + __le16 report; > > > + __le16 flow_state; > > > + } hash; > > > + __virtio32 reserved; > > > + __virtio64 tstamp; > > > +}; > > > + > > > #ifndef VIRTIO_NET_NO_LEGACY > > > /* This header comes first in the scatter-gather list. > > > * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must > > > > > > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both > > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then? > > Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP? > > I think if either is enabled we need to enable the extended layout. > Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are > not, then those fields are ignored. I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not? If TSTAMP depends on HASH then point is moot. > > I am not sure what does v12 mean here. > > > > virtio_net_hdr_v1 is just with VIRTIO_F_VERSION_1, > > virtio_net_hdr_v1_hash is with VIRTIO_F_VERSION_1 and > > VIRTIO_NET_F_HASH_REPORT. > > > > So this one is virtio_net_hdr_hash_tstamp I guess? > > Sounds better, yes, will change that. > > This was an attempt at identifying the layout with the likely next > generation of the spec, 1.2. Similar to virtio_net_hdr_v1. But that is > both premature and not very helpful.
On Mon, 28 Dec 2020 11:22:32 -0500 Willem de Bruijn wrote: > From: Willem de Bruijn <willemb@google.com> > > Add optional PTP hardware timestamp offload for virtio-net. > > Accurate RTT measurement requires timestamps close to the wire. > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > virtio-net header is expanded with room for a timestamp. A host may > pass receive timestamps for all or some packets. A timestamp is valid > if non-zero. > > The timestamp straddles (virtual) hardware domains. Like PTP, use > international atomic time (CLOCK_TAI) as global clock base. It is > guest responsibility to sync with host, e.g., through kvm-clock. Would this not be confusing to some user space SW to have a NIC with no PHC deliver HW stamps? I'd CC Richard on this, unless you already discussed with him offline.
On Mon, Dec 28, 2020 at 5:59 PM Jakub Kicinski <kuba@kernel.org> wrote: > > On Mon, 28 Dec 2020 11:22:32 -0500 Willem de Bruijn wrote: > > From: Willem de Bruijn <willemb@google.com> > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > Accurate RTT measurement requires timestamps close to the wire. > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > virtio-net header is expanded with room for a timestamp. A host may > > pass receive timestamps for all or some packets. A timestamp is valid > > if non-zero. > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > international atomic time (CLOCK_TAI) as global clock base. It is > > guest responsibility to sync with host, e.g., through kvm-clock. > > Would this not be confusing to some user space SW to have a NIC with > no PHC deliver HW stamps? > > I'd CC Richard on this, unless you already discussed with him offline. Thanks, good point. I should have included Richard. There is a well understood method for synchronizing guest and host clock in KVM using ptp_kvm. For virtual environments without NIC hardware offload, the when host timestamps in software, this suffices. Syncing host with NIC is assumed if the host advertises the feature and implements using real hardware timestamps.
On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote: > > On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > > > > From: Willem de Bruijn <willemb@google.com> > > > > > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > > > > > Accurate RTT measurement requires timestamps close to the wire. > > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > > > virtio-net header is expanded with room for a timestamp. A host may > > > > pass receive timestamps for all or some packets. A timestamp is valid > > > > if non-zero. > > > > > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > > > international atomic time (CLOCK_TAI) as global clock base. It is > > > > guest responsibility to sync with host, e.g., through kvm-clock. > > > > > > > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > > > > index f6881b5b77ee..0ffe2eeebd4a 100644 > > > > --- a/include/uapi/linux/virtio_net.h > > > > +++ b/include/uapi/linux/virtio_net.h > > > > @@ -57,6 +57,7 @@ > > > > * Steering */ > > > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > > > > > > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ > > > > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > > > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > > > > }; > > > > }; > > > > > > > > +struct virtio_net_hdr_v12 { > > > > + struct virtio_net_hdr_v1 hdr; > > > > + struct { > > > > + __le32 value; > > > > + __le16 report; > > > > + __le16 flow_state; > > > > + } hash; > > > > + __virtio32 reserved; > > > > + __virtio64 tstamp; > > > > +}; > > > > + > > > > #ifndef VIRTIO_NET_NO_LEGACY > > > > /* This header comes first in the scatter-gather list. > > > > * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must > > > > > > > > > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both > > > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then? > > > > Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP? > > > > I think if either is enabled we need to enable the extended layout. > > Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are > > not, then those fields are ignored. > > I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not? > If TSTAMP depends on HASH then point is moot. True, but the two features really are independent. I did consider using configurable metadata layout depending on features negotiated. If there are tons of optional extensions, that makes sense. But it is more complex and parsing error prone. With a handful of options each of a few bytes, that did not seem worth the cost to me at this point. And importantly, such a mode can always be added later as a separate VIRTIO_NET_F_PACKED_HEADER option. If anything, perhaps if we increase the virtio_net_hdr_* allocation, we should allocate some additional reserved space now? As each new size introduces quite a bit of boilerplate. Also, e.g., in qemu just to pass the sizes between virtio-net driver and vhost-net device.
----- Original Message ----- > On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote: > > > On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com> > > > wrote: > > > > > > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > > > > > From: Willem de Bruijn <willemb@google.com> > > > > > > > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > > > > > > > Accurate RTT measurement requires timestamps close to the wire. > > > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > > > > virtio-net header is expanded with room for a timestamp. A host may > > > > > pass receive timestamps for all or some packets. A timestamp is valid > > > > > if non-zero. > > > > > > > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > > > > international atomic time (CLOCK_TAI) as global clock base. It is > > > > > guest responsibility to sync with host, e.g., through kvm-clock. > > > > > > > > > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > > > > diff --git a/include/uapi/linux/virtio_net.h > > > > > b/include/uapi/linux/virtio_net.h > > > > > index f6881b5b77ee..0ffe2eeebd4a 100644 > > > > > --- a/include/uapi/linux/virtio_net.h > > > > > +++ b/include/uapi/linux/virtio_net.h > > > > > @@ -57,6 +57,7 @@ > > > > > * Steering */ > > > > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > > > > > > > > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI > > > > > receive time */ > > > > > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > > > > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > > > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > > > > > }; > > > > > }; > > > > > > > > > > +struct virtio_net_hdr_v12 { > > > > > + struct virtio_net_hdr_v1 hdr; > > > > > + struct { > > > > > + __le32 value; > > > > > + __le16 report; > > > > > + __le16 flow_state; > > > > > + } hash; > > > > > + __virtio32 reserved; > > > > > + __virtio64 tstamp; > > > > > +}; > > > > > + > > > > > #ifndef VIRTIO_NET_NO_LEGACY > > > > > /* This header comes first in the scatter-gather list. > > > > > * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it > > > > > must > > > > > > > > > > > > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both > > > > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then? > > > > > > Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP? > > > > > > I think if either is enabled we need to enable the extended layout. > > > Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are > > > not, then those fields are ignored. > > > > I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not? > > If TSTAMP depends on HASH then point is moot. > > True, but the two features really are independent. > > I did consider using configurable metadata layout depending on > features negotiated. If there are tons of optional extensions, that > makes sense. But it is more complex and parsing error prone. With a > handful of options each of a few bytes, that did not seem worth the > cost to me at this point. Consider NFV workloads(64B) packet. Most fields of current vnet header is even a burdern. It might be the time to introduce configurable or self-descriptive vnet header. > > And importantly, such a mode can always be added later as a separate > VIRTIO_NET_F_PACKED_HEADER option. What will do feature provide? Thanks > > If anything, perhaps if we increase the virtio_net_hdr_* allocation, > we should allocate some additional reserved space now? As each new > size introduces quite a bit of boilerplate. Also, e.g., in qemu just > to pass the sizes between virtio-net driver and vhost-net device. > >
On Tue, Dec 29, 2020 at 4:23 AM Jason Wang <jasowang@redhat.com> wrote: > > > > ----- Original Message ----- > > On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote: > > > > On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com> > > > > wrote: > > > > > > > > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > > > > > > From: Willem de Bruijn <willemb@google.com> > > > > > > > > > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > > > > > > > > > Accurate RTT measurement requires timestamps close to the wire. > > > > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > > > > > virtio-net header is expanded with room for a timestamp. A host may > > > > > > pass receive timestamps for all or some packets. A timestamp is valid > > > > > > if non-zero. > > > > > > > > > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > > > > > international atomic time (CLOCK_TAI) as global clock base. It is > > > > > > guest responsibility to sync with host, e.g., through kvm-clock. > > > > > > > > > > > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > > > > > diff --git a/include/uapi/linux/virtio_net.h > > > > > > b/include/uapi/linux/virtio_net.h > > > > > > index f6881b5b77ee..0ffe2eeebd4a 100644 > > > > > > --- a/include/uapi/linux/virtio_net.h > > > > > > +++ b/include/uapi/linux/virtio_net.h > > > > > > @@ -57,6 +57,7 @@ > > > > > > * Steering */ > > > > > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > > > > > > > > > > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI > > > > > > receive time */ > > > > > > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > > > > > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > > > > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > > > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > > > > > > }; > > > > > > }; > > > > > > > > > > > > +struct virtio_net_hdr_v12 { > > > > > > + struct virtio_net_hdr_v1 hdr; > > > > > > + struct { > > > > > > + __le32 value; > > > > > > + __le16 report; > > > > > > + __le16 flow_state; > > > > > > + } hash; > > > > > > + __virtio32 reserved; > > > > > > + __virtio64 tstamp; > > > > > > +}; > > > > > > + > > > > > > #ifndef VIRTIO_NET_NO_LEGACY > > > > > > /* This header comes first in the scatter-gather list. > > > > > > * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it > > > > > > must > > > > > > > > > > > > > > > So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both > > > > > VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then? > > > > > > > > Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP? > > > > > > > > I think if either is enabled we need to enable the extended layout. > > > > Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are > > > > not, then those fields are ignored. > > > > > > I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not? > > > If TSTAMP depends on HASH then point is moot. > > > > True, but the two features really are independent. > > > > I did consider using configurable metadata layout depending on > > features negotiated. If there are tons of optional extensions, that > > makes sense. But it is more complex and parsing error prone. With a > > handful of options each of a few bytes, that did not seem worth the > > cost to me at this point. > > Consider NFV workloads(64B) packet. Most fields of current vnet header > is even a burdern. Such workloads will not negotiate these features, I imagine. I think this could only become an issue if a small packet workload would want to negotiate one optional feature, without the others. Even then, the actual cost of a few untouched bytes is negligible, as long as the headers don't span cache-lines. I expect it to be cheaper than having to parse a dynamic layout. > It might be the time to introduce configurable or > self-descriptive vnet header. I did briefly toy with a type-val encoding. Not type-val-len, as all options have fixed length (so far), so length can be left implicit. But as each feature is negotiated before hand, the type can be left implicit too. Leaving just the data elements tightly packed. As long as order is defined. > > And importantly, such a mode can always be added later as a separate > > VIRTIO_NET_F_PACKED_HEADER option. > > What will do feature provide? The tightly packed data elements. Strip out the elements for non negotiated features. So if either tstamp option is negotiated, but hash is not, exclude the 64b of hash fields. Note that I'm not actually arguing for this (at this point).
On 2020/12/29 下午10:20, Willem de Bruijn wrote: > On Tue, Dec 29, 2020 at 4:23 AM Jason Wang <jasowang@redhat.com> wrote: >> >> >> ----- Original Message ----- >>> On Mon, Dec 28, 2020 at 7:55 PM Michael S. Tsirkin <mst@redhat.com> wrote: >>>> On Mon, Dec 28, 2020 at 02:30:31PM -0500, Willem de Bruijn wrote: >>>>> On Mon, Dec 28, 2020 at 12:29 PM Michael S. Tsirkin <mst@redhat.com> >>>>> wrote: >>>>>> On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: >>>>>>> From: Willem de Bruijn <willemb@google.com> >>>>>>> >>>>>>> Add optional PTP hardware timestamp offload for virtio-net. >>>>>>> >>>>>>> Accurate RTT measurement requires timestamps close to the wire. >>>>>>> Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the >>>>>>> virtio-net header is expanded with room for a timestamp. A host may >>>>>>> pass receive timestamps for all or some packets. A timestamp is valid >>>>>>> if non-zero. >>>>>>> >>>>>>> The timestamp straddles (virtual) hardware domains. Like PTP, use >>>>>>> international atomic time (CLOCK_TAI) as global clock base. It is >>>>>>> guest responsibility to sync with host, e.g., through kvm-clock. >>>>>>> >>>>>>> Signed-off-by: Willem de Bruijn <willemb@google.com> >>>>>>> diff --git a/include/uapi/linux/virtio_net.h >>>>>>> b/include/uapi/linux/virtio_net.h >>>>>>> index f6881b5b77ee..0ffe2eeebd4a 100644 >>>>>>> --- a/include/uapi/linux/virtio_net.h >>>>>>> +++ b/include/uapi/linux/virtio_net.h >>>>>>> @@ -57,6 +57,7 @@ >>>>>>> * Steering */ >>>>>>> #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ >>>>>>> >>>>>>> +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI >>>>>>> receive time */ >>>>>>> #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ >>>>>>> #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ >>>>>>> #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ >>>>>>> @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { >>>>>>> }; >>>>>>> }; >>>>>>> >>>>>>> +struct virtio_net_hdr_v12 { >>>>>>> + struct virtio_net_hdr_v1 hdr; >>>>>>> + struct { >>>>>>> + __le32 value; >>>>>>> + __le16 report; >>>>>>> + __le16 flow_state; >>>>>>> + } hash; >>>>>>> + __virtio32 reserved; >>>>>>> + __virtio64 tstamp; >>>>>>> +}; >>>>>>> + >>>>>>> #ifndef VIRTIO_NET_NO_LEGACY >>>>>>> /* This header comes first in the scatter-gather list. >>>>>>> * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it >>>>>>> must >>>>>> >>>>>> So it looks like VIRTIO_NET_F_RX_TSTAMP should depend on both >>>>>> VIRTIO_NET_F_RX_TSTAMP and VIRTIO_NET_F_HASH_REPORT then? >>>>> Do you mean VIRTIO_NET_F_TX_TSTAMP depends on VIRTIO_NET_F_RX_TSTAMP? >>>>> >>>>> I think if either is enabled we need to enable the extended layout. >>>>> Regardless of whether TX_HASH or HASH_REPORT are enabled. If they are >>>>> not, then those fields are ignored. >>>> I mean do we waste the 8 bytes for hash if TSTAMP is set but HASH is not? >>>> If TSTAMP depends on HASH then point is moot. >>> True, but the two features really are independent. >>> >>> I did consider using configurable metadata layout depending on >>> features negotiated. If there are tons of optional extensions, that >>> makes sense. But it is more complex and parsing error prone. With a >>> handful of options each of a few bytes, that did not seem worth the >>> cost to me at this point. >> Consider NFV workloads(64B) packet. Most fields of current vnet header >> is even a burdern. > Such workloads will not negotiate these features, I imagine. > > I think this could only become an issue if a small packet workload > would want to negotiate one optional feature, without the others. Yes. > Even then, the actual cost of a few untouched bytes is negligible, as > long as the headers don't span cache-lines. Right. It looks to me with hash report support the current header has exceeds 32 bytes which is the cacheline size for some archs. And it can be imagined that after two or more features it could be larger than 64 bytes. > I expect it to be cheaper > than having to parse a dynamic layout. The only overhead is probably analyzing "type" which should be cheap I guess. > >> It might be the time to introduce configurable or >> self-descriptive vnet header. > I did briefly toy with a type-val encoding. Not type-val-len, as all > options have fixed length (so far), so length can be left implicit. > But as each feature is negotiated before hand, the type can be left > implicit too. Leaving just the data elements tightly packed. As long > as order is defined. Right, so in this case there should be even no overhead. > >>> And importantly, such a mode can always be added later as a separate >>> VIRTIO_NET_F_PACKED_HEADER option. >> What will do feature provide? > The tightly packed data elements. Strip out the elements for non > negotiated features. So if either tstamp option is negotiated, but > hash is not, exclude the 64b of hash fields. Note that I'm not > actually arguing for this (at this point). I second for this. Thanks
On 2020/12/29 上午8:57, Willem de Bruijn wrote: > On Mon, Dec 28, 2020 at 5:59 PM Jakub Kicinski <kuba@kernel.org> wrote: >> On Mon, 28 Dec 2020 11:22:32 -0500 Willem de Bruijn wrote: >>> From: Willem de Bruijn <willemb@google.com> >>> >>> Add optional PTP hardware timestamp offload for virtio-net. >>> >>> Accurate RTT measurement requires timestamps close to the wire. >>> Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the >>> virtio-net header is expanded with room for a timestamp. A host may >>> pass receive timestamps for all or some packets. A timestamp is valid >>> if non-zero. >>> >>> The timestamp straddles (virtual) hardware domains. Like PTP, use >>> international atomic time (CLOCK_TAI) as global clock base. It is >>> guest responsibility to sync with host, e.g., through kvm-clock. >> Would this not be confusing to some user space SW to have a NIC with >> no PHC deliver HW stamps? >> >> I'd CC Richard on this, unless you already discussed with him offline. > Thanks, good point. I should have included Richard. > > There is a well understood method for synchronizing guest and host > clock in KVM using ptp_kvm. For virtual environments without NIC > hardware offload, the when host timestamps in software, this suffices. > > Syncing host with NIC is assumed if the host advertises the feature > and implements using real hardware timestamps. Or it could be useful for virtio hardware when there's no KVM that provides PTP. Thanks >
On Mon, Dec 28, 2020 at 07:57:20PM -0500, Willem de Bruijn wrote: > There is a well understood method for synchronizing guest and host > clock in KVM using ptp_kvm. For virtual environments without NIC > hardware offload, the when host timestamps in software, this suffices. > > Syncing host with NIC is assumed if the host advertises the feature > and implements using real hardware timestamps. Sounds reasonable. Thanks, Richard
Hi Willem, Thank you for the patch! Perhaps something to improve: [auto build test WARNING on ipvs/master] [also build test WARNING on linus/master v5.11-rc6 next-20210125] [If your patch is applied to the wrong git tree, kindly drop us a note. And when submitting patch, we suggest to use '--base' as documented in https://git-scm.com/docs/git-format-patch] url: https://github.com/0day-ci/linux/commits/Willem-de-Bruijn/virtio-net-add-tx-hash-rx-tstamp-and-tx-tstamp/20201229-002604 base: https://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git master config: x86_64-randconfig-s021-20201228 (attached as .config) compiler: gcc-9 (Debian 9.3.0-15) 9.3.0 reproduce: # apt-get install sparse # sparse version: v0.6.3-215-g0fb77bb6-dirty # https://github.com/0day-ci/linux/commit/d309db6857fa35b0d7a11cc5229436d6d71ab274 git remote add linux-review https://github.com/0day-ci/linux git fetch --no-tags linux-review Willem-de-Bruijn/virtio-net-add-tx-hash-rx-tstamp-and-tx-tstamp/20201229-002604 git checkout d309db6857fa35b0d7a11cc5229436d6d71ab274 # save the attached .config to linux build tree make W=1 C=1 CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' ARCH=x86_64 If you fix the issue, kindly add following tag as appropriate Reported-by: kernel test robot <lkp@intel.com> "sparse warnings: (new ones prefixed by >>)" >> drivers/net/virtio_net.c:1096:80: sparse: sparse: incorrect type in argument 1 (different base types) @@ expected unsigned long long [usertype] ns @@ got restricted __virtio64 [usertype] tstamp @@ drivers/net/virtio_net.c:1096:80: sparse: expected unsigned long long [usertype] ns drivers/net/virtio_net.c:1096:80: sparse: got restricted __virtio64 [usertype] tstamp drivers/net/virtio_net.c:1580:32: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le32 [usertype] hash_value @@ got restricted __virtio32 @@ drivers/net/virtio_net.c:1580:32: sparse: expected restricted __le32 [usertype] hash_value drivers/net/virtio_net.c:1580:32: sparse: got restricted __virtio32 drivers/net/virtio_net.c:1581:33: sparse: sparse: incorrect type in assignment (different base types) @@ expected restricted __le16 [usertype] hash_report @@ got int @@ drivers/net/virtio_net.c:1581:33: sparse: expected restricted __le16 [usertype] hash_report drivers/net/virtio_net.c:1581:33: sparse: got int vim +1096 drivers/net/virtio_net.c 1048 1049 static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, 1050 void *buf, unsigned int len, void **ctx, 1051 unsigned int *xdp_xmit, 1052 struct virtnet_rq_stats *stats) 1053 { 1054 struct net_device *dev = vi->dev; 1055 struct sk_buff *skb; 1056 struct virtio_net_hdr_mrg_rxbuf *hdr; 1057 1058 if (unlikely(len < vi->hdr_len + ETH_HLEN)) { 1059 pr_debug("%s: short packet %i\n", dev->name, len); 1060 dev->stats.rx_length_errors++; 1061 if (vi->mergeable_rx_bufs) { 1062 put_page(virt_to_head_page(buf)); 1063 } else if (vi->big_packets) { 1064 give_pages(rq, buf); 1065 } else { 1066 put_page(virt_to_head_page(buf)); 1067 } 1068 return; 1069 } 1070 1071 if (vi->mergeable_rx_bufs) 1072 skb = receive_mergeable(dev, vi, rq, buf, ctx, len, xdp_xmit, 1073 stats); 1074 else if (vi->big_packets) 1075 skb = receive_big(dev, vi, rq, buf, len, stats); 1076 else 1077 skb = receive_small(dev, vi, rq, buf, ctx, len, xdp_xmit, stats); 1078 1079 if (unlikely(!skb)) 1080 return; 1081 1082 hdr = skb_vnet_hdr(skb); 1083 1084 if (hdr->hdr.flags & VIRTIO_NET_HDR_F_DATA_VALID) 1085 skb->ip_summed = CHECKSUM_UNNECESSARY; 1086 1087 if (virtio_net_hdr_to_skb(skb, &hdr->hdr, 1088 virtio_is_little_endian(vi->vdev))) { 1089 net_warn_ratelimited("%s: bad gso: type: %u, size: %u\n", 1090 dev->name, hdr->hdr.gso_type, 1091 hdr->hdr.gso_size); 1092 goto frame_err; 1093 } 1094 1095 if (vi->has_rx_tstamp) > 1096 skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp); 1097 1098 skb_record_rx_queue(skb, vq2rxq(rq->vq)); 1099 skb->protocol = eth_type_trans(skb, dev); 1100 pr_debug("Receiving skb proto 0x%04x len %i type %i\n", 1101 ntohs(skb->protocol), skb->len, skb->pkt_type); 1102 1103 napi_gro_receive(&rq->napi, skb); 1104 return; 1105 1106 frame_err: 1107 dev->stats.rx_frame_errors++; 1108 dev_kfree_skb(skb); 1109 } 1110 --- 0-DAY CI Kernel Test Service, Intel Corporation https://lists.01.org/hyperkitty/list/kbuild-all@lists.01.org
On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > From: Willem de Bruijn <willemb@google.com> > > Add optional PTP hardware timestamp offload for virtio-net. > > Accurate RTT measurement requires timestamps close to the wire. > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > virtio-net header is expanded with room for a timestamp. A host may > pass receive timestamps for all or some packets. A timestamp is valid > if non-zero. > > The timestamp straddles (virtual) hardware domains. Like PTP, use > international atomic time (CLOCK_TAI) as global clock base. It is > guest responsibility to sync with host, e.g., through kvm-clock. > > Signed-off-by: Willem de Bruijn <willemb@google.com> > --- > drivers/net/virtio_net.c | 20 +++++++++++++++++++- > include/uapi/linux/virtio_net.h | 12 ++++++++++++ > 2 files changed, 31 insertions(+), 1 deletion(-) > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > index b917b7333928..57744bb6a141 100644 > --- a/drivers/net/virtio_net.c > +++ b/drivers/net/virtio_net.c > @@ -204,6 +204,9 @@ struct virtnet_info { > /* Guest will pass tx path info to the host */ > bool has_tx_hash; > > + /* Host will pass CLOCK_TAI receive time to the guest */ > + bool has_rx_tstamp; > + > /* Has control virtqueue */ > bool has_cvq; > > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) > return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; > } > > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb) > +{ > + BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb)); > + > + return (void *)skb->cb; > +} > + > /* > * private is used to chain pages for big packets, put the whole > * most recent used list in the beginning for reuse > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, > goto frame_err; > } > > + if (vi->has_rx_tstamp) > + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp); > + > skb_record_rx_queue(skb, vq2rxq(rq->vq)); > skb->protocol = eth_type_trans(skb, dev); > pr_debug("Receiving skb proto 0x%04x len %i type %i\n", > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev) > vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); > } > > + if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) { > + vi->has_rx_tstamp = true; > + vi->hdr_len = sizeof(struct virtio_net_hdr_v12); > + } > + > if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || > virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) > vi->any_header_sg = true; > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = { > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > - VIRTIO_NET_F_TX_HASH > + VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP > > static unsigned int features[] = { > VIRTNET_FEATURES, > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > index f6881b5b77ee..0ffe2eeebd4a 100644 > --- a/include/uapi/linux/virtio_net.h > +++ b/include/uapi/linux/virtio_net.h > @@ -57,6 +57,7 @@ > * Steering */ > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > }; > }; > > +struct virtio_net_hdr_v12 { > + struct virtio_net_hdr_v1 hdr; > + struct { > + __le32 value; > + __le16 report; > + __le16 flow_state; > + } hash; > + __virtio32 reserved; Does endian-ness matter? If not - just u32? > + __virtio64 tstamp; > +}; > + Given it's only available in modern devices, I think we can make this __le64 tstamp. > #ifndef VIRTIO_NET_NO_LEGACY > /* This header comes first in the scatter-gather list. > * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must > -- > 2.29.2.729.g45daf8777d-goog
On Tue, Feb 2, 2021 at 9:08 AM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > > From: Willem de Bruijn <willemb@google.com> > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > Accurate RTT measurement requires timestamps close to the wire. > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > virtio-net header is expanded with room for a timestamp. A host may > > pass receive timestamps for all or some packets. A timestamp is valid > > if non-zero. > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > international atomic time (CLOCK_TAI) as global clock base. It is > > guest responsibility to sync with host, e.g., through kvm-clock. > > > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > --- > > drivers/net/virtio_net.c | 20 +++++++++++++++++++- > > include/uapi/linux/virtio_net.h | 12 ++++++++++++ > > 2 files changed, 31 insertions(+), 1 deletion(-) > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > index b917b7333928..57744bb6a141 100644 > > --- a/drivers/net/virtio_net.c > > +++ b/drivers/net/virtio_net.c > > @@ -204,6 +204,9 @@ struct virtnet_info { > > /* Guest will pass tx path info to the host */ > > bool has_tx_hash; > > > > + /* Host will pass CLOCK_TAI receive time to the guest */ > > + bool has_rx_tstamp; > > + > > /* Has control virtqueue */ > > bool has_cvq; > > > > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) > > return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; > > } > > > > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb) > > +{ > > + BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb)); > > + > > + return (void *)skb->cb; > > +} > > + > > /* > > * private is used to chain pages for big packets, put the whole > > * most recent used list in the beginning for reuse > > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, > > goto frame_err; > > } > > > > + if (vi->has_rx_tstamp) > > + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp); > > + > > skb_record_rx_queue(skb, vq2rxq(rq->vq)); > > skb->protocol = eth_type_trans(skb, dev); > > pr_debug("Receiving skb proto 0x%04x len %i type %i\n", > > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev) > > vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); > > } > > > > + if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) { > > + vi->has_rx_tstamp = true; > > + vi->hdr_len = sizeof(struct virtio_net_hdr_v12); > > + } > > + > > if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || > > virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) > > vi->any_header_sg = true; > > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = { > > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > > - VIRTIO_NET_F_TX_HASH > > + VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP > > > > static unsigned int features[] = { > > VIRTNET_FEATURES, > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > > index f6881b5b77ee..0ffe2eeebd4a 100644 > > --- a/include/uapi/linux/virtio_net.h > > +++ b/include/uapi/linux/virtio_net.h > > @@ -57,6 +57,7 @@ > > * Steering */ > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ > > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > > }; > > }; > > > > +struct virtio_net_hdr_v12 { > > + struct virtio_net_hdr_v1 hdr; > > + struct { > > + __le32 value; > > + __le16 report; > > + __le16 flow_state; > > + } hash; > > + __virtio32 reserved; > > > Does endian-ness matter? If not - just u32? I suppose it does not matter as long as this is reserved. Should it be __le32, at least? > > + __virtio64 tstamp; > > +}; > > + > > Given it's only available in modern devices, I think we > can make this __le64 tstamp. Actually, would it be possible to make new features available on legacy devices? There is nothing in the features bits precluding it. I have a revised patchset almost ready. I suppose I should send it as RFC again, and simultaneously file an OASIS ballot for each feature?
On Tue, Feb 02, 2021 at 05:17:13PM -0500, Willem de Bruijn wrote: > On Tue, Feb 2, 2021 at 9:08 AM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > > > From: Willem de Bruijn <willemb@google.com> > > > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > > > Accurate RTT measurement requires timestamps close to the wire. > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > > virtio-net header is expanded with room for a timestamp. A host may > > > pass receive timestamps for all or some packets. A timestamp is valid > > > if non-zero. > > > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > > international atomic time (CLOCK_TAI) as global clock base. It is > > > guest responsibility to sync with host, e.g., through kvm-clock. > > > > > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > > --- > > > drivers/net/virtio_net.c | 20 +++++++++++++++++++- > > > include/uapi/linux/virtio_net.h | 12 ++++++++++++ > > > 2 files changed, 31 insertions(+), 1 deletion(-) > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > > index b917b7333928..57744bb6a141 100644 > > > --- a/drivers/net/virtio_net.c > > > +++ b/drivers/net/virtio_net.c > > > @@ -204,6 +204,9 @@ struct virtnet_info { > > > /* Guest will pass tx path info to the host */ > > > bool has_tx_hash; > > > > > > + /* Host will pass CLOCK_TAI receive time to the guest */ > > > + bool has_rx_tstamp; > > > + > > > /* Has control virtqueue */ > > > bool has_cvq; > > > > > > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) > > > return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; > > > } > > > > > > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb) > > > +{ > > > + BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb)); > > > + > > > + return (void *)skb->cb; > > > +} > > > + > > > /* > > > * private is used to chain pages for big packets, put the whole > > > * most recent used list in the beginning for reuse > > > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, > > > goto frame_err; > > > } > > > > > > + if (vi->has_rx_tstamp) > > > + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp); > > > + > > > skb_record_rx_queue(skb, vq2rxq(rq->vq)); > > > skb->protocol = eth_type_trans(skb, dev); > > > pr_debug("Receiving skb proto 0x%04x len %i type %i\n", > > > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev) > > > vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); > > > } > > > > > > + if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) { > > > + vi->has_rx_tstamp = true; > > > + vi->hdr_len = sizeof(struct virtio_net_hdr_v12); > > > + } > > > + > > > if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || > > > virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) > > > vi->any_header_sg = true; > > > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = { > > > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > > > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > > > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > > > - VIRTIO_NET_F_TX_HASH > > > + VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP > > > > > > static unsigned int features[] = { > > > VIRTNET_FEATURES, > > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > > > index f6881b5b77ee..0ffe2eeebd4a 100644 > > > --- a/include/uapi/linux/virtio_net.h > > > +++ b/include/uapi/linux/virtio_net.h > > > @@ -57,6 +57,7 @@ > > > * Steering */ > > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > > > > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ > > > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > > > }; > > > }; > > > > > > +struct virtio_net_hdr_v12 { > > > + struct virtio_net_hdr_v1 hdr; > > > + struct { > > > + __le32 value; > > > + __le16 report; > > > + __le16 flow_state; > > > + } hash; > > > + __virtio32 reserved; > > > > > > Does endian-ness matter? If not - just u32? > > I suppose it does not matter as long as this is reserved. Should it be > __le32, at least? One can safely assign 0 to any value. > > > + __virtio64 tstamp; > > > +}; > > > + > > > > Given it's only available in modern devices, I think we > > can make this __le64 tstamp. > > Actually, would it be possible to make new features available on > legacy devices? There is nothing in the features bits precluding it. I think it won't be possible: you are using feature bit 55, legacy devices have up to 32 feature bits. And of course the header looks a bit differently for legacy, you would have to add special code to handle that when mergeable buffers are off. > > I have a revised patchset almost ready. I suppose I should send it as > RFC again, and simultaneously file an OASIS ballot for each feature? that would be great. -- MST
On Tue, Feb 2, 2021 at 6:06 PM Michael S. Tsirkin <mst@redhat.com> wrote: > > On Tue, Feb 02, 2021 at 05:17:13PM -0500, Willem de Bruijn wrote: > > On Tue, Feb 2, 2021 at 9:08 AM Michael S. Tsirkin <mst@redhat.com> wrote: > > > > > > On Mon, Dec 28, 2020 at 11:22:32AM -0500, Willem de Bruijn wrote: > > > > From: Willem de Bruijn <willemb@google.com> > > > > > > > > Add optional PTP hardware timestamp offload for virtio-net. > > > > > > > > Accurate RTT measurement requires timestamps close to the wire. > > > > Introduce virtio feature VIRTIO_NET_F_RX_TSTAMP. If negotiated, the > > > > virtio-net header is expanded with room for a timestamp. A host may > > > > pass receive timestamps for all or some packets. A timestamp is valid > > > > if non-zero. > > > > > > > > The timestamp straddles (virtual) hardware domains. Like PTP, use > > > > international atomic time (CLOCK_TAI) as global clock base. It is > > > > guest responsibility to sync with host, e.g., through kvm-clock. > > > > > > > > Signed-off-by: Willem de Bruijn <willemb@google.com> > > > > --- > > > > drivers/net/virtio_net.c | 20 +++++++++++++++++++- > > > > include/uapi/linux/virtio_net.h | 12 ++++++++++++ > > > > 2 files changed, 31 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c > > > > index b917b7333928..57744bb6a141 100644 > > > > --- a/drivers/net/virtio_net.c > > > > +++ b/drivers/net/virtio_net.c > > > > @@ -204,6 +204,9 @@ struct virtnet_info { > > > > /* Guest will pass tx path info to the host */ > > > > bool has_tx_hash; > > > > > > > > + /* Host will pass CLOCK_TAI receive time to the guest */ > > > > + bool has_rx_tstamp; > > > > + > > > > /* Has control virtqueue */ > > > > bool has_cvq; > > > > > > > > @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) > > > > return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; > > > > } > > > > > > > > +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb) > > > > +{ > > > > + BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb)); > > > > + > > > > + return (void *)skb->cb; > > > > +} > > > > + > > > > /* > > > > * private is used to chain pages for big packets, put the whole > > > > * most recent used list in the beginning for reuse > > > > @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, > > > > goto frame_err; > > > > } > > > > > > > > + if (vi->has_rx_tstamp) > > > > + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp); > > > > + > > > > skb_record_rx_queue(skb, vq2rxq(rq->vq)); > > > > skb->protocol = eth_type_trans(skb, dev); > > > > pr_debug("Receiving skb proto 0x%04x len %i type %i\n", > > > > @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev) > > > > vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); > > > > } > > > > > > > > + if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) { > > > > + vi->has_rx_tstamp = true; > > > > + vi->hdr_len = sizeof(struct virtio_net_hdr_v12); > > > > + } > > > > + > > > > if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || > > > > virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) > > > > vi->any_header_sg = true; > > > > @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = { > > > > VIRTIO_NET_F_CTRL_MAC_ADDR, \ > > > > VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ > > > > VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ > > > > - VIRTIO_NET_F_TX_HASH > > > > + VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP > > > > > > > > static unsigned int features[] = { > > > > VIRTNET_FEATURES, > > > > diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h > > > > index f6881b5b77ee..0ffe2eeebd4a 100644 > > > > --- a/include/uapi/linux/virtio_net.h > > > > +++ b/include/uapi/linux/virtio_net.h > > > > @@ -57,6 +57,7 @@ > > > > * Steering */ > > > > #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ > > > > > > > > +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ > > > > #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ > > > > #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ > > > > #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ > > > > @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { > > > > }; > > > > }; > > > > > > > > +struct virtio_net_hdr_v12 { > > > > + struct virtio_net_hdr_v1 hdr; > > > > + struct { > > > > + __le32 value; > > > > + __le16 report; > > > > + __le16 flow_state; > > > > + } hash; > > > > + __virtio32 reserved; > > > > > > > > > Does endian-ness matter? If not - just u32? > > > > I suppose it does not matter as long as this is reserved. Should it be > > __le32, at least? > > One can safely assign 0 to any value. Ack. > > > > > + __virtio64 tstamp; > > > > +}; > > > > + > > > > > > Given it's only available in modern devices, I think we > > > can make this __le64 tstamp. > > > > Actually, would it be possible to make new features available on > > legacy devices? There is nothing in the features bits precluding it. > > I think it won't be possible: you are using feature bit 55, > legacy devices have up to 32 feature bits. And of course the > header looks a bit differently for legacy, you would have to add special > code to handle that when mergeable buffers are off. I think I can make the latter work. I did start without a dependency on the v1 header initially. Feature bit array length I had not considered. Good point. Need to think about that. It would be very appealing if in particular the tx-hash feature could work in legacy mode. > > > > I have a revised patchset almost ready. I suppose I should send it as > > RFC again, and simultaneously file an OASIS ballot for each feature? > > that would be great. Will do, thanks.
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index b917b7333928..57744bb6a141 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -204,6 +204,9 @@ struct virtnet_info { /* Guest will pass tx path info to the host */ bool has_tx_hash; + /* Host will pass CLOCK_TAI receive time to the guest */ + bool has_rx_tstamp; + /* Has control virtqueue */ bool has_cvq; @@ -292,6 +295,13 @@ static inline struct virtio_net_hdr_mrg_rxbuf *skb_vnet_hdr(struct sk_buff *skb) return (struct virtio_net_hdr_mrg_rxbuf *)skb->cb; } +static inline struct virtio_net_hdr_v12 *skb_vnet_hdr_12(struct sk_buff *skb) +{ + BUILD_BUG_ON(sizeof(struct virtio_net_hdr_v12) > sizeof(skb->cb)); + + return (void *)skb->cb; +} + /* * private is used to chain pages for big packets, put the whole * most recent used list in the beginning for reuse @@ -1082,6 +1092,9 @@ static void receive_buf(struct virtnet_info *vi, struct receive_queue *rq, goto frame_err; } + if (vi->has_rx_tstamp) + skb_hwtstamps(skb)->hwtstamp = ns_to_ktime(skb_vnet_hdr_12(skb)->tstamp); + skb_record_rx_queue(skb, vq2rxq(rq->vq)); skb->protocol = eth_type_trans(skb, dev); pr_debug("Receiving skb proto 0x%04x len %i type %i\n", @@ -3071,6 +3084,11 @@ static int virtnet_probe(struct virtio_device *vdev) vi->hdr_len = sizeof(struct virtio_net_hdr_v1_hash); } + if (virtio_has_feature(vdev, VIRTIO_NET_F_RX_TSTAMP)) { + vi->has_rx_tstamp = true; + vi->hdr_len = sizeof(struct virtio_net_hdr_v12); + } + if (virtio_has_feature(vdev, VIRTIO_F_ANY_LAYOUT) || virtio_has_feature(vdev, VIRTIO_F_VERSION_1)) vi->any_header_sg = true; @@ -3261,7 +3279,7 @@ static struct virtio_device_id id_table[] = { VIRTIO_NET_F_CTRL_MAC_ADDR, \ VIRTIO_NET_F_MTU, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, \ VIRTIO_NET_F_SPEED_DUPLEX, VIRTIO_NET_F_STANDBY, \ - VIRTIO_NET_F_TX_HASH + VIRTIO_NET_F_TX_HASH, VIRTIO_NET_F_RX_TSTAMP static unsigned int features[] = { VIRTNET_FEATURES, diff --git a/include/uapi/linux/virtio_net.h b/include/uapi/linux/virtio_net.h index f6881b5b77ee..0ffe2eeebd4a 100644 --- a/include/uapi/linux/virtio_net.h +++ b/include/uapi/linux/virtio_net.h @@ -57,6 +57,7 @@ * Steering */ #define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ +#define VIRTIO_NET_F_RX_TSTAMP 55 /* Host sends TAI receive time */ #define VIRTIO_NET_F_TX_HASH 56 /* Guest sends hash report */ #define VIRTIO_NET_F_HASH_REPORT 57 /* Supports hash report */ #define VIRTIO_NET_F_RSS 60 /* Supports RSS RX steering */ @@ -182,6 +183,17 @@ struct virtio_net_hdr_v1_hash { }; }; +struct virtio_net_hdr_v12 { + struct virtio_net_hdr_v1 hdr; + struct { + __le32 value; + __le16 report; + __le16 flow_state; + } hash; + __virtio32 reserved; + __virtio64 tstamp; +}; + #ifndef VIRTIO_NET_NO_LEGACY /* This header comes first in the scatter-gather list. * For legacy virtio, if VIRTIO_F_ANY_LAYOUT is not negotiated, it must