diff mbox series

[net-next,1/3] net: devmem: support single IOV with sendmsg

Message ID 20250520203044.2689904-1-stfomichev@gmail.com
State New
Headers show
Series [net-next,1/3] net: devmem: support single IOV with sendmsg | expand

Commit Message

Stanislav Fomichev May 20, 2025, 8:30 p.m. UTC
sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple
iovs becomes ITER_IOVEC. iter_iov_len does not return correct
value for UBUF, so teach to treat UBUF differently.

Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Pavel Begunkov <asml.silence@gmail.com>
Cc: Mina Almasry <almasrymina@google.com>
Fixes: bd61848900bf ("net: devmem: Implement TX path")
Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
---
 include/linux/uio.h | 8 +++++++-
 net/core/datagram.c | 3 ++-
 2 files changed, 9 insertions(+), 2 deletions(-)

Comments

Mina Almasry May 21, 2025, 5:21 p.m. UTC | #1
On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
>
> Add new -z argument to specify max IOV size. By default, use
> single large IOV.
>
> Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
> ---
>  .../selftests/drivers/net/hw/ncdevmem.c       | 49 +++++++++++--------
>  1 file changed, 29 insertions(+), 20 deletions(-)
>
> diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> index ca723722a810..fc7ba7d71502 100644
> --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
> @@ -82,6 +82,9 @@
>  #define MSG_SOCK_DEVMEM 0x2000000
>  #endif
>
> +#define MAX_IOV 1024
> +
> +static size_t max_chunk;
>  static char *server_ip;
>  static char *client_ip;
>  static char *port;
> @@ -834,10 +837,10 @@ static int do_client(struct memory_buffer *mem)
>         struct sockaddr_in6 server_sin;
>         struct sockaddr_in6 client_sin;
>         struct ynl_sock *ys = NULL;
> +       struct iovec iov[MAX_IOV];
>         struct msghdr msg = {};
>         ssize_t line_size = 0;
>         struct cmsghdr *cmsg;
> -       struct iovec iov[2];
>         char *line = NULL;
>         unsigned long mid;
>         size_t len = 0;
> @@ -893,27 +896,29 @@ static int do_client(struct memory_buffer *mem)
>                 if (line_size < 0)
>                         break;
>
> -               mid = (line_size / 2) + 1;
> -
> -               iov[0].iov_base = (void *)1;
> -               iov[0].iov_len = mid;
> -               iov[1].iov_base = (void *)(mid + 2);
> -               iov[1].iov_len = line_size - mid;
> +               if (max_chunk) {
> +                       msg.msg_iovlen =
> +                               (line_size + max_chunk - 1) / max_chunk;
> +                       if (msg.msg_iovlen > MAX_IOV)
> +                               error(1, 0,
> +                                     "can't partition %zd bytes into maximum of %d chunks",
> +                                     line_size, MAX_IOV);
>
> -               provider->memcpy_to_device(mem, (size_t)iov[0].iov_base, line,
> -                                          iov[0].iov_len);
> -               provider->memcpy_to_device(mem, (size_t)iov[1].iov_base,
> -                                          line + iov[0].iov_len,
> -                                          iov[1].iov_len);
> +                       for (int i = 0; i < msg.msg_iovlen; i++) {
> +                               iov[i].iov_base = (void *)(i * max_chunk);
> +                               iov[i].iov_len = max_chunk;

Isn't the last iov going to be truncated in the case where line_size
is not exactly divisible with max_chunk?

> +                       }
>
> -               fprintf(stderr,
> -                       "read line_size=%ld iov[0].iov_base=%lu, iov[0].iov_len=%lu, iov[1].iov_base=%lu, iov[1].iov_len=%lu\n",
> -                       line_size, (unsigned long)iov[0].iov_base,
> -                       iov[0].iov_len, (unsigned long)iov[1].iov_base,
> -                       iov[1].iov_len);
> +                       iov[msg.msg_iovlen - 1].iov_len =
> +                               line_size - (msg.msg_iovlen - 1) * max_chunk;
> +               } else {
> +                       iov[0].iov_base = 0;
> +                       iov[0].iov_len = line_size;
> +                       msg.msg_iovlen = 1;
> +               }

Do you need to special case this? Shouldn't this be the same as max_chunk==1?

>
>                 msg.msg_iov = iov;
> -               msg.msg_iovlen = 2;
> +               provider->memcpy_to_device(mem, 0, line, line_size);
>
>                 msg.msg_control = ctrl_data;
>                 msg.msg_controllen = sizeof(ctrl_data);
> @@ -934,7 +939,8 @@ static int do_client(struct memory_buffer *mem)
>                 fprintf(stderr, "sendmsg_ret=%d\n", ret);
>
>                 if (ret != line_size)
> -                       error(1, errno, "Did not send all bytes");
> +                       error(1, errno, "Did not send all bytes %d vs %zd", ret,
> +                             line_size);
>
>                 wait_compl(socket_fd);
>         }
> @@ -956,7 +962,7 @@ int main(int argc, char *argv[])
>         int is_server = 0, opt;
>         int ret;
>
> -       while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:")) != -1) {
> +       while ((opt = getopt(argc, argv, "ls:c:p:v:q:t:f:z:")) != -1) {
>                 switch (opt) {
>                 case 'l':
>                         is_server = 1;
> @@ -982,6 +988,9 @@ int main(int argc, char *argv[])
>                 case 'f':
>                         ifname = optarg;
>                         break;
> +               case 'z':
> +                       max_chunk = atoi(optarg);
> +                       break;
>                 case '?':
>                         fprintf(stderr, "unknown option: %c\n", optopt);
>                         break;
> --
> 2.49.0
>
Stanislav Fomichev May 21, 2025, 5:33 p.m. UTC | #2
On 05/21, Mina Almasry wrote:
> On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
> >
> > sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple
> > iovs becomes ITER_IOVEC. iter_iov_len does not return correct
> > value for UBUF, so teach to treat UBUF differently.
> >
> > Cc: Al Viro <viro@zeniv.linux.org.uk>
> > Cc: Pavel Begunkov <asml.silence@gmail.com>
> > Cc: Mina Almasry <almasrymina@google.com>
> > Fixes: bd61848900bf ("net: devmem: Implement TX path")
> > Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
> > ---
> >  include/linux/uio.h | 8 +++++++-
> >  net/core/datagram.c | 3 ++-
> >  2 files changed, 9 insertions(+), 2 deletions(-)
> >
> > diff --git a/include/linux/uio.h b/include/linux/uio.h
> > index 49ece9e1888f..393d0622cc28 100644
> > --- a/include/linux/uio.h
> > +++ b/include/linux/uio.h
> > @@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter)
> >  }
> >
> >  #define iter_iov_addr(iter)    (iter_iov(iter)->iov_base + (iter)->iov_offset)
> > -#define iter_iov_len(iter)     (iter_iov(iter)->iov_len - (iter)->iov_offset)
> > +
> > +static inline size_t iter_iov_len(const struct iov_iter *i)
> > +{
> > +       if (i->iter_type == ITER_UBUF)
> > +               return i->count;
> > +       return iter_iov(i)->iov_len - i->iov_offset;
> > +}
> >
> 
> This change looks good to me from devmem perspective, but aren't you
> potentially breaking all these existing callers to iter_iov_len?
> 
> ackc -i iter_iov_len
> fs/read_write.c
> 846:                                            iter_iov_len(iter), ppos);
> 849:                                            iter_iov_len(iter), ppos);
> 858:            if (nr != iter_iov_len(iter))
> 
> mm/madvise.c
> 1808:           size_t len_in = iter_iov_len(iter);
> 1838:           iov_iter_advance(iter, iter_iov_len(iter));
> 
> io_uring/rw.c
> 710:                    len = iter_iov_len(iter);
> 
> Or are you confident this change is compatible with these callers for
> some reason?
 
Pavel did go over all callers, see:
https://lore.kernel.org/netdev/7f06216e-1e66-433e-a247-2445dac22498@gmail.com/

> Maybe better to handle this locally in zerocopy_fill_skb_from_devmem,
> and then follow up with a more ambitious change that streamlines how
> all the iters behave.

Yes, I can definitely do that, but it seems a bit strange that the
callers need to distinguish between IOVEC and UBUF (which is a 1-entry
IOVEC), so having working iter_iov_len seems a bit cleaner.
Mina Almasry May 21, 2025, 6:44 p.m. UTC | #3
On Wed, May 21, 2025 at 10:33 AM Stanislav Fomichev
<stfomichev@gmail.com> wrote:
>
> On 05/21, Mina Almasry wrote:
> > On Tue, May 20, 2025 at 1:30 PM Stanislav Fomichev <stfomichev@gmail.com> wrote:
> > >
> > > sendmsg() with a single iov becomes ITER_UBUF, sendmsg() with multiple
> > > iovs becomes ITER_IOVEC. iter_iov_len does not return correct
> > > value for UBUF, so teach to treat UBUF differently.
> > >
> > > Cc: Al Viro <viro@zeniv.linux.org.uk>
> > > Cc: Pavel Begunkov <asml.silence@gmail.com>
> > > Cc: Mina Almasry <almasrymina@google.com>
> > > Fixes: bd61848900bf ("net: devmem: Implement TX path")
> > > Signed-off-by: Stanislav Fomichev <stfomichev@gmail.com>
> > > ---
> > >  include/linux/uio.h | 8 +++++++-
> > >  net/core/datagram.c | 3 ++-
> > >  2 files changed, 9 insertions(+), 2 deletions(-)
> > >
> > > diff --git a/include/linux/uio.h b/include/linux/uio.h
> > > index 49ece9e1888f..393d0622cc28 100644
> > > --- a/include/linux/uio.h
> > > +++ b/include/linux/uio.h
> > > @@ -99,7 +99,13 @@ static inline const struct iovec *iter_iov(const struct iov_iter *iter)
> > >  }
> > >
> > >  #define iter_iov_addr(iter)    (iter_iov(iter)->iov_base + (iter)->iov_offset)
> > > -#define iter_iov_len(iter)     (iter_iov(iter)->iov_len - (iter)->iov_offset)
> > > +
> > > +static inline size_t iter_iov_len(const struct iov_iter *i)
> > > +{
> > > +       if (i->iter_type == ITER_UBUF)
> > > +               return i->count;
> > > +       return iter_iov(i)->iov_len - i->iov_offset;
> > > +}
> > >
> >
> > This change looks good to me from devmem perspective, but aren't you
> > potentially breaking all these existing callers to iter_iov_len?
> >
> > ackc -i iter_iov_len
> > fs/read_write.c
> > 846:                                            iter_iov_len(iter), ppos);
> > 849:                                            iter_iov_len(iter), ppos);
> > 858:            if (nr != iter_iov_len(iter))
> >
> > mm/madvise.c
> > 1808:           size_t len_in = iter_iov_len(iter);
> > 1838:           iov_iter_advance(iter, iter_iov_len(iter));
> >
> > io_uring/rw.c
> > 710:                    len = iter_iov_len(iter);
> >
> > Or are you confident this change is compatible with these callers for
> > some reason?
>
> Pavel did go over all callers, see:
> https://lore.kernel.org/netdev/7f06216e-1e66-433e-a247-2445dac22498@gmail.com/
>

Thanks, I'm not confident enough in my understanding of the other call
sites of iter_iov_len to provide a reviewed-by, but this looks fine to
me for devmem TCP, so I think acked-by is appropriate.

Acked-by: Mina Almasry <almasrymina@google.com>

Thanks!
diff mbox series

Patch

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 49ece9e1888f..393d0622cc28 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -99,7 +99,13 @@  static inline const struct iovec *iter_iov(const struct iov_iter *iter)
 }
 
 #define iter_iov_addr(iter)	(iter_iov(iter)->iov_base + (iter)->iov_offset)
-#define iter_iov_len(iter)	(iter_iov(iter)->iov_len - (iter)->iov_offset)
+
+static inline size_t iter_iov_len(const struct iov_iter *i)
+{
+	if (i->iter_type == ITER_UBUF)
+		return i->count;
+	return iter_iov(i)->iov_len - i->iov_offset;
+}
 
 static inline enum iter_type iov_iter_type(const struct iov_iter *i)
 {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 9ef5442536f5..c44f1d2b70a4 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -706,7 +706,8 @@  zerocopy_fill_skb_from_devmem(struct sk_buff *skb, struct iov_iter *from,
 	 * iov_addrs are interpreted as an offset in bytes into the dma-buf to
 	 * send from. We do not support other iter types.
 	 */
-	if (iov_iter_type(from) != ITER_IOVEC)
+	if (iov_iter_type(from) != ITER_IOVEC &&
+	    iov_iter_type(from) != ITER_UBUF)
 		return -EFAULT;
 
 	while (length && iov_iter_count(from)) {