linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Trond Myklebust <trondmy@hammerspace.com>
To: David Howells <dhowells@redhat.com>
Cc: Matthew Wilcox <willy@infradead.org>,
	"David S. Miller" <davem@davemloft.net>,
	Eric Dumazet <edumazet@google.com>,
	Jakub Kicinski <kuba@kernel.org>, Paolo Abeni <pabeni@redhat.com>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Christoph Hellwig <hch@infradead.org>,
	Jens Axboe <axboe@kernel.dk>, Jeffrey Layton <jlayton@kernel.org>,
	Christian Brauner <brauner@kernel.org>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	"netdev@vger.kernel.org" <netdev@vger.kernel.org>,
	"linux-fsdevel@vger.kernel.org" <linux-fsdevel@vger.kernel.org>,
	"linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	Trond Myklebust <trondmy@hammerspace.com>,
	Anna Schumaker <anna@kernel.org>,
	Charles Edward Lever <chuck.lever@oracle.com>,
	"linux-nfs@vger.kernel.org" <linux-nfs@vger.kernel.org>
Subject: Re: [RFC PATCH 27/28] sunrpc: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage
Date: Thu, 16 Mar 2023 16:17:52 +0000	[thread overview]
Message-ID: <3DFBF27C-A62B-4AFE-87FD-3DF53FC39E8E@hammerspace.com> (raw)
In-Reply-To: <20230316152618.711970-28-dhowells@redhat.com>



> On Mar 16, 2023, at 11:26, David Howells <dhowells@redhat.com> wrote:
> 
> When transmitting data, call down into TCP using a single sendmsg with
> MSG_SPLICE_PAGES to indicate that content should be spliced rather than
> performing several sendmsg and sendpage calls to transmit header, data
> pages and trailer.
> 
> To make this work, the data is assembled in a bio_vec array and attached to
> a BVEC-type iterator.  The bio_vec array has two extra slots before the
> first for headers and one after the last for a trailer.  The headers and
> trailer are copied into memory acquired from zcopy_alloc() which just
> breaks a page up into small pieces that can be freed with put_page().
> 
> Signed-off-by: David Howells <dhowells@redhat.com>
> cc: Trond Myklebust <trond.myklebust@hammerspace.com>
> cc: Anna Schumaker <anna@kernel.org>
> cc: Chuck Lever <chuck.lever@oracle.com>
> cc: Jeff Layton <jlayton@kernel.org>
> cc: "David S. Miller" <davem@davemloft.net>
> cc: Eric Dumazet <edumazet@google.com>
> cc: Jakub Kicinski <kuba@kernel.org>
> cc: Paolo Abeni <pabeni@redhat.com>
> cc: Jens Axboe <axboe@kernel.dk>
> cc: Matthew Wilcox <willy@infradead.org>
> cc: linux-nfs@vger.kernel.org
> cc: netdev@vger.kernel.org
> ---
> net/sunrpc/svcsock.c | 70 ++++++++++++--------------------------------
> net/sunrpc/xdr.c     | 24 ++++++++++++---
> 2 files changed, 38 insertions(+), 56 deletions(-)
> 
> diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
> index 03a4f5615086..1fa41ddbc40e 100644
> --- a/net/sunrpc/svcsock.c
> +++ b/net/sunrpc/svcsock.c
> @@ -36,6 +36,7 @@
> #include <linux/skbuff.h>
> #include <linux/file.h>
> #include <linux/freezer.h>
> +#include <linux/zcopy_alloc.h>
> #include <net/sock.h>
> #include <net/checksum.h>
> #include <net/ip.h>
> @@ -1060,16 +1061,8 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
> return 0; /* record not complete */
> }
> 
> -static int svc_tcp_send_kvec(struct socket *sock, const struct kvec *vec,
> -      int flags)
> -{
> - return kernel_sendpage(sock, virt_to_page(vec->iov_base),
> -       offset_in_page(vec->iov_base),
> -       vec->iov_len, flags);
> -}
> -
> /*
> - * kernel_sendpage() is used exclusively to reduce the number of
> + * MSG_SPLICE_PAGES is used exclusively to reduce the number of
>  * copy operations in this path. Therefore the caller must ensure
>  * that the pages backing @xdr are unchanging.
>  *
> @@ -1081,65 +1074,38 @@ static int svc_tcp_sendmsg(struct socket *sock, struct xdr_buf *xdr,
> {
> const struct kvec *head = xdr->head;
> const struct kvec *tail = xdr->tail;
> - struct kvec rm = {
> - .iov_base = &marker,
> - .iov_len = sizeof(marker),
> - };
> struct msghdr msg = {
> - .msg_flags = 0,
> + .msg_flags = MSG_SPLICE_PAGES,
> };
> - int ret;
> + int ret, n = xdr_buf_pagecount(xdr), size;
> 
> *sentp = 0;
> ret = xdr_alloc_bvec(xdr, GFP_KERNEL);
> if (ret < 0)
> return ret;
> 
> - ret = kernel_sendmsg(sock, &msg, &rm, 1, rm.iov_len);
> + ret = zcopy_memdup(sizeof(marker), &marker, &xdr->bvec[-2], GFP_KERNEL);
> if (ret < 0)
> return ret;
> - *sentp += ret;
> - if (ret != rm.iov_len)
> - return -EAGAIN;
> 
> - ret = svc_tcp_send_kvec(sock, head, 0);
> + ret = zcopy_memdup(head->iov_len, head->iov_base, &xdr->bvec[-1], GFP_KERNEL);
> if (ret < 0)
> return ret;
> - *sentp += ret;
> - if (ret != head->iov_len)
> - goto out;
> 
> - if (xdr->page_len) {
> - unsigned int offset, len, remaining;
> - struct bio_vec *bvec;
> -
> - bvec = xdr->bvec + (xdr->page_base >> PAGE_SHIFT);
> - offset = offset_in_page(xdr->page_base);
> - remaining = xdr->page_len;
> - while (remaining > 0) {
> - len = min(remaining, bvec->bv_len - offset);
> - ret = kernel_sendpage(sock, bvec->bv_page,
> -      bvec->bv_offset + offset,
> -      len, 0);
> - if (ret < 0)
> - return ret;
> - *sentp += ret;
> - if (ret != len)
> - goto out;
> - remaining -= len;
> - offset = 0;
> - bvec++;
> - }
> - }
> + ret = zcopy_memdup(tail->iov_len, tail->iov_base, &xdr->bvec[n], GFP_KERNEL);
> + if (ret < 0)
> + return ret;
> 
> - if (tail->iov_len) {
> - ret = svc_tcp_send_kvec(sock, tail, 0);
> - if (ret < 0)
> - return ret;
> - *sentp += ret;
> - }
> + size = sizeof(marker) + head->iov_len + xdr->page_len + tail->iov_len;
> + iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, xdr->bvec - 2, n + 3, size);
> 
> -out:
> + ret = sock_sendmsg(sock, &msg);
> + if (ret < 0)
> + return ret;
> + if (ret > 0)
> + *sentp = ret;
> + if (ret != size)
> + return -EAGAIN;
> return 0;
> }
> 
> diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
> index 36835b2f5446..6dff0b4f17b8 100644
> --- a/net/sunrpc/xdr.c
> +++ b/net/sunrpc/xdr.c
> @@ -145,14 +145,19 @@ xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
> {
> size_t i, n = xdr_buf_pagecount(buf);
> 
> - if (n != 0 && buf->bvec == NULL) {
> - buf->bvec = kmalloc_array(n, sizeof(buf->bvec[0]), gfp);
> + if (buf->bvec == NULL) {
> + /* Allow for two headers and a trailer to be attached */
> + buf->bvec = kmalloc_array(n + 3, sizeof(buf->bvec[0]), gfp);
> if (!buf->bvec)
> return -ENOMEM;
> + buf->bvec += 2;
> + buf->bvec[-2].bv_page = NULL;
> + buf->bvec[-1].bv_page = NULL;

NACK.

> for (i = 0; i < n; i++) {
> bvec_set_page(&buf->bvec[i], buf->pages[i], PAGE_SIZE,
>      0);
> }
> + buf->bvec[n].bv_page = NULL;
> }
> return 0;
> }
> @@ -160,8 +165,19 @@ xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
> void
> xdr_free_bvec(struct xdr_buf *buf)
> {
> - kfree(buf->bvec);
> - buf->bvec = NULL;
> + if (buf->bvec) {
> + size_t n = xdr_buf_pagecount(buf);
> +
> + if (buf->bvec[-2].bv_page)
> + put_page(buf->bvec[-2].bv_page);
> + if (buf->bvec[-1].bv_page)
> + put_page(buf->bvec[-1].bv_page);
> + if (buf->bvec[n].bv_page)
> + put_page(buf->bvec[n].bv_page);
> + buf->bvec -= 2;
> + kfree(buf->bvec);
> + buf->bvec = NULL;
> + }
> }
> 
> /**
> 



  reply	other threads:[~2023-03-16 16:18 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-16 15:25 [RFC PATCH 00/28] splice, net: Replace sendpage with sendmsg(MSG_SPLICE_PAGES) David Howells
2023-03-16 15:25 ` [RFC PATCH 01/28] net: Declare MSG_SPLICE_PAGES internal sendmsg() flag David Howells
2023-03-16 15:25 ` [RFC PATCH 02/28] Add a special allocator for staging netfs protocol to MSG_SPLICE_PAGES David Howells
2023-03-16 17:28   ` Matthew Wilcox
2023-03-16 18:00   ` David Howells
2023-03-16 15:25 ` [RFC PATCH 03/28] tcp: Support MSG_SPLICE_PAGES David Howells
2023-03-16 18:37   ` Willem de Bruijn
2023-03-16 18:44   ` David Howells
2023-03-16 19:00     ` Willem de Bruijn
2023-03-21  0:38     ` David Howells
2023-03-21 14:22       ` Willem de Bruijn
2023-03-16 15:25 ` [RFC PATCH 04/28] tcp: Convert do_tcp_sendpages() to use MSG_SPLICE_PAGES David Howells
2023-03-16 15:25 ` [RFC PATCH 05/28] tcp_bpf: Inline do_tcp_sendpages as it's now a wrapper around tcp_sendmsg David Howells
2023-03-16 15:25 ` [RFC PATCH 06/28] espintcp: Inline do_tcp_sendpages() David Howells
2023-03-16 15:25 ` [RFC PATCH 07/28] tls: " David Howells
2023-03-16 15:25 ` [RFC PATCH 08/28] siw: " David Howells
2023-03-16 15:25 ` [RFC PATCH 09/28] tcp: Fold do_tcp_sendpages() into tcp_sendpage_locked() David Howells
2023-03-16 15:26 ` [RFC PATCH 10/28] ip, udp: Support MSG_SPLICE_PAGES David Howells
2023-03-16 15:26 ` [RFC PATCH 11/28] udp: Convert udp_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-16 15:26 ` [RFC PATCH 12/28] af_unix: Support MSG_SPLICE_PAGES David Howells
2023-03-16 15:26 ` [RFC PATCH 13/28] crypto: af_alg: Indent the loop in af_alg_sendmsg() David Howells
2023-03-16 15:26 ` [RFC PATCH 14/28] crypto: af_alg: Support MSG_SPLICE_PAGES David Howells
2023-03-16 15:26 ` [RFC PATCH 15/28] crypto: af_alg: Convert af_alg_sendpage() to use MSG_SPLICE_PAGES David Howells
2023-03-16 15:26 ` [RFC PATCH 16/28] splice, net: Use sendmsg(MSG_SPLICE_PAGES) rather than ->sendpage() David Howells
2023-03-16 15:26 ` [RFC PATCH 17/28] Remove file->f_op->sendpage David Howells
2023-03-16 15:26 ` [RFC PATCH 18/28] siw: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage to transmit David Howells
2023-03-16 15:26 ` [RFC PATCH 19/28] ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage David Howells
2023-03-16 15:26 ` [RFC PATCH 20/28] iscsi: " David Howells
2023-03-16 15:26 ` [RFC PATCH 21/28] tcp_bpf: Make tcp_bpf_sendpage() go through tcp_bpf_sendmsg(MSG_SPLICE_PAGES) David Howells
2023-03-16 15:26 ` [RFC PATCH 22/28] net: Use sendmsg(MSG_SPLICE_PAGES) not sendpage in skb_send_sock() David Howells
2023-03-16 15:26 ` [RFC PATCH 23/28] algif: Remove hash_sendpage*() David Howells
2023-03-16 15:26 ` [RFC PATCH 24/28] ceph: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage() David Howells
2023-03-16 15:26 ` [RFC PATCH 25/28] rds: Use sendmsg(MSG_SPLICE_PAGES) rather than sendpage David Howells
2023-03-16 15:26 ` [RFC PATCH 26/28] dlm: " David Howells
2023-03-16 15:26 ` [RFC PATCH 27/28] sunrpc: Use sendmsg(MSG_SPLICE_PAGES) rather then sendpage David Howells
2023-03-16 16:17   ` Trond Myklebust [this message]
2023-03-16 17:10     ` Chuck Lever III
2023-03-16 17:28     ` David Howells
2023-03-16 17:41       ` Chuck Lever III
2023-03-16 21:21     ` David Howells
2023-03-17 15:29       ` Chuck Lever III
2023-03-16 16:24   ` David Howells
2023-03-16 17:23     ` Trond Myklebust
2023-03-16 18:06     ` David Howells
2023-03-16 19:01       ` Trond Myklebust
2023-03-22 13:10       ` David Howells
2023-03-22 18:15       ` [RFC PATCH] iov_iter: Add an iterator-of-iterators David Howells
2023-03-22 18:47         ` Trond Myklebust
2023-03-22 18:49         ` Matthew Wilcox
     [not found] ` <20230316152618.711970-29-dhowells@redhat.com>
2023-03-16 15:57   ` [RFC PATCH 28/28] sock: Remove ->sendpage*() in favour of sendmsg(MSG_SPLICE_PAGES) Marc Kleine-Budde

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3DFBF27C-A62B-4AFE-87FD-3DF53FC39E8E@hammerspace.com \
    --to=trondmy@hammerspace.com \
    --cc=anna@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=brauner@kernel.org \
    --cc=chuck.lever@oracle.com \
    --cc=davem@davemloft.net \
    --cc=dhowells@redhat.com \
    --cc=edumazet@google.com \
    --cc=hch@infradead.org \
    --cc=jlayton@kernel.org \
    --cc=kuba@kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-nfs@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    --cc=torvalds@linux-foundation.org \
    --cc=viro@zeniv.linux.org.uk \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox