From: David Howells <dhowells@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, Al Viro <viro@zeniv.linux.org.uk>,
Christoph Hellwig <hch@infradead.org>
Cc: David Howells <dhowells@redhat.com>,
Matthew Wilcox <willy@infradead.org>, Jan Kara <jack@suse.cz>,
Jeff Layton <jlayton@kernel.org>,
David Hildenbrand <david@redhat.com>,
Jason Gunthorpe <jgg@nvidia.com>,
Logan Gunthorpe <logang@deltatee.com>,
Hillf Danton <hdanton@sina.com>,
Christian Brauner <brauner@kernel.org>,
linux-fsdevel@vger.kernel.org, linux-block@vger.kernel.org,
linux-kernel@vger.kernel.org, linux-mm@kvack.org,
Dominique Martinet <asmadeus@codewreck.org>,
Eric Van Hensbergen <ericvh@gmail.com>,
Latchesar Ionkov <lucho@ionkov.net>,
Christian Schoenebeck <linux_oss@crudebyte.com>,
v9fs-developer@lists.sourceforge.net
Subject: [RFC PATCH 10/11] 9p: Pin pages rather than ref'ing if appropriate
Date: Fri, 30 Jun 2023 16:25:23 +0100 [thread overview]
Message-ID: <20230630152524.661208-11-dhowells@redhat.com> (raw)
In-Reply-To: <20230630152524.661208-1-dhowells@redhat.com>
Convert the 9p filesystem to use iov_iter_extract_pages() instead of
iov_iter_get_pages(). This will pin pages or leave them unaltered rather
than getting a ref on them as appropriate to the iterator.
The pages need to be pinned for DIO-read rather than having refs taken on
them to prevent VM copy-on-write from malfunctioning during a concurrent
fork() (the result of the I/O would otherwise end up only visible to the
child process and not the parent).
Signed-off-by: David Howells <dhowells@redhat.com>
cc: Dominique Martinet <asmadeus@codewreck.org>
cc: Eric Van Hensbergen <ericvh@gmail.com>
cc: Latchesar Ionkov <lucho@ionkov.net>
cc: Christian Schoenebeck <linux_oss@crudebyte.com>
cc: v9fs-developer@lists.sourceforge.net
---
net/9p/trans_common.c | 8 ++--
net/9p/trans_common.h | 2 +-
net/9p/trans_virtio.c | 92 ++++++++++++++-----------------------------
3 files changed, 34 insertions(+), 68 deletions(-)
diff --git a/net/9p/trans_common.c b/net/9p/trans_common.c
index c827f694551c..4342de18f08b 100644
--- a/net/9p/trans_common.c
+++ b/net/9p/trans_common.c
@@ -9,16 +9,16 @@
#include "trans_common.h"
/**
- * p9_release_pages - Release pages after the transaction.
+ * p9_unpin_pages - Unpin pages after the transaction.
* @pages: array of pages to be put
* @nr_pages: size of array
*/
-void p9_release_pages(struct page **pages, int nr_pages)
+void p9_unpin_pages(struct page **pages, int nr_pages)
{
int i;
for (i = 0; i < nr_pages; i++)
if (pages[i])
- put_page(pages[i]);
+ unpin_user_page(pages[i]);
}
-EXPORT_SYMBOL(p9_release_pages);
+EXPORT_SYMBOL(p9_unpin_pages);
diff --git a/net/9p/trans_common.h b/net/9p/trans_common.h
index 32134db6abf3..fd94c48aba5b 100644
--- a/net/9p/trans_common.h
+++ b/net/9p/trans_common.h
@@ -4,4 +4,4 @@
* Author Venkateswararao Jujjuri <jvrao@linux.vnet.ibm.com>
*/
-void p9_release_pages(struct page **pages, int nr_pages);
+void p9_unpin_pages(struct page **pages, int nr_pages);
diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c
index 3c27ffb781e3..93569de2bdba 100644
--- a/net/9p/trans_virtio.c
+++ b/net/9p/trans_virtio.c
@@ -310,71 +310,35 @@ static int p9_get_mapped_pages(struct virtio_chan *chan,
struct iov_iter *data,
int count,
size_t *offs,
- int *need_drop)
+ bool *need_unpin,
+ iov_iter_extraction_t extraction_flags)
{
int nr_pages;
int err;
+ int n;
if (!iov_iter_count(data))
return 0;
- if (!iov_iter_is_kvec(data)) {
- int n;
- /*
- * We allow only p9_max_pages pinned. We wait for the
- * Other zc request to finish here
- */
- if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
- err = wait_event_killable(vp_wq,
- (atomic_read(&vp_pinned) < chan->p9_max_pages));
- if (err == -ERESTARTSYS)
- return err;
- }
- n = iov_iter_get_pages_alloc2(data, pages, count, offs);
- if (n < 0)
- return n;
- *need_drop = 1;
- nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE);
- atomic_add(nr_pages, &vp_pinned);
- return n;
- } else {
- /* kernel buffer, no need to pin pages */
- int index;
- size_t len;
- void *p;
-
- /* we'd already checked that it's non-empty */
- while (1) {
- len = iov_iter_single_seg_count(data);
- if (likely(len)) {
- p = data->kvec->iov_base + data->iov_offset;
- break;
- }
- iov_iter_advance(data, 0);
- }
- if (len > count)
- len = count;
-
- nr_pages = DIV_ROUND_UP((unsigned long)p + len, PAGE_SIZE) -
- (unsigned long)p / PAGE_SIZE;
-
- *pages = kmalloc_array(nr_pages, sizeof(struct page *),
- GFP_NOFS);
- if (!*pages)
- return -ENOMEM;
-
- *need_drop = 0;
- p -= (*offs = offset_in_page(p));
- for (index = 0; index < nr_pages; index++) {
- if (is_vmalloc_addr(p))
- (*pages)[index] = vmalloc_to_page(p);
- else
- (*pages)[index] = kmap_to_page(p);
- p += PAGE_SIZE;
- }
- iov_iter_advance(data, len);
- return len;
+ /*
+ * We allow only p9_max_pages pinned. We wait for the
+ * Other zc request to finish here
+ */
+ if (atomic_read(&vp_pinned) >= chan->p9_max_pages) {
+ err = wait_event_killable(vp_wq,
+ (atomic_read(&vp_pinned) < chan->p9_max_pages));
+ if (err == -ERESTARTSYS)
+ return err;
}
+
+ n = iov_iter_extract_pages(data, pages, count, INT_MAX,
+ extraction_flags, offs);
+ if (n < 0)
+ return n;
+ *need_unpin = iov_iter_extract_will_pin(data);
+ nr_pages = DIV_ROUND_UP(n + *offs, PAGE_SIZE);
+ atomic_add(nr_pages, &vp_pinned);
+ return n;
}
static void handle_rerror(struct p9_req_t *req, int in_hdr_len,
@@ -429,7 +393,7 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
struct virtio_chan *chan = client->trans;
struct scatterlist *sgs[4];
size_t offs;
- int need_drop = 0;
+ bool need_unpin;
int kicked = 0;
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
@@ -437,7 +401,8 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
if (uodata) {
__le32 sz;
int n = p9_get_mapped_pages(chan, &out_pages, uodata,
- outlen, &offs, &need_drop);
+ outlen, &offs, &need_unpin,
+ WRITE_FROM_ITER);
if (n < 0) {
err = n;
goto err_out;
@@ -456,7 +421,8 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
memcpy(&req->tc.sdata[0], &sz, sizeof(sz));
} else if (uidata) {
int n = p9_get_mapped_pages(chan, &in_pages, uidata,
- inlen, &offs, &need_drop);
+ inlen, &offs, &need_unpin,
+ READ_INTO_ITER);
if (n < 0) {
err = n;
goto err_out;
@@ -542,13 +508,13 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
* Non kernel buffers are pinned, unpin them
*/
err_out:
- if (need_drop) {
+ if (need_unpin) {
if (in_pages) {
- p9_release_pages(in_pages, in_nr_pages);
+ p9_unpin_pages(in_pages, in_nr_pages);
atomic_sub(in_nr_pages, &vp_pinned);
}
if (out_pages) {
- p9_release_pages(out_pages, out_nr_pages);
+ p9_unpin_pages(out_pages, out_nr_pages);
atomic_sub(out_nr_pages, &vp_pinned);
}
/* wakeup anybody waiting for slots to pin pages */
next prev parent reply other threads:[~2023-06-30 15:26 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-06-30 15:25 [RFC PATCH 00/11] iov_iter: Use I/O direction from kiocb, iomap & request rather than iov_iter David Howells
2023-06-30 15:25 ` [RFC PATCH 01/11] iov_iter: Fix comment refs to iov_iter_get_pages/pages_alloc() David Howells
2023-07-06 15:21 ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 02/11] vfs: Set IOCB_WRITE in iocbs that we're going to write from David Howells
2023-07-06 15:22 ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 03/11] vfs: Use init_kiocb() to initialise new IOCBs David Howells
2023-06-30 15:39 ` Jens Axboe
2023-06-30 16:00 ` David Howells
2023-06-30 16:05 ` Jens Axboe
2023-07-06 15:29 ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 04/11] iov_iter: Use IOCB_WRITE rather than iterator direction David Howells
2023-06-30 15:25 ` [RFC PATCH 05/11] iov_iter: Use IOMAP_WRITE " David Howells
2023-07-06 15:30 ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 06/11] iov_iter: Use op_is_write() " David Howells
2023-07-06 15:30 ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 07/11] cifs: Drop the check using iov_iter_rw() David Howells
2023-06-30 15:25 ` [RFC PATCH 08/11] iov_iter: Drop iov_iter_rw() and fold in last user David Howells
2023-07-06 15:31 ` Christoph Hellwig
2023-06-30 15:25 ` [RFC PATCH 09/11] iov_iter: Use I/O dir flags with iov_iter_extract_pages() David Howells
2023-06-30 15:25 ` David Howells [this message]
2023-06-30 15:25 ` [RFC PATCH 11/11] scsi: Use extract_iter_to_sg() David Howells
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230630152524.661208-11-dhowells@redhat.com \
--to=dhowells@redhat.com \
--cc=asmadeus@codewreck.org \
--cc=axboe@kernel.dk \
--cc=brauner@kernel.org \
--cc=david@redhat.com \
--cc=ericvh@gmail.com \
--cc=hch@infradead.org \
--cc=hdanton@sina.com \
--cc=jack@suse.cz \
--cc=jgg@nvidia.com \
--cc=jlayton@kernel.org \
--cc=linux-block@vger.kernel.org \
--cc=linux-fsdevel@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=linux_oss@crudebyte.com \
--cc=logang@deltatee.com \
--cc=lucho@ionkov.net \
--cc=v9fs-developer@lists.sourceforge.net \
--cc=viro@zeniv.linux.org.uk \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox