linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: John Hubbard <jhubbard@nvidia.com>
To: Jens Axboe <axboe@kernel.dk>, Jan Kara <jack@suse.cz>,
	Christoph Hellwig <hch@infradead.org>,
	Dave Chinner <dchinner@redhat.com>,
	"Darrick J . Wong" <djwong@kernel.org>,
	Theodore Ts'o <tytso@mit.edu>,
	Alexander Viro <viro@zeniv.linux.org.uk>,
	Miklos Szeredi <miklos@szeredi.hu>,
	Andrew Morton <akpm@linux-foundation.org>,
	Chaitanya Kulkarni <kch@nvidia.com>
Cc: <linux-block@vger.kernel.org>, <linux-fsdevel@vger.kernel.org>,
	<linux-xfs@vger.kernel.org>, <linux-mm@kvack.org>,
	LKML <linux-kernel@vger.kernel.org>,
	John Hubbard <jhubbard@nvidia.com>
Subject: [RFC PATCH 2/7] block: add dio_w_*() wrappers for pin, unpin user pages
Date: Fri, 25 Feb 2022 00:50:20 -0800	[thread overview]
Message-ID: <20220225085025.3052894-3-jhubbard@nvidia.com> (raw)
In-Reply-To: <20220225085025.3052894-1-jhubbard@nvidia.com>

Add a new config parameter, CONFIG_BLK_USE_PIN_USER_PAGES_FOR_DIO, and
dio_w_*() wrapper functions. Together, these allow the developer to
choose between these sets of routines, for Direct IO code paths:

a) pin_user_pages_fast()
   pin_user_page()
   unpin_user_page()

b) get_user_pages_fast()
   get_page()
   put_page()

CONFIG_BLK_USE_PIN_USER_PAGES_FOR_DIO is a temporary setting, and will
be deleted once the conversion is complete. In the meantime, developers
can enable this in order to try out each filesystem.

More information: The Direct IO part of the block infrastructure is
being changed to use pin_user_page*() and unpin_user_page*() calls, in
place of a mix of get_user_pages_fast(), get_page(), and put_page().
These have to be changed over all at the same time, for block, bio, and
all filesystems.

While that changeover is in progress (but disabled via this new CONFIG
option), kernel developers need a way to test their changes. The steps
are:

a) Enable CONFIG_BLK_USE_PIN_USER_PAGES_FOR_DIO

b) Monitor these /proc/vmstat items:

nr_foll_pin_acquired
nr_foll_pin_released

...to ensure that they remain equal, when "at rest".

Signed-off-by: John Hubbard <jhubbard@nvidia.com>
---
 block/Kconfig        | 25 +++++++++++++++++++++++++
 include/linux/bvec.h | 11 +++++++++++
 2 files changed, 36 insertions(+)

diff --git a/block/Kconfig b/block/Kconfig
index 168b873eb666..f6ca5e9597e4 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -50,6 +50,31 @@ config BLK_DEV_BSG_COMMON
 config BLK_ICQ
 	bool
 
+config BLK_USE_PIN_USER_PAGES_FOR_DIO
+	bool "DEVELOPERS ONLY: Enable pin_user_pages() for Direct IO" if EXPERT
+	default n
+	help
+	  For Direct IO code, retain the pages via calls to
+	  pin_user_pages_fast(), instead of via get_user_pages_fast().
+	  Likewise, use pin_user_page() instead of get_page(). And then
+	  release such pages via unpin_user_page(), instead of
+	  put_page().
+
+	  This is a temporary setting, which will be deleted once the
+	  conversion is completed, reviewed, and tested. In the meantime,
+	  developers can enable this in order to try out each filesystem.
+	  For that, it's best to monitor these /proc/vmstat items:
+
+		nr_foll_pin_acquired
+		nr_foll_pin_released
+
+	  ...to ensure that they remain equal, when "at rest".
+
+	  Say yes here ONLY if are actively developing or testing the
+	  block layer or filesystems with pin_user_pages_fast().
+	  Otherwise, this is just a way to throw off the refcounting of
+	  pages in the system.
+
 config BLK_DEV_BSGLIB
 	bool "Block layer SG support v4 helper lib"
 	select BLK_DEV_BSG_COMMON
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index 35c25dff651a..a96a68c687f6 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -241,4 +241,15 @@ static inline void *bvec_virt(struct bio_vec *bvec)
 	return page_address(bvec->bv_page) + bvec->bv_offset;
 }
 
+#ifdef CONFIG_BLK_USE_PIN_USER_PAGES_FOR_DIO
+#define dio_w_pin_user_pages_fast(s, n, p, f)	pin_user_pages_fast(s, n, p, f)
+#define dio_w_pin_user_page(p)			pin_user_page(p)
+#define dio_w_unpin_user_page(p)		unpin_user_page(p)
+
+#else
+#define dio_w_pin_user_pages_fast(s, n, p, f)	get_user_pages_fast(s, n, p, f)
+#define dio_w_pin_user_page(p)			get_page(p)
+#define dio_w_unpin_user_page(p)		put_page(p)
+#endif
+
 #endif /* __LINUX_BVEC_H */
-- 
2.35.1



  parent reply	other threads:[~2022-02-25  8:50 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-25  8:50 [RFC PATCH 0/7] block, fs: convert Direct IO to FOLL_PIN John Hubbard
2022-02-25  8:50 ` [RFC PATCH 1/7] mm/gup: introduce pin_user_page() John Hubbard
2022-02-28 13:27   ` David Hildenbrand
2022-02-28 21:14     ` John Hubbard
2022-03-01  8:11       ` David Hildenbrand
2022-03-01  8:40         ` John Hubbard
2022-03-01  9:30           ` David Hildenbrand
2022-02-25  8:50 ` John Hubbard [this message]
2022-02-25  8:50 ` [RFC PATCH 3/7] block, fs: assert that key paths use iovecs, and nothing else John Hubbard
2022-02-25  8:50 ` [RFC PATCH 4/7] block, bio, fs: initial pin_user_pages_fast() changes John Hubbard
2022-02-25  8:50 ` [RFC PATCH 5/7] NFS: direct-io: convert to FOLL_PIN pages John Hubbard
2022-02-25  8:50 ` [RFC PATCH 6/7] fuse: convert direct IO paths to use FOLL_PIN John Hubbard
2022-02-25  8:50 ` [RFC PATCH 7/7] block, direct-io: flip the switch: use pin_user_pages_fast() John Hubbard
2022-02-25 12:05 ` [RFC PATCH 0/7] block, fs: convert Direct IO to FOLL_PIN Jan Kara
2022-02-25 16:14   ` Chaitanya Kulkarni
2022-02-25 16:40     ` Jan Kara
2022-02-25 19:36   ` John Hubbard
2022-02-25 22:20     ` John Hubbard
2022-02-25 13:12 ` David Hildenbrand
2022-02-25 21:10   ` John Hubbard

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220225085025.3052894-3-jhubbard@nvidia.com \
    --to=jhubbard@nvidia.com \
    --cc=akpm@linux-foundation.org \
    --cc=axboe@kernel.dk \
    --cc=dchinner@redhat.com \
    --cc=djwong@kernel.org \
    --cc=hch@infradead.org \
    --cc=jack@suse.cz \
    --cc=kch@nvidia.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=miklos@szeredi.hu \
    --cc=tytso@mit.edu \
    --cc=viro@zeniv.linux.org.uk \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox