linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Nadav Amit <nadav.amit@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
	Nadav Amit <namit@vmware.com>,
	Mike Kravetz <mike.kravetz@oracle.com>,
	Hugh Dickins <hughd@google.com>,
	Axel Rasmussen <axelrasmussen@google.com>,
	Peter Xu <peterx@redhat.com>,
	David Hildenbrand <david@redhat.com>,
	Mike Rapoport <rppt@linux.ibm.com>
Subject: [PATCH v2 3/5] userfaultfd: introduce write-likely mode for uffd operations
Date: Mon, 18 Jul 2022 04:47:46 -0700	[thread overview]
Message-ID: <20220718114748.2623-4-namit@vmware.com> (raw)
In-Reply-To: <20220718114748.2623-1-namit@vmware.com>

From: Nadav Amit <namit@vmware.com>

Introduce write-likely hints for uffd. These hints would be used in a
future patch to decide whether to attempt to map pages in the page-table
or whether to only mark them logically as writable. This allows
userspace to determine whether a page would be accessed faster or
whether removal of the page would be possible, potentially, without
writeback and TLB flush.

Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
---
 fs/userfaultfd.c                 | 32 ++++++++++++++++++++++++--------
 include/linux/userfaultfd_k.h    |  1 +
 include/uapi/linux/userfaultfd.h | 13 ++++++++++++-
 3 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 8d8792b27c53..3027d228550a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1709,7 +1709,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
 		goto out;
 	if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP|
-				 UFFDIO_COPY_MODE_ACCESS_LIKELY))
+				 UFFDIO_COPY_MODE_ACCESS_LIKELY|
+				 UFFDIO_COPY_MODE_WRITE_LIKELY))
 		goto out;
 
 	mode_wp = uffdio_copy.mode & UFFDIO_COPY_MODE_WP;
@@ -1719,8 +1720,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_copy.mode & UFFDIO_COPY_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_copy.mode & UFFDIO_COPY_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
@@ -1774,14 +1778,18 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
 		goto out;
 	ret = -EINVAL;
 	if (uffdio_zeropage.mode & ~(UFFDIO_ZEROPAGE_MODE_DONTWAKE|
-				     UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY))
+				     UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY|
+				     UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY))
 		goto out;
 
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
@@ -1834,7 +1842,8 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 
 	if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
 			       UFFDIO_WRITEPROTECT_MODE_WP |
-			       UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY))
+			       UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY |
+			       UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY))
 		return -EINVAL;
 
 	mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP;
@@ -1847,8 +1856,11 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
@@ -1903,14 +1915,18 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
 		goto out;
 	}
 	if (uffdio_continue.mode & ~(UFFDIO_CONTINUE_MODE_DONTWAKE|
-				     UFFDIO_CONTINUE_MODE_ACCESS_LIKELY))
+				     UFFDIO_CONTINUE_MODE_ACCESS_LIKELY|
+				     UFFDIO_CONTINUE_MODE_WRITE_LIKELY))
 		goto out;
 
 	if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
 		if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_ACCESS_LIKELY)
 			uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_WRITE_LIKELY)
+			uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
 	} else {
-		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+		uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+			      UFFD_FLAGS_WRITE_LIKELY;
 	}
 
 	if (mmget_not_zero(ctx->mm)) {
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index b326798b5677..4968c86938b2 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -60,6 +60,7 @@ typedef unsigned int __bitwise uffd_flags_t;
 #define UFFD_FLAGS_NONE			((__force uffd_flags_t)0)
 #define UFFD_FLAGS_WP			((__force uffd_flags_t)BIT(0))
 #define UFFD_FLAGS_ACCESS_LIKELY	((__force uffd_flags_t)BIT(1))
+#define UFFD_FLAGS_WRITE_LIKELY		((__force uffd_flags_t)BIT(2))
 
 extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
 				    struct vm_area_struct *dst_vma,
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 02e0c1f56939..f52cbe4c9c44 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -202,7 +202,7 @@ struct uffdio_api {
 	 * write-protection mode is supported on both shmem and hugetlbfs.
 	 *
 	 * UFFD_FEATURE_ACCESS_HINTS indicates that the ioctl operations
-	 * support the UFFDIO_*_MODE_ACCESS_LIKELY hints.
+	 * support the UFFDIO_*_MODE_[ACCESS|WRITE]_LIKELY hints.
 	 */
 #define UFFD_FEATURE_PAGEFAULT_FLAG_WP		(1<<0)
 #define UFFD_FEATURE_EVENT_FORK			(1<<1)
@@ -257,9 +257,13 @@ struct uffdio_copy {
 	 * page is likely to be access in the near future. Providing the hint
 	 * properly can improve performance.
 	 *
+	 * UFFDIO_COPY_MODE_WRITE_LIKELY provides a hint to the kernel that the
+	 * page is likely to be written in the near future. Providing the hint
+	 * properly can improve performance.
 	 */
 #define UFFDIO_COPY_MODE_WP			((__u64)1<<1)
 #define UFFDIO_COPY_MODE_ACCESS_LIKELY		((__u64)1<<2)
+#define UFFDIO_COPY_MODE_WRITE_LIKELY		((__u64)1<<3)
 	__u64 mode;
 
 	/*
@@ -273,6 +277,7 @@ struct uffdio_zeropage {
 	struct uffdio_range range;
 #define UFFDIO_ZEROPAGE_MODE_DONTWAKE		((__u64)1<<0)
 #define UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY	((__u64)1<<1)
+#define UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY	((__u64)1<<2)
 	__u64 mode;
 
 	/*
@@ -296,6 +301,10 @@ struct uffdio_writeprotect {
  * that the page is likely to be access in the near future. Providing
  * the hint properly can improve performance.
  *
+ * UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY: provides a hint to the kernel
+ * that the page is likely to be written in the near future. Providing
+ * the hint properly can improve performance.
+ *
  * NOTE: Write protecting a region (WP=1) is unrelated to page faults,
  * therefore DONTWAKE flag is meaningless with WP=1.  Removing write
  * protection (WP=0) in response to a page fault wakes the faulting
@@ -304,6 +313,7 @@ struct uffdio_writeprotect {
 #define UFFDIO_WRITEPROTECT_MODE_WP		((__u64)1<<0)
 #define UFFDIO_WRITEPROTECT_MODE_DONTWAKE	((__u64)1<<1)
 #define UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY	((__u64)1<<2)
+#define UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY	((__u64)1<<3)
 	__u64 mode;
 };
 
@@ -311,6 +321,7 @@ struct uffdio_continue {
 	struct uffdio_range range;
 #define UFFDIO_CONTINUE_MODE_DONTWAKE		((__u64)1<<0)
 #define UFFDIO_CONTINUE_MODE_ACCESS_LIKELY	((__u64)1<<1)
+#define UFFDIO_CONTINUE_MODE_WRITE_LIKELY	((__u64)1<<2)
 	__u64 mode;
 
 	/*
-- 
2.25.1



  parent reply	other threads:[~2022-07-18 19:22 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-07-18 11:47 [PATCH v2 0/5] userfaultfd: support access/write hints Nadav Amit
2022-07-18 11:47 ` [PATCH v2 2/5] userfaultfd: introduce access-likely mode for common operations Nadav Amit
2022-07-18 20:05   ` Peter Xu
2022-07-18 20:59     ` Nadav Amit
2022-07-18 21:21       ` Peter Xu
2022-07-23  9:16   ` Mike Rapoport
2022-07-25 17:18     ` Nadav Amit
2022-07-26 16:02       ` Mike Rapoport
2022-07-18 11:47 ` Nadav Amit [this message]
2022-07-18 20:12   ` [PATCH v2 3/5] userfaultfd: introduce write-likely mode for uffd operations Peter Xu
2022-07-18 20:25     ` Nadav Amit
2022-07-18 21:27       ` Peter Xu
2022-07-18 11:47 ` [PATCH v2 4/5] userfaultfd: zero access/write hints Nadav Amit
2022-07-22  7:47   ` David Hildenbrand
2022-07-18 11:47 ` [PATCH v2 5/5] selftest/userfaultfd: test read/write hints Nadav Amit
     [not found] ` <20220718114748.2623-2-namit@vmware.com>
2022-07-18 20:05   ` [PATCH v2 1/5] userfaultfd: introduce uffd_flags Peter Xu
2022-07-22  7:54   ` David Hildenbrand
2022-07-22 18:47     ` Nadav Amit
2022-07-23  9:12   ` Mike Rapoport
2022-07-25 17:23     ` Nadav Amit

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220718114748.2623-4-namit@vmware.com \
    --to=nadav.amit@gmail.com \
    --cc=akpm@linux-foundation.org \
    --cc=axelrasmussen@google.com \
    --cc=david@redhat.com \
    --cc=hughd@google.com \
    --cc=linux-mm@kvack.org \
    --cc=mike.kravetz@oracle.com \
    --cc=namit@vmware.com \
    --cc=peterx@redhat.com \
    --cc=rppt@linux.ibm.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox