From: Nadav Amit <nadav.amit@gmail.com>
To: linux-mm@kvack.org
Cc: Andrew Morton <akpm@linux-foundation.org>,
Nadav Amit <namit@vmware.com>,
Mike Kravetz <mike.kravetz@oracle.com>,
Hugh Dickins <hughd@google.com>,
Axel Rasmussen <axelrasmussen@google.com>,
Peter Xu <peterx@redhat.com>,
David Hildenbrand <david@redhat.com>,
Mike Rapoport <rppt@linux.ibm.com>
Subject: [PATCH v2 3/5] userfaultfd: introduce write-likely mode for uffd operations
Date: Mon, 18 Jul 2022 04:47:46 -0700 [thread overview]
Message-ID: <20220718114748.2623-4-namit@vmware.com> (raw)
In-Reply-To: <20220718114748.2623-1-namit@vmware.com>
From: Nadav Amit <namit@vmware.com>
Introduce write-likely hints for uffd. These hints would be used in a
future patch to decide whether to attempt to map pages in the page-table
or whether to only mark them logically as writable. This allows
userspace to determine whether a page would be accessed faster or
whether removal of the page would be possible, potentially, without
writeback and TLB flush.
Cc: Mike Kravetz <mike.kravetz@oracle.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Axel Rasmussen <axelrasmussen@google.com>
Cc: Peter Xu <peterx@redhat.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Signed-off-by: Nadav Amit <namit@vmware.com>
---
fs/userfaultfd.c | 32 ++++++++++++++++++++++++--------
include/linux/userfaultfd_k.h | 1 +
include/uapi/linux/userfaultfd.h | 13 ++++++++++++-
3 files changed, 37 insertions(+), 9 deletions(-)
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 8d8792b27c53..3027d228550a 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -1709,7 +1709,8 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
if (uffdio_copy.src + uffdio_copy.len <= uffdio_copy.src)
goto out;
if (uffdio_copy.mode & ~(UFFDIO_COPY_MODE_DONTWAKE|UFFDIO_COPY_MODE_WP|
- UFFDIO_COPY_MODE_ACCESS_LIKELY))
+ UFFDIO_COPY_MODE_ACCESS_LIKELY|
+ UFFDIO_COPY_MODE_WRITE_LIKELY))
goto out;
mode_wp = uffdio_copy.mode & UFFDIO_COPY_MODE_WP;
@@ -1719,8 +1720,11 @@ static int userfaultfd_copy(struct userfaultfd_ctx *ctx,
if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
if (uffdio_copy.mode & UFFDIO_COPY_MODE_ACCESS_LIKELY)
uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ if (uffdio_copy.mode & UFFDIO_COPY_MODE_WRITE_LIKELY)
+ uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
} else {
- uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+ UFFD_FLAGS_WRITE_LIKELY;
}
if (mmget_not_zero(ctx->mm)) {
@@ -1774,14 +1778,18 @@ static int userfaultfd_zeropage(struct userfaultfd_ctx *ctx,
goto out;
ret = -EINVAL;
if (uffdio_zeropage.mode & ~(UFFDIO_ZEROPAGE_MODE_DONTWAKE|
- UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY))
+ UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY|
+ UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY))
goto out;
if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
if (uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY)
uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ if (uffdio_zeropage.mode & UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY)
+ uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
} else {
- uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+ UFFD_FLAGS_WRITE_LIKELY;
}
if (mmget_not_zero(ctx->mm)) {
@@ -1834,7 +1842,8 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
if (uffdio_wp.mode & ~(UFFDIO_WRITEPROTECT_MODE_DONTWAKE |
UFFDIO_WRITEPROTECT_MODE_WP |
- UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY))
+ UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY |
+ UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY))
return -EINVAL;
mode_wp = uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WP;
@@ -1847,8 +1856,11 @@ static int userfaultfd_writeprotect(struct userfaultfd_ctx *ctx,
if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
if (uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY)
uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ if (uffdio_wp.mode & UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY)
+ uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
} else {
- uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+ UFFD_FLAGS_WRITE_LIKELY;
}
if (mmget_not_zero(ctx->mm)) {
@@ -1903,14 +1915,18 @@ static int userfaultfd_continue(struct userfaultfd_ctx *ctx, unsigned long arg)
goto out;
}
if (uffdio_continue.mode & ~(UFFDIO_CONTINUE_MODE_DONTWAKE|
- UFFDIO_CONTINUE_MODE_ACCESS_LIKELY))
+ UFFDIO_CONTINUE_MODE_ACCESS_LIKELY|
+ UFFDIO_CONTINUE_MODE_WRITE_LIKELY))
goto out;
if (ctx->features & UFFD_FEATURE_ACCESS_HINTS) {
if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_ACCESS_LIKELY)
uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ if (uffdio_continue.mode & UFFDIO_CONTINUE_MODE_WRITE_LIKELY)
+ uffd_flags |= UFFD_FLAGS_WRITE_LIKELY;
} else {
- uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY;
+ uffd_flags |= UFFD_FLAGS_ACCESS_LIKELY |
+ UFFD_FLAGS_WRITE_LIKELY;
}
if (mmget_not_zero(ctx->mm)) {
diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h
index b326798b5677..4968c86938b2 100644
--- a/include/linux/userfaultfd_k.h
+++ b/include/linux/userfaultfd_k.h
@@ -60,6 +60,7 @@ typedef unsigned int __bitwise uffd_flags_t;
#define UFFD_FLAGS_NONE ((__force uffd_flags_t)0)
#define UFFD_FLAGS_WP ((__force uffd_flags_t)BIT(0))
#define UFFD_FLAGS_ACCESS_LIKELY ((__force uffd_flags_t)BIT(1))
+#define UFFD_FLAGS_WRITE_LIKELY ((__force uffd_flags_t)BIT(2))
extern int mfill_atomic_install_pte(struct mm_struct *dst_mm, pmd_t *dst_pmd,
struct vm_area_struct *dst_vma,
diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h
index 02e0c1f56939..f52cbe4c9c44 100644
--- a/include/uapi/linux/userfaultfd.h
+++ b/include/uapi/linux/userfaultfd.h
@@ -202,7 +202,7 @@ struct uffdio_api {
* write-protection mode is supported on both shmem and hugetlbfs.
*
* UFFD_FEATURE_ACCESS_HINTS indicates that the ioctl operations
- * support the UFFDIO_*_MODE_ACCESS_LIKELY hints.
+ * support the UFFDIO_*_MODE_[ACCESS|WRITE]_LIKELY hints.
*/
#define UFFD_FEATURE_PAGEFAULT_FLAG_WP (1<<0)
#define UFFD_FEATURE_EVENT_FORK (1<<1)
@@ -257,9 +257,13 @@ struct uffdio_copy {
* page is likely to be access in the near future. Providing the hint
* properly can improve performance.
*
+ * UFFDIO_COPY_MODE_WRITE_LIKELY provides a hint to the kernel that the
+ * page is likely to be written in the near future. Providing the hint
+ * properly can improve performance.
*/
#define UFFDIO_COPY_MODE_WP ((__u64)1<<1)
#define UFFDIO_COPY_MODE_ACCESS_LIKELY ((__u64)1<<2)
+#define UFFDIO_COPY_MODE_WRITE_LIKELY ((__u64)1<<3)
__u64 mode;
/*
@@ -273,6 +277,7 @@ struct uffdio_zeropage {
struct uffdio_range range;
#define UFFDIO_ZEROPAGE_MODE_DONTWAKE ((__u64)1<<0)
#define UFFDIO_ZEROPAGE_MODE_ACCESS_LIKELY ((__u64)1<<1)
+#define UFFDIO_ZEROPAGE_MODE_WRITE_LIKELY ((__u64)1<<2)
__u64 mode;
/*
@@ -296,6 +301,10 @@ struct uffdio_writeprotect {
* that the page is likely to be access in the near future. Providing
* the hint properly can improve performance.
*
+ * UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY: provides a hint to the kernel
+ * that the page is likely to be written in the near future. Providing
+ * the hint properly can improve performance.
+ *
* NOTE: Write protecting a region (WP=1) is unrelated to page faults,
* therefore DONTWAKE flag is meaningless with WP=1. Removing write
* protection (WP=0) in response to a page fault wakes the faulting
@@ -304,6 +313,7 @@ struct uffdio_writeprotect {
#define UFFDIO_WRITEPROTECT_MODE_WP ((__u64)1<<0)
#define UFFDIO_WRITEPROTECT_MODE_DONTWAKE ((__u64)1<<1)
#define UFFDIO_WRITEPROTECT_MODE_ACCESS_LIKELY ((__u64)1<<2)
+#define UFFDIO_WRITEPROTECT_MODE_WRITE_LIKELY ((__u64)1<<3)
__u64 mode;
};
@@ -311,6 +321,7 @@ struct uffdio_continue {
struct uffdio_range range;
#define UFFDIO_CONTINUE_MODE_DONTWAKE ((__u64)1<<0)
#define UFFDIO_CONTINUE_MODE_ACCESS_LIKELY ((__u64)1<<1)
+#define UFFDIO_CONTINUE_MODE_WRITE_LIKELY ((__u64)1<<2)
__u64 mode;
/*
--
2.25.1
next prev parent reply other threads:[~2022-07-18 19:22 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-18 11:47 [PATCH v2 0/5] userfaultfd: support access/write hints Nadav Amit
2022-07-18 11:47 ` [PATCH v2 2/5] userfaultfd: introduce access-likely mode for common operations Nadav Amit
2022-07-18 20:05 ` Peter Xu
2022-07-18 20:59 ` Nadav Amit
2022-07-18 21:21 ` Peter Xu
2022-07-23 9:16 ` Mike Rapoport
2022-07-25 17:18 ` Nadav Amit
2022-07-26 16:02 ` Mike Rapoport
2022-07-18 11:47 ` Nadav Amit [this message]
2022-07-18 20:12 ` [PATCH v2 3/5] userfaultfd: introduce write-likely mode for uffd operations Peter Xu
2022-07-18 20:25 ` Nadav Amit
2022-07-18 21:27 ` Peter Xu
2022-07-18 11:47 ` [PATCH v2 4/5] userfaultfd: zero access/write hints Nadav Amit
2022-07-22 7:47 ` David Hildenbrand
2022-07-18 11:47 ` [PATCH v2 5/5] selftest/userfaultfd: test read/write hints Nadav Amit
[not found] ` <20220718114748.2623-2-namit@vmware.com>
2022-07-18 20:05 ` [PATCH v2 1/5] userfaultfd: introduce uffd_flags Peter Xu
2022-07-22 7:54 ` David Hildenbrand
2022-07-22 18:47 ` Nadav Amit
2022-07-23 9:12 ` Mike Rapoport
2022-07-25 17:23 ` Nadav Amit
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220718114748.2623-4-namit@vmware.com \
--to=nadav.amit@gmail.com \
--cc=akpm@linux-foundation.org \
--cc=axelrasmussen@google.com \
--cc=david@redhat.com \
--cc=hughd@google.com \
--cc=linux-mm@kvack.org \
--cc=mike.kravetz@oracle.com \
--cc=namit@vmware.com \
--cc=peterx@redhat.com \
--cc=rppt@linux.ibm.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox