From: James Houghton <jthoughton@google.com>
To: Mike Kravetz <mike.kravetz@oracle.com>,
Muchun Song <songmuchun@bytedance.com>,
Peter Xu <peterx@redhat.com>
Cc: David Hildenbrand <david@redhat.com>,
David Rientjes <rientjes@google.com>,
Axel Rasmussen <axelrasmussen@google.com>,
Mina Almasry <almasrymina@google.com>,
"Zach O'Keefe" <zokeefe@google.com>,
Manish Mishra <manish.mishra@nutanix.com>,
Naoya Horiguchi <naoya.horiguchi@nec.com>,
"Dr . David Alan Gilbert" <dgilbert@redhat.com>,
"Matthew Wilcox (Oracle)" <willy@infradead.org>,
Vlastimil Babka <vbabka@suse.cz>,
Baolin Wang <baolin.wang@linux.alibaba.com>,
Miaohe Lin <linmiaohe@huawei.com>,
Yang Shi <shy828301@gmail.com>,
Andrew Morton <akpm@linux-foundation.org>,
linux-mm@kvack.org, linux-kernel@vger.kernel.org,
James Houghton <jthoughton@google.com>
Subject: [PATCH 09/46] mm: add MADV_SPLIT to enable HugeTLB HGM
Date: Thu, 5 Jan 2023 10:18:07 +0000 [thread overview]
Message-ID: <20230105101844.1893104-10-jthoughton@google.com> (raw)
In-Reply-To: <20230105101844.1893104-1-jthoughton@google.com>
Issuing ioctl(MADV_SPLIT) on a HugeTLB address range will enable
HugeTLB HGM. MADV_SPLIT was chosen for the name so that this API can be
applied to non-HugeTLB memory in the future, if such an application is
to arise.
MADV_SPLIT provides several API changes for some syscalls on HugeTLB
address ranges:
1. UFFDIO_CONTINUE is allowed for MAP_SHARED VMAs at PAGE_SIZE
alignment.
2. read()ing a page fault event from a userfaultfd will yield a
PAGE_SIZE-rounded address, instead of a huge-page-size-rounded
address (unless UFFD_FEATURE_EXACT_ADDRESS is used).
There is no way to disable the API changes that come with issuing
MADV_SPLIT. MADV_COLLAPSE can be used to collapse high-granularity page
table mappings that come from the extended functionality that comes with
using MADV_SPLIT.
For post-copy live migration, the expected use-case is:
1. mmap(MAP_SHARED, some_fd) primary mapping
2. mmap(MAP_SHARED, some_fd) alias mapping
3. MADV_SPLIT the primary mapping
4. UFFDIO_REGISTER/etc. the primary mapping
5. Copy memory contents into alias mapping and UFFDIO_CONTINUE the
corresponding PAGE_SIZE sections in the primary mapping.
More API changes may be added in the future.
Signed-off-by: James Houghton <jthoughton@google.com>
---
arch/alpha/include/uapi/asm/mman.h | 2 ++
arch/mips/include/uapi/asm/mman.h | 2 ++
arch/parisc/include/uapi/asm/mman.h | 2 ++
arch/xtensa/include/uapi/asm/mman.h | 2 ++
include/linux/hugetlb.h | 2 ++
include/uapi/asm-generic/mman-common.h | 2 ++
mm/hugetlb.c | 3 +--
mm/madvise.c | 26 ++++++++++++++++++++++++++
8 files changed, 39 insertions(+), 2 deletions(-)
diff --git a/arch/alpha/include/uapi/asm/mman.h b/arch/alpha/include/uapi/asm/mman.h
index 763929e814e9..7a26f3648b90 100644
--- a/arch/alpha/include/uapi/asm/mman.h
+++ b/arch/alpha/include/uapi/asm/mman.h
@@ -78,6 +78,8 @@
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+#define MADV_SPLIT 26 /* Enable hugepage high-granularity APIs */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/arch/mips/include/uapi/asm/mman.h b/arch/mips/include/uapi/asm/mman.h
index c6e1fc77c996..f8a74a3a0928 100644
--- a/arch/mips/include/uapi/asm/mman.h
+++ b/arch/mips/include/uapi/asm/mman.h
@@ -105,6 +105,8 @@
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+#define MADV_SPLIT 26 /* Enable hugepage high-granularity APIs */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/arch/parisc/include/uapi/asm/mman.h b/arch/parisc/include/uapi/asm/mman.h
index 68c44f99bc93..a6dc6a56c941 100644
--- a/arch/parisc/include/uapi/asm/mman.h
+++ b/arch/parisc/include/uapi/asm/mman.h
@@ -72,6 +72,8 @@
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+#define MADV_SPLIT 74 /* Enable hugepage high-granularity APIs */
+
#define MADV_HWPOISON 100 /* poison a page for testing */
#define MADV_SOFT_OFFLINE 101 /* soft offline page for testing */
diff --git a/arch/xtensa/include/uapi/asm/mman.h b/arch/xtensa/include/uapi/asm/mman.h
index 1ff0c858544f..f98a77c430a9 100644
--- a/arch/xtensa/include/uapi/asm/mman.h
+++ b/arch/xtensa/include/uapi/asm/mman.h
@@ -113,6 +113,8 @@
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+#define MADV_SPLIT 26 /* Enable hugepage high-granularity APIs */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 8713d9c4f86c..16fc3e381801 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -109,6 +109,8 @@ struct hugetlb_vma_lock {
struct vm_area_struct *vma;
};
+void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
+
extern struct resv_map *resv_map_alloc(void);
void resv_map_release(struct kref *ref);
diff --git a/include/uapi/asm-generic/mman-common.h b/include/uapi/asm-generic/mman-common.h
index 6ce1f1ceb432..996e8ded092f 100644
--- a/include/uapi/asm-generic/mman-common.h
+++ b/include/uapi/asm-generic/mman-common.h
@@ -79,6 +79,8 @@
#define MADV_COLLAPSE 25 /* Synchronous hugepage collapse */
+#define MADV_SPLIT 26 /* Enable hugepage high-granularity APIs */
+
/* compatibility flags */
#define MAP_FILE 0
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index d27fe05d5ef6..5bd53ae8ca4b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -92,7 +92,6 @@ struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
/* Forward declaration */
static int hugetlb_acct_memory(struct hstate *h, long delta);
static void hugetlb_vma_lock_free(struct vm_area_struct *vma);
-static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma);
static void __hugetlb_vma_unlock_write_free(struct vm_area_struct *vma);
static inline bool subpool_is_free(struct hugepage_subpool *spool)
@@ -361,7 +360,7 @@ static void hugetlb_vma_lock_free(struct vm_area_struct *vma)
}
}
-static void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
+void hugetlb_vma_lock_alloc(struct vm_area_struct *vma)
{
struct hugetlb_vma_lock *vma_lock;
diff --git a/mm/madvise.c b/mm/madvise.c
index 025be3517af1..04ee28992e52 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1011,6 +1011,24 @@ static long madvise_remove(struct vm_area_struct *vma,
return error;
}
+static int madvise_split(struct vm_area_struct *vma,
+ unsigned long *new_flags)
+{
+ if (!is_vm_hugetlb_page(vma) || !hugetlb_hgm_eligible(vma))
+ return -EINVAL;
+ /*
+ * Attempt to allocate the VMA lock again. If it isn't allocated,
+ * MADV_COLLAPSE won't work.
+ */
+ hugetlb_vma_lock_alloc(vma);
+
+ /* PMD sharing doesn't work with HGM. */
+ hugetlb_unshare_all_pmds(vma);
+
+ *new_flags |= VM_HUGETLB_HGM;
+ return 0;
+}
+
/*
* Apply an madvise behavior to a region of a vma. madvise_update_vma
* will handle splitting a vm area into separate areas, each area with its own
@@ -1089,6 +1107,11 @@ static int madvise_vma_behavior(struct vm_area_struct *vma,
break;
case MADV_COLLAPSE:
return madvise_collapse(vma, prev, start, end);
+ case MADV_SPLIT:
+ error = madvise_split(vma, &new_flags);
+ if (error)
+ goto out;
+ break;
}
anon_name = anon_vma_name(vma);
@@ -1183,6 +1206,9 @@ madvise_behavior_valid(int behavior)
case MADV_HUGEPAGE:
case MADV_NOHUGEPAGE:
case MADV_COLLAPSE:
+#endif
+#ifdef CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING
+ case MADV_SPLIT:
#endif
case MADV_DONTDUMP:
case MADV_DODUMP:
--
2.39.0.314.g84b9a713c41-goog
next prev parent reply other threads:[~2023-01-05 10:19 UTC|newest]
Thread overview: 126+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-01-05 10:17 [PATCH 00/46] Based on latest mm-unstable (85b44c25cd1e) James Houghton
2023-01-05 10:17 ` [PATCH 01/46] hugetlb: don't set PageUptodate for UFFDIO_CONTINUE James Houghton
2023-01-05 10:18 ` [PATCH 02/46] hugetlb: remove mk_huge_pte; it is unused James Houghton
2023-01-05 10:18 ` [PATCH 03/46] hugetlb: remove redundant pte_mkhuge in migration path James Houghton
2023-01-05 10:18 ` [PATCH 04/46] hugetlb: only adjust address ranges when VMAs want PMD sharing James Houghton
2023-01-05 10:18 ` [PATCH 05/46] hugetlb: add CONFIG_HUGETLB_HIGH_GRANULARITY_MAPPING James Houghton
2023-01-05 10:18 ` [PATCH 06/46] mm: add VM_HUGETLB_HGM VMA flag James Houghton
2023-01-05 10:18 ` [PATCH 07/46] hugetlb: rename __vma_shareable_flags_pmd to __vma_has_hugetlb_vma_lock James Houghton
2023-01-05 10:18 ` [PATCH 08/46] hugetlb: add HugeTLB HGM enablement helpers James Houghton
2023-01-05 10:18 ` James Houghton [this message]
2023-01-05 15:05 ` [PATCH 09/46] mm: add MADV_SPLIT to enable HugeTLB HGM kernel test robot
2023-01-05 15:29 ` David Hildenbrand
2023-01-10 0:01 ` Zach O'Keefe
2023-01-05 10:18 ` [PATCH 10/46] hugetlb: make huge_pte_lockptr take an explicit shift argument James Houghton
2023-01-05 10:18 ` [PATCH 11/46] hugetlb: add hugetlb_pte to track HugeTLB page table entries James Houghton
2023-01-05 16:06 ` kernel test robot
2023-01-05 10:18 ` [PATCH 12/46] hugetlb: add hugetlb_alloc_pmd and hugetlb_alloc_pte James Houghton
2023-01-05 10:18 ` [PATCH 13/46] hugetlb: add hugetlb_hgm_walk and hugetlb_walk_step James Houghton
2023-01-05 16:57 ` kernel test robot
2023-01-05 18:58 ` kernel test robot
2023-01-11 21:51 ` Peter Xu
2023-01-12 13:38 ` James Houghton
2023-01-05 10:18 ` [PATCH 14/46] hugetlb: add make_huge_pte_with_shift James Houghton
2023-01-05 10:18 ` [PATCH 15/46] hugetlb: make default arch_make_huge_pte understand small mappings James Houghton
2023-01-05 10:18 ` [PATCH 16/46] hugetlbfs: do a full walk to check if vma maps a page James Houghton
2023-01-05 10:18 ` [PATCH 17/46] hugetlb: make unmapping compatible with high-granularity mappings James Houghton
2023-01-05 10:18 ` [PATCH 18/46] hugetlb: add HGM support for hugetlb_change_protection James Houghton
2023-01-05 10:18 ` [PATCH 19/46] hugetlb: add HGM support for follow_hugetlb_page James Houghton
2023-01-05 22:26 ` Peter Xu
2023-01-12 18:02 ` Peter Xu
2023-01-12 18:06 ` James Houghton
2023-01-05 10:18 ` [PATCH 20/46] hugetlb: add HGM support for hugetlb_follow_page_mask James Houghton
2023-01-05 10:18 ` [PATCH 21/46] hugetlb: use struct hugetlb_pte for walk_hugetlb_range James Houghton
2023-01-05 22:42 ` Peter Xu
2023-01-11 22:58 ` Peter Xu
2023-01-12 14:06 ` James Houghton
2023-01-12 15:29 ` Peter Xu
2023-01-12 16:45 ` James Houghton
2023-01-12 16:55 ` James Houghton
2023-01-12 20:27 ` Peter Xu
2023-01-12 21:17 ` James Houghton
2023-01-12 21:33 ` Peter Xu
2023-01-16 10:17 ` David Hildenbrand
2023-01-17 23:11 ` James Houghton
2023-01-18 9:43 ` David Hildenbrand
2023-01-18 15:35 ` Peter Xu
2023-01-18 16:39 ` James Houghton
2023-01-18 18:21 ` David Hildenbrand
2023-01-18 19:28 ` Mike Kravetz
2023-01-19 16:57 ` James Houghton
2023-01-19 17:31 ` Mike Kravetz
2023-01-19 19:42 ` James Houghton
2023-01-19 20:53 ` Peter Xu
2023-01-19 22:45 ` James Houghton
2023-01-19 22:00 ` Mike Kravetz
2023-01-19 22:23 ` Peter Xu
2023-01-19 22:35 ` James Houghton
2023-01-19 23:07 ` Peter Xu
2023-01-19 23:26 ` James Houghton
2023-01-20 17:23 ` Peter Xu
2023-01-19 23:44 ` Mike Kravetz
2023-01-23 15:19 ` Peter Xu
2023-01-23 17:49 ` Mike Kravetz
2023-01-26 16:58 ` James Houghton
2023-01-26 20:30 ` Peter Xu
2023-01-27 21:02 ` James Houghton
2023-01-30 17:29 ` Peter Xu
2023-01-30 18:38 ` James Houghton
2023-01-30 21:14 ` Peter Xu
2023-02-01 0:24 ` James Houghton
2023-02-01 1:24 ` Peter Xu
2023-02-01 15:45 ` James Houghton
2023-02-01 15:56 ` David Hildenbrand
2023-02-01 17:58 ` James Houghton
2023-02-01 18:01 ` David Hildenbrand
2023-02-01 16:22 ` Peter Xu
2023-02-01 21:32 ` James Houghton
2023-02-01 21:51 ` Peter Xu
2023-02-02 0:24 ` James Houghton
2023-02-07 16:30 ` James Houghton
2023-02-07 22:46 ` James Houghton
2023-02-07 23:13 ` Peter Xu
2023-02-08 0:26 ` James Houghton
2023-02-08 16:16 ` Peter Xu
2023-02-09 16:43 ` James Houghton
2023-02-09 19:10 ` Peter Xu
2023-02-09 19:49 ` James Houghton
2023-02-09 20:22 ` Peter Xu
2023-01-18 17:08 ` David Hildenbrand
2023-01-05 10:18 ` [PATCH 22/46] mm: rmap: provide pte_order in page_vma_mapped_walk James Houghton
2023-01-05 10:18 ` [PATCH 23/46] mm: rmap: make page_vma_mapped_walk callers use pte_order James Houghton
2023-01-05 10:18 ` [PATCH 24/46] rmap: update hugetlb lock comment for HGM James Houghton
2023-01-05 10:18 ` [PATCH 25/46] hugetlb: update page_vma_mapped to do high-granularity walks James Houghton
2023-01-05 10:18 ` [PATCH 26/46] hugetlb: add HGM support for copy_hugetlb_page_range James Houghton
2023-01-05 10:18 ` [PATCH 27/46] hugetlb: add HGM support for move_hugetlb_page_tables James Houghton
2023-01-05 10:18 ` [PATCH 28/46] hugetlb: add HGM support for hugetlb_fault and hugetlb_no_page James Houghton
2023-01-05 10:18 ` [PATCH 29/46] rmap: in try_to_{migrate,unmap}_one, check head page for page flags James Houghton
2023-01-05 10:18 ` [PATCH 30/46] hugetlb: add high-granularity migration support James Houghton
2023-01-05 10:18 ` [PATCH 31/46] hugetlb: sort hstates in hugetlb_init_hstates James Houghton
2023-01-05 10:18 ` [PATCH 32/46] hugetlb: add for_each_hgm_shift James Houghton
2023-01-05 10:18 ` [PATCH 33/46] hugetlb: userfaultfd: add support for high-granularity UFFDIO_CONTINUE James Houghton
2023-01-05 10:18 ` [PATCH 34/46] hugetlb: userfaultfd: when using MADV_SPLIT, round addresses to PAGE_SIZE James Houghton
2023-01-06 15:13 ` Peter Xu
2023-01-10 14:50 ` James Houghton
2023-01-05 10:18 ` [PATCH 35/46] hugetlb: add MADV_COLLAPSE for hugetlb James Houghton
2023-01-10 20:04 ` James Houghton
2023-01-17 21:06 ` Peter Xu
2023-01-17 21:38 ` James Houghton
2023-01-17 21:54 ` Peter Xu
2023-01-19 22:37 ` Peter Xu
2023-01-19 23:06 ` James Houghton
2023-01-05 10:18 ` [PATCH 36/46] hugetlb: remove huge_pte_lock and huge_pte_lockptr James Houghton
2023-01-05 10:18 ` [PATCH 37/46] hugetlb: replace make_huge_pte with make_huge_pte_with_shift James Houghton
2023-01-05 10:18 ` [PATCH 38/46] mm: smaps: add stats for HugeTLB mapping size James Houghton
2023-01-05 10:18 ` [PATCH 39/46] hugetlb: x86: enable high-granularity mapping James Houghton
2023-01-12 20:07 ` James Houghton
2023-01-05 10:18 ` [PATCH 40/46] docs: hugetlb: update hugetlb and userfaultfd admin-guides with HGM info James Houghton
2023-01-05 10:18 ` [PATCH 41/46] docs: proc: include information about HugeTLB HGM James Houghton
2023-01-05 10:18 ` [PATCH 42/46] selftests/vm: add HugeTLB HGM to userfaultfd selftest James Houghton
2023-01-05 10:18 ` [PATCH 43/46] selftests/kvm: add HugeTLB HGM to KVM demand paging selftest James Houghton
2023-01-05 10:18 ` [PATCH 44/46] selftests/vm: add anon and shared hugetlb to migration test James Houghton
2023-01-05 10:18 ` [PATCH 45/46] selftests/vm: add hugetlb HGM test to migration selftest James Houghton
2023-01-05 10:18 ` [PATCH 46/46] selftests/vm: add HGM UFFDIO_CONTINUE and hwpoison tests James Houghton
2023-01-05 10:47 ` [PATCH 00/46] Based on latest mm-unstable (85b44c25cd1e) David Hildenbrand
2023-01-09 19:53 ` Mike Kravetz
2023-01-10 15:47 ` David Hildenbrand
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230105101844.1893104-10-jthoughton@google.com \
--to=jthoughton@google.com \
--cc=akpm@linux-foundation.org \
--cc=almasrymina@google.com \
--cc=axelrasmussen@google.com \
--cc=baolin.wang@linux.alibaba.com \
--cc=david@redhat.com \
--cc=dgilbert@redhat.com \
--cc=linmiaohe@huawei.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=manish.mishra@nutanix.com \
--cc=mike.kravetz@oracle.com \
--cc=naoya.horiguchi@nec.com \
--cc=peterx@redhat.com \
--cc=rientjes@google.com \
--cc=shy828301@gmail.com \
--cc=songmuchun@bytedance.com \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=zokeefe@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox