linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Elliot Berman <quic_eberman@quicinc.com>
To: Andrew Morton <akpm@linux-foundation.org>,
	Shuah Khan <shuah@kernel.org>,
	David Hildenbrand <david@redhat.com>,
	Matthew Wilcox <willy@infradead.org>, <maz@kernel.org>
Cc: <kvm@vger.kernel.org>, <linux-arm-msm@vger.kernel.org>,
	<linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>,
	<linux-kselftest@vger.kernel.org>, <pbonzini@redhat.com>,
	Elliot Berman <quic_eberman@quicinc.com>,
	Fuad Tabba <tabba@google.com>
Subject: [PATCH RFC 3/5] mm/gup: Add support for re-pinning a normal pinned page as exclusive
Date: Tue, 18 Jun 2024 17:05:09 -0700	[thread overview]
Message-ID: <20240618-exclusive-gup-v1-3-30472a19c5d1@quicinc.com> (raw)
In-Reply-To: <20240618-exclusive-gup-v1-0-30472a19c5d1@quicinc.com>

From: Fuad Tabba <tabba@google.com>

When a page is shared, the exclusive pin is dropped, but one
normal pin is maintained. In order to be able to unshare a page,
add the ability to reaquire the exclusive pin, but only if there
is only one normal pin on the page, and only if the page is
marked as AnonExclusive.

Co-Developed-by: Elliot Berman <quic_eberman@quicinc.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
Signed-off-by: Fuad Tabba <tabba@google.com>
Signed-off-by: Elliot Berman <quic_eberman@quicinc.com>
---
 include/linux/mm.h       |  1 +
 include/linux/page_ref.h | 18 ++++++++++++------
 mm/gup.c                 | 48 +++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 50 insertions(+), 17 deletions(-)

diff --git a/include/linux/mm.h b/include/linux/mm.h
index d03d62bceba0..628ab936dd2b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1590,6 +1590,7 @@ void unpin_user_page_range_dirty_lock(struct page *page, unsigned long npages,
 void unpin_user_pages(struct page **pages, unsigned long npages);
 void unpin_exc_pages(struct page **pages, unsigned long npages);
 void unexc_user_page(struct page *page);
+int reexc_user_page(struct page *page);
 
 static inline bool is_cow_mapping(vm_flags_t flags)
 {
diff --git a/include/linux/page_ref.h b/include/linux/page_ref.h
index 9d16e1f4db09..e66130fe995d 100644
--- a/include/linux/page_ref.h
+++ b/include/linux/page_ref.h
@@ -92,7 +92,8 @@ static inline void __page_ref_unfreeze(struct page *page, int v)
  * provides safe operation for get_user_pages(), page_mkclean() and
  * other calls that race to set up page table entries.
  */
-#define GUP_PIN_COUNTING_BIAS (1U << 10)
+#define GUP_PIN_COUNTING_SHIFT (10)
+#define GUP_PIN_COUNTING_BIAS (1U << GUP_PIN_COUNTING_SHIFT)
 
 /*
  * GUP_PIN_EXCLUSIVE_BIAS is used to grab an exclusive pin over a page.
@@ -100,7 +101,8 @@ static inline void __page_ref_unfreeze(struct page *page, int v)
  * exist for the page.
  * After it's taken, no other gup pins can be taken.
  */
-#define GUP_PIN_EXCLUSIVE_BIAS (1U << 30)
+#define GUP_PIN_EXCLUSIVE_SHIFT (30)
+#define GUP_PIN_EXCLUSIVE_BIAS (1U << GUP_PIN_EXCLUSIVE_SHIFT)
 
 static inline int page_ref_count(const struct page *page)
 {
@@ -155,7 +157,9 @@ static inline void init_page_count(struct page *page)
 	set_page_count(page, 1);
 }
 
-static __must_check inline bool page_ref_setexc(struct page *page, unsigned int refs)
+static __must_check inline bool page_ref_setexc(struct page *page,
+						unsigned int expected_pins,
+						unsigned int refs)
 {
 	unsigned int old_count, new_count;
 
@@ -165,7 +169,7 @@ static __must_check inline bool page_ref_setexc(struct page *page, unsigned int
 	do {
 		old_count = atomic_read(&page->_refcount);
 
-		if (old_count >= GUP_PIN_COUNTING_BIAS)
+		if ((old_count >> GUP_PIN_COUNTING_SHIFT) != expected_pins)
 			return false;
 
 		if (check_add_overflow(old_count, refs + GUP_PIN_EXCLUSIVE_BIAS, &new_count))
@@ -178,9 +182,11 @@ static __must_check inline bool page_ref_setexc(struct page *page, unsigned int
 	return true;
 }
 
-static __must_check inline bool folio_ref_setexc(struct folio *folio, unsigned int refs)
+static __must_check inline bool folio_ref_setexc(struct folio *folio,
+						 unsigned int expected_pins,
+						 unsigned int refs)
 {
-	return page_ref_setexc(&folio->page, refs);
+	return page_ref_setexc(&folio->page, expected_pins, refs);
 }
 
 static inline void page_ref_add(struct page *page, int nr)
diff --git a/mm/gup.c b/mm/gup.c
index 7f20de33221d..663030d03d95 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -97,7 +97,9 @@ static inline struct folio *try_get_folio(struct page *page, int refs)
 	return folio;
 }
 
-static bool large_folio_pin_setexc(struct folio *folio, unsigned int pins)
+static bool large_folio_pin_setexc(struct folio *folio,
+				   unsigned int expected_pins,
+				   unsigned int pins)
 {
 	unsigned int old_pincount, new_pincount;
 
@@ -107,7 +109,7 @@ static bool large_folio_pin_setexc(struct folio *folio, unsigned int pins)
 	do {
 		old_pincount = atomic_read(&folio->_pincount);
 
-		if (old_pincount > 0)
+		if (old_pincount != expected_pins)
 			return false;
 
 		if (check_add_overflow(old_pincount, pins + GUP_PIN_EXCLUSIVE_BIAS, &new_pincount))
@@ -117,15 +119,18 @@ static bool large_folio_pin_setexc(struct folio *folio, unsigned int pins)
 	return true;
 }
 
-static bool __try_grab_folio_excl(struct folio *folio, int pincount, int refcount)
+static bool __try_grab_folio_excl(struct folio *folio,
+				  unsigned int expected_pins,
+				  int pincount,
+				  int refcount)
 {
 	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_EXCLUSIVE_PIN)))
 		return false;
 
 	if (folio_test_large(folio)) {
-		if (!large_folio_pin_setexc(folio, pincount))
+		if (!large_folio_pin_setexc(folio, expected_pins, pincount))
 			return false;
-	} else if (!folio_ref_setexc(folio, refcount)) {
+	} else if (!folio_ref_setexc(folio, expected_pins, refcount)) {
 		return false;
 	}
 
@@ -135,7 +140,9 @@ static bool __try_grab_folio_excl(struct folio *folio, int pincount, int refcoun
 	return true;
 }
 
-static bool try_grab_folio_excl(struct folio *folio, int refs)
+static bool try_grab_folio_excl(struct folio *folio,
+				unsigned int expected_pins,
+				int refs)
 {
 	/*
 	 * When pinning a large folio, use an exact count to track it.
@@ -145,15 +152,17 @@ static bool try_grab_folio_excl(struct folio *folio, int refs)
 	 * is pinned.  That's why the refcount from the earlier
 	 * try_get_folio() is left intact.
 	 */
-	return __try_grab_folio_excl(folio, refs,
+	return __try_grab_folio_excl(folio, expected_pins, refs,
 				     refs * (GUP_PIN_COUNTING_BIAS - 1));
 }
 
-static bool try_grab_page_excl(struct page *page)
+static bool try_grab_page_excl(struct page *page,
+			       unsigned int expected_pins)
 {
 	struct folio *folio = page_folio(page);
 
-	return __try_grab_folio_excl(folio, 1, GUP_PIN_COUNTING_BIAS);
+	return __try_grab_folio_excl(folio, expected_pins, 1,
+				     GUP_PIN_COUNTING_BIAS);
 }
 
 /**
@@ -227,7 +236,7 @@ struct folio *try_grab_folio(struct page *page, int refs, unsigned int flags)
 	}
 
 	if (unlikely(flags & FOLL_EXCLUSIVE)) {
-		if (!try_grab_folio_excl(folio, refs))
+		if (!try_grab_folio_excl(folio, 0, refs))
 			return NULL;
 	} else {
 		/*
@@ -347,7 +356,7 @@ int __must_check try_grab_page(struct page *page, unsigned int flags)
 			return -EBUSY;
 
 		if (unlikely(flags & FOLL_EXCLUSIVE)) {
-			if (!try_grab_page_excl(page))
+			if (!try_grab_page_excl(page, 0))
 				return -EBUSY;
 		} else {
 			/*
@@ -661,6 +670,23 @@ void unexc_user_page(struct page *page)
 }
 EXPORT_SYMBOL(unexc_user_page);
 
+int reexc_user_page(struct page *page)
+{
+	if (WARN_ON_ONCE(!IS_ENABLED(CONFIG_EXCLUSIVE_PIN)))
+		return -EINVAL;
+
+	sanity_check_pinned_pages(&page, 1);
+
+	if (!PageAnonExclusive(page))
+		return -EINVAL;
+
+	if (!try_grab_page_excl(page, 1))
+		return -EBUSY;
+
+	return 0;
+}
+EXPORT_SYMBOL(reexc_user_page);
+
 /*
  * Set the MMF_HAS_PINNED if not set yet; after set it'll be there for the mm's
  * lifecycle.  Avoid setting the bit unless necessary, or it might cause write

-- 
2.34.1



  parent reply	other threads:[~2024-06-19  0:05 UTC|newest]

Thread overview: 70+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-19  0:05 [PATCH RFC 0/5] mm/gup: Introduce exclusive GUP pinning Elliot Berman
2024-06-19  0:05 ` [PATCH RFC 1/5] mm/gup: Move GUP_PIN_COUNTING_BIAS to page_ref.h Elliot Berman
2024-06-19  0:05 ` [PATCH RFC 2/5] mm/gup: Add an option for obtaining an exclusive pin Elliot Berman
2024-06-19  0:05 ` Elliot Berman [this message]
2024-06-19  0:05 ` [PATCH RFC 4/5] mm/gup-test: Verify exclusive pinned Elliot Berman
2024-06-19  0:05 ` [PATCH RFC 5/5] mm/gup_test: Verify GUP grabs same pages twice Elliot Berman
2024-06-19  0:11 ` [PATCH RFC 0/5] mm/gup: Introduce exclusive GUP pinning Elliot Berman
2024-06-19  2:44 ` John Hubbard
2024-06-19  7:37   ` David Hildenbrand
2024-06-19  9:11     ` Fuad Tabba
2024-06-19 11:51       ` Jason Gunthorpe
2024-06-19 12:01         ` Fuad Tabba
2024-06-19 12:42           ` Jason Gunthorpe
2024-06-20 15:37           ` Sean Christopherson
2024-06-21  8:23             ` Fuad Tabba
2024-06-21  8:43               ` David Hildenbrand
2024-06-21  8:54                 ` Fuad Tabba
2024-06-21  9:10                   ` David Hildenbrand
2024-06-21 10:16                     ` Fuad Tabba
2024-06-21 16:54                       ` Elliot Berman
2024-06-24 19:03                         ` Sean Christopherson
2024-06-24 21:50                           ` David Rientjes
2024-06-26  3:19                             ` Vishal Annapurve
2024-06-26  5:20                               ` Pankaj Gupta
2024-06-19 12:17         ` David Hildenbrand
2024-06-20  4:11         ` Christoph Hellwig
2024-06-20  8:32           ` Fuad Tabba
2024-06-20 13:55             ` Jason Gunthorpe
2024-06-20 14:01               ` David Hildenbrand
2024-06-20 14:29                 ` Jason Gunthorpe
2024-06-20 14:45                   ` David Hildenbrand
2024-06-20 16:04                     ` Sean Christopherson
2024-06-20 18:56                       ` David Hildenbrand
2024-06-20 16:36                     ` Jason Gunthorpe
2024-06-20 18:53                       ` David Hildenbrand
2024-06-20 20:30                         ` Sean Christopherson
2024-06-20 20:47                           ` David Hildenbrand
2024-06-20 22:32                             ` Sean Christopherson
2024-06-20 23:00                               ` Jason Gunthorpe
2024-06-20 23:11                           ` Jason Gunthorpe
2024-06-20 23:54                             ` Sean Christopherson
2024-06-21  7:43                               ` David Hildenbrand
2024-06-21 12:39                               ` Jason Gunthorpe
2024-06-20 23:08                         ` Jason Gunthorpe
2024-06-20 22:47                   ` Elliot Berman
2024-06-20 23:18                     ` Jason Gunthorpe
2024-06-21  7:32                       ` Quentin Perret
2024-06-21  8:02                         ` David Hildenbrand
2024-06-21  9:25                           ` Quentin Perret
2024-06-21  9:37                             ` David Hildenbrand
2024-06-21 16:48                             ` Elliot Berman
2024-06-21 12:26                         ` Jason Gunthorpe
2024-06-19 12:16       ` David Hildenbrand
2024-06-20  8:47         ` Fuad Tabba
2024-06-20  9:00           ` David Hildenbrand
2024-06-20 14:01             ` Jason Gunthorpe
2024-06-20 13:08     ` Mostafa Saleh
2024-06-20 14:14       ` David Hildenbrand
2024-06-20 14:34         ` Jason Gunthorpe
2024-08-02  8:26           ` Tian, Kevin
2024-08-02 11:22             ` Jason Gunthorpe
2024-08-05  2:24               ` Tian, Kevin
2024-08-05 23:22                 ` Jason Gunthorpe
2024-08-06  0:50                   ` Tian, Kevin
2024-06-20 16:33         ` Mostafa Saleh
2024-07-12 23:29 ` Ackerley Tng
2024-07-16 16:03   ` Sean Christopherson
2024-07-16 16:08     ` Jason Gunthorpe
2024-07-16 17:34       ` Sean Christopherson
2024-07-16 20:11         ` Jason Gunthorpe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240618-exclusive-gup-v1-3-30472a19c5d1@quicinc.com \
    --to=quic_eberman@quicinc.com \
    --cc=akpm@linux-foundation.org \
    --cc=david@redhat.com \
    --cc=kvm@vger.kernel.org \
    --cc=linux-arm-msm@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-kselftest@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=maz@kernel.org \
    --cc=pbonzini@redhat.com \
    --cc=shuah@kernel.org \
    --cc=tabba@google.com \
    --cc=willy@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox