linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Yunsheng Lin <linyunsheng@huawei.com>
To: <davem@davemloft.net>, <kuba@kernel.org>, <pabeni@redhat.com>
Cc: <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
	Yunsheng Lin <linyunsheng@huawei.com>,
	Alexander Duyck <alexander.duyck@gmail.com>,
	Andrew Morton <akpm@linux-foundation.org>, <linux-mm@kvack.org>
Subject: [PATCH net-next v12 11/14] mm: page_frag: introduce prepare/probe/commit API
Date: Wed, 31 Jul 2024 20:45:01 +0800	[thread overview]
Message-ID: <20240731124505.2903877-12-linyunsheng@huawei.com> (raw)
In-Reply-To: <20240731124505.2903877-1-linyunsheng@huawei.com>

There are many use cases that need minimum memory in order
for forward progress, but more performant if more memory is
available or need to probe the cache info to use any memory
available for frag caoleasing reason.

Currently skb_page_frag_refill() API is used to solve the
above use cases, but caller needs to know about the internal
detail and access the data field of 'struct page_frag' to
meet the requirement of the above use cases and its
implementation is similar to the one in mm subsystem.

To unify those two page_frag implementations, introduce a
prepare API to ensure minimum memory is satisfied and return
how much the actual memory is available to the caller and a
probe API to report the current available memory to caller
without doing cache refilling. The caller needs to either call
the commit API to report how much memory it actually uses, or
not do so if deciding to not use any memory.

CC: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Yunsheng Lin <linyunsheng@huawei.com>
---
 include/linux/page_frag_cache.h |  75 ++++++++++++++++
 mm/page_frag_cache.c            | 152 ++++++++++++++++++++++++++++----
 2 files changed, 212 insertions(+), 15 deletions(-)

diff --git a/include/linux/page_frag_cache.h b/include/linux/page_frag_cache.h
index 0abffdd10a1c..ba5d7f8a03cd 100644
--- a/include/linux/page_frag_cache.h
+++ b/include/linux/page_frag_cache.h
@@ -7,6 +7,8 @@
 #include <linux/build_bug.h>
 #include <linux/log2.h>
 #include <linux/types.h>
+#include <linux/mm.h>
+#include <linux/mmdebug.h>
 #include <linux/mm_types_task.h>
 
 #if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
@@ -67,6 +69,9 @@ static inline unsigned int page_frag_cache_page_size(unsigned long encoded_va)
 
 void page_frag_cache_drain(struct page_frag_cache *nc);
 void __page_frag_cache_drain(struct page *page, unsigned int count);
+struct page *page_frag_alloc_pg(struct page_frag_cache *nc,
+				unsigned int *offset, unsigned int fragsz,
+				gfp_t gfp);
 void *__page_frag_alloc_va_align(struct page_frag_cache *nc,
 				 unsigned int fragsz, gfp_t gfp_mask,
 				 unsigned int align_mask);
@@ -79,12 +84,82 @@ static inline void *page_frag_alloc_va_align(struct page_frag_cache *nc,
 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, -align);
 }
 
+static inline unsigned int page_frag_cache_page_offset(const struct page_frag_cache *nc)
+{
+	return page_frag_cache_page_size(nc->encoded_va) - nc->remaining;
+}
+
 static inline void *page_frag_alloc_va(struct page_frag_cache *nc,
 				       unsigned int fragsz, gfp_t gfp_mask)
 {
 	return __page_frag_alloc_va_align(nc, fragsz, gfp_mask, ~0u);
 }
 
+void *page_frag_alloc_va_prepare(struct page_frag_cache *nc, unsigned int *fragsz,
+				 gfp_t gfp);
+
+static inline void *page_frag_alloc_va_prepare_align(struct page_frag_cache *nc,
+						     unsigned int *fragsz,
+						     gfp_t gfp,
+						     unsigned int align)
+{
+	WARN_ON_ONCE(!is_power_of_2(align) || align > PAGE_SIZE);
+	nc->remaining = nc->remaining & -align;
+	return page_frag_alloc_va_prepare(nc, fragsz, gfp);
+}
+
+struct page *page_frag_alloc_pg_prepare(struct page_frag_cache *nc,
+					unsigned int *offset,
+					unsigned int *fragsz, gfp_t gfp);
+
+struct page *page_frag_alloc_prepare(struct page_frag_cache *nc,
+				     unsigned int *offset,
+				     unsigned int *fragsz,
+				     void **va, gfp_t gfp);
+
+static inline struct page *page_frag_alloc_probe(struct page_frag_cache *nc,
+						 unsigned int *offset,
+						 unsigned int *fragsz,
+						 void **va)
+{
+	unsigned long encoded_va = nc->encoded_va;
+	struct page *page;
+
+	VM_BUG_ON(!*fragsz);
+	if (unlikely(nc->remaining < *fragsz))
+		return NULL;
+
+	*va = encoded_page_address(encoded_va);
+	page = virt_to_page(*va);
+	*fragsz = nc->remaining;
+	*offset = page_frag_cache_page_size(encoded_va) - *fragsz;
+	*va += *offset;
+
+	return page;
+}
+
+static inline void page_frag_alloc_commit(struct page_frag_cache *nc,
+					  unsigned int fragsz)
+{
+	VM_BUG_ON(fragsz > nc->remaining || !nc->pagecnt_bias);
+	nc->pagecnt_bias--;
+	nc->remaining -= fragsz;
+}
+
+static inline void page_frag_alloc_commit_noref(struct page_frag_cache *nc,
+						unsigned int fragsz)
+{
+	VM_BUG_ON(fragsz > nc->remaining);
+	nc->remaining -= fragsz;
+}
+
+static inline void page_frag_alloc_abort(struct page_frag_cache *nc,
+					 unsigned int fragsz)
+{
+	nc->pagecnt_bias++;
+	nc->remaining += fragsz;
+}
+
 void page_frag_free_va(void *addr);
 
 #endif
diff --git a/mm/page_frag_cache.c b/mm/page_frag_cache.c
index a24d6d5278d1..6a21d710c0e2 100644
--- a/mm/page_frag_cache.c
+++ b/mm/page_frag_cache.c
@@ -19,27 +19,27 @@
 #include <linux/page_frag_cache.h>
 #include "internal.h"
 
-static bool __page_frag_cache_reuse(unsigned long encoded_va,
-				    unsigned int pagecnt_bias)
+static struct page *__page_frag_cache_reuse(unsigned long encoded_va,
+					    unsigned int pagecnt_bias)
 {
 	struct page *page;
 
 	page = virt_to_page((void *)encoded_va);
 	if (!page_ref_sub_and_test(page, pagecnt_bias))
-		return false;
+		return NULL;
 
 	if (unlikely(encoded_page_pfmemalloc(encoded_va))) {
 		free_unref_page(page, encoded_page_order(encoded_va));
-		return false;
+		return NULL;
 	}
 
 	/* OK, page count is 0, we can safely set it */
 	set_page_count(page, PAGE_FRAG_CACHE_MAX_SIZE + 1);
-	return true;
+	return page;
 }
 
-static bool __page_frag_cache_refill(struct page_frag_cache *nc,
-				     gfp_t gfp_mask)
+static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
+					     gfp_t gfp_mask)
 {
 	unsigned long order = PAGE_FRAG_CACHE_MAX_ORDER;
 	struct page *page = NULL;
@@ -55,7 +55,7 @@ static bool __page_frag_cache_refill(struct page_frag_cache *nc,
 		page = __alloc_pages(gfp, 0, numa_mem_id(), NULL);
 		if (unlikely(!page)) {
 			memset(nc, 0, sizeof(*nc));
-			return false;
+			return NULL;
 		}
 
 		order = 0;
@@ -69,29 +69,151 @@ static bool __page_frag_cache_refill(struct page_frag_cache *nc,
 	 */
 	page_ref_add(page, PAGE_FRAG_CACHE_MAX_SIZE);
 
-	return true;
+	return page;
 }
 
 /* Reload cache by reusing the old cache if it is possible, or
  * refilling from the page allocator.
  */
-static bool __page_frag_cache_reload(struct page_frag_cache *nc,
-				     gfp_t gfp_mask)
+static struct page *__page_frag_cache_reload(struct page_frag_cache *nc,
+					     gfp_t gfp_mask)
 {
+	struct page *page;
+
 	if (likely(nc->encoded_va)) {
-		if (__page_frag_cache_reuse(nc->encoded_va, nc->pagecnt_bias))
+		page = __page_frag_cache_reuse(nc->encoded_va, nc->pagecnt_bias);
+		if (page)
 			goto out;
 	}
 
-	if (unlikely(!__page_frag_cache_refill(nc, gfp_mask)))
-		return false;
+	page = __page_frag_cache_refill(nc, gfp_mask);
+	if (unlikely(!page))
+		return NULL;
 
 out:
 	/* reset page count bias and remaining to start of new frag */
 	nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1;
 	nc->remaining = page_frag_cache_page_size(nc->encoded_va);
-	return true;
+	return page;
+}
+
+void *page_frag_alloc_va_prepare(struct page_frag_cache *nc,
+				 unsigned int *fragsz, gfp_t gfp)
+{
+	unsigned int remaining = nc->remaining;
+
+	VM_BUG_ON(!*fragsz);
+	if (likely(remaining >= *fragsz)) {
+		unsigned long encoded_va = nc->encoded_va;
+
+		*fragsz = remaining;
+
+		return encoded_page_address(encoded_va) +
+			(page_frag_cache_page_size(encoded_va) - remaining);
+	}
+
+	if (unlikely(*fragsz > PAGE_SIZE))
+		return NULL;
+
+	/* When reload fails, nc->encoded_va and nc->remaining are both reset
+	 * to zero, so there is no need to check the return value here.
+	 */
+	__page_frag_cache_reload(nc, gfp);
+
+	*fragsz = nc->remaining;
+	return encoded_page_address(nc->encoded_va);
+}
+EXPORT_SYMBOL(page_frag_alloc_va_prepare);
+
+struct page *page_frag_alloc_pg_prepare(struct page_frag_cache *nc,
+					unsigned int *offset,
+					unsigned int *fragsz, gfp_t gfp)
+{
+	unsigned int remaining = nc->remaining;
+	struct page *page;
+
+	VM_BUG_ON(!*fragsz);
+	if (likely(remaining >= *fragsz)) {
+		unsigned long encoded_va = nc->encoded_va;
+
+		*offset = page_frag_cache_page_size(encoded_va) - remaining;
+		*fragsz = remaining;
+
+		return virt_to_page((void *)encoded_va);
+	}
+
+	if (unlikely(*fragsz > PAGE_SIZE))
+		return NULL;
+
+	page = __page_frag_cache_reload(nc, gfp);
+	*offset = 0;
+	*fragsz = nc->remaining;
+	return page;
+}
+EXPORT_SYMBOL(page_frag_alloc_pg_prepare);
+
+struct page *page_frag_alloc_prepare(struct page_frag_cache *nc,
+				     unsigned int *offset,
+				     unsigned int *fragsz,
+				     void **va, gfp_t gfp)
+{
+	unsigned int remaining = nc->remaining;
+	struct page *page;
+
+	VM_BUG_ON(!*fragsz);
+	if (likely(remaining >= *fragsz)) {
+		unsigned long encoded_va = nc->encoded_va;
+
+		*offset = page_frag_cache_page_size(encoded_va) - remaining;
+		*va = encoded_page_address(encoded_va) + *offset;
+		*fragsz = remaining;
+
+		return virt_to_page((void *)encoded_va);
+	}
+
+	if (unlikely(*fragsz > PAGE_SIZE))
+		return NULL;
+
+	page = __page_frag_cache_reload(nc, gfp);
+	*offset = 0;
+	*fragsz = nc->remaining;
+	*va = encoded_page_address(nc->encoded_va);
+
+	return page;
+}
+EXPORT_SYMBOL(page_frag_alloc_prepare);
+
+struct page *page_frag_alloc_pg(struct page_frag_cache *nc,
+				unsigned int *offset, unsigned int fragsz,
+				gfp_t gfp)
+{
+	unsigned int remaining = nc->remaining;
+	struct page *page;
+
+	VM_BUG_ON(!fragsz);
+	if (likely(remaining >= fragsz)) {
+		unsigned long encoded_va = nc->encoded_va;
+
+		*offset = page_frag_cache_page_size(encoded_va) -
+				remaining;
+
+		return virt_to_page((void *)encoded_va);
+	}
+
+	if (unlikely(fragsz > PAGE_SIZE))
+		return NULL;
+
+	page = __page_frag_cache_reload(nc, gfp);
+	if (unlikely(!page))
+		return NULL;
+
+	*offset = 0;
+	nc->remaining = remaining - fragsz;
+	nc->pagecnt_bias--;
+
+	return page;
 }
+EXPORT_SYMBOL(page_frag_alloc_pg);
 
 void page_frag_cache_drain(struct page_frag_cache *nc)
 {
-- 
2.33.0



  parent reply	other threads:[~2024-07-31 12:51 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <20240731124505.2903877-1-linyunsheng@huawei.com>
2024-07-31 12:44 ` [PATCH net-next v12 01/14] mm: page_frag: add a test module for page_frag Yunsheng Lin
2024-07-31 18:29   ` Alexander Duyck
2024-08-01 12:58     ` Yunsheng Lin
2024-08-01 14:50       ` Alexander Duyck
2024-08-02 10:02         ` Yunsheng Lin
2024-08-02 16:42           ` Alexander Duyck
2024-07-31 12:44 ` [PATCH net-next v12 02/14] mm: move the page fragment allocator from page_alloc into its own file Yunsheng Lin
2024-07-31 12:44 ` [PATCH net-next v12 03/14] mm: page_frag: use initial zero offset for page_frag_alloc_align() Yunsheng Lin
2024-07-31 12:44 ` [PATCH net-next v12 04/14] mm: page_frag: add '_va' suffix to page_frag API Yunsheng Lin
2024-07-31 13:36   ` Chuck Lever
2024-07-31 18:13   ` Alexander Duyck
2024-08-01 13:01     ` Yunsheng Lin
2024-08-01 15:21       ` Alexander Duyck
2024-08-02 10:05         ` Yunsheng Lin
2024-08-02 17:00           ` Alexander Duyck
     [not found]             ` <2a29ce61-7136-4b9b-9940-504228b10cba@gmail.com>
2024-08-06  0:52               ` Alexander Duyck
2024-08-06 11:37                 ` Yunsheng Lin
2024-08-04  6:44   ` Sagi Grimberg
2024-07-31 12:44 ` [PATCH net-next v12 05/14] mm: page_frag: avoid caller accessing 'page_frag_cache' directly Yunsheng Lin
2024-07-31 13:36   ` Chuck Lever
2024-07-31 12:44 ` [PATCH net-next v12 07/14] mm: page_frag: reuse existing space for 'size' and 'pfmemalloc' Yunsheng Lin
2024-07-31 12:44 ` [PATCH net-next v12 08/14] mm: page_frag: some minor refactoring before adding new API Yunsheng Lin
2024-07-31 12:44 ` [PATCH net-next v12 09/14] mm: page_frag: use __alloc_pages() to replace alloc_pages_node() Yunsheng Lin
2024-07-31 12:45 ` Yunsheng Lin [this message]
2024-07-31 12:45 ` [PATCH net-next v12 13/14] mm: page_frag: update documentation for page_frag Yunsheng Lin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240731124505.2903877-12-linyunsheng@huawei.com \
    --to=linyunsheng@huawei.com \
    --cc=akpm@linux-foundation.org \
    --cc=alexander.duyck@gmail.com \
    --cc=davem@davemloft.net \
    --cc=kuba@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=netdev@vger.kernel.org \
    --cc=pabeni@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox