linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: "Pankaj Raghav (Samsung)" <kernel@pankajraghav.com>
To: david@fromorbit.com, djwong@kernel.org, chandan.babu@oracle.com,
	brauner@kernel.org, akpm@linux-foundation.org,
	willy@infradead.org
Cc: mcgrof@kernel.org, linux-mm@kvack.org, hare@suse.de,
	linux-kernel@vger.kernel.org, yang@os.amperecomputing.com,
	Zi Yan <zi.yan@sent.com>,
	linux-xfs@vger.kernel.org, p.raghav@samsung.com,
	linux-fsdevel@vger.kernel.org, kernel@pankajraghav.com,
	hch@lst.de, gost.dev@samsung.com, cl@os.amperecomputing.com,
	john.g.garry@oracle.com
Subject: [PATCH v7 05/11] mm: split a folio in minimum folio order chunks
Date: Fri,  7 Jun 2024 14:58:56 +0000	[thread overview]
Message-ID: <20240607145902.1137853-6-kernel@pankajraghav.com> (raw)
In-Reply-To: <20240607145902.1137853-1-kernel@pankajraghav.com>

From: Luis Chamberlain <mcgrof@kernel.org>

split_folio() and split_folio_to_list() assume order 0, to support
minorder for non-anonymous folios, we must expand these to check the
folio mapping order and use that.

Set new_order to be at least minimum folio order if it is set in
split_huge_page_to_list() so that we can maintain minimum folio order
requirement in the page cache.

Update the debugfs write files used for testing to ensure the order
is respected as well. We simply enforce the min order when a file
mapping is used.

Signed-off-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
 include/linux/huge_mm.h | 14 ++++++++---
 mm/huge_memory.c        | 55 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 61 insertions(+), 8 deletions(-)

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 020e2344eb86..15caa4e7b00e 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -88,6 +88,8 @@ extern struct kobj_attribute shmem_enabled_attr;
 #define thp_vma_allowable_order(vma, vm_flags, tva_flags, order) \
 	(!!thp_vma_allowable_orders(vma, vm_flags, tva_flags, BIT(order)))
 
+#define split_folio(f) split_folio_to_list(f, NULL)
+
 #ifdef CONFIG_PGTABLE_HAS_HUGE_LEAVES
 #define HPAGE_PMD_SHIFT PMD_SHIFT
 #define HPAGE_PUD_SHIFT PUD_SHIFT
@@ -307,9 +309,10 @@ unsigned long thp_get_unmapped_area_vmflags(struct file *filp, unsigned long add
 bool can_split_folio(struct folio *folio, int *pextra_pins);
 int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		unsigned int new_order);
+int split_folio_to_list(struct folio *folio, struct list_head *list);
 static inline int split_huge_page(struct page *page)
 {
-	return split_huge_page_to_list_to_order(page, NULL, 0);
+	return split_folio(page_folio(page));
 }
 void deferred_split_folio(struct folio *folio);
 
@@ -474,6 +477,12 @@ static inline int split_huge_page(struct page *page)
 {
 	return 0;
 }
+
+static inline int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+	return 0;
+}
+
 static inline void deferred_split_folio(struct folio *folio) {}
 #define split_huge_pmd(__vma, __pmd, __address)	\
 	do { } while (0)
@@ -578,7 +587,4 @@ static inline int split_folio_to_order(struct folio *folio, int new_order)
 	return split_folio_to_list_to_order(folio, NULL, new_order);
 }
 
-#define split_folio_to_list(f, l) split_folio_to_list_to_order(f, l, 0)
-#define split_folio(f) split_folio_to_order(f, 0)
-
 #endif /* _LINUX_HUGE_MM_H */
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 8e49f402d7c7..399a4f5125c7 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3068,6 +3068,9 @@ bool can_split_folio(struct folio *folio, int *pextra_pins)
  * released, or if some unexpected race happened (e.g., anon VMA disappeared,
  * truncation).
  *
+ * Callers should ensure that the order respects the address space mapping
+ * min-order if one is set for non-anonymous folios.
+ *
  * Returns -EINVAL when trying to split to an order that is incompatible
  * with the folio. Splitting to order 0 is compatible with all folios.
  */
@@ -3143,6 +3146,7 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 		mapping = NULL;
 		anon_vma_lock_write(anon_vma);
 	} else {
+		unsigned int min_order;
 		gfp_t gfp;
 
 		mapping = folio->mapping;
@@ -3153,6 +3157,14 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 			goto out;
 		}
 
+		min_order = mapping_min_folio_order(folio->mapping);
+		if (new_order < min_order) {
+			VM_WARN_ONCE(1, "Cannot split mapped folio below min-order: %u",
+				     min_order);
+			ret = -EINVAL;
+			goto out;
+		}
+
 		gfp = current_gfp_context(mapping_gfp_mask(mapping) &
 							GFP_RECLAIM_MASK);
 
@@ -3264,6 +3276,21 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
 	return ret;
 }
 
+int split_folio_to_list(struct folio *folio, struct list_head *list)
+{
+	unsigned int min_order = 0;
+
+	if (!folio_test_anon(folio)) {
+		if (!folio->mapping) {
+			count_vm_event(THP_SPLIT_PAGE_FAILED);
+			return -EBUSY;
+		}
+		min_order = mapping_min_folio_order(folio->mapping);
+	}
+
+	return split_huge_page_to_list_to_order(&folio->page, list, min_order);
+}
+
 void __folio_undo_large_rmappable(struct folio *folio)
 {
 	struct deferred_split *ds_queue;
@@ -3493,6 +3520,8 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		struct vm_area_struct *vma = vma_lookup(mm, addr);
 		struct page *page;
 		struct folio *folio;
+		struct address_space *mapping;
+		unsigned int target_order = new_order;
 
 		if (!vma)
 			break;
@@ -3513,7 +3542,13 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		if (!is_transparent_hugepage(folio))
 			goto next;
 
-		if (new_order >= folio_order(folio))
+		if (!folio_test_anon(folio)) {
+			mapping = folio->mapping;
+			target_order = max(new_order,
+					   mapping_min_folio_order(mapping));
+		}
+
+		if (target_order >= folio_order(folio))
 			goto next;
 
 		total++;
@@ -3529,9 +3564,13 @@ static int split_huge_pages_pid(int pid, unsigned long vaddr_start,
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio_to_order(folio, new_order))
+		if (!folio_test_anon(folio) && folio->mapping != mapping)
+			goto unlock;
+
+		if (!split_folio_to_order(folio, target_order))
 			split++;
 
+unlock:
 		folio_unlock(folio);
 next:
 		folio_put(folio);
@@ -3556,6 +3595,7 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 	pgoff_t index;
 	int nr_pages = 1;
 	unsigned long total = 0, split = 0;
+	unsigned int min_order;
 
 	file = getname_kernel(file_path);
 	if (IS_ERR(file))
@@ -3569,9 +3609,11 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		 file_path, off_start, off_end);
 
 	mapping = candidate->f_mapping;
+	min_order = mapping_min_folio_order(mapping);
 
 	for (index = off_start; index < off_end; index += nr_pages) {
 		struct folio *folio = filemap_get_folio(mapping, index);
+		unsigned int target_order = new_order;
 
 		nr_pages = 1;
 		if (IS_ERR(folio))
@@ -3580,18 +3622,23 @@ static int split_huge_pages_in_file(const char *file_path, pgoff_t off_start,
 		if (!folio_test_large(folio))
 			goto next;
 
+		target_order = max(new_order, min_order);
 		total++;
 		nr_pages = folio_nr_pages(folio);
 
-		if (new_order >= folio_order(folio))
+		if (target_order >= folio_order(folio))
 			goto next;
 
 		if (!folio_trylock(folio))
 			goto next;
 
-		if (!split_folio_to_order(folio, new_order))
+		if (folio->mapping != mapping)
+			goto unlock;
+
+		if (!split_folio_to_order(folio, target_order))
 			split++;
 
+unlock:
 		folio_unlock(folio);
 next:
 		folio_put(folio);
-- 
2.44.1



  parent reply	other threads:[~2024-06-07 14:59 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-07 14:58 [PATCH v7 00/11] enable bs > ps in XFS Pankaj Raghav (Samsung)
2024-06-07 14:58 ` [PATCH v7 01/11] readahead: rework loop in page_cache_ra_unbounded() Pankaj Raghav (Samsung)
2024-06-07 14:58 ` [PATCH v7 02/11] fs: Allow fine-grained control of folio sizes Pankaj Raghav (Samsung)
2024-06-12 15:38   ` Darrick J. Wong
2024-06-07 14:58 ` [PATCH v7 03/11] filemap: allocate mapping_min_order folios in the page cache Pankaj Raghav (Samsung)
2024-06-12  9:01   ` Hannes Reinecke
2024-06-12 15:40   ` Darrick J. Wong
2024-06-12 17:24   ` Matthew Wilcox
2024-06-13  8:44   ` Christoph Hellwig
2024-06-17  9:58     ` Pankaj Raghav (Samsung)
2024-06-17 12:34       ` Matthew Wilcox
2024-06-07 14:58 ` [PATCH v7 04/11] readahead: allocate folios with mapping_min_order in readahead Pankaj Raghav (Samsung)
2024-06-12 18:50   ` Matthew Wilcox
2024-06-14  9:26     ` Pankaj Raghav (Samsung)
2024-06-17 12:32       ` Matthew Wilcox
2024-06-17 16:04         ` Pankaj Raghav (Samsung)
2024-06-17 16:10           ` Matthew Wilcox
2024-06-17 16:39             ` Pankaj Raghav (Samsung)
2024-06-18  6:56               ` Hannes Reinecke
2024-06-21 12:19                 ` Pankaj Raghav (Samsung)
2024-06-21 13:28                   ` Hannes Reinecke
2024-06-18  6:52             ` Hannes Reinecke
2024-06-07 14:58 ` Pankaj Raghav (Samsung) [this message]
2024-06-07 16:58   ` [PATCH v7 05/11] mm: split a folio in minimum folio order chunks Zi Yan
2024-06-07 17:01     ` Matthew Wilcox
2024-06-07 20:45       ` Pankaj Raghav (Samsung)
2024-06-07 20:30     ` Pankaj Raghav (Samsung)
2024-06-07 20:51       ` Zi Yan
2024-06-10  7:26         ` Pankaj Raghav (Samsung)
2024-06-12  9:02   ` Hannes Reinecke
2024-06-07 14:58 ` [PATCH v7 06/11] filemap: cap PTE range to be created to allowed zero fill in folio_map_range() Pankaj Raghav (Samsung)
2024-06-12 19:08   ` Matthew Wilcox
2024-06-13  7:57     ` Luis Chamberlain
2024-06-13  8:07       ` David Hildenbrand
2024-06-13  8:13         ` Luis Chamberlain
2024-06-13  8:16           ` David Hildenbrand
2024-06-13 15:27             ` Luis Chamberlain
2024-06-13 15:32               ` Matthew Wilcox
2024-06-13 15:38                 ` Luis Chamberlain
2024-06-13 15:40                   ` Matthew Wilcox
2024-06-13 19:39                     ` Luis Chamberlain
2024-06-07 14:58 ` [PATCH v7 07/11] iomap: fix iomap_dio_zero() for fs bs > system page size Pankaj Raghav (Samsung)
2024-06-11  7:38   ` John Garry
2024-06-11  9:41     ` Pankaj Raghav (Samsung)
2024-06-11 10:00       ` John Garry
2024-06-12 20:40   ` Darrick J. Wong
2024-06-17 15:08     ` Pankaj Raghav (Samsung)
2024-06-07 14:58 ` [PATCH v7 08/11] xfs: use kvmalloc for xattr buffers Pankaj Raghav (Samsung)
2024-06-07 14:59 ` [PATCH v7 09/11] xfs: expose block size in stat Pankaj Raghav (Samsung)
2024-06-07 14:59 ` [PATCH v7 10/11] xfs: make the calculation generic in xfs_sb_validate_fsb_count() Pankaj Raghav (Samsung)
2024-06-13  8:45   ` Christoph Hellwig
2024-06-17 16:09     ` Pankaj Raghav (Samsung)
2024-06-07 14:59 ` [PATCH v7 11/11] xfs: enable block size larger than page size support Pankaj Raghav (Samsung)
2024-06-13  8:47   ` Christoph Hellwig
2024-06-17  1:29     ` Dave Chinner
2024-06-17  6:51       ` Christoph Hellwig
2024-06-17 16:31         ` Pankaj Raghav (Samsung)
2024-06-17 23:18         ` Dave Chinner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240607145902.1137853-6-kernel@pankajraghav.com \
    --to=kernel@pankajraghav.com \
    --cc=akpm@linux-foundation.org \
    --cc=brauner@kernel.org \
    --cc=chandan.babu@oracle.com \
    --cc=cl@os.amperecomputing.com \
    --cc=david@fromorbit.com \
    --cc=djwong@kernel.org \
    --cc=gost.dev@samsung.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=john.g.garry@oracle.com \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=linux-xfs@vger.kernel.org \
    --cc=mcgrof@kernel.org \
    --cc=p.raghav@samsung.com \
    --cc=willy@infradead.org \
    --cc=yang@os.amperecomputing.com \
    --cc=zi.yan@sent.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox