From: Yin Fengwei <fengwei.yin@intel.com>
To: linux-mm@kvack.org, akpm@linux-foundation.org,
willy@infradead.org, kirill@shutemov.name, yuzhao@google.com,
ryan.roberts@arm.com, ying.huang@intel.com
Cc: fengwei.yin@intel.com
Subject: [PATCH v3 2/2] lru: allow large batched add large folio to lru list
Date: Sat, 29 Apr 2023 16:27:59 +0800 [thread overview]
Message-ID: <20230429082759.1600796-3-fengwei.yin@intel.com> (raw)
In-Reply-To: <20230429082759.1600796-1-fengwei.yin@intel.com>
Currently, large folio is not batched added to lru list. Which
cause high lru lock contention after enable large folio for
anonymous mapping.
Running page_fault1 of will-it-scale + order 2 folio with 96
processes on Ice Lake 48C/96T, the lru lock contention could
be around 64%:
- 64.31% 0.23% page_fault1_pro [kernel.kallsyms] [k] folio_lruvec_lock_irqsave
- 64.07% folio_lruvec_lock_irqsave
+ 64.01% _raw_spin_lock_irqsave
With this patch, the lru lock contention dropped to 43% with same
testing:
- 42.67% 0.19% page_fault1_pro [kernel.kallsyms] [k] folio_lruvec_lock_irqsave
- 42.48% folio_lruvec_lock_irqsave
+ 42.42% _raw_spin_lock_irqsave
Reported-by: "Huang, Ying" <ying.huang@intel.com>
Signed-off-by: Yin Fengwei <fengwei.yin@intel.com>
---
include/linux/pagevec.h | 46 ++++++++++++++++++++++++++++++++++++++---
mm/mlock.c | 7 +++----
mm/swap.c | 3 +--
3 files changed, 47 insertions(+), 9 deletions(-)
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index f582f7213ea5..9479b7b50bc6 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -10,6 +10,7 @@
#define _LINUX_PAGEVEC_H
#include <linux/xarray.h>
+#include <linux/mm.h>
/* 15 pointers + header align the pagevec structure to a power of two */
#define PAGEVEC_SIZE 15
@@ -22,6 +23,7 @@ struct address_space;
struct pagevec {
unsigned char nr;
bool percpu_pvec_drained;
+ unsigned short nr_pages;
struct page *pages[PAGEVEC_SIZE];
};
@@ -30,12 +32,14 @@ void __pagevec_release(struct pagevec *pvec);
static inline void pagevec_init(struct pagevec *pvec)
{
pvec->nr = 0;
+ pvec->nr_pages = 0;
pvec->percpu_pvec_drained = false;
}
static inline void pagevec_reinit(struct pagevec *pvec)
{
pvec->nr = 0;
+ pvec->nr_pages = 0;
}
static inline unsigned pagevec_count(struct pagevec *pvec)
@@ -54,7 +58,12 @@ static inline unsigned pagevec_space(struct pagevec *pvec)
static inline unsigned pagevec_add(struct pagevec *pvec, struct page *page)
{
pvec->pages[pvec->nr++] = page;
- return pagevec_space(pvec);
+ pvec->nr_pages += compound_nr(page);
+
+ if (pvec->nr_pages > PAGEVEC_SIZE)
+ return 0;
+ else
+ return pagevec_space(pvec);
}
static inline void pagevec_release(struct pagevec *pvec)
@@ -75,6 +84,7 @@ static inline void pagevec_release(struct pagevec *pvec)
struct folio_batch {
unsigned char nr;
bool percpu_pvec_drained;
+ unsigned short nr_pages;
struct folio *folios[PAGEVEC_SIZE];
};
@@ -92,12 +102,14 @@ static_assert(offsetof(struct pagevec, pages) ==
static inline void folio_batch_init(struct folio_batch *fbatch)
{
fbatch->nr = 0;
+ fbatch->nr_pages = 0;
fbatch->percpu_pvec_drained = false;
}
static inline void folio_batch_reinit(struct folio_batch *fbatch)
{
fbatch->nr = 0;
+ fbatch->nr_pages = 0;
}
static inline unsigned int folio_batch_count(struct folio_batch *fbatch)
@@ -110,6 +122,32 @@ static inline unsigned int fbatch_space(struct folio_batch *fbatch)
return PAGEVEC_SIZE - fbatch->nr;
}
+/**
+ * folio_batch_add_nr_pages() - Add a folio to a batch.
+ * @fbatch: The folio batch.
+ * @folio: The folio to add.
+ * @nr_pages: The number of pages added to batch.
+ *
+ * The folio is added to the end of the batch.
+ * The batch must have previously been initialised using folio_batch_init().
+ *
+ * Return: The number of slots still available.
+ * Note: parameter folio may not be direct reference to folio and can't
+ * use folio_nr_pages(folio).
+ * Currently, this function is only called in mlock.c.
+ */
+static inline unsigned folio_batch_add_nr_pages(struct folio_batch *fbatch,
+ struct folio *folio, unsigned int nr_pages)
+{
+ fbatch->folios[fbatch->nr++] = folio;
+ fbatch->nr_pages += nr_pages;
+
+ if (fbatch->nr_pages > PAGEVEC_SIZE)
+ return 0;
+ else
+ return fbatch_space(fbatch);
+}
+
/**
* folio_batch_add() - Add a folio to a batch.
* @fbatch: The folio batch.
@@ -123,8 +161,10 @@ static inline unsigned int fbatch_space(struct folio_batch *fbatch)
static inline unsigned folio_batch_add(struct folio_batch *fbatch,
struct folio *folio)
{
- fbatch->folios[fbatch->nr++] = folio;
- return fbatch_space(fbatch);
+ unsigned int nr_pages;
+
+ nr_pages = xa_is_value(folio) ? 1 : folio_nr_pages(folio);
+ return folio_batch_add_nr_pages(fbatch, folio, nr_pages);
}
static inline void folio_batch_release(struct folio_batch *fbatch)
diff --git a/mm/mlock.c b/mm/mlock.c
index 617469fce96d..6de3e6d4639f 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -243,19 +243,18 @@ bool need_mlock_drain(int cpu)
void mlock_folio(struct folio *folio)
{
struct folio_batch *fbatch;
+ unsigned int nr_pages = folio_nr_pages(folio);
local_lock(&mlock_fbatch.lock);
fbatch = this_cpu_ptr(&mlock_fbatch.fbatch);
if (!folio_test_set_mlocked(folio)) {
- int nr_pages = folio_nr_pages(folio);
-
zone_stat_mod_folio(folio, NR_MLOCK, nr_pages);
__count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
}
folio_get(folio);
- if (!folio_batch_add(fbatch, mlock_lru(folio)) ||
+ if (!folio_batch_add_nr_pages(fbatch, mlock_lru(folio), nr_pages) ||
folio_test_large(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
@@ -278,7 +277,7 @@ void mlock_new_folio(struct folio *folio)
__count_vm_events(UNEVICTABLE_PGMLOCKED, nr_pages);
folio_get(folio);
- if (!folio_batch_add(fbatch, mlock_new(folio)) ||
+ if (!folio_batch_add_nr_pages(fbatch, mlock_new(folio), nr_pages) ||
folio_test_large(folio) || lru_cache_disabled())
mlock_folio_batch(fbatch);
local_unlock(&mlock_fbatch.lock);
diff --git a/mm/swap.c b/mm/swap.c
index 57cb01b042f6..0f8554aeb338 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -228,8 +228,7 @@ static void folio_batch_move_lru(struct folio_batch *fbatch, move_fn_t move_fn)
static void folio_batch_add_and_move(struct folio_batch *fbatch,
struct folio *folio, move_fn_t move_fn)
{
- if (folio_batch_add(fbatch, folio) && !folio_test_large(folio) &&
- !lru_cache_disabled())
+ if (folio_batch_add(fbatch, folio) && !lru_cache_disabled())
return;
folio_batch_move_lru(fbatch, move_fn);
}
--
2.34.1
next prev parent reply other threads:[~2023-04-29 8:28 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-29 8:27 [PATCH v3 0/2] Reduce lock contention related with large folio Yin Fengwei
2023-04-29 8:27 ` [PATCH v3 1/2] THP: avoid lock when check whether THP is in deferred list Yin Fengwei
2023-05-04 11:48 ` kirill
2023-05-05 1:09 ` Yin, Fengwei
2023-05-29 2:58 ` Yin Fengwei
2023-05-05 0:52 ` Huang, Ying
2023-05-05 1:09 ` Yin, Fengwei
2023-04-29 8:27 ` Yin Fengwei [this message]
2023-04-29 22:35 ` [PATCH v3 2/2] lru: allow large batched add large folio to lru list Matthew Wilcox
2023-05-01 5:52 ` Yin, Fengwei
2023-05-05 5:51 ` Yin, Fengwei
2023-05-15 2:14 ` Yin, Fengwei
2023-06-20 3:22 ` Matthew Wilcox
2023-06-20 4:39 ` Yin Fengwei
2023-06-20 8:01 ` Yin Fengwei
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20230429082759.1600796-3-fengwei.yin@intel.com \
--to=fengwei.yin@intel.com \
--cc=akpm@linux-foundation.org \
--cc=kirill@shutemov.name \
--cc=linux-mm@kvack.org \
--cc=ryan.roberts@arm.com \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
--cc=yuzhao@google.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox