From: Raghavendra K T <raghavendra.kt@amd.com>
To: <linux-mm@kvack.org>, <linux-kernel@vger.kernel.org>,
<gourry@gourry.net>, <nehagholkar@meta.com>, <abhishekd@meta.com>,
<david@redhat.com>, <ying.huang@intel.com>, <nphamcs@gmail.com>,
<akpm@linux-foundation.org>, <hannes@cmpxchg.org>,
<feng.tang@intel.com>, <kbusch@meta.com>, <bharata@amd.com>,
<Hasan.Maruf@amd.com>, <sj@kernel.org>
Cc: <willy@infradead.org>, <kirill.shutemov@linux.intel.com>,
<mgorman@techsingularity.net>, <vbabka@suse.cz>,
<hughd@google.com>, <rientjes@google.com>, <shy828301@gmail.com>,
<Liam.Howlett@Oracle.com>, <peterz@infradead.org>,
<mingo@redhat.com>, Raghavendra K T <raghavendra.kt@amd.com>
Subject: [RFC PATCH V0 04/10] mm/migration: Migrate accessed folios to toptier node
Date: Sun, 1 Dec 2024 15:38:12 +0000 [thread overview]
Message-ID: <20241201153818.2633616-5-raghavendra.kt@amd.com> (raw)
In-Reply-To: <20241201153818.2633616-1-raghavendra.kt@amd.com>
For each recently accessed slowtier folio in the migration list:
- Isolate LRU pages
- Migrate to a regular node.
The rationale behind whole migration is to speedup the access to
recently accessed pages.
Limitation:
PTE A bit scanning approach lacks information about exact destination
node to migrate to.
Reason:
PROT_NONE hint fault based scanning is done in a process context. Here
when the fault occurs, source CPU of the fault associated task is known.
Time of page access is also accurate.
With the lack of above information, migration is done to node 0 by default.
Signed-off-by: Raghavendra K T <raghavendra.kt@amd.com>
---
mm/kmmscand.c | 178 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 178 insertions(+)
TBD: Before calling migrate_misplaced_folio, we need to hold a PTL lock.
But since we are not coming from fault path, it is tricky. We need to
fix this before the final patch.
diff --git a/mm/kmmscand.c b/mm/kmmscand.c
index 0496359d07f5..3b4453b053f4 100644
--- a/mm/kmmscand.c
+++ b/mm/kmmscand.c
@@ -4,6 +4,7 @@
#include <linux/sched.h>
#include <linux/sched/mm.h>
#include <linux/mmu_notifier.h>
+#include <linux/migrate.h>
#include <linux/rmap.h>
#include <linux/pagewalk.h>
#include <linux/page_ext.h>
@@ -36,7 +37,15 @@ static unsigned long kmmscand_mms_to_scan __read_mostly = KMMSCAND_MMS_TO_SCAN;
volatile bool kmmscand_scan_enabled = true;
static bool need_wakeup;
+/* mm of the migrating folio entry */
+static struct mm_struct *kmmscand_cur_migrate_mm;
+
+/* Migration list is manipulated underneath because of mm_exit */
+static bool kmmscand_migration_list_dirty;
+
static unsigned long kmmscand_sleep_expire;
+#define KMMSCAND_DEFAULT_TARGET_NODE (0)
+static int kmmscand_target_node = KMMSCAND_DEFAULT_TARGET_NODE;
static DEFINE_SPINLOCK(kmmscand_mm_lock);
static DEFINE_SPINLOCK(kmmscand_migrate_lock);
@@ -115,6 +124,107 @@ static bool kmmscand_eligible_srcnid(int nid)
return false;
}
+/*
+ * Do not know what info to pass in the future to make
+ * decision on taget node. Keep it void * now.
+ */
+static int kmmscand_get_target_node(void *data)
+{
+ return kmmscand_target_node;
+}
+
+static int kmmscand_migrate_misplaced_folio_prepare(struct folio *folio,
+ struct vm_area_struct *vma, int node)
+{
+ if (folio_is_file_lru(folio)) {
+ /*
+ * Do not migrate file folios that are mapped in multiple
+ * processes with execute permissions as they are probably
+ * shared libraries.
+ *
+ * See folio_likely_mapped_shared() on possible imprecision
+ * when we cannot easily detect if a folio is shared.
+ */
+ if (vma && (vma->vm_flags & VM_EXEC) &&
+ folio_likely_mapped_shared(folio))
+ return -EACCES;
+ /*
+ * Do not migrate dirty folios as not all filesystems can move
+ * dirty folios in MIGRATE_ASYNC mode which is a waste of
+ * cycles.
+ */
+ if (folio_test_dirty(folio))
+ return -EAGAIN;
+ }
+
+ if (!folio_isolate_lru(folio))
+ return -EAGAIN;
+
+ return 0;
+}
+
+enum kmmscand_migration_err {
+ KMMSCAND_NULL_MM = 1,
+ KMMSCAND_INVALID_FOLIO,
+ KMMSCAND_INVALID_VMA,
+ KMMSCAND_INELIGIBLE_SRC_NODE,
+ KMMSCAND_SAME_SRC_DEST_NODE,
+ KMMSCAND_LRU_ISOLATION_ERR,
+};
+
+static int kmmscand_promote_folio(struct kmmscand_migrate_info *info)
+{
+ unsigned long pfn;
+ struct page *page;
+ struct folio *folio;
+ struct vm_area_struct *vma;
+ int ret;
+
+ int srcnid, destnid;
+
+ if (info->mm == NULL)
+ return KMMSCAND_NULL_MM;
+
+ folio = info->folio;
+
+ /* Check again if the folio is really valid now */
+ if (folio) {
+ pfn = folio_pfn(folio);
+ page = pfn_to_online_page(pfn);
+ }
+
+ if (!page || !folio || !folio_test_lru(folio) ||
+ folio_is_zone_device(folio) || !folio_mapped(folio))
+ return KMMSCAND_INVALID_FOLIO;
+
+ vma = info->vma;
+
+ /* XXX: Need to validate vma here?. vma_lookup() results in 2x regression */
+ if (!vma)
+ return KMMSCAND_INVALID_VMA;
+
+ srcnid = folio_nid(folio);
+
+ /* Do not try to promote pages from regular nodes */
+ if (!kmmscand_eligible_srcnid(srcnid))
+ return KMMSCAND_INELIGIBLE_SRC_NODE;
+
+ destnid = kmmscand_get_target_node(NULL);
+
+ if (srcnid == destnid)
+ return KMMSCAND_SAME_SRC_DEST_NODE;
+
+ folio_get(folio);
+ ret = kmmscand_migrate_misplaced_folio_prepare(folio, vma, destnid);
+ if (ret) {
+ folio_put(folio);
+ return KMMSCAND_LRU_ISOLATION_ERR;
+ }
+ folio_put(folio);
+
+ return migrate_misplaced_folio(folio, vma, destnid);
+}
+
static bool folio_idle_clear_pte_refs_one(struct folio *folio,
struct vm_area_struct *vma,
unsigned long addr,
@@ -266,8 +376,74 @@ static void kmmscand_collect_mm_slot(struct kmmscand_mm_slot *mm_slot)
}
}
+static void kmmscand_cleanup_migration_list(struct mm_struct *mm)
+{
+ struct kmmscand_migrate_info *info, *tmp;
+
+start_again:
+ spin_lock(&kmmscand_migrate_lock);
+ if (!list_empty(&kmmscand_migrate_list.migrate_head)) {
+
+ if (mm == READ_ONCE(kmmscand_cur_migrate_mm)) {
+ /* A folio in this mm is being migrated. wait */
+ WRITE_ONCE(kmmscand_migration_list_dirty, true);
+ spin_unlock(&kmmscand_migrate_lock);
+ goto start_again;
+ }
+
+ list_for_each_entry_safe(info, tmp, &kmmscand_migrate_list.migrate_head,
+ migrate_node) {
+ if (info && (info->mm == mm)) {
+ info->mm = NULL;
+ WRITE_ONCE(kmmscand_migration_list_dirty, true);
+ }
+ }
+ }
+ spin_unlock(&kmmscand_migrate_lock);
+}
+
static void kmmscand_migrate_folio(void)
{
+ int ret = 0;
+ struct kmmscand_migrate_info *info, *tmp;
+
+ spin_lock(&kmmscand_migrate_lock);
+
+ if (!list_empty(&kmmscand_migrate_list.migrate_head)) {
+ list_for_each_entry_safe(info, tmp, &kmmscand_migrate_list.migrate_head,
+ migrate_node) {
+ if (READ_ONCE(kmmscand_migration_list_dirty)) {
+ kmmscand_migration_list_dirty = false;
+ list_del(&info->migrate_node);
+ /*
+ * Do not try to migrate this entry because mm might have
+ * vanished underneath.
+ */
+ kfree(info);
+ spin_unlock(&kmmscand_migrate_lock);
+ goto dirty_list_handled;
+ }
+
+ list_del(&info->migrate_node);
+ /* Note down the mm of folio entry we are migrating */
+ WRITE_ONCE(kmmscand_cur_migrate_mm, info->mm);
+ spin_unlock(&kmmscand_migrate_lock);
+
+ if (info->mm)
+ ret = kmmscand_promote_folio(info);
+
+ kfree(info);
+
+ spin_lock(&kmmscand_migrate_lock);
+ /* Reset mm of folio entry we are migrating */
+ WRITE_ONCE(kmmscand_cur_migrate_mm, NULL);
+ spin_unlock(&kmmscand_migrate_lock);
+dirty_list_handled:
+ //cond_resched();
+ spin_lock(&kmmscand_migrate_lock);
+ }
+ }
+ spin_unlock(&kmmscand_migrate_lock);
}
static unsigned long kmmscand_scan_mm_slot(void)
@@ -450,6 +626,8 @@ void __kmmscand_exit(struct mm_struct *mm)
spin_unlock(&kmmscand_mm_lock);
+ kmmscand_cleanup_migration_list(mm);
+
if (free) {
mm_slot_free(kmmscand_slot_cache, mm_slot);
mmdrop(mm);
--
2.39.3
next prev parent reply other threads:[~2024-12-01 15:39 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-12-01 15:38 [RFC PATCH V0 0/10] mm: slowtier page promotion based on PTE A bit Raghavendra K T
2024-12-01 15:38 ` [RFC PATCH V0 01/10] mm: Add kmmscand kernel daemon Raghavendra K T
2024-12-01 15:38 ` [RFC PATCH V0 02/10] mm: Maintain mm_struct list in the system Raghavendra K T
2024-12-01 15:38 ` [RFC PATCH V0 03/10] mm: Scan the mm and create a migration list Raghavendra K T
2024-12-01 15:38 ` Raghavendra K T [this message]
2024-12-01 15:38 ` [RFC PATCH V0 05/10] mm: Add throttling of mm scanning using scan_period Raghavendra K T
2024-12-01 15:38 ` [RFC PATCH V0 06/10] mm: Add throttling of mm scanning using scan_size Raghavendra K T
2024-12-01 15:38 ` [RFC PATCH V0 07/10] sysfs: Add sysfs support to tune scanning Raghavendra K T
2024-12-01 15:38 ` [RFC PATCH V0 08/10] vmstat: Add vmstat counters Raghavendra K T
2024-12-01 15:38 ` [RFC PATCH V0 09/10] trace/kmmscand: Add tracing of scanning and migration Raghavendra K T
2024-12-05 17:46 ` Steven Rostedt
2024-12-06 6:33 ` Raghavendra K T
2024-12-06 14:49 ` Steven Rostedt
2024-12-01 15:38 ` [RFC PATCH V0 DO NOT MERGE 10/10] kmmscand: Add scanning Raghavendra K T
2024-12-10 18:53 ` [RFC PATCH V0 0/10] mm: slowtier page promotion based on PTE A bit SeongJae Park
2024-12-20 6:30 ` Raghavendra K T
2025-02-12 17:02 ` Davidlohr Bueso
2025-02-13 5:39 ` Raghavendra K T
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241201153818.2633616-5-raghavendra.kt@amd.com \
--to=raghavendra.kt@amd.com \
--cc=Hasan.Maruf@amd.com \
--cc=Liam.Howlett@Oracle.com \
--cc=abhishekd@meta.com \
--cc=akpm@linux-foundation.org \
--cc=bharata@amd.com \
--cc=david@redhat.com \
--cc=feng.tang@intel.com \
--cc=gourry@gourry.net \
--cc=hannes@cmpxchg.org \
--cc=hughd@google.com \
--cc=kbusch@meta.com \
--cc=kirill.shutemov@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mgorman@techsingularity.net \
--cc=mingo@redhat.com \
--cc=nehagholkar@meta.com \
--cc=nphamcs@gmail.com \
--cc=peterz@infradead.org \
--cc=rientjes@google.com \
--cc=shy828301@gmail.com \
--cc=sj@kernel.org \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox