From: Mel Gorman <mgorman@techsingularity.net>
To: Linux-MM <linux-mm@kvack.org>
Cc: David Rientjes <rientjes@google.com>,
Andrea Arcangeli <aarcange@redhat.com>,
Linus Torvalds <torvalds@linux-foundation.org>,
Michal Hocko <mhocko@kernel.org>,
ying.huang@intel.com, kirill@shutemov.name,
Andrew Morton <akpm@linux-foundation.org>,
Linux List Kernel Mailing <linux-kernel@vger.kernel.org>
Subject: [PATCH 13/14] mm, compaction: Capture a page under direct compaction
Date: Fri, 14 Dec 2018 23:05:45 +0000 [thread overview]
Message-ID: <20181214230545.GD29005@techsingularity.net> (raw)
In-Reply-To: <20181214230310.572-1-mgorman@techsingularity.net>
Compaction is inherently race-prone as a suitable page freed during compaction
can be allocated by any parallel task. This patch uses a capture_control
structure to isolate a page immediately when it is freed by a direct compactor
in the slow path of the page allocator.
4.20.0-rc6 4.20.0-rc6
findfree-v1r8 capture-v1r8
Amean fault-both-3 2911.07 ( 0.00%) 2898.64 ( 0.43%)
Amean fault-both-5 4692.96 ( 0.00%) 4296.58 ( 8.45%)
Amean fault-both-7 6449.17 ( 0.00%) 6203.55 ( 3.81%)
Amean fault-both-12 9778.40 ( 0.00%) 9309.13 ( 4.80%)
Amean fault-both-18 11756.92 ( 0.00%) 6245.27 * 46.88%*
Amean fault-both-24 13675.93 ( 0.00%) 15083.42 ( -10.29%)
Amean fault-both-30 17195.41 ( 0.00%) 11498.60 * 33.13%*
Amean fault-both-32 18150.08 ( 0.00%) 9684.82 * 46.64%*
As expected, the biggest reduction in latency is when there are multiple
compaction instances that would previously compete for the same blocks.
THP allocation rates are also slightly higher.
4.20.0-rc6 4.20.0-rc6
findfree-v1r8 capture-v1r8
Percentage huge-1 0.00 ( 0.00%) 0.00 ( 0.00%)
Percentage huge-3 97.63 ( 0.00%) 98.12 ( 0.49%)
Percentage huge-5 96.11 ( 0.00%) 98.83 ( 2.84%)
Percentage huge-7 95.44 ( 0.00%) 97.99 ( 2.68%)
Percentage huge-12 95.36 ( 0.00%) 99.00 ( 3.82%)
Percentage huge-18 95.32 ( 0.00%) 98.92 ( 3.78%)
Percentage huge-24 95.13 ( 0.00%) 99.08 ( 4.15%)
Percentage huge-30 95.53 ( 0.00%) 99.22 ( 3.86%)
Percentage huge-32 94.94 ( 0.00%) 98.97 ( 4.25%)
And scan rates are reduced
Compaction migrate scanned 27634284 19002941
Compaction free scanned 55279519 46395714
Signed-off-by: Mel Gorman <mgorman@techsingularity.net>
---
include/linux/compaction.h | 3 ++-
include/linux/sched.h | 4 ++++
kernel/sched/core.c | 3 +++
mm/compaction.c | 31 +++++++++++++++++++------
mm/internal.h | 9 +++++++
mm/page_alloc.c | 58 ++++++++++++++++++++++++++++++++++++++++++----
6 files changed, 96 insertions(+), 12 deletions(-)
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 68250a57aace..b0d530cf46d1 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -95,7 +95,8 @@ extern int sysctl_compact_unevictable_allowed;
extern int fragmentation_index(struct zone *zone, unsigned int order);
extern enum compact_result try_to_compact_pages(gfp_t gfp_mask,
unsigned int order, unsigned int alloc_flags,
- const struct alloc_context *ac, enum compact_priority prio);
+ const struct alloc_context *ac, enum compact_priority prio,
+ struct page **page);
extern void reset_isolation_suitable(pg_data_t *pgdat);
extern enum compact_result compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, int classzone_idx);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8681905589f0..f1758ef4d1e2 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -47,6 +47,7 @@ struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
struct reclaim_state;
+struct capture_control;
struct robust_list_head;
struct sched_attr;
struct sched_param;
@@ -964,6 +965,9 @@ struct task_struct {
struct io_context *io_context;
+#ifdef CONFIG_COMPACTION
+ struct capture_control *capture_control;
+#endif
/* Ptrace state: */
unsigned long ptrace_message;
kernel_siginfo_t *last_siginfo;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5f41fd2e0b6b..cd6d816aa40b 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2177,6 +2177,9 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
INIT_HLIST_HEAD(&p->preempt_notifiers);
#endif
+#ifdef CONFIG_COMPACTION
+ p->capture_control = NULL;
+#endif
init_numa_balancing(clone_flags, p);
}
diff --git a/mm/compaction.c b/mm/compaction.c
index ba3035dcc548..39d33b6d1172 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1949,7 +1949,8 @@ bool compaction_zonelist_suitable(struct alloc_context *ac, int order,
return false;
}
-static enum compact_result compact_zone(struct compact_control *cc)
+static enum compact_result
+compact_zone(struct compact_control *cc, struct capture_control *capc)
{
enum compact_result ret;
unsigned long start_pfn = cc->zone->zone_start_pfn;
@@ -2086,6 +2087,11 @@ static enum compact_result compact_zone(struct compact_control *cc)
}
}
+ /* Stop if a page has been captured */
+ if (capc && capc->page) {
+ ret = COMPACT_SUCCESS;
+ break;
+ }
}
out:
@@ -2119,7 +2125,8 @@ static enum compact_result compact_zone(struct compact_control *cc)
static enum compact_result compact_zone_order(struct zone *zone, int order,
gfp_t gfp_mask, enum compact_priority prio,
- unsigned int alloc_flags, int classzone_idx)
+ unsigned int alloc_flags, int classzone_idx,
+ struct page **capture)
{
enum compact_result ret;
struct compact_control cc = {
@@ -2139,14 +2146,24 @@ static enum compact_result compact_zone_order(struct zone *zone, int order,
.ignore_skip_hint = (prio == MIN_COMPACT_PRIORITY),
.ignore_block_suitable = (prio == MIN_COMPACT_PRIORITY)
};
+ struct capture_control capc = {
+ .cc = &cc,
+ .page = NULL,
+ };
+
+ if (capture)
+ current->capture_control = &capc;
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
- ret = compact_zone(&cc);
+ ret = compact_zone(&cc, &capc);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
+ *capture = capc.page;
+ current->capture_control = NULL;
+
return ret;
}
@@ -2164,7 +2181,7 @@ int sysctl_extfrag_threshold = 500;
*/
enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
- enum compact_priority prio)
+ enum compact_priority prio, struct page **capture)
{
int may_perform_io = gfp_mask & __GFP_IO;
struct zoneref *z;
@@ -2192,7 +2209,7 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order,
}
status = compact_zone_order(zone, order, gfp_mask, prio,
- alloc_flags, ac_classzone_idx(ac));
+ alloc_flags, ac_classzone_idx(ac), capture);
rc = max(status, rc);
/* The allocation should succeed, stop compacting */
@@ -2260,7 +2277,7 @@ static void compact_node(int nid)
INIT_LIST_HEAD(&cc.freepages);
INIT_LIST_HEAD(&cc.migratepages);
- compact_zone(&cc);
+ compact_zone(&cc, NULL);
VM_BUG_ON(!list_empty(&cc.freepages));
VM_BUG_ON(!list_empty(&cc.migratepages));
@@ -2402,7 +2419,7 @@ static void kcompactd_do_work(pg_data_t *pgdat)
if (kthread_should_stop())
return;
- status = compact_zone(&cc);
+ status = compact_zone(&cc, NULL);
if (status == COMPACT_SUCCESS) {
compaction_defer_reset(zone, cc.order, false);
diff --git a/mm/internal.h b/mm/internal.h
index 983cb975545f..08fbb9d157c0 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -207,6 +207,15 @@ struct compact_control {
bool contended; /* Signal lock or sched contention */
};
+/*
+ * Used in direct compaction when a page should be taken from the freelists
+ * immediately when one is created during the free path.
+ */
+struct capture_control {
+ struct compact_control *cc;
+ struct page *page;
+};
+
unsigned long
isolate_freepages_range(struct compact_control *cc,
unsigned long start_pfn, unsigned long end_pfn);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c7b80e62bfd9..4e0cf4dbda5b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -753,6 +753,41 @@ static inline int page_is_buddy(struct page *page, struct page *buddy,
return 0;
}
+#ifdef CONFIG_COMPACTION
+static inline struct capture_control *task_capc(struct zone *zone)
+{
+ struct capture_control *capc = current->capture_control;
+
+ return capc &&
+ !(current->flags & PF_KTHREAD) &&
+ !capc->page &&
+ capc->cc->zone == zone &&
+ capc->cc->direct_compaction ? capc : NULL;
+}
+
+static inline bool
+compaction_capture(struct capture_control *capc, struct page *page, int order)
+{
+ if (!capc || order != capc->cc->order)
+ return false;
+
+ capc->page = page;
+ return true;
+}
+
+#else
+static inline struct capture_control *task_capc(struct zone *zone)
+{
+ return NULL;
+}
+
+static inline bool
+compaction_capture(struct capture_control *capc, struct page *page, int order)
+{
+ return false;
+}
+#endif /* CONFIG_COMPACTION */
+
/*
* Freeing function for a buddy system allocator.
*
@@ -786,6 +821,7 @@ static inline void __free_one_page(struct page *page,
unsigned long uninitialized_var(buddy_pfn);
struct page *buddy;
unsigned int max_order;
+ struct capture_control *capc = task_capc(zone);
max_order = min_t(unsigned int, MAX_ORDER, pageblock_order + 1);
@@ -801,6 +837,12 @@ static inline void __free_one_page(struct page *page,
continue_merging:
while (order < max_order - 1) {
+ if (compaction_capture(capc, page, order)) {
+ if (likely(!is_migrate_isolate(migratetype)))
+ __mod_zone_freepage_state(zone, -(1 << order),
+ migratetype);
+ return;
+ }
buddy_pfn = __find_buddy_pfn(pfn, order);
buddy = page + (buddy_pfn - pfn);
@@ -3779,7 +3821,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
unsigned int alloc_flags, const struct alloc_context *ac,
enum compact_priority prio, enum compact_result *compact_result)
{
- struct page *page;
+ struct page *page = NULL;
unsigned long pflags;
unsigned int noreclaim_flag;
@@ -3790,13 +3832,15 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
noreclaim_flag = memalloc_noreclaim_save();
*compact_result = try_to_compact_pages(gfp_mask, order, alloc_flags, ac,
- prio);
+ prio, &page);
memalloc_noreclaim_restore(noreclaim_flag);
psi_memstall_leave(&pflags);
- if (*compact_result <= COMPACT_INACTIVE)
+ if (*compact_result <= COMPACT_INACTIVE) {
+ WARN_ON_ONCE(page);
return NULL;
+ }
/*
* At least in one zone compaction wasn't deferred or skipped, so let's
@@ -3804,7 +3848,13 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
*/
count_vm_event(COMPACTSTALL);
- page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
+ /* Prep a captured page if available */
+ if (page)
+ prep_new_page(page, order, gfp_mask, alloc_flags);
+
+ /* Try get a page from the freelist if available */
+ if (!page)
+ page = get_page_from_freelist(gfp_mask, order, alloc_flags, ac);
if (page) {
struct zone *zone = page_zone(page);
--
2.16.4
--
Mel Gorman
SUSE Labs
next prev parent reply other threads:[~2018-12-14 23:05 UTC|newest]
Thread overview: 34+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-12-14 23:02 [RFC PATCH 00/14] Increase success rates and reduce latency of compaction v1 Mel Gorman
2018-12-14 23:02 ` [PATCH 01/14] mm, compaction: Shrink compact_control Mel Gorman
2018-12-17 12:27 ` Vlastimil Babka
2018-12-14 23:02 ` [PATCH 02/14] mm, compaction: Rearrange compact_control Mel Gorman
2018-12-17 13:20 ` Vlastimil Babka
2018-12-14 23:02 ` [PATCH 03/14] mm, compaction: Remove last_migrated_pfn from compact_control Mel Gorman
2018-12-17 13:50 ` Vlastimil Babka
2018-12-14 23:03 ` [PATCH 04/14] mm, compaction: Rename map_pages to split_map_pages Mel Gorman
2018-12-17 14:06 ` Vlastimil Babka
2018-12-17 14:30 ` Mel Gorman
2018-12-14 23:03 ` [PATCH 05/14] mm, compaction: Skip pageblocks with reserved pages Mel Gorman
2018-12-18 8:08 ` Vlastimil Babka
2018-12-18 8:38 ` Mel Gorman
2018-12-14 23:03 ` [PATCH 06/14] mm, migrate: Immediately fail migration of a page with no migration handler Mel Gorman
2018-12-18 9:06 ` Vlastimil Babka
2018-12-18 9:55 ` Mel Gorman
2018-12-20 19:44 ` Yang Shi
2018-12-20 19:44 ` Yang Shi
2018-12-20 20:31 ` Mel Gorman
2018-12-14 23:03 ` [PATCH 07/14] mm, compaction: Always finish scanning of a full pageblock Mel Gorman
2018-12-18 9:23 ` Vlastimil Babka
2018-12-14 23:03 ` [PATCH 08/14] mm, compaction: Use the page allocator bulk-free helper for lists of pages Mel Gorman
2018-12-18 9:55 ` Vlastimil Babka
2018-12-19 16:04 ` Mel Gorman
2018-12-14 23:03 ` [PATCH 09/14] mm, compaction: Ignore the fragmentation avoidance boost for isolation and compaction Mel Gorman
2018-12-18 12:36 ` Vlastimil Babka
2018-12-18 13:51 ` Mel Gorman
2018-12-18 13:58 ` Vlastimil Babka
2018-12-18 14:29 ` Mel Gorman
2018-12-14 23:04 ` [PATCH 10/14] mm, compaction: Use free lists to quickly locate a migration source Mel Gorman
2018-12-14 23:05 ` [PATCH 11/14] mm, compaction: Keep migration source private to a single compaction instance Mel Gorman
2018-12-14 23:05 ` [PATCH 12/14] mm, compaction: Use free lists to quickly locate a migration target Mel Gorman
2018-12-14 23:05 ` Mel Gorman [this message]
2018-12-14 23:06 ` [PATCH 14/14] mm, compaction: Do not direct compact remote memory Mel Gorman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20181214230545.GD29005@techsingularity.net \
--to=mgorman@techsingularity.net \
--cc=aarcange@redhat.com \
--cc=akpm@linux-foundation.org \
--cc=kirill@shutemov.name \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mhocko@kernel.org \
--cc=rientjes@google.com \
--cc=torvalds@linux-foundation.org \
--cc=ying.huang@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox