[RFC 5/6] mm, thp: wakeup khugepaged when THP allocation fails

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Vlastimil Babka <vbabka@suse.cz>
To: linux-mm@kvack.org
Cc: linux-kernel@vger.kernel.org,
	Andrew Morton <akpm@linux-foundation.org>,
	Hugh Dickins <hughd@google.com>,
	Andrea Arcangeli <aarcange@redhat.com>,
	"Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>,
	Rik van Riel <riel@redhat.com>, Mel Gorman <mgorman@suse.de>,
	Michal Hocko <mhocko@suse.cz>,
	Ebru Akagunduz <ebru.akagunduz@gmail.com>,
	Alex Thorlton <athorlton@sgi.com>,
	David Rientjes <rientjes@google.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Ingo Molnar <mingo@kernel.org>, Vlastimil Babka <vbabka@suse.cz>
Subject: [RFC 5/6] mm, thp: wakeup khugepaged when THP allocation fails
Date: Mon, 23 Feb 2015 13:58:41 +0100	[thread overview]
Message-ID: <1424696322-21952-6-git-send-email-vbabka@suse.cz> (raw)
In-Reply-To: <1424696322-21952-1-git-send-email-vbabka@suse.cz>

The previous patch has taken away the THP collapse scanning from khugepaged,
leaving it only to maintain the thp_avail_nodes nodemask through heavyweight
attempts to make a hugepage available on nodes where it could not be allocated
from the process context, both through page fault or the collapse scanning.

This patch improves the coordination between failed THP allocations and
khugepaged by wakeups, repurposing the khugepaged_wait infrastructure.
Instead of periodical sleeping and checking for work, khugepaged will now sleep
at least alloc_sleep_millisecs after its last allocation attempt in order to
prevent excessive activity, and then respond to a failed THP allocation
immediately through khugepaged_wait.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
---
 mm/huge_memory.c | 77 ++++++++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 33 deletions(-)

diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 1c92edc..9172c7f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -158,9 +158,6 @@ static int start_khugepaged(void)
 			khugepaged_thread = NULL;
 		}
 
-		if (!list_empty(&khugepaged_scan.mm_head))
-			wake_up_interruptible(&khugepaged_wait);
-
 		set_recommended_min_free_kbytes();
 	} else if (khugepaged_thread) {
 		kthread_stop(khugepaged_thread);
@@ -430,7 +427,6 @@ static ssize_t scan_sleep_millisecs_store(struct kobject *kobj,
 		return -EINVAL;
 
 	khugepaged_scan_sleep_millisecs = msecs;
-	wake_up_interruptible(&khugepaged_wait);
 
 	return count;
 }
@@ -781,8 +777,10 @@ fault_alloc_hugepage(struct vm_area_struct *vma, unsigned long haddr)
 	gfp = alloc_hugepage_gfpmask(transparent_hugepage_defrag(vma));
 	hpage = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
 
-	if (!hpage)
+	if (!hpage) {
 		node_clear(nid, thp_avail_nodes);
+		wake_up_interruptible(&khugepaged_wait);
+	}
 
 	return hpage;
 }
@@ -2054,8 +2052,6 @@ int __khugepaged_enter(struct mm_struct *mm)
 	spin_unlock(&khugepaged_mm_lock);
 
 	atomic_inc(&mm->mm_count);
-	if (wakeup)
-		wake_up_interruptible(&khugepaged_wait);
 
 	return 0;
 }
@@ -2252,12 +2248,6 @@ static void __collapse_huge_page_copy(pte_t *pte, struct page *page,
 	}
 }
 
-static void khugepaged_alloc_sleep(void)
-{
-	wait_event_freezable_timeout(khugepaged_wait, false,
-			msecs_to_jiffies(khugepaged_alloc_sleep_millisecs));
-}
-
 static bool khugepaged_scan_abort(int nid, int *node_load)
 {
 	int i;
@@ -2358,6 +2348,7 @@ static struct page
 		count_vm_event(THP_COLLAPSE_ALLOC_FAILED);
 		*hpage = ERR_PTR(-ENOMEM);
 		node_clear(node, thp_avail_nodes);
+		wake_up_interruptible(&khugepaged_wait);
 		return NULL;
 	}
 
@@ -2365,7 +2356,7 @@ static struct page
 	return *hpage;
 }
 
-/* Return true, if THP should be allocatable on at least one node */
+/* Return true if we tried to allocate on at least one node */
 static bool khugepaged_check_nodes(void)
 {
 	bool ret = false;
@@ -2375,15 +2366,14 @@ static bool khugepaged_check_nodes(void)
 
 	for_each_online_node(nid) {
 		if (node_isset(nid, thp_avail_nodes)) {
-			ret = true;
 			continue;
 		}
 
 		newpage = alloc_hugepage_node(gfp, nid);
+		ret = true;
 
 		if (newpage) {
 			node_set(nid, thp_avail_nodes);
-			ret = true;
 			put_page(newpage);
 		}
 		if (unlikely(kthread_should_stop() || freezing(current)))
@@ -2393,6 +2383,19 @@ static bool khugepaged_check_nodes(void)
 	return ret;
 }
 
+/* Return true if hugepages are available on at least one node */
+static bool check_thp_avail(void)
+{
+	int nid;
+
+	for_each_online_node(nid) {
+		if (node_isset(nid, thp_avail_nodes))
+			return true;
+	}
+
+	return false;
+}
+
 static bool hugepage_vma_check(struct vm_area_struct *vma)
 {
 	if ((!(vma->vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
@@ -2656,6 +2659,9 @@ bool khugepaged_scan_mm(struct mm_struct *mm, unsigned long *start, long pages)
 	int ret;
 	int *node_load;
 
+	if (!check_thp_avail())
+		return false;
+
 	//TODO: #ifdef this for NUMA only
 	node_load = kmalloc(sizeof(int) * MAX_NUMNODES,
 						GFP_KERNEL | GFP_NOWAIT);
@@ -2706,30 +2712,36 @@ out:
 	return true;
 }
 
-static int khugepaged_has_work(void)
+static bool khugepaged_has_work(void)
 {
-	return !list_empty(&khugepaged_scan.mm_head) &&
-		khugepaged_enabled();
+	int nid;
+
+	for_each_online_node(nid) {
+		if (!node_isset(nid, thp_avail_nodes))
+			return true;
+	}
+
+	return false;
 }
 
-static int khugepaged_wait_event(void)
+static bool khugepaged_wait_event(void)
 {
-	return !list_empty(&khugepaged_scan.mm_head) ||
-		kthread_should_stop();
+	return khugepaged_has_work() || kthread_should_stop();
 }
 
-static void khugepaged_wait_work(void)
+static void khugepaged_wait_work(bool did_alloc)
 {
+	unsigned int msec_sleep;
+
 	try_to_freeze();
 
-	if (khugepaged_has_work()) {
-		if (!khugepaged_scan_sleep_millisecs)
-			return;
+	if (did_alloc) {
+		msec_sleep = READ_ONCE(khugepaged_alloc_sleep_millisecs);
 
-		wait_event_freezable_timeout(khugepaged_wait,
+		if (msec_sleep)
+			wait_event_freezable_timeout(khugepaged_wait,
 					     kthread_should_stop(),
-			msecs_to_jiffies(khugepaged_scan_sleep_millisecs));
-		return;
+						msecs_to_jiffies(msec_sleep));
 	}
 
 	if (khugepaged_enabled())
@@ -2739,15 +2751,14 @@ static void khugepaged_wait_work(void)
 static int khugepaged(void *none)
 {
 	struct mm_slot *mm_slot;
+	bool did_alloc;
 
 	set_freezable();
 	set_user_nice(current, MAX_NICE);
 
 	while (!kthread_should_stop()) {
-		if (khugepaged_check_nodes())
-			khugepaged_wait_work();
-		else
-			khugepaged_alloc_sleep();
+		did_alloc = khugepaged_check_nodes();
+		khugepaged_wait_work(did_alloc);
 	}
 
 	spin_lock(&khugepaged_mm_lock);
-- 
2.1.4

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2015-02-23 12:59 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-02-23 12:58 [RFC 0/6] the big khugepaged redesign Vlastimil Babka
2015-02-23 12:58 ` [RFC 1/6] mm, thp: stop preallocating hugepages in khugepaged Vlastimil Babka
2015-02-23 12:58 ` [RFC 2/6] mm, thp: make khugepaged check for THP allocability before scanning Vlastimil Babka
2015-02-23 12:58 ` [RFC 3/6] mm, thp: try fault allocations only if we expect them to succeed Vlastimil Babka
2015-02-23 12:58 ` [RFC 4/6] mm, thp: move collapsing from khugepaged to task_work context Vlastimil Babka
2015-02-23 14:25   ` Peter Zijlstra
2015-02-23 12:58 ` Vlastimil Babka [this message]
2015-02-23 12:58 ` [RFC 6/6] mm, thp: remove no longer needed khugepaged code Vlastimil Babka
2015-02-23 21:03 ` [RFC 0/6] the big khugepaged redesign Andi Kleen
2015-02-23 22:46 ` Davidlohr Bueso
2015-02-23 22:56   ` Andrew Morton
2015-02-23 22:58     ` Sasha Levin
2015-02-24 10:32     ` Vlastimil Babka
2015-02-24 11:24       ` Andrea Arcangeli
2015-02-24 11:45         ` Andrea Arcangeli
2015-02-25 12:42         ` Vlastimil Babka
2015-03-05 16:30       ` Vlastimil Babka
2015-03-05 16:52         ` Andres Freund
2015-03-05 17:01           ` Vlastimil Babka
2015-03-05 17:07             ` Andres Freund
2015-03-06  0:21         ` Andres Freund
2015-03-06  7:50           ` Vlastimil Babka
2015-03-09  3:17   ` Vlastimil Babka

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1424696322-21952-6-git-send-email-vbabka@suse.cz \
    --to=vbabka@suse.cz \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=athorlton@sgi.com \
    --cc=ebru.akagunduz@gmail.com \
    --cc=hughd@google.com \
    --cc=kirill.shutemov@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=riel@redhat.com \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox