[PATCH 1/3] mm,oom: Move last second allocation to inside the OOM killer.

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
To: akpm@linux-foundation.org
Cc: linux-mm@kvack.org,
	Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>,
	Andrea Arcangeli <aarcange@redhat.com>,
	Johannes Weiner <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.com>
Subject: [PATCH 1/3] mm,oom: Move last second allocation to inside the OOM killer.
Date: Sat, 25 Nov 2017 19:52:47 +0900	[thread overview]
Message-ID: <1511607169-5084-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp> (raw)

Since selecting an OOM victim can take quite some time and the OOM
situation might be resolved meanwhile, sometimes doing last second
allocation attempt after selecting an OOM victim can succeed.

Therefore, this patch moves last second allocation attempt to after
selecting an OOM victim. This patch is expected to reduce the time
window for potentially pre-mature OOM killing considerably.

Signed-off-by: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>
Suggested-by: Michal Hocko <mhocko@suse.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/oom.h | 13 +++++++++++++
 mm/oom_kill.c       | 14 ++++++++++++++
 mm/page_alloc.c     | 44 ++++++++++++++++++++++++++------------------
 3 files changed, 53 insertions(+), 18 deletions(-)

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 01c91d8..27cd36b 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -14,6 +14,8 @@
 struct notifier_block;
 struct mem_cgroup;
 struct task_struct;
+struct alloc_context;
+struct page;
 
 /*
  * Details of the page allocation that triggered the oom killer that are used to
@@ -38,6 +40,15 @@ struct oom_control {
 	 */
 	const int order;
 
+	/* Context for really last second allocation attempt. */
+	const struct alloc_context *ac;
+	/*
+	 * Set by the OOM killer if ac != NULL and last second allocation
+	 * attempt succeeded. If ac != NULL, the caller must check for
+	 * page != NULL.
+	 */
+	struct page *page;
+
 	/* Used by oom implementation, do not set */
 	unsigned long totalpages;
 	struct task_struct *chosen;
@@ -102,6 +113,8 @@ extern unsigned long oom_badness(struct task_struct *p,
 
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
+extern struct page *alloc_pages_before_oomkill(const struct oom_control *oc);
+
 /* sysctls */
 extern int sysctl_oom_dump_tasks;
 extern int sysctl_oom_kill_allocating_task;
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index c957be3..348ec5a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -1061,6 +1061,9 @@ bool out_of_memory(struct oom_control *oc)
 	if (!is_memcg_oom(oc) && sysctl_oom_kill_allocating_task &&
 	    current->mm && !oom_unkillable_task(current, NULL, oc->nodemask) &&
 	    current->signal->oom_score_adj != OOM_SCORE_ADJ_MIN) {
+		oc->page = alloc_pages_before_oomkill(oc);
+		if (oc->page)
+			return true;
 		get_task_struct(current);
 		oc->chosen = current;
 		oom_kill_process(oc, "Out of memory (oom_kill_allocating_task)");
@@ -1068,6 +1071,17 @@ bool out_of_memory(struct oom_control *oc)
 	}
 
 	select_bad_process(oc);
+	/*
+	 * Try really last second allocation attempt after we selected an OOM
+	 * victim, for somebody might have managed to free memory while we were
+	 * selecting an OOM victim which can take quite some time.
+	 */
+	oc->page = alloc_pages_before_oomkill(oc);
+	if (oc->page) {
+		if (oc->chosen && oc->chosen != (void *)-1UL)
+			put_task_struct(oc->chosen);
+		return true;
+	}
 	/* Found nothing?!?! Either we hang forever, or we panic. */
 	if (!oc->chosen && !is_sysrq_oom(oc) && !is_memcg_oom(oc)) {
 		dump_header(oc, NULL);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 48b5b01..7fa95ea 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -3325,8 +3325,9 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 		.memcg = NULL,
 		.gfp_mask = gfp_mask,
 		.order = order,
+		.ac = ac,
 	};
-	struct page *page;
+	struct page *page = NULL;
 
 	*did_some_progress = 0;
 
@@ -3340,19 +3341,6 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 		return NULL;
 	}
 
-	/*
-	 * Go through the zonelist yet one more time, keep very high watermark
-	 * here, this is only to catch a parallel oom killing, we must fail if
-	 * we're still under heavy pressure. But make sure that this reclaim
-	 * attempt shall not depend on __GFP_DIRECT_RECLAIM && !__GFP_NORETRY
-	 * allocation which will never fail due to oom_lock already held.
-	 */
-	page = get_page_from_freelist((gfp_mask | __GFP_HARDWALL) &
-				      ~__GFP_DIRECT_RECLAIM, order,
-				      ALLOC_WMARK_HIGH|ALLOC_CPUSET, ac);
-	if (page)
-		goto out;
-
 	/* Coredumps can quickly deplete all memory reserves */
 	if (current->flags & PF_DUMPCORE)
 		goto out;
@@ -3387,16 +3375,18 @@ void warn_alloc(gfp_t gfp_mask, nodemask_t *nodemask, const char *fmt, ...)
 		goto out;
 
 	/* Exhausted what can be done so it's blamo time */
-	if (out_of_memory(&oc) || WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
+	if (out_of_memory(&oc)) {
+		*did_some_progress = 1;
+		page = oc.page;
+	} else if (WARN_ON_ONCE(gfp_mask & __GFP_NOFAIL)) {
 		*did_some_progress = 1;
 
 		/*
 		 * Help non-failing allocations by giving them access to memory
 		 * reserves
 		 */
-		if (gfp_mask & __GFP_NOFAIL)
-			page = __alloc_pages_cpuset_fallback(gfp_mask, order,
-					ALLOC_NO_WATERMARKS, ac);
+		page = __alloc_pages_cpuset_fallback(gfp_mask, order,
+						     ALLOC_NO_WATERMARKS, ac);
 	}
 out:
 	mutex_unlock(&oom_lock);
@@ -4156,6 +4146,24 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask)
 	return page;
 }
 
+struct page *alloc_pages_before_oomkill(const struct oom_control *oc)
+{
+	/*
+	 * Go through the zonelist yet one more time, keep very high watermark
+	 * here, this is only to catch a parallel oom killing, we must fail if
+	 * we're still under heavy pressure. But make sure that this reclaim
+	 * attempt shall not depend on __GFP_DIRECT_RECLAIM && !__GFP_NORETRY
+	 * allocation which will never fail due to oom_lock already held.
+	 */
+	int alloc_flags = ALLOC_CPUSET | ALLOC_WMARK_HIGH;
+	gfp_t gfp_mask = oc->gfp_mask | __GFP_HARDWALL;
+
+	if (!oc->ac)
+		return NULL;
+	gfp_mask &= ~__GFP_DIRECT_RECLAIM;
+	return get_page_from_freelist(gfp_mask, oc->order, alloc_flags, oc->ac);
+}
+
 static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order,
 		int preferred_nid, nodemask_t *nodemask,
 		struct alloc_context *ac, gfp_t *alloc_mask,
-- 
1.8.3.1

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next             reply	other threads:[~2017-11-25 10:53 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-11-25 10:52 Tetsuo Handa [this message]
2017-11-25 10:52 ` [PATCH 2/3] mm,oom: Use ALLOC_OOM for OOM victim's last second allocation Tetsuo Handa
2017-11-25 10:52 ` [PATCH 3/3] mm,oom: Remove oom_lock serialization from the OOM reaper Tetsuo Handa
2017-11-28 13:04 ` [PATCH 1/3] mm,oom: Move last second allocation to inside the OOM killer Michal Hocko
2017-12-01 14:33 ` Johannes Weiner
2017-12-01 14:46   ` Michal Hocko
2017-12-01 14:56     ` Johannes Weiner
2017-12-01 15:17       ` Michal Hocko
2017-12-01 15:57         ` Johannes Weiner
2017-12-01 16:38           ` Michal Hocko
2017-12-05 10:46             ` Johannes Weiner
2017-12-05 13:02               ` Michal Hocko
2017-12-05 13:17                 ` Tetsuo Handa
2017-12-05 13:42                   ` Michal Hocko
2017-12-05 14:07                     ` Tetsuo Handa
2017-12-05 14:30                       ` Michal Hocko
2017-12-01 16:52           ` Tetsuo Handa

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1511607169-5084-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp \
    --to=penguin-kernel@i-love.sakura.ne.jp \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox