linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Johannes Weiner <hannes@cmpxchg.org>
To: Dave Chinner <david@fromorbit.com>
Cc: Tetsuo Handa <penguin-kernel@I-love.SAKURA.ne.jp>,
	mhocko@suse.cz, dchinner@redhat.com, linux-mm@kvack.org,
	rientjes@google.com, oleg@redhat.com, akpm@linux-foundation.org,
	mgorman@suse.de, torvalds@linux-foundation.org, xfs@oss.sgi.com
Subject: Re: How to handle TIF_MEMDIE stalls?
Date: Sat, 21 Feb 2015 18:52:27 -0500	[thread overview]
Message-ID: <20150221235227.GA25079@phnom.home.cmpxchg.org> (raw)
In-Reply-To: <20150219225217.GY12722@dastard>

On Fri, Feb 20, 2015 at 09:52:17AM +1100, Dave Chinner wrote:
> I will actively work around aanything that causes filesystem memory
> pressure to increase the chance of oom killer invocations. The OOM
> killer is not a solution - it is, by definition, a loose cannon and
> so we should be reducing dependencies on it.

Once we have a better-working alternative, sure.

> I really don't care about the OOM Killer corner cases - it's
> completely the wrong way line of development to be spending time on
> and you aren't going to convince me otherwise. The OOM killer a
> crutch used to justify having a memory allocation subsystem that
> can't provide forward progress guarantee mechanisms to callers that
> need it.

We can provide this.  Are all these callers able to preallocate?

---

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 51bd1e72a917..af81b8a67651 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -380,6 +380,10 @@ extern void free_kmem_pages(unsigned long addr, unsigned int order);
 #define __free_page(page) __free_pages((page), 0)
 #define free_page(addr) free_pages((addr), 0)
 
+void register_private_page(struct page *page, unsigned int order);
+int alloc_private_pages(gfp_t gfp_mask, unsigned int order, unsigned int nr);
+void free_private_pages(void);
+
 void page_alloc_init(void);
 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
 void drain_all_pages(struct zone *zone);
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6d77432e14ff..1fe390779f23 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1545,6 +1545,8 @@ struct task_struct {
 #endif
 
 /* VM state */
+	struct list_head private_pages;
+
 	struct reclaim_state *reclaim_state;
 
 	struct backing_dev_info *backing_dev_info;
diff --git a/kernel/fork.c b/kernel/fork.c
index cf65139615a0..b6349b0e5da2 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1308,6 +1308,8 @@ static struct task_struct *copy_process(unsigned long clone_flags,
 	memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 #endif
 
+	INIT_LIST_HEAD(&p->private_pages);
+
 	p->default_timer_slack_ns = current->timer_slack_ns;
 
 	task_io_accounting_init(&p->ioac);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a47f0b229a1a..546db4e0da75 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -490,12 +490,10 @@ static inline void clear_page_guard(struct zone *zone, struct page *page,
 static inline void set_page_order(struct page *page, unsigned int order)
 {
 	set_page_private(page, order);
-	__SetPageBuddy(page);
 }
 
 static inline void rmv_page_order(struct page *page)
 {
-	__ClearPageBuddy(page);
 	set_page_private(page, 0);
 }
 
@@ -617,6 +615,7 @@ static inline void __free_one_page(struct page *page,
 			list_del(&buddy->lru);
 			zone->free_area[order].nr_free--;
 			rmv_page_order(buddy);
+			__ClearPageBuddy(buddy);
 		}
 		combined_idx = buddy_idx & page_idx;
 		page = page + (combined_idx - page_idx);
@@ -624,6 +623,7 @@ static inline void __free_one_page(struct page *page,
 		order++;
 	}
 	set_page_order(page, order);
+	__SetPageBuddy(page);
 
 	/*
 	 * If this is not the largest possible page, check if the buddy
@@ -924,6 +924,7 @@ static inline void expand(struct zone *zone, struct page *page,
 		list_add(&page[size].lru, &area->free_list[migratetype]);
 		area->nr_free++;
 		set_page_order(&page[size], high);
+		__SetPageBuddy(page);
 	}
 }
 
@@ -1015,6 +1016,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order,
 							struct page, lru);
 		list_del(&page->lru);
 		rmv_page_order(page);
+		__ClearPageBuddy(page);
 		area->nr_free--;
 		expand(zone, page, order, current_order, area, migratetype);
 		set_freepage_migratetype(page, migratetype);
@@ -1212,6 +1214,7 @@ __rmqueue_fallback(struct zone *zone, unsigned int order, int start_migratetype)
 			/* Remove the page from the freelists */
 			list_del(&page->lru);
 			rmv_page_order(page);
+			__ClearPageBuddy(page);
 
 			expand(zone, page, order, current_order, area,
 					buddy_type);
@@ -1598,6 +1601,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
 	list_del(&page->lru);
 	zone->free_area[order].nr_free--;
 	rmv_page_order(page);
+	__ClearPageBuddy(page);
 
 	/* Set the pageblock if the isolated page is at least a pageblock */
 	if (order >= pageblock_order - 1) {
@@ -2504,6 +2508,40 @@ retry:
 	return page;
 }
 
+/* Try to allocate from the caller's private memory reserves */
+static inline struct page *
+__alloc_pages_private(gfp_t gfp_mask, unsigned int order,
+		      const struct alloc_context *ac)
+{
+	unsigned int uninitialized_var(alloc_order);
+	struct page *page = NULL;
+	struct page *p;
+
+	/* Dopy, but this is a slowpath right before OOM */
+	list_for_each_entry(p, &current->private_pages, lru) {
+		int o = page_order(p);
+
+		if (o >= order && (!page || o < alloc_order)) {
+			page = p;
+			alloc_order = o;
+		}
+	}
+	if (!page)
+		return NULL;
+
+	list_del(&page->lru);
+	rmv_page_order(page);
+
+	/* Give back the remainder */
+	while (alloc_order > order) {
+		alloc_order--;
+		set_page_order(&page[1 << alloc_order], alloc_order);
+		list_add(&page[1 << alloc_order].lru, &current->private_pages);
+	}
+
+	return page;
+}
+
 /*
  * This is called in the allocator slow-path if the allocation request is of
  * sufficient urgency to ignore watermarks and take other desperate measures
@@ -2753,9 +2791,13 @@ retry:
 		/*
 		 * If we fail to make progress by freeing individual
 		 * pages, but the allocation wants us to keep going,
-		 * start OOM killing tasks.
+		 * dip into private reserves, or start OOM killing.
 		 */
 		if (!did_some_progress) {
+			page = __alloc_pages_private(gfp_mask, order, ac);
+			if (page)
+				goto got_pg;
+
 			page = __alloc_pages_may_oom(gfp_mask, order, ac,
 							&did_some_progress);
 			if (page)
@@ -3046,6 +3088,82 @@ void free_pages_exact(void *virt, size_t size)
 EXPORT_SYMBOL(free_pages_exact);
 
 /**
+ * alloc_private_pages - allocate private memory reserve pages
+ * @gfp_mask: gfp flags for the allocations
+ * @order: order of pages to allocate
+ * @nr: number of pages to allocate
+ *
+ * This allocates @nr pages of order @order as an emergency reserve of
+ * the calling task, to be used by the page allocator if an allocation
+ * would otherwise fail.
+ *
+ * The caller is responsible for calling free_private_pages() once the
+ * reserves are no longer required.
+ */
+int alloc_private_pages(gfp_t gfp_mask, unsigned int order, unsigned int nr)
+{
+	struct page *page, *page2;
+	LIST_HEAD(pages);
+	unsigned int i;
+
+	for (i = 0; i < nr; i++) {
+		page = alloc_pages(gfp_mask, order);
+		if (!page)
+			goto error;
+		set_page_order(page, order);
+		list_add(&page->lru, &pages);
+	}
+
+	list_splice(&pages, &current->private_pages);
+	return 0;
+
+error:
+	list_for_each_entry_safe(page, page2, &pages, lru) {
+		list_del(&page->lru);
+		rmv_page_order(page);
+		__free_pages(page, order);
+	}
+	return -ENOMEM;
+}
+
+/**
+ * register_private_page - register a private memory reserve page
+ * @page: pre-allocated page
+ * @order: @page's order
+ *
+ * This registers @page as an emergency reserve of the calling task,
+ * to be used by the page allocator if an allocation would otherwise
+ * fail.
+ *
+ * The caller is responsible for calling free_private_pages() once the
+ * reserves are no longer required.
+ */
+void register_private_page(struct page *page, unsigned int order)
+{
+	set_page_order(page, order);
+	list_add(&page->lru, &current->private_pages);
+}
+
+/**
+ * free_private_pages - free all private memory reserve pages
+ *
+ * Frees all (remaining) pages of the calling task's memory reserves
+ * established by alloc_private_pages() and register_private_page().
+ */
+void free_private_pages(void)
+{
+	struct page *page, *page2;
+
+	list_for_each_entry_safe(page, page2, &current->private_pages, lru) {
+		int order = page_order(page);
+
+		list_del(&page->lru);
+		rmv_page_order(page);
+		__free_pages(page, order);
+	}
+}
+
+/**
  * nr_free_zone_pages - count number of pages beyond high watermark
  * @offset: The zone index of the highest zone
  *
@@ -6551,6 +6669,7 @@ __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn)
 #endif
 		list_del(&page->lru);
 		rmv_page_order(page);
+		__ClearPageBuddy(page);
 		zone->free_area[order].nr_free--;
 		for (i = 0; i < (1 << order); i++)
 			SetPageReserved((page+i));

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2015-02-21 23:53 UTC|newest]

Thread overview: 177+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-12-12 13:54 [RFC PATCH] oom: Don't count on mm-less current process Tetsuo Handa
2014-12-16 12:47 ` Michal Hocko
2014-12-17 11:54   ` Tetsuo Handa
2014-12-17 13:08     ` Michal Hocko
2014-12-18 12:11       ` Tetsuo Handa
2014-12-18 15:33         ` Michal Hocko
2014-12-19 12:07           ` Tetsuo Handa
2014-12-19 12:49             ` Michal Hocko
2014-12-20  9:13               ` Tetsuo Handa
2014-12-20 11:42                 ` Tetsuo Handa
2014-12-22 20:25                   ` Michal Hocko
2014-12-23  1:00                     ` Tetsuo Handa
2014-12-23  9:51                       ` Michal Hocko
2014-12-23 11:46                         ` Tetsuo Handa
2014-12-23 11:57                           ` Tetsuo Handa
2014-12-23 12:12                             ` Tetsuo Handa
2014-12-23 12:27                             ` Michal Hocko
2014-12-23 12:24                           ` Michal Hocko
2014-12-23 13:00                             ` Tetsuo Handa
2014-12-23 13:09                               ` Michal Hocko
2014-12-23 13:20                                 ` Tetsuo Handa
2014-12-23 13:43                                   ` Michal Hocko
2014-12-23 14:11                                     ` Tetsuo Handa
2014-12-23 14:57                                       ` Michal Hocko
2014-12-19 12:22           ` How to handle TIF_MEMDIE stalls? Tetsuo Handa
2014-12-20  2:03             ` Dave Chinner
2014-12-20 12:41               ` Tetsuo Handa
2014-12-20 22:35                 ` Dave Chinner
2014-12-21  8:45                   ` Tetsuo Handa
2014-12-21 20:42                     ` Dave Chinner
2014-12-22 16:57                       ` Michal Hocko
2014-12-22 21:30                         ` Dave Chinner
2014-12-23  9:41                           ` Johannes Weiner
2014-12-24  1:06                             ` Dave Chinner
2014-12-24  2:40                               ` Linus Torvalds
2014-12-29 18:19                     ` Michal Hocko
2014-12-30  6:42                       ` Tetsuo Handa
2014-12-30 11:21                         ` Michal Hocko
2014-12-30 13:33                           ` Tetsuo Handa
2014-12-31 10:24                             ` Tetsuo Handa
2015-02-09 11:44                           ` Tetsuo Handa
2015-02-10 13:58                             ` Tetsuo Handa
2015-02-10 15:19                               ` Johannes Weiner
2015-02-11  2:23                                 ` Tetsuo Handa
2015-02-11 13:37                                   ` Tetsuo Handa
2015-02-11 18:50                                     ` Oleg Nesterov
2015-02-11 18:59                                       ` Oleg Nesterov
2015-03-14 13:03                                         ` Tetsuo Handa
2015-02-17 12:23                                   ` Tetsuo Handa
2015-02-17 12:53                                     ` Johannes Weiner
2015-02-17 15:38                                       ` Michal Hocko
2015-02-17 22:54                                       ` Dave Chinner
2015-02-17 23:32                                         ` Dave Chinner
2015-02-18  8:25                                         ` Michal Hocko
2015-02-18 10:48                                           ` Dave Chinner
2015-02-18 12:16                                             ` Michal Hocko
2015-02-18 21:31                                               ` Dave Chinner
2015-02-19  9:40                                                 ` Michal Hocko
2015-02-19 22:03                                                   ` Dave Chinner
2015-02-20  9:27                                                     ` Michal Hocko
2015-02-19 11:01                                               ` Johannes Weiner
2015-02-19 12:29                                                 ` Michal Hocko
2015-02-19 12:58                                                   ` Michal Hocko
2015-02-19 15:29                                                     ` Tetsuo Handa
2015-02-19 21:53                                                       ` Tetsuo Handa
2015-02-20  9:13                                                       ` Michal Hocko
2015-02-20 13:37                                                         ` Stefan Ring
2015-02-19 13:29                                                   ` Tetsuo Handa
2015-02-20  9:10                                                     ` Michal Hocko
2015-02-20 12:20                                                       ` Tetsuo Handa
2015-02-20 12:38                                                         ` Michal Hocko
2015-02-19 21:43                                                   ` Dave Chinner
2015-02-20 12:48                                                     ` Michal Hocko
2015-02-20 23:09                                                       ` Dave Chinner
2015-02-19 10:24                                         ` Johannes Weiner
2015-02-19 22:52                                           ` Dave Chinner
2015-02-20 10:36                                             ` Tetsuo Handa
2015-02-20 23:15                                               ` Dave Chinner
2015-02-21  3:20                                                 ` Theodore Ts'o
2015-02-21  9:19                                                   ` Andrew Morton
2015-02-21 13:48                                                     ` Tetsuo Handa
2015-02-21 21:38                                                     ` Dave Chinner
2015-02-22  0:20                                                     ` Johannes Weiner
2015-02-23 10:48                                                       ` Michal Hocko
2015-02-23 11:23                                                         ` Tetsuo Handa
2015-02-23 21:33                                                       ` David Rientjes
2015-02-22 14:48                                                     ` __GFP_NOFAIL and oom_killer_disabled? Tetsuo Handa
2015-02-23 10:21                                                       ` Michal Hocko
2015-02-23 13:03                                                         ` Tetsuo Handa
2015-02-24 18:14                                                           ` Michal Hocko
2015-02-25 11:22                                                             ` Tetsuo Handa
2015-02-25 16:02                                                               ` Michal Hocko
2015-02-25 21:48                                                                 ` Tetsuo Handa
2015-02-25 21:51                                                                   ` Andrew Morton
2015-02-21 12:00                                                   ` How to handle TIF_MEMDIE stalls? Tetsuo Handa
2015-02-23 10:26                                                   ` Michal Hocko
2015-02-21 11:12                                                 ` Tetsuo Handa
2015-02-21 21:48                                                   ` Dave Chinner
2015-02-21 23:52                                             ` Johannes Weiner [this message]
2015-02-23  0:45                                               ` Dave Chinner
2015-02-23  1:29                                                 ` Andrew Morton
2015-02-23  7:32                                                   ` Dave Chinner
2015-02-27 18:24                                                     ` Vlastimil Babka
2015-02-28  0:03                                                       ` Dave Chinner
2015-02-28 15:17                                                         ` Theodore Ts'o
2015-03-02  9:39                                                     ` Vlastimil Babka
2015-03-02 22:31                                                       ` Dave Chinner
2015-03-03  9:13                                                         ` Vlastimil Babka
2015-03-04  1:33                                                           ` Dave Chinner
2015-03-04  8:50                                                             ` Vlastimil Babka
2015-03-04 11:03                                                               ` Dave Chinner
2015-03-07  0:20                                                         ` Johannes Weiner
2015-03-07  3:43                                                           ` Dave Chinner
2015-03-07 15:08                                                             ` Johannes Weiner
2015-03-02 20:22                                                     ` Johannes Weiner
2015-03-02 23:12                                                       ` Dave Chinner
2015-03-03  2:50                                                         ` Johannes Weiner
2015-03-04  6:52                                                           ` Dave Chinner
2015-03-04 15:04                                                             ` Johannes Weiner
2015-03-04 17:38                                                               ` Theodore Ts'o
2015-03-04 23:17                                                                 ` Dave Chinner
2015-02-28 16:29                                                 ` Johannes Weiner
2015-02-28 16:41                                                   ` Theodore Ts'o
2015-02-28 22:15                                                     ` Johannes Weiner
2015-03-01 11:17                                                       ` Tetsuo Handa
2015-03-06 11:53                                                         ` Tetsuo Handa
2015-03-01 13:43                                                       ` Theodore Ts'o
2015-03-01 16:15                                                         ` Johannes Weiner
2015-03-01 19:36                                                           ` Theodore Ts'o
2015-03-01 20:44                                                             ` Johannes Weiner
2015-03-01 20:17                                                         ` Johannes Weiner
2015-03-01 21:48                                                       ` Dave Chinner
2015-03-02  0:17                                                         ` Dave Chinner
2015-03-02 12:46                                                           ` Brian Foster
2015-02-28 18:36                                                 ` Vlastimil Babka
2015-03-02 15:18                                                 ` Michal Hocko
2015-03-02 16:05                                                   ` Johannes Weiner
2015-03-02 17:10                                                     ` Michal Hocko
2015-03-02 17:27                                                       ` Johannes Weiner
2015-03-02 16:39                                                   ` Theodore Ts'o
2015-03-02 16:58                                                     ` Michal Hocko
2015-03-04 12:52                                                       ` Dave Chinner
2015-02-17 14:59                                     ` Michal Hocko
2015-02-17 14:50                                 ` Michal Hocko
2015-02-17 14:37                             ` Michal Hocko
2015-02-17 14:44                               ` Michal Hocko
2015-02-16 11:23                           ` Tetsuo Handa
2015-02-16 15:42                             ` Johannes Weiner
2015-02-17 11:57                               ` Tetsuo Handa
2015-02-17 13:16                                 ` Johannes Weiner
2015-02-17 16:50                                   ` Michal Hocko
2015-02-17 23:25                                     ` Dave Chinner
2015-02-18  8:48                                       ` Michal Hocko
2015-02-18 11:23                                         ` Tetsuo Handa
2015-02-18 12:29                                           ` Michal Hocko
2015-02-18 14:06                                             ` Tetsuo Handa
2015-02-18 14:25                                               ` Michal Hocko
2015-02-19 10:48                                                 ` Tetsuo Handa
2015-02-20  8:26                                                   ` Michal Hocko
2015-02-23 22:08                                 ` David Rientjes
2015-02-24 11:20                                   ` Tetsuo Handa
2015-02-24 15:20                                     ` Theodore Ts'o
2015-02-24 21:02                                       ` Dave Chinner
2015-02-25 14:31                                         ` Tetsuo Handa
2015-02-27  7:39                                           ` Dave Chinner
2015-02-27 12:42                                             ` Tetsuo Handa
2015-02-27 13:12                                               ` Dave Chinner
2015-03-04 12:41                                                 ` Tetsuo Handa
2015-03-04 13:25                                                   ` Dave Chinner
2015-03-04 14:11                                                     ` Tetsuo Handa
2015-03-05  1:36                                                       ` Dave Chinner
2015-02-17 16:33                             ` Michal Hocko
2014-12-29 17:40                   ` [PATCH] mm: get rid of radix tree gfp mask for pagecache_get_page (was: Re: How to handle TIF_MEMDIE stalls?) Michal Hocko
2014-12-29 18:45                     ` Linus Torvalds
2014-12-29 19:33                       ` Michal Hocko
2014-12-30 13:42                         ` Michal Hocko
2014-12-30 21:45                           ` Linus Torvalds

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150221235227.GA25079@phnom.home.cmpxchg.org \
    --to=hannes@cmpxchg.org \
    --cc=akpm@linux-foundation.org \
    --cc=david@fromorbit.com \
    --cc=dchinner@redhat.com \
    --cc=linux-mm@kvack.org \
    --cc=mgorman@suse.de \
    --cc=mhocko@suse.cz \
    --cc=oleg@redhat.com \
    --cc=penguin-kernel@I-love.SAKURA.ne.jp \
    --cc=rientjes@google.com \
    --cc=torvalds@linux-foundation.org \
    --cc=xfs@oss.sgi.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox