linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-kernel@vger.kernel.org" <linux-kernel@vger.kernel.org>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>,
	"kosaki.motohiro@jp.fujitsu.com" <kosaki.motohiro@jp.fujitsu.com>,
	aarcange@redhat.com, akpm@linux-foundation.org,
	minchan.kim@gmail.com, rientjes@google.com,
	vedran.furac@gmail.com,
	"hugh.dickins@tiscali.co.uk" <hugh.dickins@tiscali.co.uk>
Subject: [RFC][-mm][PATCH 3/6] oom-killer: count lowmem rss
Date: Mon, 2 Nov 2009 16:26:17 +0900	[thread overview]
Message-ID: <20091102162617.9d07e05f.kamezawa.hiroyu@jp.fujitsu.com> (raw)
In-Reply-To: <20091102162244.9425e49b.kamezawa.hiroyu@jp.fujitsu.com>

From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>

Count lowmem rss per mm_struct. Lowmem here means...

   for NUMA, pages in a zone < policy_zone.
   for HIGHMEM x86, pages in NORMAL zone.
   for others, all pages are lowmem.

Now, lower_zone_protection[] works very well for protecting lowmem but
possiblity of lowmem-oom is not 0 even if under good protection in the kernel.
(As fact, it's can be configured by sysctl. When we keep it high, there
 will be tons of not-for-use memory but system will be protected against
 rare event of lowmem-oom.)
Considering a x86 system with 2G of memory, NORMAL is 856MB and HIGHMEM is 1.1GB
...we can't keep lower_zone_protection too high.

This patch counts num of lowmem used for user process's page-cache memory.
Later patch will use this vaule for OOM calculation.

Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
---
 include/linux/mempolicy.h |   21 +++++++++++++++++++++
 include/linux/mm_types.h  |    1 +
 mm/memory.c               |   32 ++++++++++++++++++++++++++------
 mm/rmap.c                 |    2 ++
 mm/swapfile.c             |    2 ++
 5 files changed, 52 insertions(+), 6 deletions(-)

Index: mmotm-2.6.32-Nov2/include/linux/mempolicy.h
===================================================================
--- mmotm-2.6.32-Nov2.orig/include/linux/mempolicy.h
+++ mmotm-2.6.32-Nov2/include/linux/mempolicy.h
@@ -240,6 +240,13 @@ static inline int vma_migratable(struct 
 	return 1;
 }
 
+static inline int is_lowmem_page(struct page *page)
+{
+	if (unlikely(page_zonenum(page) < policy_zone))
+		return 1;
+	return 0;
+}
+
 #else
 
 struct mempolicy {};
@@ -356,6 +363,20 @@ static inline int mpol_to_str(char *buff
 }
 #endif
 
+#ifdef CONFIG_HIGHMEM
+static inline int is_lowmem_page(struct page *page)
+{
+	if (page_zonenum(page) == ZONE_HIGHMEM)
+		return 0;
+	return 1;
+}
+#else
+static inline int is_lowmem_page(struct page *page)
+{
+	return 1;
+}
+#endif
+
 #endif /* CONFIG_NUMA */
 #endif /* __KERNEL__ */
 
Index: mmotm-2.6.32-Nov2/include/linux/mm_types.h
===================================================================
--- mmotm-2.6.32-Nov2.orig/include/linux/mm_types.h
+++ mmotm-2.6.32-Nov2/include/linux/mm_types.h
@@ -229,6 +229,7 @@ struct mm_struct {
 	mm_counter_t _file_rss;
 	mm_counter_t _anon_rss;
 	mm_counter_t _swap_usage;
+	mm_counter_t _low_rss;
 
 	unsigned long hiwater_rss;	/* High-watermark of RSS usage */
 	unsigned long hiwater_vm;	/* High-water virtual memory usage */
Index: mmotm-2.6.32-Nov2/mm/memory.c
===================================================================
--- mmotm-2.6.32-Nov2.orig/mm/memory.c
+++ mmotm-2.6.32-Nov2/mm/memory.c
@@ -376,8 +376,9 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig
 	return 0;
 }
 
-static inline void
-add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss, int swap_usage)
+
+static inline void add_mm_rss(struct mm_struct *mm,
+	int file_rss, int anon_rss, int swap_usage, int low_rss)
 {
 	if (file_rss)
 		add_mm_counter(mm, file_rss, file_rss);
@@ -385,6 +386,8 @@ add_mm_rss(struct mm_struct *mm, int fil
 		add_mm_counter(mm, anon_rss, anon_rss);
 	if (swap_usage)
 		add_mm_counter(mm, swap_usage, swap_usage);
+	if (low_rss)
+		add_mm_counter(mm, low_rss, low_rss);
 }
 
 /*
@@ -638,6 +641,8 @@ copy_one_pte(struct mm_struct *dst_mm, s
 		get_page(page);
 		page_dup_rmap(page);
 		rss[PageAnon(page)]++;
+		if (is_lowmem_page(page))
+			rss[3]++;
 	}
 
 out_set_pte:
@@ -653,11 +658,11 @@ static int copy_pte_range(struct mm_stru
 	pte_t *src_pte, *dst_pte;
 	spinlock_t *src_ptl, *dst_ptl;
 	int progress = 0;
-	int rss[3];
+	int rss[4];
 	swp_entry_t entry = (swp_entry_t){0};
 
 again:
-	rss[2] = rss[1] = rss[0] = 0;
+	rss[3] = rss[2] = rss[1] = rss[0] = 0;
 	dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
 	if (!dst_pte)
 		return -ENOMEM;
@@ -693,7 +698,7 @@ again:
 	arch_leave_lazy_mmu_mode();
 	spin_unlock(src_ptl);
 	pte_unmap_nested(orig_src_pte);
-	add_mm_rss(dst_mm, rss[0], rss[1], rss[2]);
+	add_mm_rss(dst_mm, rss[0], rss[1], rss[2], rss[3]);
 	pte_unmap_unlock(orig_dst_pte, dst_ptl);
 	cond_resched();
 
@@ -824,6 +829,7 @@ static unsigned long zap_pte_range(struc
 	int file_rss = 0;
 	int anon_rss = 0;
 	int swap_usage = 0;
+	int low_rss = 0;
 
 	pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
 	arch_enter_lazy_mmu_mode();
@@ -878,6 +884,8 @@ static unsigned long zap_pte_range(struc
 					mark_page_accessed(page);
 				file_rss--;
 			}
+			if (is_lowmem_page(page))
+				low_rss--;
 			page_remove_rmap(page);
 			if (unlikely(page_mapcount(page) < 0))
 				print_bad_pte(vma, addr, ptent, page);
@@ -904,7 +912,7 @@ static unsigned long zap_pte_range(struc
 		pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
 	} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
 
-	add_mm_rss(mm, file_rss, anon_rss, swap_usage);
+	add_mm_rss(mm, file_rss, anon_rss, swap_usage, low_rss);
 	arch_leave_lazy_mmu_mode();
 	pte_unmap_unlock(pte - 1, ptl);
 
@@ -1539,6 +1547,8 @@ static int insert_page(struct vm_area_st
 	/* Ok, finally just insert the thing.. */
 	get_page(page);
 	inc_mm_counter(mm, file_rss);
+	if (is_lowmem_page(page))
+		inc_mm_counter(mm, low_rss);
 	page_add_file_rmap(page);
 	set_pte_at(mm, addr, pte, mk_pte(page, prot));
 
@@ -2179,6 +2189,10 @@ gotten:
 			}
 		} else
 			inc_mm_counter(mm, anon_rss);
+		if (old_page && is_lowmem_page(old_page))
+			dec_mm_counter(mm, low_rss);
+		if (is_lowmem_page(new_page))
+			inc_mm_counter(mm, low_rss);
 		flush_cache_page(vma, address, pte_pfn(orig_pte));
 		entry = mk_pte(new_page, vma->vm_page_prot);
 		entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2607,6 +2621,8 @@ static int do_swap_page(struct mm_struct
 
 	inc_mm_counter(mm, anon_rss);
 	dec_mm_counter(mm, swap_usage);
+	if (is_lowmem_page(page))
+		inc_mm_counter(mm, low_rss);
 	pte = mk_pte(page, vma->vm_page_prot);
 	if ((flags & FAULT_FLAG_WRITE) && reuse_swap_page(page)) {
 		pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2691,6 +2707,8 @@ static int do_anonymous_page(struct mm_s
 		goto release;
 
 	inc_mm_counter(mm, anon_rss);
+	if (is_lowmem_page(page))
+		inc_mm_counter(mm, low_rss);
 	page_add_new_anon_rmap(page, vma, address);
 setpte:
 	set_pte_at(mm, address, page_table, entry);
@@ -2854,6 +2872,8 @@ static int __do_fault(struct mm_struct *
 				get_page(dirty_page);
 			}
 		}
+		if (is_lowmem_page(page))
+			inc_mm_counter(mm, low_rss);
 		set_pte_at(mm, address, page_table, entry);
 
 		/* no need to invalidate: a not-present page won't be cached */
Index: mmotm-2.6.32-Nov2/mm/rmap.c
===================================================================
--- mmotm-2.6.32-Nov2.orig/mm/rmap.c
+++ mmotm-2.6.32-Nov2/mm/rmap.c
@@ -854,6 +854,8 @@ static int try_to_unmap_one(struct page 
 	} else
 		dec_mm_counter(mm, file_rss);
 
+	if (is_lowmem_page(page))
+		dec_mm_counter(mm, low_rss);
 
 	page_remove_rmap(page);
 	page_cache_release(page);
Index: mmotm-2.6.32-Nov2/mm/swapfile.c
===================================================================
--- mmotm-2.6.32-Nov2.orig/mm/swapfile.c
+++ mmotm-2.6.32-Nov2/mm/swapfile.c
@@ -838,6 +838,8 @@ static int unuse_pte(struct vm_area_stru
 
 	inc_mm_counter(vma->vm_mm, anon_rss);
 	dec_mm_counter(vma->vm_mm, swap_usage);
+	if (is_lowmem_page(page))
+		inc_mm_counter(vma->vm_mm, low_rss);
 	get_page(page);
 	set_pte_at(vma->vm_mm, addr, pte,
 		   pte_mkold(mk_pte(page, vma->vm_page_prot)));

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-11-02  7:28 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-02  7:22 [RFC][-mm][PATCH 0/6] oom-killer: total renewal KAMEZAWA Hiroyuki
2009-11-02  7:24 ` [RFC][-mm][PATCH 1/6] oom-killer: updates for classification of OOM KAMEZAWA Hiroyuki
2009-11-02 17:05   ` Christoph Lameter
2009-11-02 23:02     ` KAMEZAWA Hiroyuki
2009-11-03 20:18   ` David Rientjes
2009-11-04  0:01     ` KAMEZAWA Hiroyuki
2009-11-02  7:25 ` [RFC][-mm][PATCH 2/6] oom-killer: count swap usage per process KAMEZAWA Hiroyuki
2009-11-02 17:07   ` Christoph Lameter
2009-11-02 23:03     ` KAMEZAWA Hiroyuki
2009-11-03 19:47   ` David Rientjes
2009-11-04  0:02     ` KAMEZAWA Hiroyuki
2009-11-02  7:26 ` KAMEZAWA Hiroyuki [this message]
2009-11-02 17:09   ` [RFC][-mm][PATCH 3/6] oom-killer: count lowmem rss Christoph Lameter
2009-11-02 23:11     ` KAMEZAWA Hiroyuki
2009-11-03 20:24   ` David Rientjes
2009-11-04  0:22     ` KAMEZAWA Hiroyuki
2009-11-02  7:27 ` [RFC][-mm][PATCH 4/6] oom-killer: fork bomb detector KAMEZAWA Hiroyuki
2009-11-02  8:39   ` KAMEZAWA Hiroyuki
2009-11-02  7:28 ` [RFC][-mm][PATCH 5/6] oom-killer: check last total_vm expansion KAMEZAWA Hiroyuki
2009-11-03 20:29   ` David Rientjes
2009-11-04  0:25     ` KAMEZAWA Hiroyuki
2009-11-02  7:30 ` [RFC][-mm][PATCH 6/6] oom-killer: rewrite badness KAMEZAWA Hiroyuki
2009-11-02 15:04 ` [RFC][-mm][PATCH 0/6] oom-killer: total renewal Minchan Kim
2009-11-02 15:44   ` KAMEZAWA Hiroyuki
2009-11-03 20:34 ` David Rientjes
2009-11-03 23:56   ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091102162617.9d07e05f.kamezawa.hiroyu@jp.fujitsu.com \
    --to=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=kosaki.motohiro@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=minchan.kim@gmail.com \
    --cc=rientjes@google.com \
    --cc=vedran.furac@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox