From: Balbir Singh <balbir@in.ibm.com>
To: hugh@veritas.com, akpm@osdl.org, andyw@uk.ibm.com
Cc: linux-mm@kvack.org, Balbir Singh <balbir@in.ibm.com>
Subject: [RFC][PATCH 1/3] Add back rmap lock
Date: Fri, 29 Dec 2006 15:38:50 +0530 [thread overview]
Message-ID: <20061229100850.13860.69089.sendpatchset@balbir.in.ibm.com> (raw)
In-Reply-To: <20061229100839.13860.15525.sendpatchset@balbir.in.ibm.com>
This patch adds back the rmap lock that was removed by a patch posted
to lkml at http://lkml.org/lkml/2004/7/12/241. The rmap lock is needed to
ensure that rmap information does not change as a page is being shared or
unshared.
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
include/linux/mm.h | 67 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm_types.h | 8 ++++-
include/linux/page-flags.h | 1
include/linux/rmap.h | 24 ++++++++++++++--
init/Kconfig | 11 +++++++
mm/filemap_xip.c | 2 -
mm/page_alloc.c | 9 ++++--
mm/rmap.c | 44 ++++++++++++++++++++---------
mm/vmscan.c | 28 +++++++++++++++---
9 files changed, 169 insertions(+), 25 deletions(-)
diff -puN include/linux/page-flags.h~add-page-map-lock include/linux/page-flags.h
--- linux-2.6.20-rc2/include/linux/page-flags.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/page-flags.h 2006-12-29 14:48:07.000000000 +0530
@@ -90,6 +90,7 @@
#define PG_reclaim 17 /* To be reclaimed asap */
#define PG_nosave_free 18 /* Used for system suspend/resume */
#define PG_buddy 19 /* Page is free, on buddy lists */
+#define PG_maplock 20 /* Lock rmap operations */
#if (BITS_PER_LONG > 32)
diff -puN include/linux/rmap.h~add-page-map-lock include/linux/rmap.h
--- linux-2.6.20-rc2/include/linux/rmap.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/rmap.h 2006-12-29 14:48:07.000000000 +0530
@@ -8,6 +8,24 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
+#include <linux/bit_spinlock.h>
+
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+#define page_map_lock(page) \
+ bit_spin_lock(PG_maplock, (unsigned long *)&(page)->flags)
+#define page_map_unlock(page) \
+ bit_spin_unlock(PG_maplock, (unsigned long *)&(page)->flags)
+#define page_check_address_pte_trylock(ptl) \
+ spin_trylock(ptl)
+#else
+#define page_map_lock(page) do {} while(0)
+#define page_map_unlock(page) do {} while(0)
+#define page_check_address_pte_trylock(ptl) \
+({ \
+ spin_lock(ptl); \
+ 1; \
+})
+#endif /* CONFIG_SHARED_PAGE_ACCOUNTING */
/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -83,7 +101,9 @@ void page_remove_rmap(struct page *, str
*/
static inline void page_dup_rmap(struct page *page)
{
- atomic_inc(&page->_mapcount);
+ page_map_lock(page);
+ page_mapcount_inc(page);
+ page_map_unlock(page);
}
/*
@@ -96,7 +116,7 @@ int try_to_unmap(struct page *, int igno
* Called from mm/filemap_xip.c to unmap empty zero page
*/
pte_t *page_check_address(struct page *, struct mm_struct *,
- unsigned long, spinlock_t **);
+ unsigned long, spinlock_t **, bool);
/*
* Used by swapoff to help locate where page is expected in vma.
diff -puN mm/rmap.c~add-page-map-lock mm/rmap.c
--- linux-2.6.20-rc2/mm/rmap.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/rmap.c 2006-12-29 14:48:07.000000000 +0530
@@ -243,7 +243,8 @@ unsigned long page_address_in_vma(struct
* On success returns with pte mapped and locked.
*/
pte_t *page_check_address(struct page *page, struct mm_struct *mm,
- unsigned long address, spinlock_t **ptlp)
+ unsigned long address, spinlock_t **ptlp,
+ bool trylock)
{
pgd_t *pgd;
pud_t *pud;
@@ -271,12 +272,20 @@ pte_t *page_check_address(struct page *p
}
ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ if (trylock) {
+ if (!page_check_address_pte_trylock(ptl))
+ goto out;
+ } else {
+ spin_lock(ptl);
+ }
+
if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
*ptlp = ptl;
return pte;
}
- pte_unmap_unlock(pte, ptl);
+ spin_unlock(ptl);
+out:
+ pte_unmap(pte);
return NULL;
}
@@ -297,7 +306,7 @@ static int page_referenced_one(struct pa
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, true);
if (!pte)
goto out;
@@ -441,7 +450,7 @@ static int page_mkclean_one(struct page
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, false);
if (!pte)
goto out;
@@ -532,9 +541,10 @@ static void __page_set_anon_rmap(struct
void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
- if (atomic_inc_and_test(&page->_mapcount))
+ page_map_lock(page);
+ if (page_mapcount_inc_and_test(page))
__page_set_anon_rmap(page, vma, address);
- /* else checking page index and mapping is racy */
+ page_map_unlock(page);
}
/*
@@ -549,8 +559,10 @@ void page_add_anon_rmap(struct page *pag
void page_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
- atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
+ page_map_lock(page);
+ page_mapcount_set(page, 0); /* elevate count by 1 (starts at -1) */
__page_set_anon_rmap(page, vma, address);
+ page_map_unlock(page);
}
/**
@@ -561,8 +573,10 @@ void page_add_new_anon_rmap(struct page
*/
void page_add_file_rmap(struct page *page)
{
- if (atomic_inc_and_test(&page->_mapcount))
+ page_map_lock(page);
+ if (page_mapcount_inc_and_test(page))
__inc_zone_page_state(page, NR_FILE_MAPPED);
+ page_map_unlock(page);
}
/**
@@ -573,7 +587,8 @@ void page_add_file_rmap(struct page *pag
*/
void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
{
- if (atomic_add_negative(-1, &page->_mapcount)) {
+ page_map_lock(page);
+ if (page_mapcount_add_negative(-1, page)) {
if (unlikely(page_mapcount(page) < 0)) {
printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page));
@@ -602,6 +617,7 @@ void page_remove_rmap(struct page *page,
__dec_zone_page_state(page,
PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
}
+ page_map_unlock(page);
}
/*
@@ -622,7 +638,7 @@ static int try_to_unmap_one(struct page
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, true);
if (!pte)
goto out;
@@ -861,6 +877,7 @@ static int try_to_unmap_file(struct page
* The mapcount of the page we came in with is irrelevant,
* but even so use it as a guide to how hard we should try?
*/
+ page_map_unlock(page);
mapcount = page_mapcount(page);
if (!mapcount)
goto out;
@@ -882,7 +899,7 @@ static int try_to_unmap_file(struct page
cursor += CLUSTER_SIZE;
vma->vm_private_data = (void *) cursor;
if ((int)mapcount <= 0)
- goto out;
+ goto relock;
}
vma->vm_private_data = (void *) max_nl_cursor;
}
@@ -897,6 +914,8 @@ static int try_to_unmap_file(struct page
*/
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
vma->vm_private_data = NULL;
+relock:
+ page_map_lock(page);
out:
spin_unlock(&mapping->i_mmap_lock);
return ret;
@@ -929,4 +948,3 @@ int try_to_unmap(struct page *page, int
ret = SWAP_SUCCESS;
return ret;
}
-
diff -puN mm/vmscan.c~add-page-map-lock mm/vmscan.c
--- linux-2.6.20-rc2/mm/vmscan.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/vmscan.c 2006-12-29 14:48:07.000000000 +0530
@@ -472,6 +472,7 @@ static unsigned long shrink_page_list(st
VM_BUG_ON(PageActive(page));
sc->nr_scanned++;
+ page_map_lock(page);
if (!sc->may_swap && page_mapped(page))
goto keep_locked;
@@ -485,17 +486,22 @@ static unsigned long shrink_page_list(st
referenced = page_referenced(page, 1);
/* In active use or really unfreeable? Activate it. */
- if (referenced && page_mapping_inuse(page))
+ if (referenced && page_mapping_inuse(page)) {
+ page_map_unlock(page);
goto activate_locked;
+ }
#ifdef CONFIG_SWAP
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
- if (PageAnon(page) && !PageSwapCache(page))
+ if (PageAnon(page) && !PageSwapCache(page)) {
+ page_map_unlock(page);
if (!add_to_swap(page, GFP_ATOMIC))
goto activate_locked;
+ page_map_lock(page);
+ }
#endif /* CONFIG_SWAP */
mapping = page_mapping(page);
@@ -509,13 +515,16 @@ static unsigned long shrink_page_list(st
if (page_mapped(page) && mapping) {
switch (try_to_unmap(page, 0)) {
case SWAP_FAIL:
+ page_map_unlock(page);
goto activate_locked;
case SWAP_AGAIN:
+ page_map_unlock(page);
goto keep_locked;
case SWAP_SUCCESS:
; /* try to free the page below */
}
}
+ page_map_unlock(page);
if (PageDirty(page)) {
if (referenced)
@@ -833,12 +842,21 @@ force_reclaim_mapped:
page = lru_to_page(&l_hold);
list_del(&page->lru);
if (page_mapped(page)) {
- if (!reclaim_mapped ||
- (total_swap_pages == 0 && PageAnon(page)) ||
- page_referenced(page, 0)) {
+ if (!reclaim_mapped) {
list_add(&page->lru, &l_active);
continue;
}
+ page_map_lock(page);
+ if (page_referenced(page, 0)) {
+ page_map_unlock(page);
+ list_add(&page->lru, &l_active);
+ continue;
+ }
+ page_map_unlock(page);
+ }
+ if (total_swap_pages == 0 && PageAnon(page)) {
+ list_add(&page->lru, &l_active);
+ continue;
}
list_add(&page->lru, &l_inactive);
}
diff -puN include/linux/mm.h~add-page-map-lock include/linux/mm.h
--- linux-2.6.20-rc2/include/linux/mm.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/mm.h 2006-12-29 14:48:07.000000000 +0530
@@ -602,6 +602,52 @@ static inline pgoff_t page_index(struct
return page->index;
}
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+/*
+ * Under SHARED_PAGE_ACCOUNTING, all these operations take place under
+ * the rmap page lock (page_map_*lock)
+ */
+static inline void reset_page_mapcount(struct page *page)
+{
+ (page)->_mapcount = -1;
+}
+
+static inline int page_mapcount(struct page *page)
+{
+ return (page)->_mapcount + 1;
+}
+
+/*
+ * Return true if this page is mapped into pagetables.
+ */
+static inline int page_mapped(struct page *page)
+{
+ return (page)->_mapcount >= 0;
+}
+
+static inline int page_mapcount_inc_and_test(struct page *page)
+{
+ page->_mapcount++;
+ return (page->_mapcount == 0);
+}
+
+static inline void page_mapcount_inc(struct page *page)
+{
+ page->_mapcount++;
+}
+
+static inline int page_mapcount_add_negative(int val, struct page *page)
+{
+ page->_mapcount += val;
+ return (page->_mapcount < 0);
+}
+
+static inline void page_mapcount_set(struct page *page, int val)
+{
+ page->_mapcount = val;
+}
+
+#else
/*
* The atomic page->_mapcount, like _count, starts from -1:
* so that transitions both from it and to it can be tracked,
@@ -625,6 +671,27 @@ static inline int page_mapped(struct pag
return atomic_read(&(page)->_mapcount) >= 0;
}
+static inline int page_mapcount_inc_and_test(struct page *page)
+{
+ return atomic_inc_and_test(&(page)->_mapcount);
+}
+
+static inline void page_mapcount_inc(struct page *page)
+{
+ atomic_inc(&(page)->_mapcount);
+}
+
+static inline int page_mapcount_add_negative(int val, struct page *page)
+{
+ return atomic_add_negative(val, &(page)->_mapcount);
+}
+
+static inline int page_mapcount_set(struct page *page, int val)
+{
+ atomic_set(&(page)->_mapcount, val);
+}
+#endif
+
/*
* Error return values for the *_nopage functions
*/
diff -puN mm/page_alloc.c~add-page-map-lock mm/page_alloc.c
--- linux-2.6.20-rc2/mm/page_alloc.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/page_alloc.c 2006-12-29 14:48:07.000000000 +0530
@@ -199,7 +199,8 @@ static void bad_page(struct page *page)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_buddy );
+ 1 << PG_buddy |
+ 1 << PG_maplock);
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
@@ -434,7 +435,8 @@ static inline int free_pages_check(struc
1 << PG_swapcache |
1 << PG_writeback |
1 << PG_reserved |
- 1 << PG_buddy ))))
+ 1 << PG_buddy |
+ 1 << PG_maplock))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
@@ -584,7 +586,8 @@ static int prep_new_page(struct page *pa
1 << PG_swapcache |
1 << PG_writeback |
1 << PG_reserved |
- 1 << PG_buddy ))))
+ 1 << PG_buddy |
+ 1 << PG_maplock))))
bad_page(page);
/*
diff -puN include/linux/mm_types.h~add-page-map-lock include/linux/mm_types.h
--- linux-2.6.20-rc2/include/linux/mm_types.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/mm_types.h 2006-12-29 14:48:07.000000000 +0530
@@ -8,6 +8,12 @@
struct address_space;
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+typedef long mapcount_t;
+#else
+typedef atomic_t mapcount_t;
+#endif
+
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
@@ -19,7 +25,7 @@ struct page {
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
atomic_t _count; /* Usage count, see below. */
- atomic_t _mapcount; /* Count of ptes mapped in mms,
+ mapcount_t _mapcount; /* Count of ptes mapped in mms,
* to show when page is mapped
* & limit reverse map searches.
*/
diff -puN init/Kconfig~add-page-map-lock init/Kconfig
--- linux-2.6.20-rc2/init/Kconfig~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/init/Kconfig 2006-12-29 14:48:07.000000000 +0530
@@ -280,6 +280,17 @@ config RELAY
If unsure, say N.
+config SHARED_PAGE_ACCOUNTING
+ bool "Enable support for accounting shared pages in RSS"
+ help
+ This option enables accounting of pages shared among several
+ processes in the system.
+ The RSS (Resident Set Size) of a process is tracked for shared
+ pages, to enable finer accounting of pages used by a process.
+ The accounting is more accurate and comes with a certain overhead
+
+ If unsure, say N
+
source "usr/Kconfig"
config CC_OPTIMIZE_FOR_SIZE
diff -puN mm/page-writeback.c~add-page-map-lock mm/page-writeback.c
diff -puN mm/filemap_xip.c~add-page-map-lock mm/filemap_xip.c
--- linux-2.6.20-rc2/mm/filemap_xip.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/filemap_xip.c 2006-12-29 14:48:07.000000000 +0530
@@ -184,7 +184,7 @@ __xip_unmap (struct address_space * mapp
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
page = ZERO_PAGE(address);
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, false);
if (pte) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
_
--
Balbir Singh,
Linux Technology Center,
IBM Software Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2006-12-29 22:11 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-12-29 10:08 [RFC][PATCH 0/3] Add shared RSS accounting Balbir Singh
2006-12-29 10:08 ` Balbir Singh [this message]
2006-12-29 10:09 ` [RFC][PATCH 2/3] Move RSS accounting to page_xxxx_rmap() functions Balbir Singh
2006-12-29 10:09 ` [RFC][PATCH 3/3] Add shared page accounting Balbir Singh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20061229100850.13860.69089.sendpatchset@balbir.in.ibm.com \
--to=balbir@in.ibm.com \
--cc=akpm@osdl.org \
--cc=andyw@uk.ibm.com \
--cc=hugh@veritas.com \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox