* [RFC][PATCH 1/3] Add back rmap lock
2006-12-29 10:08 [RFC][PATCH 0/3] Add shared RSS accounting Balbir Singh
@ 2006-12-29 10:08 ` Balbir Singh
2006-12-29 10:09 ` [RFC][PATCH 2/3] Move RSS accounting to page_xxxx_rmap() functions Balbir Singh
2006-12-29 10:09 ` [RFC][PATCH 3/3] Add shared page accounting Balbir Singh
2 siblings, 0 replies; 4+ messages in thread
From: Balbir Singh @ 2006-12-29 10:08 UTC (permalink / raw)
To: hugh, akpm, andyw; +Cc: linux-mm, Balbir Singh
This patch adds back the rmap lock that was removed by a patch posted
to lkml at http://lkml.org/lkml/2004/7/12/241. The rmap lock is needed to
ensure that rmap information does not change as a page is being shared or
unshared.
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
include/linux/mm.h | 67 +++++++++++++++++++++++++++++++++++++++++++++
include/linux/mm_types.h | 8 ++++-
include/linux/page-flags.h | 1
include/linux/rmap.h | 24 ++++++++++++++--
init/Kconfig | 11 +++++++
mm/filemap_xip.c | 2 -
mm/page_alloc.c | 9 ++++--
mm/rmap.c | 44 ++++++++++++++++++++---------
mm/vmscan.c | 28 +++++++++++++++---
9 files changed, 169 insertions(+), 25 deletions(-)
diff -puN include/linux/page-flags.h~add-page-map-lock include/linux/page-flags.h
--- linux-2.6.20-rc2/include/linux/page-flags.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/page-flags.h 2006-12-29 14:48:07.000000000 +0530
@@ -90,6 +90,7 @@
#define PG_reclaim 17 /* To be reclaimed asap */
#define PG_nosave_free 18 /* Used for system suspend/resume */
#define PG_buddy 19 /* Page is free, on buddy lists */
+#define PG_maplock 20 /* Lock rmap operations */
#if (BITS_PER_LONG > 32)
diff -puN include/linux/rmap.h~add-page-map-lock include/linux/rmap.h
--- linux-2.6.20-rc2/include/linux/rmap.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/rmap.h 2006-12-29 14:48:07.000000000 +0530
@@ -8,6 +8,24 @@
#include <linux/slab.h>
#include <linux/mm.h>
#include <linux/spinlock.h>
+#include <linux/bit_spinlock.h>
+
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+#define page_map_lock(page) \
+ bit_spin_lock(PG_maplock, (unsigned long *)&(page)->flags)
+#define page_map_unlock(page) \
+ bit_spin_unlock(PG_maplock, (unsigned long *)&(page)->flags)
+#define page_check_address_pte_trylock(ptl) \
+ spin_trylock(ptl)
+#else
+#define page_map_lock(page) do {} while(0)
+#define page_map_unlock(page) do {} while(0)
+#define page_check_address_pte_trylock(ptl) \
+({ \
+ spin_lock(ptl); \
+ 1; \
+})
+#endif /* CONFIG_SHARED_PAGE_ACCOUNTING */
/*
* The anon_vma heads a list of private "related" vmas, to scan if
@@ -83,7 +101,9 @@ void page_remove_rmap(struct page *, str
*/
static inline void page_dup_rmap(struct page *page)
{
- atomic_inc(&page->_mapcount);
+ page_map_lock(page);
+ page_mapcount_inc(page);
+ page_map_unlock(page);
}
/*
@@ -96,7 +116,7 @@ int try_to_unmap(struct page *, int igno
* Called from mm/filemap_xip.c to unmap empty zero page
*/
pte_t *page_check_address(struct page *, struct mm_struct *,
- unsigned long, spinlock_t **);
+ unsigned long, spinlock_t **, bool);
/*
* Used by swapoff to help locate where page is expected in vma.
diff -puN mm/rmap.c~add-page-map-lock mm/rmap.c
--- linux-2.6.20-rc2/mm/rmap.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/rmap.c 2006-12-29 14:48:07.000000000 +0530
@@ -243,7 +243,8 @@ unsigned long page_address_in_vma(struct
* On success returns with pte mapped and locked.
*/
pte_t *page_check_address(struct page *page, struct mm_struct *mm,
- unsigned long address, spinlock_t **ptlp)
+ unsigned long address, spinlock_t **ptlp,
+ bool trylock)
{
pgd_t *pgd;
pud_t *pud;
@@ -271,12 +272,20 @@ pte_t *page_check_address(struct page *p
}
ptl = pte_lockptr(mm, pmd);
- spin_lock(ptl);
+ if (trylock) {
+ if (!page_check_address_pte_trylock(ptl))
+ goto out;
+ } else {
+ spin_lock(ptl);
+ }
+
if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
*ptlp = ptl;
return pte;
}
- pte_unmap_unlock(pte, ptl);
+ spin_unlock(ptl);
+out:
+ pte_unmap(pte);
return NULL;
}
@@ -297,7 +306,7 @@ static int page_referenced_one(struct pa
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, true);
if (!pte)
goto out;
@@ -441,7 +450,7 @@ static int page_mkclean_one(struct page
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, false);
if (!pte)
goto out;
@@ -532,9 +541,10 @@ static void __page_set_anon_rmap(struct
void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
- if (atomic_inc_and_test(&page->_mapcount))
+ page_map_lock(page);
+ if (page_mapcount_inc_and_test(page))
__page_set_anon_rmap(page, vma, address);
- /* else checking page index and mapping is racy */
+ page_map_unlock(page);
}
/*
@@ -549,8 +559,10 @@ void page_add_anon_rmap(struct page *pag
void page_add_new_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
- atomic_set(&page->_mapcount, 0); /* elevate count by 1 (starts at -1) */
+ page_map_lock(page);
+ page_mapcount_set(page, 0); /* elevate count by 1 (starts at -1) */
__page_set_anon_rmap(page, vma, address);
+ page_map_unlock(page);
}
/**
@@ -561,8 +573,10 @@ void page_add_new_anon_rmap(struct page
*/
void page_add_file_rmap(struct page *page)
{
- if (atomic_inc_and_test(&page->_mapcount))
+ page_map_lock(page);
+ if (page_mapcount_inc_and_test(page))
__inc_zone_page_state(page, NR_FILE_MAPPED);
+ page_map_unlock(page);
}
/**
@@ -573,7 +587,8 @@ void page_add_file_rmap(struct page *pag
*/
void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
{
- if (atomic_add_negative(-1, &page->_mapcount)) {
+ page_map_lock(page);
+ if (page_mapcount_add_negative(-1, page)) {
if (unlikely(page_mapcount(page) < 0)) {
printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page));
@@ -602,6 +617,7 @@ void page_remove_rmap(struct page *page,
__dec_zone_page_state(page,
PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
}
+ page_map_unlock(page);
}
/*
@@ -622,7 +638,7 @@ static int try_to_unmap_one(struct page
if (address == -EFAULT)
goto out;
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, true);
if (!pte)
goto out;
@@ -861,6 +877,7 @@ static int try_to_unmap_file(struct page
* The mapcount of the page we came in with is irrelevant,
* but even so use it as a guide to how hard we should try?
*/
+ page_map_unlock(page);
mapcount = page_mapcount(page);
if (!mapcount)
goto out;
@@ -882,7 +899,7 @@ static int try_to_unmap_file(struct page
cursor += CLUSTER_SIZE;
vma->vm_private_data = (void *) cursor;
if ((int)mapcount <= 0)
- goto out;
+ goto relock;
}
vma->vm_private_data = (void *) max_nl_cursor;
}
@@ -897,6 +914,8 @@ static int try_to_unmap_file(struct page
*/
list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
vma->vm_private_data = NULL;
+relock:
+ page_map_lock(page);
out:
spin_unlock(&mapping->i_mmap_lock);
return ret;
@@ -929,4 +948,3 @@ int try_to_unmap(struct page *page, int
ret = SWAP_SUCCESS;
return ret;
}
-
diff -puN mm/vmscan.c~add-page-map-lock mm/vmscan.c
--- linux-2.6.20-rc2/mm/vmscan.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/vmscan.c 2006-12-29 14:48:07.000000000 +0530
@@ -472,6 +472,7 @@ static unsigned long shrink_page_list(st
VM_BUG_ON(PageActive(page));
sc->nr_scanned++;
+ page_map_lock(page);
if (!sc->may_swap && page_mapped(page))
goto keep_locked;
@@ -485,17 +486,22 @@ static unsigned long shrink_page_list(st
referenced = page_referenced(page, 1);
/* In active use or really unfreeable? Activate it. */
- if (referenced && page_mapping_inuse(page))
+ if (referenced && page_mapping_inuse(page)) {
+ page_map_unlock(page);
goto activate_locked;
+ }
#ifdef CONFIG_SWAP
/*
* Anonymous process memory has backing store?
* Try to allocate it some swap space here.
*/
- if (PageAnon(page) && !PageSwapCache(page))
+ if (PageAnon(page) && !PageSwapCache(page)) {
+ page_map_unlock(page);
if (!add_to_swap(page, GFP_ATOMIC))
goto activate_locked;
+ page_map_lock(page);
+ }
#endif /* CONFIG_SWAP */
mapping = page_mapping(page);
@@ -509,13 +515,16 @@ static unsigned long shrink_page_list(st
if (page_mapped(page) && mapping) {
switch (try_to_unmap(page, 0)) {
case SWAP_FAIL:
+ page_map_unlock(page);
goto activate_locked;
case SWAP_AGAIN:
+ page_map_unlock(page);
goto keep_locked;
case SWAP_SUCCESS:
; /* try to free the page below */
}
}
+ page_map_unlock(page);
if (PageDirty(page)) {
if (referenced)
@@ -833,12 +842,21 @@ force_reclaim_mapped:
page = lru_to_page(&l_hold);
list_del(&page->lru);
if (page_mapped(page)) {
- if (!reclaim_mapped ||
- (total_swap_pages == 0 && PageAnon(page)) ||
- page_referenced(page, 0)) {
+ if (!reclaim_mapped) {
list_add(&page->lru, &l_active);
continue;
}
+ page_map_lock(page);
+ if (page_referenced(page, 0)) {
+ page_map_unlock(page);
+ list_add(&page->lru, &l_active);
+ continue;
+ }
+ page_map_unlock(page);
+ }
+ if (total_swap_pages == 0 && PageAnon(page)) {
+ list_add(&page->lru, &l_active);
+ continue;
}
list_add(&page->lru, &l_inactive);
}
diff -puN include/linux/mm.h~add-page-map-lock include/linux/mm.h
--- linux-2.6.20-rc2/include/linux/mm.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/mm.h 2006-12-29 14:48:07.000000000 +0530
@@ -602,6 +602,52 @@ static inline pgoff_t page_index(struct
return page->index;
}
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+/*
+ * Under SHARED_PAGE_ACCOUNTING, all these operations take place under
+ * the rmap page lock (page_map_*lock)
+ */
+static inline void reset_page_mapcount(struct page *page)
+{
+ (page)->_mapcount = -1;
+}
+
+static inline int page_mapcount(struct page *page)
+{
+ return (page)->_mapcount + 1;
+}
+
+/*
+ * Return true if this page is mapped into pagetables.
+ */
+static inline int page_mapped(struct page *page)
+{
+ return (page)->_mapcount >= 0;
+}
+
+static inline int page_mapcount_inc_and_test(struct page *page)
+{
+ page->_mapcount++;
+ return (page->_mapcount == 0);
+}
+
+static inline void page_mapcount_inc(struct page *page)
+{
+ page->_mapcount++;
+}
+
+static inline int page_mapcount_add_negative(int val, struct page *page)
+{
+ page->_mapcount += val;
+ return (page->_mapcount < 0);
+}
+
+static inline void page_mapcount_set(struct page *page, int val)
+{
+ page->_mapcount = val;
+}
+
+#else
/*
* The atomic page->_mapcount, like _count, starts from -1:
* so that transitions both from it and to it can be tracked,
@@ -625,6 +671,27 @@ static inline int page_mapped(struct pag
return atomic_read(&(page)->_mapcount) >= 0;
}
+static inline int page_mapcount_inc_and_test(struct page *page)
+{
+ return atomic_inc_and_test(&(page)->_mapcount);
+}
+
+static inline void page_mapcount_inc(struct page *page)
+{
+ atomic_inc(&(page)->_mapcount);
+}
+
+static inline int page_mapcount_add_negative(int val, struct page *page)
+{
+ return atomic_add_negative(val, &(page)->_mapcount);
+}
+
+static inline int page_mapcount_set(struct page *page, int val)
+{
+ atomic_set(&(page)->_mapcount, val);
+}
+#endif
+
/*
* Error return values for the *_nopage functions
*/
diff -puN mm/page_alloc.c~add-page-map-lock mm/page_alloc.c
--- linux-2.6.20-rc2/mm/page_alloc.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/page_alloc.c 2006-12-29 14:48:07.000000000 +0530
@@ -199,7 +199,8 @@ static void bad_page(struct page *page)
1 << PG_slab |
1 << PG_swapcache |
1 << PG_writeback |
- 1 << PG_buddy );
+ 1 << PG_buddy |
+ 1 << PG_maplock);
set_page_count(page, 0);
reset_page_mapcount(page);
page->mapping = NULL;
@@ -434,7 +435,8 @@ static inline int free_pages_check(struc
1 << PG_swapcache |
1 << PG_writeback |
1 << PG_reserved |
- 1 << PG_buddy ))))
+ 1 << PG_buddy |
+ 1 << PG_maplock))))
bad_page(page);
if (PageDirty(page))
__ClearPageDirty(page);
@@ -584,7 +586,8 @@ static int prep_new_page(struct page *pa
1 << PG_swapcache |
1 << PG_writeback |
1 << PG_reserved |
- 1 << PG_buddy ))))
+ 1 << PG_buddy |
+ 1 << PG_maplock))))
bad_page(page);
/*
diff -puN include/linux/mm_types.h~add-page-map-lock include/linux/mm_types.h
--- linux-2.6.20-rc2/include/linux/mm_types.h~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/mm_types.h 2006-12-29 14:48:07.000000000 +0530
@@ -8,6 +8,12 @@
struct address_space;
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+typedef long mapcount_t;
+#else
+typedef atomic_t mapcount_t;
+#endif
+
/*
* Each physical page in the system has a struct page associated with
* it to keep track of whatever it is we are using the page for at the
@@ -19,7 +25,7 @@ struct page {
unsigned long flags; /* Atomic flags, some possibly
* updated asynchronously */
atomic_t _count; /* Usage count, see below. */
- atomic_t _mapcount; /* Count of ptes mapped in mms,
+ mapcount_t _mapcount; /* Count of ptes mapped in mms,
* to show when page is mapped
* & limit reverse map searches.
*/
diff -puN init/Kconfig~add-page-map-lock init/Kconfig
--- linux-2.6.20-rc2/init/Kconfig~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/init/Kconfig 2006-12-29 14:48:07.000000000 +0530
@@ -280,6 +280,17 @@ config RELAY
If unsure, say N.
+config SHARED_PAGE_ACCOUNTING
+ bool "Enable support for accounting shared pages in RSS"
+ help
+ This option enables accounting of pages shared among several
+ processes in the system.
+ The RSS (Resident Set Size) of a process is tracked for shared
+ pages, to enable finer accounting of pages used by a process.
+ The accounting is more accurate and comes with a certain overhead
+
+ If unsure, say N
+
source "usr/Kconfig"
config CC_OPTIMIZE_FOR_SIZE
diff -puN mm/page-writeback.c~add-page-map-lock mm/page-writeback.c
diff -puN mm/filemap_xip.c~add-page-map-lock mm/filemap_xip.c
--- linux-2.6.20-rc2/mm/filemap_xip.c~add-page-map-lock 2006-12-29 14:48:07.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/filemap_xip.c 2006-12-29 14:48:07.000000000 +0530
@@ -184,7 +184,7 @@ __xip_unmap (struct address_space * mapp
((pgoff - vma->vm_pgoff) << PAGE_SHIFT);
BUG_ON(address < vma->vm_start || address >= vma->vm_end);
page = ZERO_PAGE(address);
- pte = page_check_address(page, mm, address, &ptl);
+ pte = page_check_address(page, mm, address, &ptl, false);
if (pte) {
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
_
--
Balbir Singh,
Linux Technology Center,
IBM Software Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread* [RFC][PATCH 2/3] Move RSS accounting to page_xxxx_rmap() functions
2006-12-29 10:08 [RFC][PATCH 0/3] Add shared RSS accounting Balbir Singh
2006-12-29 10:08 ` [RFC][PATCH 1/3] Add back rmap lock Balbir Singh
@ 2006-12-29 10:09 ` Balbir Singh
2006-12-29 10:09 ` [RFC][PATCH 3/3] Add shared page accounting Balbir Singh
2 siblings, 0 replies; 4+ messages in thread
From: Balbir Singh @ 2006-12-29 10:09 UTC (permalink / raw)
To: hugh, akpm, andyw; +Cc: linux-mm, Balbir Singh
The accounting of RSS is moved from several places to the rmap functions
page_add_anon_rmap(), page_add_new_anon_rmap(), page_add_file_rmap()
and page_remove_rmap().
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
fs/exec.c | 1 -
include/linux/rmap.h | 8 ++++++--
mm/filemap_xip.c | 1 -
mm/fremap.c | 10 ++++------
mm/memory.c | 50 ++++++++++----------------------------------------
mm/migrate.c | 2 +-
mm/rmap.c | 17 ++++++++++-------
mm/swapfile.c | 1 -
8 files changed, 31 insertions(+), 59 deletions(-)
diff -puN include/linux/rmap.h~move-accounting-to-rmap include/linux/rmap.h
--- linux-2.6.20-rc2/include/linux/rmap.h~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/rmap.h 2006-12-29 14:48:28.000000000 +0530
@@ -89,7 +89,7 @@ void __anon_vma_link(struct vm_area_stru
*/
void page_add_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
void page_add_new_anon_rmap(struct page *, struct vm_area_struct *, unsigned long);
-void page_add_file_rmap(struct page *);
+void page_add_file_rmap(struct page *, struct mm_struct *);
void page_remove_rmap(struct page *, struct vm_area_struct *);
/**
@@ -99,10 +99,14 @@ void page_remove_rmap(struct page *, str
* For copy_page_range only: minimal extract from page_add_rmap,
* avoiding unnecessary tests (already checked) so it's quicker.
*/
-static inline void page_dup_rmap(struct page *page)
+static inline void page_dup_rmap(struct page *page, struct mm_struct *mm)
{
page_map_lock(page);
page_mapcount_inc(page);
+ if (PageAnon(page))
+ inc_mm_counter(mm, anon_rss);
+ else
+ inc_mm_counter(mm, file_rss);
page_map_unlock(page);
}
diff -puN mm/rmap.c~move-accounting-to-rmap mm/rmap.c
--- linux-2.6.20-rc2/mm/rmap.c~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/rmap.c 2006-12-29 14:48:28.000000000 +0530
@@ -544,6 +544,7 @@ void page_add_anon_rmap(struct page *pag
page_map_lock(page);
if (page_mapcount_inc_and_test(page))
__page_set_anon_rmap(page, vma, address);
+ inc_mm_counter(vma->vm_mm, anon_rss);
page_map_unlock(page);
}
@@ -562,6 +563,7 @@ void page_add_new_anon_rmap(struct page
page_map_lock(page);
page_mapcount_set(page, 0); /* elevate count by 1 (starts at -1) */
__page_set_anon_rmap(page, vma, address);
+ inc_mm_counter(vma->vm_mm, anon_rss);
page_map_unlock(page);
}
@@ -571,11 +573,12 @@ void page_add_new_anon_rmap(struct page
*
* The caller needs to hold the pte lock.
*/
-void page_add_file_rmap(struct page *page)
+void page_add_file_rmap(struct page *page, struct mm_struct *mm)
{
page_map_lock(page);
if (page_mapcount_inc_and_test(page))
__inc_zone_page_state(page, NR_FILE_MAPPED);
+ inc_mm_counter(mm, file_rss);
page_map_unlock(page);
}
@@ -587,6 +590,7 @@ void page_add_file_rmap(struct page *pag
*/
void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
{
+ int anon = PageAnon(page);
page_map_lock(page);
if (page_mapcount_add_negative(-1, page)) {
if (unlikely(page_mapcount(page) < 0)) {
@@ -615,8 +619,12 @@ void page_remove_rmap(struct page *page,
if (page_test_and_clear_dirty(page))
set_page_dirty(page);
__dec_zone_page_state(page,
- PageAnon(page) ? NR_ANON_PAGES : NR_FILE_MAPPED);
+ anon ? NR_ANON_PAGES : NR_FILE_MAPPED);
}
+ if (anon)
+ dec_mm_counter(vma->vm_mm, anon_rss);
+ else
+ dec_mm_counter(vma->vm_mm, file_rss);
page_map_unlock(page);
}
@@ -679,7 +687,6 @@ static int try_to_unmap_one(struct page
list_add(&mm->mmlist, &init_mm.mmlist);
spin_unlock(&mmlist_lock);
}
- dec_mm_counter(mm, anon_rss);
#ifdef CONFIG_MIGRATION
} else {
/*
@@ -700,10 +707,7 @@ static int try_to_unmap_one(struct page
swp_entry_t entry;
entry = make_migration_entry(page, pte_write(pteval));
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
- } else
#endif
- dec_mm_counter(mm, file_rss);
-
page_remove_rmap(page, vma);
page_cache_release(page);
@@ -797,7 +801,6 @@ static void try_to_unmap_cluster(unsigne
page_remove_rmap(page, vma);
page_cache_release(page);
- dec_mm_counter(mm, file_rss);
(*mapcount)--;
}
pte_unmap_unlock(pte - 1, ptl);
diff -puN fs/exec.c~move-accounting-to-rmap fs/exec.c
--- linux-2.6.20-rc2/fs/exec.c~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/fs/exec.c 2006-12-29 14:48:28.000000000 +0530
@@ -321,7 +321,6 @@ void install_arg_page(struct vm_area_str
pte_unmap_unlock(pte, ptl);
goto out;
}
- inc_mm_counter(mm, anon_rss);
lru_cache_add_active(page);
set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
page, vma->vm_page_prot))));
diff -puN mm/mremap.c~move-accounting-to-rmap mm/mremap.c
diff -puN mm/memory.c~move-accounting-to-rmap mm/memory.c
--- linux-2.6.20-rc2/mm/memory.c~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/memory.c 2006-12-29 14:48:28.000000000 +0530
@@ -335,14 +335,6 @@ int __pte_alloc_kernel(pmd_t *pmd, unsig
return 0;
}
-static inline void add_mm_rss(struct mm_struct *mm, int file_rss, int anon_rss)
-{
- if (file_rss)
- add_mm_counter(mm, file_rss, file_rss);
- if (anon_rss)
- add_mm_counter(mm, anon_rss, anon_rss);
-}
-
/*
* This function is called to print an error when a bad pte
* is found. For example, we might have a PFN-mapped pte in
@@ -427,7 +419,7 @@ struct page *vm_normal_page(struct vm_ar
static inline void
copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pte_t *dst_pte, pte_t *src_pte, struct vm_area_struct *vma,
- unsigned long addr, int *rss)
+ unsigned long addr)
{
unsigned long vm_flags = vma->vm_flags;
pte_t pte = *src_pte;
@@ -481,8 +473,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
page = vm_normal_page(vma, addr, pte);
if (page) {
get_page(page);
- page_dup_rmap(page);
- rss[!!PageAnon(page)]++;
+ page_dup_rmap(page, dst_mm);
}
out_set_pte:
@@ -496,10 +487,8 @@ static int copy_pte_range(struct mm_stru
pte_t *src_pte, *dst_pte;
spinlock_t *src_ptl, *dst_ptl;
int progress = 0;
- int rss[2];
again:
- rss[1] = rss[0] = 0;
dst_pte = pte_alloc_map_lock(dst_mm, dst_pmd, addr, &dst_ptl);
if (!dst_pte)
return -ENOMEM;
@@ -524,14 +513,13 @@ again:
progress++;
continue;
}
- copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr, rss);
+ copy_one_pte(dst_mm, src_mm, dst_pte, src_pte, vma, addr);
progress += 8;
} while (dst_pte++, src_pte++, addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
spin_unlock(src_ptl);
pte_unmap_nested(src_pte - 1);
- add_mm_rss(dst_mm, rss[0], rss[1]);
pte_unmap_unlock(dst_pte - 1, dst_ptl);
cond_resched();
if (addr != end)
@@ -626,8 +614,6 @@ static unsigned long zap_pte_range(struc
struct mm_struct *mm = tlb->mm;
pte_t *pte;
spinlock_t *ptl;
- int file_rss = 0;
- int anon_rss = 0;
pte = pte_offset_map_lock(mm, pmd, addr, &ptl);
arch_enter_lazy_mmu_mode();
@@ -672,14 +658,11 @@ static unsigned long zap_pte_range(struc
addr) != page->index)
set_pte_at(mm, addr, pte,
pgoff_to_pte(page->index));
- if (PageAnon(page))
- anon_rss--;
- else {
+ if (!PageAnon(page)) {
if (pte_dirty(ptent))
set_page_dirty(page);
if (pte_young(ptent))
mark_page_accessed(page);
- file_rss--;
}
page_remove_rmap(page, vma);
tlb_remove_page(tlb, page);
@@ -696,7 +679,6 @@ static unsigned long zap_pte_range(struc
pte_clear_not_present_full(mm, addr, pte, tlb->fullmm);
} while (pte++, addr += PAGE_SIZE, (addr != end && *zap_work > 0));
- add_mm_rss(mm, file_rss, anon_rss);
arch_leave_lazy_mmu_mode();
pte_unmap_unlock(pte - 1, ptl);
@@ -1126,8 +1108,7 @@ static int zeromap_pte_range(struct mm_s
break;
}
page_cache_get(page);
- page_add_file_rmap(page);
- inc_mm_counter(mm, file_rss);
+ page_add_file_rmap(page, mm);
set_pte_at(mm, addr, pte, zero_pte);
} while (pte++, addr += PAGE_SIZE, addr != end);
arch_leave_lazy_mmu_mode();
@@ -1233,8 +1214,7 @@ static int insert_page(struct mm_struct
/* Ok, finally just insert the thing.. */
get_page(page);
- inc_mm_counter(mm, file_rss);
- page_add_file_rmap(page);
+ page_add_file_rmap(page, mm);
set_pte_at(mm, addr, pte, mk_pte(page, prot));
retval = 0;
@@ -1585,14 +1565,9 @@ gotten:
*/
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (likely(pte_same(*page_table, orig_pte))) {
- if (old_page) {
+ if (old_page)
page_remove_rmap(old_page, vma);
- if (!PageAnon(old_page)) {
- dec_mm_counter(mm, file_rss);
- inc_mm_counter(mm, anon_rss);
- }
- } else
- inc_mm_counter(mm, anon_rss);
+
flush_cache_page(vma, address, pte_pfn(orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2038,7 +2013,6 @@ static int do_swap_page(struct mm_struct
/* The page isn't present yet, go ahead with the fault. */
- inc_mm_counter(mm, anon_rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
pte = maybe_mkwrite(pte_mkdirty(pte), vma);
@@ -2104,7 +2078,6 @@ static int do_anonymous_page(struct mm_s
page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
if (!pte_none(*page_table))
goto release;
- inc_mm_counter(mm, anon_rss);
lru_cache_add_active(page);
page_add_new_anon_rmap(page, vma, address);
} else {
@@ -2117,8 +2090,7 @@ static int do_anonymous_page(struct mm_s
spin_lock(ptl);
if (!pte_none(*page_table))
goto release;
- inc_mm_counter(mm, file_rss);
- page_add_file_rmap(page);
+ page_add_file_rmap(page, mm);
}
set_pte_at(mm, address, page_table, entry);
@@ -2251,12 +2223,10 @@ retry:
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
if (anon) {
- inc_mm_counter(mm, anon_rss);
lru_cache_add_active(new_page);
page_add_new_anon_rmap(new_page, vma, address);
} else {
- inc_mm_counter(mm, file_rss);
- page_add_file_rmap(new_page);
+ page_add_file_rmap(new_page, mm);
if (write_access) {
dirty_page = new_page;
get_page(dirty_page);
diff -puN mm/swapfile.c~move-accounting-to-rmap mm/swapfile.c
--- linux-2.6.20-rc2/mm/swapfile.c~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/swapfile.c 2006-12-29 14:48:28.000000000 +0530
@@ -503,7 +503,6 @@ unsigned int count_swap_pages(int type,
static void unuse_pte(struct vm_area_struct *vma, pte_t *pte,
unsigned long addr, swp_entry_t entry, struct page *page)
{
- inc_mm_counter(vma->vm_mm, anon_rss);
get_page(page);
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
diff -puN mm/filemap_xip.c~move-accounting-to-rmap mm/filemap_xip.c
--- linux-2.6.20-rc2/mm/filemap_xip.c~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/filemap_xip.c 2006-12-29 14:48:28.000000000 +0530
@@ -190,7 +190,6 @@ __xip_unmap (struct address_space * mapp
flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte);
page_remove_rmap(page, vma);
- dec_mm_counter(mm, file_rss);
BUG_ON(pte_dirty(pteval));
pte_unmap_unlock(pte, ptl);
page_cache_release(page);
diff -puN mm/fremap.c~move-accounting-to-rmap mm/fremap.c
--- linux-2.6.20-rc2/mm/fremap.c~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/fremap.c 2006-12-29 14:48:28.000000000 +0530
@@ -75,13 +75,13 @@ int install_page(struct mm_struct *mm, s
if (page_mapcount(page) > INT_MAX/2)
goto unlock;
- if (pte_none(*pte) || !zap_pte(mm, vma, addr, pte))
- inc_mm_counter(mm, file_rss);
+ if (!pte_none(*pte))
+ zap_pte(mm, vma, addr, pte);
flush_icache_page(vma, page);
pte_val = mk_pte(page, prot);
set_pte_at(mm, addr, pte, pte_val);
- page_add_file_rmap(page);
+ page_add_file_rmap(page, mm);
update_mmu_cache(vma, addr, pte_val);
lazy_mmu_prot_update(pte_val);
err = 0;
@@ -107,10 +107,8 @@ int install_file_pte(struct mm_struct *m
if (!pte)
goto out;
- if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte)) {
+ if (!pte_none(*pte) && zap_pte(mm, vma, addr, pte))
update_hiwater_rss(mm);
- dec_mm_counter(mm, file_rss);
- }
set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
/*
diff -puN mm/migrate.c~move-accounting-to-rmap mm/migrate.c
--- linux-2.6.20-rc2/mm/migrate.c~move-accounting-to-rmap 2006-12-29 14:48:28.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/migrate.c 2006-12-29 14:48:28.000000000 +0530
@@ -177,7 +177,7 @@ static void remove_migration_pte(struct
if (PageAnon(new))
page_add_anon_rmap(new, vma, addr);
else
- page_add_file_rmap(new);
+ page_add_file_rmap(new, mm);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, pte);
_
--
Balbir Singh,
Linux Technology Center,
IBM Software Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread* [RFC][PATCH 3/3] Add shared page accounting
2006-12-29 10:08 [RFC][PATCH 0/3] Add shared RSS accounting Balbir Singh
2006-12-29 10:08 ` [RFC][PATCH 1/3] Add back rmap lock Balbir Singh
2006-12-29 10:09 ` [RFC][PATCH 2/3] Move RSS accounting to page_xxxx_rmap() functions Balbir Singh
@ 2006-12-29 10:09 ` Balbir Singh
2 siblings, 0 replies; 4+ messages in thread
From: Balbir Singh @ 2006-12-29 10:09 UTC (permalink / raw)
To: hugh, akpm, andyw; +Cc: linux-mm, Balbir Singh
This patch adds shared page accounting, a page that is shared between two
or more mm_struct's is accounted as shared. When the _mapcount of a page
reaches 1 (while adding rmap information), it means that the page is now
shared. Using rmap, the other shared mm_struct is found and accounting
is adjusted for both mm_structs. From here on, any other mm_struct
mapping this page, will only increment it's shared rss. Similarly, when a page
is unshared (_mapcount reaches 0 during page_remove_rmap) accounting is
adjusted by searching for the shared mm_struct.
To account for shared pages two new counters anon_rss_shared and file_rss_shared
have been added to mm_struct.
The patch depends on page_map_lock to ensure that rmap information does not
change, while searching for the shared mm_struct. Pte set and clear has been
moved to after the invocation of page_add_anon/file_rmap() and
page_remove_rmap(). This ensures that we will find the shared mm_struct
(page is found mapped in the mm_struct) when we search for it using rmap.
Signed-off-by: Balbir Singh <balbir@in.ibm.com>
---
fs/exec.c | 2
fs/proc/task_mmu.c | 4 -
include/linux/mm.h | 25 ++-----
include/linux/rmap.h | 37 ++++++++--
include/linux/sched.h | 118 +++++++++++++++++++++++++++++++++-
kernel/fork.c | 2
mm/filemap_xip.c | 2
mm/fremap.c | 8 +-
mm/memory.c | 11 +--
mm/migrate.c | 2
mm/rmap.c | 173 ++++++++++++++++++++++++++++++++++++++++++++++----
mm/swapfile.c | 2
12 files changed, 335 insertions(+), 51 deletions(-)
diff -puN mm/rmap.c~add-shared-accounting mm/rmap.c
--- linux-2.6.20-rc2/mm/rmap.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/rmap.c 2006-12-29 14:49:31.000000000 +0530
@@ -541,10 +541,15 @@ static void __page_set_anon_rmap(struct
void page_add_anon_rmap(struct page *page,
struct vm_area_struct *vma, unsigned long address)
{
+ int count;
+ struct mm_struct *shared_mm;
page_map_lock(page);
- if (page_mapcount_inc_and_test(page))
+ count = page_mapcount_add_and_return(1, page);
+ if (count == 0)
__page_set_anon_rmap(page, vma, address);
- inc_mm_counter(vma->vm_mm, anon_rss);
+ if (count == 1)
+ shared_mm = find_shared_anon_mm(page, vma->vm_mm);
+ inc_mm_counter_anon_shared(vma->vm_mm, shared_mm, count);
page_map_unlock(page);
}
@@ -575,10 +580,23 @@ void page_add_new_anon_rmap(struct page
*/
void page_add_file_rmap(struct page *page, struct mm_struct *mm)
{
+ int count;
+ struct mm_struct *shared_mm;
page_map_lock(page);
- if (page_mapcount_inc_and_test(page))
+ count = page_mapcount_add_and_return(1, page);
+ if (count == 0)
__inc_zone_page_state(page, NR_FILE_MAPPED);
- inc_mm_counter(mm, file_rss);
+ /*
+ * ZERO_PAGE(vaddr), does not really use the vaddr
+ * parameter
+ */
+ if (page == ZERO_PAGE(0))
+ inc_mm_counter(mm, file_rss_shared);
+ else {
+ if (count == 1)
+ shared_mm = find_shared_file_mm(page, mm);
+ inc_mm_counter_file_shared(mm, shared_mm, count);
+ }
page_map_unlock(page);
}
@@ -591,8 +609,12 @@ void page_add_file_rmap(struct page *pag
void page_remove_rmap(struct page *page, struct vm_area_struct *vma)
{
int anon = PageAnon(page);
+ int count;
+ struct mm_struct *shared_mm;
+
page_map_lock(page);
- if (page_mapcount_add_negative(-1, page)) {
+ count = page_mapcount_add_and_return(-1, page);
+ if (count < 0) {
if (unlikely(page_mapcount(page) < 0)) {
printk (KERN_EMERG "Eeek! page_mapcount(page) went negative! (%d)\n", page_mapcount(page));
printk (KERN_EMERG " page pfn = %lx\n", page_to_pfn(page));
@@ -621,10 +643,20 @@ void page_remove_rmap(struct page *page,
__dec_zone_page_state(page,
anon ? NR_ANON_PAGES : NR_FILE_MAPPED);
}
- if (anon)
- dec_mm_counter(vma->vm_mm, anon_rss);
- else
- dec_mm_counter(vma->vm_mm, file_rss);
+ if (anon) {
+ if (count == 0)
+ shared_mm = find_shared_anon_mm(page, vma->vm_mm);
+ dec_mm_counter_anon_shared(vma->vm_mm, shared_mm, count);
+ } else {
+ if (page == ZERO_PAGE(0))
+ dec_mm_counter(vma->vm_mm, file_rss_shared);
+ else {
+ if (count == 0)
+ shared_mm = find_shared_file_mm(page,
+ vma->vm_mm);
+ dec_mm_counter_file_shared(vma->vm_mm, shared_mm, count);
+ }
+ }
page_map_unlock(page);
}
@@ -671,6 +703,7 @@ static int try_to_unmap_one(struct page
/* Update high watermark before we lower rss */
update_hiwater_rss(mm);
+ page_remove_rmap(page, vma);
if (PageAnon(page)) {
swp_entry_t entry = { .val = page_private(page) };
@@ -709,7 +742,6 @@ static int try_to_unmap_one(struct page
set_pte_at(mm, address, pte, swp_entry_to_pte(entry));
#endif
- page_remove_rmap(page, vma);
page_cache_release(page);
out_unmap:
@@ -784,6 +816,7 @@ static void try_to_unmap_cluster(unsigne
page = vm_normal_page(vma, address, *pte);
BUG_ON(!page || PageAnon(page));
+ page_remove_rmap(page, vma);
if (ptep_clear_flush_young(vma, address, pte))
continue;
@@ -799,7 +832,6 @@ static void try_to_unmap_cluster(unsigne
if (pte_dirty(pteval))
set_page_dirty(page);
- page_remove_rmap(page, vma);
page_cache_release(page);
(*mapcount)--;
}
@@ -951,3 +983,122 @@ int try_to_unmap(struct page *page, int
ret = SWAP_SUCCESS;
return ret;
}
+
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+/*
+ * This routine should be called with the pte lock held
+ */
+static int page_in_vma(struct vm_area_struct *vma, struct page *page,
+ int linear)
+{
+ int ret = 0;
+ pgd_t *pgd;
+ pud_t *pud;
+ pmd_t *pmd;
+ pte_t *pte;
+ unsigned long address;
+
+ if (linear) {
+ address = vma_address(page, vma);
+ if (address == -EFAULT)
+ return 0;
+ } else {
+ address = vma->vm_start;
+ }
+
+ pgd = pgd_offset(vma->vm_mm, address);
+ if (!pgd_present(*pgd))
+ return 0;
+ pud = pud_offset(pgd, address);
+ if (!pud_present(*pud))
+ return 0;
+ pmd = pmd_offset(pud, address);
+ if (!pmd_present(*pmd))
+ return 0;
+
+ pte = pte_offset_map(pmd, address);
+ if (linear) {
+ if (pte_present(*pte) && page_to_pfn(page) == pte_pfn(*pte)) {
+ ret = 1;
+ }
+ } else {
+ unsigned long end = vma->vm_end;
+
+ for (; address < end; address += PAGE_SIZE) {
+ if (page_to_pfn(page) == pte_pfn(*pte)) {
+ ret = 1;
+ break;
+ }
+ }
+ }
+ pte_unmap(pte);
+
+ return ret;
+}
+
+/*
+ * This routine should be called with the page_map_lock() held
+ */
+struct mm_struct *find_shared_anon_mm(struct page *page, struct mm_struct *mm)
+{
+ struct mm_struct *oth_mm = NULL;
+
+ struct anon_vma *anon_vma;
+ struct vm_area_struct *vma;
+
+ anon_vma = page_lock_anon_vma(page);
+ if (!anon_vma)
+ return NULL;
+ /*
+ * Search through anon_vma's
+ */
+ list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+ if ((vma->vm_mm != mm) && page_in_vma(vma, page, 1)) {
+ oth_mm = vma->vm_mm;
+ break;
+ }
+ }
+ spin_unlock(&anon_vma->lock);
+
+ return oth_mm;
+}
+
+/*
+ * This routine should be called with the page_map_lock() held
+ */
+struct mm_struct *find_shared_file_mm(struct page *page, struct mm_struct *mm)
+{
+ struct mm_struct *oth_mm = NULL;
+
+ /*
+ * TODO: Can we hold i_mmap_lock and is it safe to use
+ * page_mapping() here?
+ */
+ struct address_space *mapping = page_mapping(page);
+ pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+ struct vm_area_struct *vma;
+ struct prio_tree_iter iter;
+
+ if (!mapping)
+ return NULL;
+
+ spin_lock(&mapping->i_mmap_lock);
+ vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, pgoff, pgoff)
+ if ((vma->vm_mm != mm) && page_in_vma(vma, page, 1)) {
+ oth_mm = vma->vm_mm;
+ break;
+ }
+
+ if (mm || list_empty(&mapping->i_mmap_nonlinear))
+ goto done;
+
+ list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list)
+ if ((vma->vm_mm != mm) && page_in_vma(vma, page, 1)) {
+ oth_mm = vma->vm_mm;
+ break;
+ }
+done:
+ spin_unlock(&mapping->i_mmap_lock);
+ return oth_mm;
+}
+#endif /* CONFIG_SHARED_PAGE_ACCOUNTING */
diff -puN include/linux/sched.h~add-shared-accounting include/linux/sched.h
--- linux-2.6.20-rc2/include/linux/sched.h~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/sched.h 2006-12-29 14:49:31.000000000 +0530
@@ -295,8 +295,22 @@ typedef unsigned long mm_counter_t;
#endif /* NR_CPUS < CONFIG_SPLIT_PTLOCK_CPUS */
-#define get_mm_rss(mm) \
- (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss))
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+#define get_mm_rss_shared(mm) \
+ (get_mm_counter(mm, file_rss_shared) + \
+ get_mm_counter(mm, anon_rss_shared))
+#define get_mm_rss_unshared(mm) \
+ (get_mm_counter(mm, file_rss) + \
+ get_mm_counter(mm, anon_rss))
+#else
+#define get_mm_rss_shared(mm) get_mm_counter(mm, file_rss)
+#define get_mm_rss_unshared(mm) get_mm_counter(mm, anon_rss)
+#endif /* CONFIG_SHARED_PAGE_ACCOUNTING */
+
+#define get_mm_rss(mm) \
+ (get_mm_counter(mm, file_rss) + get_mm_counter(mm, anon_rss) + \
+ get_mm_counter(mm, file_rss_shared) + \
+ get_mm_counter(mm, anon_rss_shared))
#define update_hiwater_rss(mm) do { \
unsigned long _rss = get_mm_rss(mm); \
if ((mm)->hiwater_rss < _rss) \
@@ -336,6 +350,8 @@ struct mm_struct {
*/
mm_counter_t _file_rss;
mm_counter_t _anon_rss;
+ mm_counter_t _file_rss_shared;
+ mm_counter_t _anon_rss_shared;
unsigned long hiwater_rss; /* High-watermark of RSS usage */
unsigned long hiwater_vm; /* High-water virtual memory usage */
@@ -375,6 +391,104 @@ struct mm_struct {
struct kioctx *ioctx_list;
};
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+static inline void inc_mm_counter_anon_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ if (count == 1) { /* This page is now being shared */
+ if (shared_mm) {
+ inc_mm_counter(mm, anon_rss_shared);
+ inc_mm_counter(shared_mm, anon_rss_shared);
+ dec_mm_counter(shared_mm, anon_rss);
+ } else /* this page cannot be shared via rmap */
+ inc_mm_counter(mm, anon_rss_shared);
+ } else if (count > 1)
+ inc_mm_counter(mm, anon_rss_shared);
+ else
+ inc_mm_counter(mm, anon_rss);
+}
+
+static inline void inc_mm_counter_file_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ if (count == 1) { /* This page is now being shared */
+ if (shared_mm) {
+ inc_mm_counter(mm, file_rss_shared);
+ inc_mm_counter(shared_mm, file_rss_shared);
+ dec_mm_counter(shared_mm, file_rss);
+ } else /* cannot be shared with rmap, bump shared count */
+ inc_mm_counter(mm, file_rss_shared);
+ } else if (count > 1)
+ inc_mm_counter(mm, file_rss_shared);
+ else
+ inc_mm_counter(mm, file_rss);
+}
+
+static inline void dec_mm_counter_anon_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ if (count == 0) { /* This page is now being unshared */
+ if (shared_mm) {
+ dec_mm_counter(mm, anon_rss_shared);
+ dec_mm_counter(shared_mm, anon_rss_shared);
+ inc_mm_counter(shared_mm, anon_rss);
+ } else
+ dec_mm_counter(mm, anon_rss_shared);
+ } else if (count > 0)
+ dec_mm_counter(mm, anon_rss_shared);
+ else
+ dec_mm_counter(mm, anon_rss);
+}
+
+static inline void dec_mm_counter_file_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ if (count == 0) { /* This page is now being shared */
+ if (shared_mm) {
+ dec_mm_counter(mm, file_rss_shared);
+ dec_mm_counter(shared_mm, file_rss_shared);
+ inc_mm_counter(shared_mm, file_rss);
+ } else
+ dec_mm_counter(mm, file_rss_shared);
+ } else if (count > 0)
+ dec_mm_counter(mm, file_rss_shared);
+ else
+ dec_mm_counter(mm, file_rss);
+}
+#else
+static inline void inc_mm_counter_anon_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ inc_mm_counter(mm, anon_rss);
+}
+
+static inline void inc_mm_counter_file_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ inc_mm_counter(mm, file_rss);
+}
+
+static inline void dec_mm_counter_anon_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ dec_mm_counter(mm, anon_rss);
+}
+
+static inline void dec_mm_counter_file_shared(struct mm_struct *mm,
+ struct mm_struct *shared_mm,
+ int count)
+{
+ dec_mm_counter(mm, file_rss);
+}
+#endif /* CONFIG_SHARED_PAGE_ACCOUNTING */
+
struct sighand_struct {
atomic_t count;
struct k_sigaction action[_NSIG];
diff -puN include/linux/rmap.h~add-shared-accounting include/linux/rmap.h
--- linux-2.6.20-rc2/include/linux/rmap.h~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/rmap.h 2006-12-29 14:49:31.000000000 +0530
@@ -9,6 +9,7 @@
#include <linux/mm.h>
#include <linux/spinlock.h>
#include <linux/bit_spinlock.h>
+#include <linux/sched.h>
#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
#define page_map_lock(page) \
@@ -99,14 +100,21 @@ void page_remove_rmap(struct page *, str
* For copy_page_range only: minimal extract from page_add_rmap,
* avoiding unnecessary tests (already checked) so it's quicker.
*/
-static inline void page_dup_rmap(struct page *page, struct mm_struct *mm)
+static inline void page_dup_rmap(struct page *page, struct mm_struct *src_mm,
+ struct mm_struct *dst_mm)
{
+ int count;
+ int anon = PageAnon(page);
page_map_lock(page);
- page_mapcount_inc(page);
- if (PageAnon(page))
- inc_mm_counter(mm, anon_rss);
- else
- inc_mm_counter(mm, file_rss);
+ count = page_mapcount_add_and_return(1, page);
+ if (anon)
+ inc_mm_counter_anon_shared(dst_mm, src_mm, count);
+ else {
+ if (page == ZERO_PAGE(0))
+ inc_mm_counter(dst_mm, file_rss_shared);
+ else
+ inc_mm_counter_file_shared(dst_mm, src_mm, count);
+ }
page_map_unlock(page);
}
@@ -135,6 +143,23 @@ unsigned long page_address_in_vma(struct
*/
int page_mkclean(struct page *);
+#ifdef CONFIG_SHARED_PAGE_ACCOUNTING
+struct mm_struct *find_shared_anon_mm(struct page *page, struct mm_struct *mm);
+struct mm_struct *find_shared_file_mm(struct page *page, struct mm_struct *mm);
+#else
+static inline struct mm_struct *find_shared_anon_mm(struct page *page,
+ struct mm_struct *mm)
+{
+ return NULL;
+}
+
+static inline struct mm_struct *find_shared_file_mm(struct page *page,
+ struct mm_struct *mm)
+{
+ return NULL;
+}
+#endif /* CONFIG_SHARED_PAGE_ACCOUNTING */
+
#else /* !CONFIG_MMU */
#define anon_vma_init() do {} while (0)
diff -puN kernel/fork.c~add-shared-accounting kernel/fork.c
--- linux-2.6.20-rc2/kernel/fork.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/kernel/fork.c 2006-12-29 14:49:31.000000000 +0530
@@ -335,6 +335,8 @@ static struct mm_struct * mm_init(struct
mm->nr_ptes = 0;
set_mm_counter(mm, file_rss, 0);
set_mm_counter(mm, anon_rss, 0);
+ set_mm_counter(mm, file_rss_shared, 0);
+ set_mm_counter(mm, anon_rss_shared, 0);
spin_lock_init(&mm->page_table_lock);
rwlock_init(&mm->ioctx_list_lock);
mm->ioctx_list = NULL;
diff -puN include/linux/mm.h~add-shared-accounting include/linux/mm.h
--- linux-2.6.20-rc2/include/linux/mm.h~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/include/linux/mm.h 2006-12-29 14:49:31.000000000 +0530
@@ -625,10 +625,10 @@ static inline int page_mapped(struct pag
return (page)->_mapcount >= 0;
}
-static inline int page_mapcount_inc_and_test(struct page *page)
+static inline int page_mapcount_add_and_return(int val, struct page *page)
{
- page->_mapcount++;
- return (page->_mapcount == 0);
+ page->_mapcount += val;
+ return page->_mapcount;
}
static inline void page_mapcount_inc(struct page *page)
@@ -636,12 +636,6 @@ static inline void page_mapcount_inc(str
page->_mapcount++;
}
-static inline int page_mapcount_add_negative(int val, struct page *page)
-{
- page->_mapcount += val;
- return (page->_mapcount < 0);
-}
-
static inline void page_mapcount_set(struct page *page, int val)
{
page->_mapcount = val;
@@ -651,7 +645,7 @@ static inline void page_mapcount_set(str
/*
* The atomic page->_mapcount, like _count, starts from -1:
* so that transitions both from it and to it can be tracked,
- * using atomic_inc_and_test and atomic_add_negative(-1).
+ * using atomic_inc_and_return and atomic_add_negative(-1).
*/
static inline void reset_page_mapcount(struct page *page)
{
@@ -671,9 +665,9 @@ static inline int page_mapped(struct pag
return atomic_read(&(page)->_mapcount) >= 0;
}
-static inline int page_mapcount_inc_and_test(struct page *page)
+static inline int page_mapcount_add_and_return(int val, struct page *page)
{
- return atomic_inc_and_test(&(page)->_mapcount);
+ return atomic_add_return(val, &(page)->_mapcount);
}
static inline void page_mapcount_inc(struct page *page)
@@ -681,12 +675,7 @@ static inline void page_mapcount_inc(str
atomic_inc(&(page)->_mapcount);
}
-static inline int page_mapcount_add_negative(int val, struct page *page)
-{
- return atomic_add_negative(val, &(page)->_mapcount);
-}
-
-static inline int page_mapcount_set(struct page *page, int val)
+static inline void page_mapcount_set(struct page *page, int val)
{
atomic_set(&(page)->_mapcount, val);
}
diff -puN fs/exec.c~add-shared-accounting fs/exec.c
--- linux-2.6.20-rc2/fs/exec.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/fs/exec.c 2006-12-29 14:49:31.000000000 +0530
@@ -322,9 +322,9 @@ void install_arg_page(struct vm_area_str
goto out;
}
lru_cache_add_active(page);
+ page_add_new_anon_rmap(page, vma, address);
set_pte_at(mm, address, pte, pte_mkdirty(pte_mkwrite(mk_pte(
page, vma->vm_page_prot))));
- page_add_new_anon_rmap(page, vma, address);
pte_unmap_unlock(pte, ptl);
/* no need for flush_tlb */
diff -puN mm/fremap.c~add-shared-accounting mm/fremap.c
--- linux-2.6.20-rc2/mm/fremap.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/fremap.c 2006-12-29 14:49:31.000000000 +0530
@@ -27,13 +27,15 @@ static int zap_pte(struct mm_struct *mm,
struct page *page = NULL;
if (pte_present(pte)) {
+ page = vm_normal_page(vma, addr, pte);
+ if (page)
+ page_remove_rmap(page, vma);
+
flush_cache_page(vma, addr, pte_pfn(pte));
pte = ptep_clear_flush(vma, addr, ptep);
- page = vm_normal_page(vma, addr, pte);
if (page) {
if (pte_dirty(pte))
set_page_dirty(page);
- page_remove_rmap(page, vma);
page_cache_release(page);
}
} else {
@@ -79,9 +81,9 @@ int install_page(struct mm_struct *mm, s
zap_pte(mm, vma, addr, pte);
flush_icache_page(vma, page);
+ page_add_file_rmap(page, mm);
pte_val = mk_pte(page, prot);
set_pte_at(mm, addr, pte, pte_val);
- page_add_file_rmap(page, mm);
update_mmu_cache(vma, addr, pte_val);
lazy_mmu_prot_update(pte_val);
err = 0;
diff -puN mm/memory.c~add-shared-accounting mm/memory.c
--- linux-2.6.20-rc2/mm/memory.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/memory.c 2006-12-29 14:49:31.000000000 +0530
@@ -473,7 +473,7 @@ copy_one_pte(struct mm_struct *dst_mm, s
page = vm_normal_page(vma, addr, pte);
if (page) {
get_page(page);
- page_dup_rmap(page, dst_mm);
+ page_dup_rmap(page, src_mm, dst_mm);
}
out_set_pte:
@@ -648,6 +648,8 @@ static unsigned long zap_pte_range(struc
page->index > details->last_index))
continue;
}
+ if (page)
+ page_remove_rmap(page, vma);
ptent = ptep_get_and_clear_full(mm, addr, pte,
tlb->fullmm);
tlb_remove_tlb_entry(tlb, pte, addr);
@@ -664,7 +666,6 @@ static unsigned long zap_pte_range(struc
if (pte_young(ptent))
mark_page_accessed(page);
}
- page_remove_rmap(page, vma);
tlb_remove_page(tlb, page);
continue;
}
@@ -1579,10 +1580,10 @@ gotten:
* thread doing COW.
*/
ptep_clear_flush(vma, address, page_table);
+ page_add_new_anon_rmap(new_page, vma, address);
set_pte_at(mm, address, page_table, entry);
update_mmu_cache(vma, address, entry);
lru_cache_add_active(new_page);
- page_add_new_anon_rmap(new_page, vma, address);
/* Free the old page.. */
new_page = old_page;
@@ -2020,8 +2021,8 @@ static int do_swap_page(struct mm_struct
}
flush_icache_page(vma, page);
- set_pte_at(mm, address, page_table, pte);
page_add_anon_rmap(page, vma, address);
+ set_pte_at(mm, address, page_table, pte);
swap_free(entry);
if (vm_swap_full())
@@ -2221,7 +2222,6 @@ retry:
entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
- set_pte_at(mm, address, page_table, entry);
if (anon) {
lru_cache_add_active(new_page);
page_add_new_anon_rmap(new_page, vma, address);
@@ -2232,6 +2232,7 @@ retry:
get_page(dirty_page);
}
}
+ set_pte_at(mm, address, page_table, entry);
} else {
/* One of our sibling threads was faster, back out. */
page_cache_release(new_page);
diff -puN mm/migrate.c~add-shared-accounting mm/migrate.c
--- linux-2.6.20-rc2/mm/migrate.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/migrate.c 2006-12-29 14:49:31.000000000 +0530
@@ -172,13 +172,13 @@ static void remove_migration_pte(struct
pte = pte_mkold(mk_pte(new, vma->vm_page_prot));
if (is_write_migration_entry(entry))
pte = pte_mkwrite(pte);
- set_pte_at(mm, addr, ptep, pte);
if (PageAnon(new))
page_add_anon_rmap(new, vma, addr);
else
page_add_file_rmap(new, mm);
+ set_pte_at(mm, addr, ptep, pte);
/* No need to invalidate - it was non-present before */
update_mmu_cache(vma, addr, pte);
lazy_mmu_prot_update(pte);
diff -puN mm/swapfile.c~add-shared-accounting mm/swapfile.c
--- linux-2.6.20-rc2/mm/swapfile.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/swapfile.c 2006-12-29 14:49:31.000000000 +0530
@@ -504,9 +504,9 @@ static void unuse_pte(struct vm_area_str
unsigned long addr, swp_entry_t entry, struct page *page)
{
get_page(page);
+ page_add_anon_rmap(page, vma, addr);
set_pte_at(vma->vm_mm, addr, pte,
pte_mkold(mk_pte(page, vma->vm_page_prot)));
- page_add_anon_rmap(page, vma, addr);
swap_free(entry);
/*
* Move the page to the active list so it is not
diff -puN mm/filemap_xip.c~add-shared-accounting mm/filemap_xip.c
--- linux-2.6.20-rc2/mm/filemap_xip.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/mm/filemap_xip.c 2006-12-29 14:49:31.000000000 +0530
@@ -186,10 +186,10 @@ __xip_unmap (struct address_space * mapp
page = ZERO_PAGE(address);
pte = page_check_address(page, mm, address, &ptl, false);
if (pte) {
+ page_remove_rmap(page, vma);
/* Nuke the page table entry. */
flush_cache_page(vma, address, pte_pfn(*pte));
pteval = ptep_clear_flush(vma, address, pte);
- page_remove_rmap(page, vma);
BUG_ON(pte_dirty(pteval));
pte_unmap_unlock(pte, ptl);
page_cache_release(page);
diff -puN fs/proc/task_mmu.c~add-shared-accounting fs/proc/task_mmu.c
--- linux-2.6.20-rc2/fs/proc/task_mmu.c~add-shared-accounting 2006-12-29 14:49:31.000000000 +0530
+++ linux-2.6.20-rc2-balbir/fs/proc/task_mmu.c 2006-12-29 14:49:31.000000000 +0530
@@ -63,11 +63,11 @@ unsigned long task_vsize(struct mm_struc
int task_statm(struct mm_struct *mm, int *shared, int *text,
int *data, int *resident)
{
- *shared = get_mm_counter(mm, file_rss);
+ *shared = get_mm_rss_shared(mm);
*text = (PAGE_ALIGN(mm->end_code) - (mm->start_code & PAGE_MASK))
>> PAGE_SHIFT;
*data = mm->total_vm - mm->shared_vm;
- *resident = *shared + get_mm_counter(mm, anon_rss);
+ *resident = *shared + get_mm_rss_unshared(mm);
return mm->total_vm;
}
_
--
Balbir Singh,
Linux Technology Center,
IBM Software Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 4+ messages in thread