linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 2.5.62] Partial object-based rmap implementation
@ 2003-02-20 16:13 Dave McCracken
  2003-02-20 17:19 ` Rik van Riel
  0 siblings, 1 reply; 2+ messages in thread
From: Dave McCracken @ 2003-02-20 16:13 UTC (permalink / raw)
  To: Linux Memory Management

[-- Attachment #1: Type: text/plain, Size: 964 bytes --]


There's been a fair amount of discussion about the advantages of doing
object-based rmap.  I've been looking into it, and we have the pieces to do
it for file-backed objects, ie the ones that have a real address_space
object pointed to from struct page.  The stumbling block has always been
anonymous pages.

At Martin Bligh's suggestion, I coded up an object-based implementation for
non-anon pages while leaving the pte_chain code intact for anon pages.  My
fork/exit microbenchmark shows roughly 50% improvement for tasks that are
composes of file-backed and/or shared pages.  This is the code that Martin
included in 2.5.62-mjb2 and reported his performance results on.

Anyway, here's the patch if anyone wants to check it out.

Dave McCracken

======================================================================
Dave McCracken          IBM Linux Base Kernel Team      1-512-838-3059
dmccr@us.ibm.com                                        T/L   678-3059

[-- Attachment #2: objrmap-2.5.62-3.diff --]
[-- Type: text/plain, Size: 9271 bytes --]

--- 2.5.62/./include/linux/mm.h	2003-02-17 16:55:50.000000000 -0600
+++ 2.5.62-objrmap/./include/linux/mm.h	2003-02-19 12:00:47.000000000 -0600
@@ -171,6 +171,7 @@
 		struct pte_chain *chain;/* Reverse pte mapping pointer.
 					 * protected by PG_chainlock */
 		pte_addr_t direct;
+		atomic_t mapcount;
 	} pte;
 	unsigned long private;		/* mapping-private opaque data */
 
--- 2.5.62/./include/linux/page-flags.h	2003-02-17 16:56:25.000000000 -0600
+++ 2.5.62-objrmap/./include/linux/page-flags.h	2003-02-18 10:22:26.000000000 -0600
@@ -74,6 +74,7 @@
 #define PG_mappedtodisk		17	/* Has blocks allocated on-disk */
 #define PG_reclaim		18	/* To be reclaimed asap */
 #define PG_compound		19	/* Part of a compound page */
+#define PG_anon			20	/* Anonymous page */
 
 /*
  * Global page accounting.  One instance per CPU.  Only unsigned longs are
@@ -256,6 +257,10 @@
 #define SetPageCompound(page)	set_bit(PG_compound, &(page)->flags)
 #define ClearPageCompound(page)	clear_bit(PG_compound, &(page)->flags)
 
+#define PageAnon(page)		test_bit(PG_anon, &(page)->flags)
+#define SetPageAnon(page)	set_bit(PG_anon, &(page)->flags)
+#define ClearPageAnon(page)	clear_bit(PG_anon, &(page)->flags)
+
 /*
  * The PageSwapCache predicate doesn't use a PG_flag at this time,
  * but it may again do so one day.
--- 2.5.62/./fs/exec.c	2003-02-17 16:56:12.000000000 -0600
+++ 2.5.62-objrmap/./fs/exec.c	2003-02-18 11:46:33.000000000 -0600
@@ -316,6 +316,7 @@
 	lru_cache_add_active(page);
 	flush_dcache_page(page);
 	flush_page_to_ram(page);
+	SetPageAnon(page);
 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
 	pte_chain = page_add_rmap(page, pte, pte_chain);
 	pte_unmap(pte);
--- 2.5.62/./mm/page_alloc.c	2003-02-17 16:55:51.000000000 -0600
+++ 2.5.62-objrmap/./mm/page_alloc.c	2003-02-18 10:22:26.000000000 -0600
@@ -220,6 +220,8 @@
 		bad_page(function, page);
 	if (PageDirty(page))
 		ClearPageDirty(page);
+	if (PageAnon(page))
+		ClearPageAnon(page);
 }
 
 /*
--- 2.5.62/./mm/swapfile.c	2003-02-17 16:56:01.000000000 -0600
+++ 2.5.62-objrmap/./mm/swapfile.c	2003-02-19 16:39:24.000000000 -0600
@@ -390,6 +390,7 @@
 		return;
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
+	SetPageAnon(page);
 	*pte_chainp = page_add_rmap(page, dir, *pte_chainp);
 	swap_free(entry);
 	++vma->vm_mm->rss;
--- 2.5.62/./mm/memory.c	2003-02-17 16:56:14.000000000 -0600
+++ 2.5.62-objrmap/./mm/memory.c	2003-02-18 10:22:26.000000000 -0600
@@ -988,6 +988,7 @@
 			++mm->rss;
 		page_remove_rmap(old_page, page_table);
 		break_cow(vma, new_page, address, page_table);
+		SetPageAnon(new_page);
 		pte_chain = page_add_rmap(new_page, page_table, pte_chain);
 		lru_cache_add_active(new_page);
 
@@ -1197,6 +1198,7 @@
 	flush_page_to_ram(page);
 	flush_icache_page(vma, page);
 	set_pte(page_table, pte);
+	SetPageAnon(page);
 	pte_chain = page_add_rmap(page, page_table, pte_chain);
 
 	/* No need to invalidate - it was non-present before */
@@ -1263,6 +1265,7 @@
 		entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
 		lru_cache_add_active(page);
 		mark_page_accessed(page);
+		SetPageAnon(page);
 	}
 
 	set_pte(page_table, entry);
@@ -1334,6 +1337,7 @@
 		copy_user_highpage(page, new_page, address);
 		page_cache_release(new_page);
 		lru_cache_add_active(page);
+		SetPageAnon(page);
 		new_page = page;
 	}
 
--- 2.5.62/./mm/rmap.c	2003-02-17 16:56:58.000000000 -0600
+++ 2.5.62-objrmap/./mm/rmap.c	2003-02-19 12:05:48.000000000 -0600
@@ -86,6 +86,89 @@
  * If the page has a single-entry pte_chain, collapse that back to a PageDirect
  * representation.  This way, it's only done under memory pressure.
  */
+static inline int
+page_referenced_obj_one(struct vm_area_struct *vma, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	unsigned long loffset;
+	unsigned long address;
+	int referenced = 0;
+
+	loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+	if (loffset < vma->vm_pgoff)
+		goto out;
+
+	address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT);
+
+	if (address >= vma->vm_end)
+		goto out;
+
+	if (!spin_trylock(&mm->page_table_lock)) {
+		referenced = 1;
+		goto out;
+	}
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd)) {
+		goto out_unlock;
+	}
+	pmd = pmd_offset(pgd, address);
+	if (!pmd_present(*pmd)) {
+		goto out_unlock;
+	}
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte)) {
+		goto out_unmap;
+	}
+	if (page_to_pfn(page) != pte_pfn(*pte)) {
+		goto out_unmap;
+	}
+	if (ptep_test_and_clear_young(pte))
+		referenced++;
+out_unmap:
+	pte_unmap(pte);
+
+out_unlock:
+	spin_unlock(&mm->page_table_lock);
+
+out:
+	return referenced;
+}
+
+static int
+page_referenced_obj(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct vm_area_struct *vma;
+	int referenced = 0;
+
+	if (atomic_read(&page->pte.mapcount) == 0)
+		return 0;
+
+	if (!mapping)
+		BUG();
+
+	if (PageSwapCache(page))
+		BUG();
+
+	if (down_trylock(&mapping->i_shared_sem))
+		return 1;
+
+	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		referenced += page_referenced_obj_one(vma, page);
+	}
+
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		referenced += page_referenced_obj_one(vma, page);
+	}
+
+	up(&mapping->i_shared_sem);
+
+	return referenced;
+}
+
 int page_referenced(struct page * page)
 {
 	struct pte_chain * pc;
@@ -94,6 +177,10 @@
 	if (TestClearPageReferenced(page))
 		referenced++;
 
+	if (!PageAnon(page)) {
+		referenced += page_referenced_obj(page);
+		goto out;
+	}
 	if (PageDirect(page)) {
 		pte_t *pte = rmap_ptep_map(page->pte.direct);
 		if (ptep_test_and_clear_young(pte))
@@ -127,6 +214,7 @@
 			__pte_chain_free(pc);
 		}
 	}
+out:
 	return referenced;
 }
 
@@ -157,6 +245,15 @@
 	if (!pfn_valid(page_to_pfn(page)) || PageReserved(page))
 		return pte_chain;
 
+	if (!PageAnon(page)) {
+		if (!page->mapping)
+			BUG();
+		if (PageSwapCache(page))
+			BUG();
+		atomic_inc(&page->pte.mapcount);
+		return pte_chain;
+	}
+
 	pte_chain_lock(page);
 
 #ifdef DEBUG_RMAP
@@ -245,6 +342,17 @@
 	if (!page_mapped(page))
 		return;		/* remap_page_range() from a driver? */
 
+	if (!PageAnon(page)) {
+		if (!page->mapping)
+			BUG();
+		if (PageSwapCache(page))
+			BUG();
+		if (atomic_read(&page->pte.mapcount) == 0)
+			BUG();
+		atomic_dec(&page->pte.mapcount);
+		return;
+	}
+
 	pte_chain_lock(page);
 
 	if (PageDirect(page)) {
@@ -310,6 +418,112 @@
 	return;
 }
 
+static inline int
+try_to_unmap_obj_one(struct vm_area_struct *vma, struct page *page)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+	pte_t pteval;
+	unsigned long loffset;
+	unsigned long address;
+	int ret = SWAP_SUCCESS;
+
+	loffset = (page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT));
+	if (loffset < vma->vm_pgoff)
+		goto out;
+
+	address = vma->vm_start + ((loffset - vma->vm_pgoff) << PAGE_SHIFT);
+
+	if (address >= vma->vm_end)
+		goto out;
+
+	if (!spin_trylock(&mm->page_table_lock)) {
+		ret = SWAP_AGAIN;
+		goto out;
+	}
+	pgd = pgd_offset(mm, address);
+	if (!pgd_present(*pgd)) {
+		goto out_unlock;
+	}
+	pmd = pmd_offset(pgd, address);
+	if (!pmd_present(*pmd)) {
+		goto out_unlock;
+	}
+	pte = pte_offset_map(pmd, address);
+	if (!pte_present(*pte)) {
+		goto out_unmap;
+	}
+	if (page_to_pfn(page) != pte_pfn(*pte)) {
+		goto out_unmap;
+	}
+
+	if (vma->vm_flags & VM_LOCKED) {
+		ret =  SWAP_FAIL;
+		goto out_unmap;
+	}
+
+	flush_cache_page(vma, address);
+	pteval = ptep_get_and_clear(pte);
+	flush_tlb_page(vma, address);
+
+	if (pte_dirty(pteval))
+		set_page_dirty(page);
+
+	if (atomic_read(&page->pte.mapcount) == 0)
+		BUG();
+
+	mm->rss--;
+	atomic_dec(&page->pte.mapcount);
+	page_cache_release(page);
+
+out_unmap:
+	pte_unmap(pte);
+
+out_unlock:
+	spin_unlock(&mm->page_table_lock);
+
+out:
+	return ret;
+}
+
+static int
+try_to_unmap_obj(struct page *page)
+{
+	struct address_space *mapping = page->mapping;
+	struct vm_area_struct *vma;
+	int ret = SWAP_SUCCESS;
+
+	if (!mapping)
+		BUG();
+
+	if (PageSwapCache(page))
+		BUG();
+
+	if (down_trylock(&mapping->i_shared_sem))
+		return SWAP_AGAIN;
+
+	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		ret = try_to_unmap_obj_one(vma, page);
+		if (ret != SWAP_SUCCESS)
+			goto out;
+	}
+
+	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		ret = try_to_unmap_obj_one(vma, page);
+		if (ret != SWAP_SUCCESS)
+			goto out;
+	}
+
+	if (atomic_read(&page->pte.mapcount) != 0)
+		BUG();
+
+out:
+	up(&mapping->i_shared_sem);
+	return ret;
+}
+
 /**
  * try_to_unmap_one - worker function for try_to_unmap
  * @page: page to unmap
@@ -414,6 +628,11 @@
 	if (!page->mapping)
 		BUG();
 
+	if (!PageAnon(page)) {
+		ret = try_to_unmap_obj(page);
+		goto out;
+	}
+
 	if (PageDirect(page)) {
 		ret = try_to_unmap_one(page, page->pte.direct);
 		if (ret == SWAP_SUCCESS) {

^ permalink raw reply	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2003-02-20 17:19 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2003-02-20 16:13 [PATCH 2.5.62] Partial object-based rmap implementation Dave McCracken
2003-02-20 17:19 ` Rik van Riel

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox