linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* PG_clean for shared mapping smart syncing
@ 1998-12-19 16:04 Andrea Arcangeli
  1998-12-19 16:17 ` Stephen C. Tweedie
  1998-12-19 16:23 ` Andrea Arcangeli
  0 siblings, 2 replies; 8+ messages in thread
From: Andrea Arcangeli @ 1998-12-19 16:04 UTC (permalink / raw)
  To: linux-kernel, linux-mm, Stephen C. Tweedie, Linus Torvalds

I' ve done a preliminary/experimental/unstable patch that should allow us
to account when a shared page in a shared mmap has to be synced. Here the
preliminary patch:

Index: mm//filemap.c
===================================================================
RCS file: /var/cvs/linux/mm/filemap.c,v
retrieving revision 1.1.1.1.2.14
diff -u -r1.1.1.1.2.14 filemap.c
--- filemap.c	1998/12/17 16:34:07	1.1.1.1.2.14
+++ linux/mm/filemap.c	1998/12/19 15:37:30
@@ -1210,8 +1210,9 @@
 	unsigned long address, unsigned int flags)
 {
 	pte_t pte = *ptep;
-	unsigned long page;
+	unsigned long page = pte_page(pte);
 	int error;
+	struct page * map = mem_map + MAP_NR(page);
 
 	if (!(flags & MS_INVALIDATE)) {
 		if (!pte_present(pte))
@@ -1220,10 +1221,9 @@
 			return 0;
 		flush_page_to_ram(pte_page(pte));
 		flush_cache_page(vma, address);
-		set_pte(ptep, pte_mkclean(pte));
+		set_pte(ptep, pte_wrprotect(pte_mkclean(pte)));
 		flush_tlb_page(vma, address);
-		page = pte_page(pte);
-		atomic_inc(&mem_map[MAP_NR(page)].count);
+		atomic_inc(&map->count);
 	} else {
 		if (pte_none(pte))
 			return 0;
@@ -1234,14 +1234,15 @@
 			swap_free(pte_val(pte));
 			return 0;
 		}
-		page = pte_page(pte);
 		if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
-			free_page(page);
+			__free_page(map);
 			return 0;
 		}
 	}
-	error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
-	free_page(page);
+	if (!PageTestAndSetClean(map))
+		error = filemap_write_page(vma, address - vma->vm_start +
+					   vma->vm_offset, page);
+	__free_page(map);
 	return error;
 }
 
Index: mm//memory.c
===================================================================
RCS file: /var/cvs/linux/mm/memory.c,v
retrieving revision 1.1.1.1.2.1
diff -u -r1.1.1.1.2.1 memory.c
--- memory.c	1998/11/27 10:41:41	1.1.1.1.2.1
+++ linux/mm/memory.c	1998/12/19 15:32:48
@@ -623,11 +623,10 @@
 	unsigned long address, pte_t *page_table)
 {
 	pte_t pte;
-	unsigned long old_page, new_page;
+	unsigned long old_page;
 	struct page * page_map;
 	
 	pte = *page_table;
-	new_page = __get_free_page(GFP_USER);
 	/* Did someone else copy this page for us while we slept? */
 	if (pte_val(*page_table) != pte_val(pte))
 		goto end_wp_page;
@@ -640,16 +639,25 @@
 		goto bad_wp_page;
 	tsk->min_flt++;
 	page_map = mem_map + MAP_NR(old_page);
-	
+
+	/* If the page is clean we just know why it was write protect -arca */
+	if (PageTestAndClearClean(page_map))
+	{
+		set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
+		goto end_wp_page;
+	}
+
 	/*
 	 * Do we need to copy?
 	 */
 	if (is_page_shared(page_map)) {
+		unsigned long new_page;
 		unlock_kernel();
+		new_page = __get_free_page(GFP_USER);
 		if (!new_page)
 			return 0;
 
-		if (PageReserved(mem_map + MAP_NR(old_page)))
+		if (PageReserved(page_map))
 			++vma->vm_mm->rss;
 		copy_cow_page(old_page,new_page);
 		flush_page_to_ram(old_page);
@@ -670,16 +678,15 @@
 	flush_cache_page(vma, address);
 	set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
 	flush_tlb_page(vma, address);
+	return 1;
 end_wp_page:
-	if (new_page)
-		free_page(new_page);
+	unlock_kernel();
 	return 1;
 
 bad_wp_page:
+	unlock_kernel();
 	printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
 	send_sig(SIGKILL, tsk, 1);
-	if (new_page)
-		free_page(new_page);
 	return 0;
 }
 
Index: mm//page_alloc.c
===================================================================
RCS file: /var/cvs/linux/mm/page_alloc.c,v
retrieving revision 1.1.1.1.2.6
diff -u -r1.1.1.1.2.6 page_alloc.c
--- page_alloc.c	1998/12/17 14:44:43	1.1.1.1.2.6
+++ linux/mm/page_alloc.c	1998/12/19 15:52:43
@@ -151,6 +151,7 @@
 	if (!PageReserved(page) && atomic_dec_and_test(&page->count)) {
 		if (PageSwapCache(page))
 			panic ("Freeing swap cache page");
+		PageClearClean(page);
 		free_pages_ok(page->map_nr, 0);
 		return;
 	}
@@ -172,6 +173,7 @@
 		if (atomic_dec_and_test(&map->count)) {
 			if (PageSwapCache(map))
 				panic ("Freeing swap cache pages");
+			PageClearClean(map);
 			free_pages_ok(map_nr, order);
 			return;
 		}
Index: include/linux//mm.h
===================================================================
RCS file: /var/cvs/linux/include/linux/mm.h,v
retrieving revision 1.1.1.1.2.5
diff -u -r1.1.1.1.2.5 mm.h
--- mm.h	1998/12/17 14:44:37	1.1.1.1.2.5
+++ linux/include/linux/mm.h	1998/12/19 15:29:31
@@ -136,6 +136,7 @@
 #define PG_Slab			 8
 #define PG_swap_cache		 9
 #define PG_skip			10
+#define PG_clean		11
 #define PG_reserved		31
 
 /* Make it prettier to test the above... */
@@ -149,16 +150,24 @@
 #define PageDMA(page)		(test_bit(PG_DMA, &(page)->flags))
 #define PageSlab(page)		(test_bit(PG_Slab, &(page)->flags))
 #define PageSwapCache(page)	(test_bit(PG_swap_cache, &(page)->flags))
+#define PageClean(page)		(test_bit(PG_clean, &(page)->flags))
 #define PageReserved(page)	(test_bit(PG_reserved, &(page)->flags))
 
 #define PageSetSlab(page)	(set_bit(PG_Slab, &(page)->flags))
+#define PageSetClean(page)	(set_bit(PG_clean, &(page)->flags))
 #define PageSetSwapCache(page)	(set_bit(PG_swap_cache, &(page)->flags))
+
+#define PageTestAndSetClean(page)	\
+			(test_and_set_bit(PG_clean, &(page)->flags))
 #define PageTestandSetSwapCache(page)	\
 			(test_and_set_bit(PG_swap_cache, &(page)->flags))
 
 #define PageClearSlab(page)	(clear_bit(PG_Slab, &(page)->flags))
+#define PageClearClean(page)	(clear_bit(PG_clean, &(page)->flags))
 #define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags))
 
+#define PageTestAndClearClean(page)	\
+			(test_and_clear_bit(PG_clean, &(page)->flags))
 #define PageTestandClearSwapCache(page)	\
 			(test_and_clear_bit(PG_swap_cache, &(page)->flags))
 


Seems to work here but I' ve not tested it very well yet. At first I
forgot to clean the PG_clean flag in *free_pages and you can guess that I
had to reboot very soon after the first msync ;). It was not msyncing
anymore!! Luckily I understood the culprit after a msec.

NOTE, the do_wp_page() patch I think it' s needed also in the stock
kernel, there are some unbalanced unlock_kernel() otherwise and we would
run a __get_free_pages()  even if not needed and while the kernel lock was
locked (maybe it was intentional? do we need the big kernel lock while we
run __get_free_pages() in do_wp_page?). You only need to reverse:

-
+
+       /* If the page is clean we just know why it was write protect -arca */
+       if (PageTestAndClearClean(page_map))
+       {
+               set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
+               goto end_wp_page;
+       }
+

Andrea Arcangeli

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PG_clean for shared mapping smart syncing
  1998-12-19 16:04 PG_clean for shared mapping smart syncing Andrea Arcangeli
@ 1998-12-19 16:17 ` Stephen C. Tweedie
  1998-12-19 16:27   ` Andrea Arcangeli
  1998-12-19 16:23 ` Andrea Arcangeli
  1 sibling, 1 reply; 8+ messages in thread
From: Stephen C. Tweedie @ 1998-12-19 16:17 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: linux-kernel, linux-mm, Stephen C. Tweedie, Linus Torvalds

Hi,

On Sat, 19 Dec 1998 17:04:26 +0100 (CET), Andrea Arcangeli
<andrea@e-mind.com> said:

Just a couple of comments: I can't see any mechanism in this patch for
clearing other process's pte dirty bits when we sync a shared page (I
know you had a patch for this before), and conceptually I much prefer
to have a Dirty bit than a Clean bit, simply because that's what we
use absolutely everywhere else in the kernel.

--Stephen
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PG_clean for shared mapping smart syncing
  1998-12-19 16:04 PG_clean for shared mapping smart syncing Andrea Arcangeli
  1998-12-19 16:17 ` Stephen C. Tweedie
@ 1998-12-19 16:23 ` Andrea Arcangeli
  1998-12-19 16:36   ` Stephen C. Tweedie
  1 sibling, 1 reply; 8+ messages in thread
From: Andrea Arcangeli @ 1998-12-19 16:23 UTC (permalink / raw)
  To: linux-kernel, linux-mm, Stephen C. Tweedie, Linus Torvalds

I've put a new patch with as difference only a bit of credits added ;) at:

ftp://e-mind.com/pub/linux/kernel-patches/pgclean-0-2.1.132-2.diff.gz

All tests I done here are been succesfully (and I am using huge size of
memory just to be sure to notice any kind of mm corruption). Does somebody
has some test suite for shared mappings or could suggest me a proggy that
uses heavly shared mappings?

Andrea Arcangeli

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PG_clean for shared mapping smart syncing
  1998-12-19 16:17 ` Stephen C. Tweedie
@ 1998-12-19 16:27   ` Andrea Arcangeli
  1998-12-19 16:37     ` Andrea Arcangeli
  0 siblings, 1 reply; 8+ messages in thread
From: Andrea Arcangeli @ 1998-12-19 16:27 UTC (permalink / raw)
  To: Stephen C. Tweedie; +Cc: linux-kernel, linux-mm, Linus Torvalds

On Sat, 19 Dec 1998, Stephen C. Tweedie wrote:

>Hi,
>
>On Sat, 19 Dec 1998 17:04:26 +0100 (CET), Andrea Arcangeli
><andrea@e-mind.com> said:
>
>Just a couple of comments: I can't see any mechanism in this patch for
>clearing other process's pte dirty bits when we sync a shared page (I

The only reason to add a bitflag in the page->flags field is to avoid
us to play with the pte. Now the pte is used only to set the page readonly
to allow us to remove the clean flag at the first page fault.

Andrea Arcangeli

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PG_clean for shared mapping smart syncing
  1998-12-19 16:23 ` Andrea Arcangeli
@ 1998-12-19 16:36   ` Stephen C. Tweedie
  0 siblings, 0 replies; 8+ messages in thread
From: Stephen C. Tweedie @ 1998-12-19 16:36 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: linux-kernel, linux-mm, Stephen C. Tweedie, Linus Torvalds

Hi,

On Sat, 19 Dec 1998 17:23:13 +0100 (CET), Andrea Arcangeli
<andrea@e-mind.com> said:

> I've put a new patch with as difference only a bit of credits added ;) at:
> ftp://e-mind.com/pub/linux/kernel-patches/pgclean-0-2.1.132-2.diff.gz

> All tests I done here are been succesfully (and I am using huge size of
> memory just to be sure to notice any kind of mm corruption). Does somebody
> has some test suite for shared mappings or could suggest me a proggy that
> uses heavly shared mappings?

ftp.uk.linux.org:/pub/linux/sct/vm/shm-stress.tar.gz

It currently uses only sysV shared maps, but it would be trivial to
extend it to use shared map files.  Let me know if you make the change
and I'll merge the patch in.

The test stuff uses separate shared-write regions for testing: one
smaller region is used as a bitmap, and the code keeps primitive
spinlocks in this bit to synchronise access to the rest of the shared
memory.  The rest of the shared memory is used as a test heap --- just a
collection of separate test pages --- and a pattern array.  We store
random patterns in the first word of each test page in the heap, and
(under spinlock) assign the same pattern atomically to both the test
page and the appropriate entry in the pattern array.  Use a sufficiently
large test heap and you can test shared-page swapping quite effectively.

--Stephen

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PG_clean for shared mapping smart syncing
  1998-12-19 16:27   ` Andrea Arcangeli
@ 1998-12-19 16:37     ` Andrea Arcangeli
  1998-12-19 16:46       ` Andrea Arcangeli
  1998-12-19 17:10       ` Stephen C. Tweedie
  0 siblings, 2 replies; 8+ messages in thread
From: Andrea Arcangeli @ 1998-12-19 16:37 UTC (permalink / raw)
  To: Stephen C. Tweedie; +Cc: linux-kernel, linux-mm, Linus Torvalds

On Sat, 19 Dec 1998, Andrea Arcangeli wrote:

>The only reason to add a bitflag in the page->flags field is to avoid
>us to play with the pte. Now the pte is used only to set the page readonly
>to allow us to remove the clean flag at the first page fault.

Ah but I just found a problem... When we set the PG_clean flag on the page
we should set the pte readonly for that page in all process vm and not
only in the process running. But if we must play with the page table it's
easier to directly set the page as clean as I was used to do with my
previous update_shared_mappings() patch. So I think we could drop
completly my last patch and return to my old code and solve the problem to
handle the mmap_sem locking right...

To get the locking right I think we could do something like:

if (sharedmapping)
{

	for_each_process_that_shares_the_mmap(p)
		down(&p->mm->mmap_sem);
} else {
	down(&current->mm->mmap_sem);
}

for_each_process_that_shares_the_mmap() will return processes always in
the same order relative to how the mappings are ordered in the the inode.

comments?

Andrea Arcangeli

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PG_clean for shared mapping smart syncing
  1998-12-19 16:37     ` Andrea Arcangeli
@ 1998-12-19 16:46       ` Andrea Arcangeli
  1998-12-19 17:10       ` Stephen C. Tweedie
  1 sibling, 0 replies; 8+ messages in thread
From: Andrea Arcangeli @ 1998-12-19 16:46 UTC (permalink / raw)
  To: Stephen C. Tweedie; +Cc: linux-kernel, linux-mm, Linus Torvalds

On Sat, 19 Dec 1998, Andrea Arcangeli wrote:

>previous update_shared_mappings() patch. So I think we could drop
>completly my last patch and return to my old code and solve the problem to
>handle the mmap_sem locking right...

Here the remainings cleanup/fixes...

Index: mm//filemap.c
===================================================================
RCS file: /var/cvs/linux/mm/filemap.c,v
retrieving revision 1.1.1.1.2.16
diff -u -r1.1.1.1.2.16 filemap.c
--- filemap.c	1998/12/19 16:38:04	1.1.1.1.2.16
+++ linux/mm/filemap.c	1998/12/19 16:43:44
@@ -30,6 +30,10 @@
  * Shared mappings now work. 15.8.1995  Bruno.
  */
 
+/*
+ * Some cleanup of filemap_sync_pte(). 19 Dec 1998, Andrea Arcangeli
+ */
+
 unsigned long page_cache_size = 0;
 struct page * page_hash_table[PAGE_HASH_SIZE];
 
@@ -1210,8 +1214,9 @@
 	unsigned long address, unsigned int flags)
 {
 	pte_t pte = *ptep;
-	unsigned long page;
+	unsigned long page = pte_page(pte);
 	int error;
+	struct page * map = mem_map + MAP_NR(page);
 
 	if (!(flags & MS_INVALIDATE)) {
 		if (!pte_present(pte))
@@ -1222,8 +1227,7 @@
 		flush_cache_page(vma, address);
 		set_pte(ptep, pte_mkclean(pte));
 		flush_tlb_page(vma, address);
-		page = pte_page(pte);
-		atomic_inc(&mem_map[MAP_NR(page)].count);
+		atomic_inc(&map->count);
 	} else {
 		if (pte_none(pte))
 			return 0;
@@ -1234,14 +1238,13 @@
 			swap_free(pte_val(pte));
 			return 0;
 		}
-		page = pte_page(pte);
 		if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
-			free_page(page);
+			__free_page(map);
 			return 0;
 		}
 	}
 	error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page);
-	free_page(page);
+	__free_page(map);
 	return error;
 }
 
Index: mm//memory.c
===================================================================
RCS file: /var/cvs/linux/mm/memory.c,v
retrieving revision 1.1.1.1.2.3
diff -u -r1.1.1.1.2.3 memory.c
--- memory.c	1998/12/19 16:38:04	1.1.1.1.2.3
+++ linux/mm/memory.c	1998/12/19 16:42:25
@@ -33,6 +33,10 @@
  * 		Idea by Alex Bligh (alex@cconcepts.co.uk)
  */
 
+/*
+ * Some fix and cleanup to of do_wp_page(). 19 Dec 1998, Andrea Arcangeli
+ */
+
 #include <linux/mm.h>
 #include <linux/mman.h>
 #include <linux/swap.h>
@@ -623,11 +627,10 @@
 	unsigned long address, pte_t *page_table)
 {
 	pte_t pte;
-	unsigned long old_page, new_page;
+	unsigned long old_page;
 	struct page * page_map;
 	
 	pte = *page_table;
-	new_page = __get_free_page(GFP_USER);
 	/* Did someone else copy this page for us while we slept? */
 	if (pte_val(*page_table) != pte_val(pte))
 		goto end_wp_page;
@@ -645,11 +648,13 @@
 	 * Do we need to copy?
 	 */
 	if (is_page_shared(page_map)) {
+		unsigned long new_page;
 		unlock_kernel();
+		new_page = __get_free_page(GFP_USER);
 		if (!new_page)
 			return 0;
 
-		if (PageReserved(mem_map + MAP_NR(old_page)))
+		if (PageReserved(page_map))
 			++vma->vm_mm->rss;
 		copy_cow_page(old_page,new_page);
 		flush_page_to_ram(old_page);
@@ -670,16 +675,15 @@
 	flush_cache_page(vma, address);
 	set_pte(page_table, pte_mkdirty(pte_mkwrite(pte)));
 	flush_tlb_page(vma, address);
+	return 1;
 end_wp_page:
-	if (new_page)
-		free_page(new_page);
+	unlock_kernel();
 	return 1;
 
 bad_wp_page:
+	unlock_kernel();
 	printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
 	send_sig(SIGKILL, tsk, 1);
-	if (new_page)
-		free_page(new_page);
 	return 0;
 }
 


Andrea Arcangeli

--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

* Re: PG_clean for shared mapping smart syncing
  1998-12-19 16:37     ` Andrea Arcangeli
  1998-12-19 16:46       ` Andrea Arcangeli
@ 1998-12-19 17:10       ` Stephen C. Tweedie
  1 sibling, 0 replies; 8+ messages in thread
From: Stephen C. Tweedie @ 1998-12-19 17:10 UTC (permalink / raw)
  To: Andrea Arcangeli
  Cc: Stephen C. Tweedie, linux-kernel, linux-mm, Linus Torvalds

Hi,

On Sat, 19 Dec 1998 17:37:03 +0100 (CET), Andrea Arcangeli
<andrea@e-mind.com> said:

> Ah but I just found a problem... When we set the PG_clean flag on the page
> we should set the pte readonly for that page in all process vm and not
> only in the process running. But if we must play with the page table it's
> easier to directly set the page as clean as I was used to do with my
> previous update_shared_mappings() patch. So I think we could drop
> completly my last patch and return to my old code and solve the problem to
> handle the mmap_sem locking right...

Agreed, I much prefer the concept of being able to reliably keep the pte
dirty bits consistent between processes.

--Stephen
--
This is a majordomo managed list.  To unsubscribe, send a message with
the body 'unsubscribe linux-mm me@address' to: majordomo@kvack.org

^ permalink raw reply	[flat|nested] 8+ messages in thread

end of thread, other threads:[~1998-12-19 17:11 UTC | newest]

Thread overview: 8+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
1998-12-19 16:04 PG_clean for shared mapping smart syncing Andrea Arcangeli
1998-12-19 16:17 ` Stephen C. Tweedie
1998-12-19 16:27   ` Andrea Arcangeli
1998-12-19 16:37     ` Andrea Arcangeli
1998-12-19 16:46       ` Andrea Arcangeli
1998-12-19 17:10       ` Stephen C. Tweedie
1998-12-19 16:23 ` Andrea Arcangeli
1998-12-19 16:36   ` Stephen C. Tweedie

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox