linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <andrewm@uow.edu.au>
To: Linus Torvalds <torvalds@transmeta.com>
Cc: linux-mm@kvack.org
Subject: Re: 3rd version of R/W mmap_sem patch available
Date: Thu, 22 Mar 2001 21:24:56 +1100	[thread overview]
Message-ID: <3AB9D2F8.86D8E359@uow.edu.au> (raw)
In-Reply-To: <Pine.LNX.4.31.0103200801480.1503-100000@penguin.transmeta.com>

Linus Torvalds wrote:
> 
> On Wed, 21 Mar 2001, Andrew Morton wrote:
> >
> >       The handling of mm->rss is racy.  But I think
> >       it always has been?
> 
> It always has been. Right now I think we hold the page_table_lock over
> most of them, that the old patch to fix this might end up being just that
> one place. Somebody interested in checking?
> 

There were two places which needed fixing, and it was
pretty trivial.  With this patch, mm_struct.rss handling
is racefree on x86.  Some other archs (notably ia64/ia32)
are still a little racy on the exec() path.

I was sorely tempted to make put_dirty_page() require
that tsk->mm->page_table_lock be held by the caller,
which would save a bunch of locking.  But put_dirty_page()
is used by architectures which I don't understand.

The patch also includes a feeble attempt to document
some locking rules.



--- linux-2.4.3-pre6/include/linux/sched.h	Thu Mar 22 18:52:52 2001
+++ lk/include/linux/sched.h	Thu Mar 22 19:41:06 2001
@@ -209,9 +209,12 @@
 	atomic_t mm_count;			/* How many references to "struct mm_struct" (users count as 1) */
 	int map_count;				/* number of VMAs */
 	struct rw_semaphore mmap_sem;
-	spinlock_t page_table_lock;
+	spinlock_t page_table_lock;		/* Protects task page tables and mm->rss */
 
-	struct list_head mmlist;		/* List of all active mm's */
+	struct list_head mmlist;		/* List of all active mm's.  These are globally strung
+						 * together off init_mm.mmlist, and are protected
+						 * by mmlist_lock
+						 */
 
 	unsigned long start_code, end_code, start_data, end_data;
 	unsigned long start_brk, brk, start_stack;
--- linux-2.4.3-pre6/fs/exec.c	Thu Mar 22 18:52:52 2001
+++ lk/fs/exec.c	Thu Mar 22 19:51:35 2001
@@ -252,6 +252,8 @@
 /*
  * This routine is used to map in a page into an address space: needed by
  * execve() for the initial stack and environment pages.
+ *
+ * tsk->mmap_sem is held for writing.
  */
 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
 {
@@ -291,6 +293,7 @@
 	unsigned long stack_base;
 	struct vm_area_struct *mpnt;
 	int i;
+	unsigned long rss_increment = 0;
 
 	stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
 
@@ -322,11 +325,14 @@
 		struct page *page = bprm->page[i];
 		if (page) {
 			bprm->page[i] = NULL;
-			current->mm->rss++;
+			rss_increment++;
 			put_dirty_page(current,page,stack_base);
 		}
 		stack_base += PAGE_SIZE;
 	}
+	spin_lock(&current->mm->page_table_lock);
+	current->mm->rss += rss_increment;
+	spin_unlock(&current->mm->page_table_lock);
 	up_write(&current->mm->mmap_sem);
 	
 	return 0;
--- linux-2.4.3-pre6/mm/memory.c	Thu Mar 22 18:52:52 2001
+++ lk/mm/memory.c	Thu Mar 22 21:13:29 2001
@@ -374,7 +374,6 @@
 		address = (address + PGDIR_SIZE) & PGDIR_MASK;
 		dir++;
 	} while (address && (address < end));
-	spin_unlock(&mm->page_table_lock);
 	/*
 	 * Update rss for the mm_struct (not necessarily current->mm)
 	 * Notice that rss is an unsigned long.
@@ -383,6 +382,7 @@
 		mm->rss -= freed;
 	else
 		mm->rss = 0;
+	spin_unlock(&mm->page_table_lock);
 }
 
 
@@ -792,6 +792,8 @@
  *  - flush the old one
  *  - update the page tables
  *  - inform the TLB about the new one
+ *
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
  */
 static inline void establish_pte(struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t entry)
 {
@@ -800,6 +802,9 @@
 	update_mmu_cache(vma, address, entry);
 }
 
+/*
+ * We hold the mm semaphore for reading and vma->vm_mm->page_table_lock
+ */
 static inline void break_cow(struct vm_area_struct * vma, struct page *	old_page, struct page * new_page, unsigned long address, 
 		pte_t *page_table)
 {
@@ -1024,8 +1029,7 @@
 }
 
 /*
- * We hold the mm semaphore and the page_table_lock on entry
- * and exit.
+ * We hold the mm semaphore and the page_table_lock on entry and exit.
  */
 static int do_swap_page(struct mm_struct * mm,
 	struct vm_area_struct * vma, unsigned long address,
--- linux-2.4.3-pre6/mm/mmap.c	Thu Mar 22 18:52:52 2001
+++ lk/mm/mmap.c	Thu Mar 22 19:19:08 2001
@@ -889,8 +889,8 @@
 	spin_lock(&mm->page_table_lock);
 	mpnt = mm->mmap;
 	mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
-	spin_unlock(&mm->page_table_lock);
 	mm->rss = 0;
+	spin_unlock(&mm->page_table_lock);
 	mm->total_vm = 0;
 	mm->locked_vm = 0;
 
--- linux-2.4.3-pre6/mm/vmscan.c	Tue Jan 16 07:36:49 2001
+++ lk/mm/vmscan.c	Thu Mar 22 19:32:11 2001
@@ -25,16 +25,15 @@
 #include <asm/pgalloc.h>
 
 /*
- * The swap-out functions return 1 if they successfully
- * threw something out, and we got a free page. It returns
- * zero if it couldn't do anything, and any other value
- * indicates it decreased rss, but the page was shared.
+ * The swap-out function returns 1 if it successfully
+ * scanned all the pages it was asked to (`count').
+ * It returns zero if it couldn't do anything,
  *
- * NOTE! If it sleeps, it *must* return 1 to make sure we
- * don't continue with the swap-out. Otherwise we may be
- * using a process that no longer actually exists (it might
- * have died while we slept).
+ * rss may decrease because pages are shared, but this
+ * doesn't count as having freed a page.
  */
+
+/* mm->page_table_lock is held. mmap_sem is not held */
 static void try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page)
 {
 	pte_t pte;
@@ -129,6 +128,7 @@
 	return;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count)
 {
 	pte_t * pte;
@@ -165,6 +165,7 @@
 	return count;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count)
 {
 	pmd_t * pmd;
@@ -194,6 +195,7 @@
 	return count;
 }
 
+/* mm->page_table_lock is held. mmap_sem is not held */
 static int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count)
 {
 	pgd_t *pgdir;
@@ -218,6 +220,9 @@
 	return count;
 }
 
+/*
+ * Returns non-zero if we scanned all `count' pages
+ */
 static int swap_out_mm(struct mm_struct * mm, int count)
 {
 	unsigned long address;
--- linux-2.4.3-pre6/mm/swapfile.c	Sun Feb 25 17:37:14 2001
+++ lk/mm/swapfile.c	Thu Mar 22 19:56:36 2001
@@ -209,6 +209,7 @@
  * share this swap entry, so be cautious and let do_wp_page work out
  * what to do if a write is requested later.
  */
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
 	pte_t *dir, swp_entry_t entry, struct page* page)
 {
@@ -234,6 +235,7 @@
 	++vma->vm_mm->rss;
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
 	unsigned long address, unsigned long size, unsigned long offset,
 	swp_entry_t entry, struct page* page)
@@ -261,6 +263,7 @@
 	} while (address && (address < end));
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
 	unsigned long address, unsigned long size,
 	swp_entry_t entry, struct page* page)
@@ -291,6 +294,7 @@
 	} while (address && (address < end));
 }
 
+/* tasklist_lock and vma->vm_mm->page_table_lock are held */
 static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
 			swp_entry_t entry, struct page* page)
 {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux.eu.org/Linux-MM/

  parent reply	other threads:[~2001-03-22 10:24 UTC|newest]

Thread overview: 26+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <Pine.LNX.4.33.0103191802330.2076-100000@mikeg.weiden.de>
2001-03-20  1:56 ` Rik van Riel
2001-03-19 22:46   ` Linus Torvalds
2001-03-20  2:46   ` Linus Torvalds
2001-03-20  4:15     ` Marcelo Tosatti
2001-03-20  6:07       ` Linus Torvalds
2001-03-20  4:29         ` Marcelo Tosatti
2001-03-20  6:36           ` Linus Torvalds
2001-03-20  7:03             ` Linus Torvalds
2001-03-20  8:19               ` Eric W. Biederman
2001-03-20 15:11     ` Andrew Morton
2001-03-20 15:15       ` Jeff Garzik
2001-03-20 15:16       ` Jeff Garzik
2001-03-20 15:31         ` Andrew Morton
2001-03-21  1:59           ` Eric W. Biederman
2001-03-20 16:08       ` Linus Torvalds
2001-03-20 16:33         ` Andi Kleen
2001-03-20 17:13           ` Linus Torvalds
2001-03-20 19:33           ` Rik van Riel
2001-03-20 22:51             ` Andrew Morton
2001-03-22 10:24         ` Andrew Morton [this message]
2001-03-25 14:53     ` [patch] pae-2.4.3-A4 Ingo Molnar
2001-03-25 16:33       ` Russell King
2001-03-25 18:07       ` Linus Torvalds
2001-03-25 18:51         ` Ingo Molnar
     [not found] <3AB6C7C2.D1A49FEF@uow.edu.au>
     [not found] ` <Pine.LNX.4.31.0103191932240.7210-100000@penguin.transmeta.com>
2001-03-20 12:38   ` 3rd version of R/W mmap_sem patch available Stephen C. Tweedie
2001-03-20 14:02     ` H. Peter Anvin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3AB9D2F8.86D8E359@uow.edu.au \
    --to=andrewm@uow.edu.au \
    --cc=linux-mm@kvack.org \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox