linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Andrew Morton <akpm@zip.com.au>
To: "Martin J. Bligh" <Martin.Bligh@us.ibm.com>
Cc: Andrea Arcangeli <andrea@suse.de>,
	Linus Torvalds <torvalds@transmeta.com>,
	Rik van Riel <riel@conectiva.com.br>,
	"linux-mm@kvack.org" <linux-mm@kvack.org>
Subject: Re: scalable kmap (was Re: vm lock contention reduction)
Date: Mon, 08 Jul 2002 22:28:06 -0700	[thread overview]
Message-ID: <3D2A7466.AD867DA7@zip.com.au> (raw)
In-Reply-To: <1214790647.1026163711@[10.10.2.3]>

"Martin J. Bligh" wrote:
> 
> > It's a bit weird that copy_strings is the heaviest user of kmap().
> 
> I can dig out the whole acg, and see what was calling copy_strings,
> that might help give us a clue.

Well, it'll be exec().

> > I bet it's kmapping the same page over and over.  A little cache
> > there will help.  I'll fix that up.
> 
> Sounds like a plan ...

Updated patch below.  We don't need an atomic kmap in copy_strings
at all.  kmap is the right thing to do, but just be smarter about it.
Hanging onto the existing kmap in there reduces the number of kmap()
calls by a factor of 32 across a kernel compile.

But still no aggregate speedup.

> > So here's the patch.  Seems to work.  Across a `make -j6 bzImage'
> > the number of calls to kmap_high() went from 490,429 down to 41,174.
> >
> > And guess what?   Zero change in wallclock time.
> 
> Well, I was going to try to bench this tonight, but am having a
> problem with 2.5.25 right now (we've been on 2.4 for a while,
> but are shifting). Hopefully get you some numbers tommorow, and
> will get some other benchmarks done by people here on various
> machines.

Don't tell me those NUMAQ's are using IDE ;)

But seriously, what's the problem?  We really do need the big
boxes to be able to test 2.5 right now, and any blockage needs
to be cleared away.

> > Any theories?
> 
> Maybe the cost of the atomic kmap counters the gain? Having to do a
> single line tlbflush every time ... a per cpu pool might help if that
> is the problem, but would have to make it a reasonable size to counter
> the cost. I'll do some more measurements first, and get some profile
> data to see if the number of ticks changes down in one function and
> up in the other?

Well, certainly it's dumb to perform an atomic_kunmap() when we're
just about to atomic_kmap() the same page again.  But no change.



 arch/i386/lib/usercopy.c        |   10 +++++
 arch/i386/mm/fault.c            |   40 ++++++++++++++++++++
 fs/exec.c                       |   72 ++++++++++++++++++++++++++++--------
 include/asm-i386/kmap_types.h   |    3 +
 include/asm-i386/processor.h    |    2 +
 include/asm-ppc/kmap_types.h    |    1 
 include/asm-sparc/kmap_types.h  |    1 
 include/asm-x86_64/kmap_types.h |    1 
 include/linux/highmem.h         |   78 ++++++++++++++++++++++++++++++++++++++++
 include/linux/sched.h           |    5 ++
 mm/filemap.c                    |   11 +++--
 11 files changed, 202 insertions(+), 22 deletions(-)

--- 2.5.25/arch/i386/mm/fault.c~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/arch/i386/mm/fault.c	Mon Jul  8 21:44:21 2002
@@ -18,6 +18,7 @@
 #include <linux/ptrace.h>
 #include <linux/mman.h>
 #include <linux/mm.h>
+#include <linux/highmem.h>
 #include <linux/smp.h>
 #include <linux/smp_lock.h>
 #include <linux/interrupt.h>
@@ -138,6 +139,39 @@ void bust_spinlocks(int yes)
 	}
 }
 
+#ifdef CONFIG_HIGHMEM
+static inline void highmem_remove_atomic_kmap(struct pt_regs *regs)
+{
+	struct copy_user_state *cus = current->copy_user_state;
+
+	if (cus)
+		kunmap_atomic(cus->kaddr, cus->type);
+}
+
+static inline void highmem_restore_atomic_kmap(struct pt_regs *regs)
+{
+	struct copy_user_state *cus = current->copy_user_state;
+
+	if (cus) {
+		long *reg;
+		unsigned offset;
+
+		cus->kaddr = kmap_atomic(cus->page, cus->type);
+		if (cus->src)
+			reg = &regs->esi;
+		else
+			reg = &regs->edi;
+		offset = *reg & (PAGE_SIZE - 1);
+		*reg = ((long)cus->kaddr) | offset;
+	}
+}
+#else
+static inline void highmem_remove_atomic_kmap(struct pt_regs *regs)
+{}
+static inline void highmem_restore_atomic_kmap(struct pt_regs *regs)
+{}
+#endif
+
 asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
 extern unsigned long idt;
 
@@ -206,6 +240,8 @@ asmlinkage void do_page_fault(struct pt_
 	}
 #endif
 
+	highmem_remove_atomic_kmap(regs);
+
 	down_read(&mm->mmap_sem);
 
 	vma = find_vma(mm, address);
@@ -267,8 +303,10 @@ good_area:
 			tsk->maj_flt++;
 			break;
 		case VM_FAULT_SIGBUS:
+			highmem_restore_atomic_kmap(regs);
 			goto do_sigbus;
 		case VM_FAULT_OOM:
+			highmem_restore_atomic_kmap(regs);
 			goto out_of_memory;
 		default:
 			BUG();
@@ -283,6 +321,7 @@ good_area:
 			tsk->thread.screen_bitmap |= 1 << bit;
 	}
 	up_read(&mm->mmap_sem);
+	highmem_restore_atomic_kmap(regs);
 	return;
 
 /*
@@ -291,6 +330,7 @@ good_area:
  */
 bad_area:
 	up_read(&mm->mmap_sem);
+	highmem_restore_atomic_kmap(regs);
 
 	/* User mode accesses just cause a SIGSEGV */
 	if (error_code & 4) {
--- 2.5.25/include/asm-i386/kmap_types.h~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/include/asm-i386/kmap_types.h	Mon Jul  8 21:44:21 2002
@@ -19,7 +19,8 @@ D(5)	KM_BIO_SRC_IRQ,
 D(6)	KM_BIO_DST_IRQ,
 D(7)	KM_PTE0,
 D(8)	KM_PTE1,
-D(9)	KM_TYPE_NR
+D(9)	KM_FILEMAP,
+D(10)	KM_TYPE_NR
 };
 
 #undef D
--- 2.5.25/include/asm-ppc/kmap_types.h~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/include/asm-ppc/kmap_types.h	Mon Jul  8 21:44:21 2002
@@ -15,6 +15,7 @@ enum km_type {
 	KM_BIO_DST_IRQ,
 	KM_PTE0,
 	KM_PTE1,
+	KM_FILEMAP,
 	KM_TYPE_NR
 };
 
--- 2.5.25/include/asm-sparc/kmap_types.h~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/include/asm-sparc/kmap_types.h	Mon Jul  8 21:44:21 2002
@@ -9,6 +9,7 @@ enum km_type {
 	KM_USER1,
 	KM_BIO_SRC_IRQ,
 	KM_BIO_DST_IRQ,
+	KM_FILEMAP,
 	KM_TYPE_NR
 };
 
--- 2.5.25/include/asm-x86_64/kmap_types.h~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/include/asm-x86_64/kmap_types.h	Mon Jul  8 21:44:21 2002
@@ -9,6 +9,7 @@ enum km_type {
 	KM_USER1,
 	KM_BIO_SRC_IRQ,
 	KM_BIO_DST_IRQ,
+	KM_FILEMAP,
 	KM_TYPE_NR
 };
 
--- 2.5.25/include/linux/highmem.h~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/include/linux/highmem.h	Mon Jul  8 21:44:21 2002
@@ -3,6 +3,7 @@
 
 #include <linux/config.h>
 #include <linux/fs.h>
+#include <asm/processor.h>
 #include <asm/cacheflush.h>
 
 #ifdef CONFIG_HIGHMEM
@@ -10,6 +11,7 @@
 extern struct page *highmem_start_page;
 
 #include <asm/highmem.h>
+#include <asm/kmap_types.h>
 
 /* declarations for linux/mm/highmem.c */
 unsigned int nr_free_highpages(void);
@@ -72,4 +74,80 @@ static inline void copy_user_highpage(st
 	kunmap_atomic(vto, KM_USER1);
 }
 
+#if defined(CONFIG_HIGHMEM) && defined(ARCH_HAS_KMAP_FIXUP)
+/*
+ * Used when performing a copy_*_user while holding an atomic kmap
+ */
+struct copy_user_state {
+	struct page *page;		/* The page which is kmap_atomiced */
+	void *kaddr;			/* Its mapping */
+	enum km_type type;		/* Its offset */
+	int src;			/* 1: fixup ESI.  0: Fixup EDI */
+};
+
+/*
+ * `src' is true if the kmap_atomic virtual address is the source of the copy.
+ */
+static inline void *
+kmap_copy_user(struct copy_user_state *cus, struct page *page,
+			enum km_type type, int src)
+{
+	cus->page = page;
+	cus->kaddr = kmap_atomic(page, type);
+	if (PageHighMem(page)) {
+		cus->type = type;
+		cus->src = src;
+		BUG_ON(current->copy_user_state != NULL);
+		current->copy_user_state = cus;
+	}
+	return cus->kaddr;
+}
+
+static inline void kunmap_copy_user(struct copy_user_state *cus)
+{
+	if (PageHighMem(cus->page)) {
+		BUG_ON(current->copy_user_state != cus);
+		kunmap_atomic(cus->kaddr, cus->type);
+		current->copy_user_state = NULL;
+		cus->page = NULL;	/* debug */
+	}
+}
+
+/*
+ * After a copy_*_user, the kernel virtual address may be different.  So
+ * use kmap_copy_user_addr() to get the new value.
+ */
+static inline void *kmap_copy_user_addr(struct copy_user_state *cus)
+{
+	return cus->kaddr;
+}
+
+#else
+
+struct copy_user_state {
+	struct page *page;
+};
+
+/*
+ * This must be a macro because `type' may be undefined
+ */
+
+#define kmap_copy_user(cus, page, type, src)	\
+	({					\
+		(cus)->page = (page);		\
+		kmap(page);			\
+	})
+
+static inline void kunmap_copy_user(struct copy_user_state *cus)
+{
+	kunmap(cus->page);
+}
+
+static inline void *kmap_copy_user_addr(struct copy_user_state *cus)
+{
+	return page_address(cus->page);
+}
+
+#endif
+
 #endif /* _LINUX_HIGHMEM_H */
--- 2.5.25/include/linux/sched.h~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/include/linux/sched.h	Mon Jul  8 21:44:21 2002
@@ -248,6 +248,8 @@ extern struct user_struct root_user;
 
 typedef struct prio_array prio_array_t;
 
+struct copy_user_state;
+
 struct task_struct {
 	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
 	struct thread_info *thread_info;
@@ -365,6 +367,9 @@ struct task_struct {
 /* journalling filesystem info */
 	void *journal_info;
 	struct dentry *proc_dentry;
+#ifdef CONFIG_HIGHMEM
+	struct copy_user_state *copy_user_state;
+#endif
 };
 
 extern void __put_task_struct(struct task_struct *tsk);
--- 2.5.25/mm/filemap.c~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/mm/filemap.c	Mon Jul  8 21:44:21 2002
@@ -15,6 +15,7 @@
 #include <linux/fs.h>
 #include <linux/mm.h>
 #include <linux/mman.h>
+#include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/file.h>
 #include <linux/iobuf.h>
@@ -1183,18 +1184,20 @@ static ssize_t generic_file_direct_IO(in
 	return retval;
 }
 
-int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
+int file_read_actor(read_descriptor_t *desc, struct page *page,
+			unsigned long offset, unsigned long size)
 {
 	char *kaddr;
+	struct copy_user_state copy_user_state;
 	unsigned long left, count = desc->count;
 
 	if (size > count)
 		size = count;
 
-	kaddr = kmap(page);
+	kaddr = kmap_copy_user(&copy_user_state, page, KM_FILEMAP, 1);
 	left = __copy_to_user(desc->buf, kaddr + offset, size);
-	kunmap(page);
-	
+	kunmap_copy_user(&copy_user_state);
+
 	if (left) {
 		size -= left;
 		desc->error = -EFAULT;
--- 2.5.25/include/asm-i386/processor.h~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/include/asm-i386/processor.h	Mon Jul  8 21:44:21 2002
@@ -485,4 +485,6 @@ extern inline void prefetchw(const void 
 
 #endif
 
+#define ARCH_HAS_KMAP_FIXUP
+
 #endif /* __ASM_I386_PROCESSOR_H */
--- 2.5.25/arch/i386/lib/usercopy.c~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/arch/i386/lib/usercopy.c	Mon Jul  8 21:44:21 2002
@@ -11,6 +11,16 @@
 
 #ifdef CONFIG_X86_USE_3DNOW_AND_WORKS
 
+/*
+ * We cannot use the mmx functions here with the kmap_atomic fixup
+ * code.
+ *
+ * But CONFIG_X86_USE_3DNOW_AND_WORKS never gets defined anywhere.
+ * Maybe kill this code?
+ */
+
+#error this will not work
+
 unsigned long
 __generic_copy_to_user(void *to, const void *from, unsigned long n)
 {
--- 2.5.25/fs/exec.c~linus-copy_user-hack	Mon Jul  8 21:44:21 2002
+++ 2.5.25-akpm/fs/exec.c	Mon Jul  8 22:18:12 2002
@@ -182,27 +182,45 @@ static int count(char ** argv, int max)
  * memory to free pages in kernel mem. These are in a format ready
  * to be put directly into the top of new user memory.
  */
+int akpm_hits;
+int akpm_misses;
+int akpm_kmaps;
+
 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) 
 {
+	struct page *kmapped_page = NULL;
+	char *kaddr = NULL;
+	int ret;
+
 	while (argc-- > 0) {
 		char *str;
 		int len;
 		unsigned long pos;
 
-		if (get_user(str, argv+argc) || !(len = strnlen_user(str, bprm->p)))
-			return -EFAULT;
-		if (bprm->p < len) 
-			return -E2BIG; 
+		if (get_user(str, argv+argc) ||
+				!(len = strnlen_user(str, bprm->p))) {
+			ret = -EFAULT;
+			goto out;
+		}
+
+		if (bprm->p < len)  {
+			ret = -E2BIG;
+			goto out;
+		}
 
 		bprm->p -= len;
 		/* XXX: add architecture specific overflow check here. */ 
-
 		pos = bprm->p;
+
+		/*
+		 * The only sleeping function which we are allowed to call in
+		 * this loop is copy_from_user().  Otherwise, copy_user_state
+		 * could get trashed.
+		 */
 		while (len > 0) {
-			char *kaddr;
 			int i, new, err;
-			struct page *page;
 			int offset, bytes_to_copy;
+			struct page *page;
 
 			offset = pos % PAGE_SIZE;
 			i = pos/PAGE_SIZE;
@@ -211,32 +229,52 @@ int copy_strings(int argc,char ** argv, 
 			if (!page) {
 				page = alloc_page(GFP_HIGHUSER);
 				bprm->page[i] = page;
-				if (!page)
-					return -ENOMEM;
+				if (!page) {
+					ret = -ENOMEM;
+					goto out;
+				}
 				new = 1;
 			}
-			kaddr = kmap(page);
 
+			if (kmapped_page) {
+				if (page == kmapped_page)
+					akpm_hits++;
+				else
+					akpm_misses++;
+			}
+			
+			if (page != kmapped_page) {
+				if (kmapped_page)
+					kunmap(kmapped_page);
+				kmapped_page = page;
+				kaddr = kmap(kmapped_page);
+				akpm_kmaps++;
+			}
 			if (new && offset)
 				memset(kaddr, 0, offset);
 			bytes_to_copy = PAGE_SIZE - offset;
 			if (bytes_to_copy > len) {
 				bytes_to_copy = len;
 				if (new)
-					memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
+					memset(kaddr+offset+len, 0,
+						PAGE_SIZE-offset-len);
+			}
+			err = copy_from_user(kaddr+offset, str, bytes_to_copy);
+			if (err) {
+				ret = -EFAULT;
+				goto out;
 			}
-			err = copy_from_user(kaddr + offset, str, bytes_to_copy);
-			kunmap(page);
-
-			if (err)
-				return -EFAULT; 
 
 			pos += bytes_to_copy;
 			str += bytes_to_copy;
 			len -= bytes_to_copy;
 		}
 	}
-	return 0;
+	ret = 0;
+out:
+	if (kmapped_page)
+		kunmap(kmapped_page);
+	return ret;
 }
 
 /*

-
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

  reply	other threads:[~2002-07-09  5:28 UTC|newest]

Thread overview: 77+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2002-07-04 23:05 vm lock contention reduction Andrew Morton
2002-07-04 23:26 ` Rik van Riel
2002-07-04 23:27 ` Rik van Riel
2002-07-05  1:37   ` Andrew Morton
2002-07-05  1:49     ` Rik van Riel
2002-07-05  2:18       ` Andrew Morton
2002-07-05  2:16         ` Rik van Riel
2002-07-05  2:53           ` Andrew Morton
2002-07-05  3:52             ` Benjamin LaHaise
2002-07-05  4:47           ` Linus Torvalds
2002-07-05  5:38             ` Andrew Morton
2002-07-05  5:51               ` Linus Torvalds
2002-07-05  6:08                 ` Linus Torvalds
2002-07-05  6:27                   ` Alexander Viro
2002-07-05  6:33                   ` Andrew Morton
2002-07-05  7:33                     ` Andrea Arcangeli
2002-07-07  2:50                       ` Andrew Morton
2002-07-07  3:05                         ` Linus Torvalds
2002-07-07  3:47                           ` Andrew Morton
2002-07-08 11:39                             ` Enhanced profiling support (was Re: vm lock contention reduction) John Levon
2002-07-08 17:52                               ` Linus Torvalds
2002-07-08 18:41                                 ` Karim Yaghmour
2002-07-10  2:22                                   ` John Levon
2002-07-10  4:16                                     ` Karim Yaghmour
2002-07-10  4:38                                       ` John Levon
2002-07-10  5:46                                         ` Karim Yaghmour
2002-07-10 13:10                                         ` bob
2002-07-07  5:16                           ` vm lock contention reduction Martin J. Bligh
2002-07-07  6:13                         ` scalable kmap (was Re: vm lock contention reduction) Martin J. Bligh
2002-07-07  6:37                           ` Andrew Morton
2002-07-07  7:53                           ` Linus Torvalds
2002-07-07  9:04                             ` Andrew Morton
2002-07-07 16:13                               ` Martin J. Bligh
2002-07-07 18:31                               ` Linus Torvalds
2002-07-07 18:55                                 ` Linus Torvalds
2002-07-07 19:02                                   ` Linus Torvalds
2002-07-08  7:24                                 ` Andrew Morton
2002-07-08  8:09                                   ` Andrea Arcangeli
2002-07-08 14:50                                     ` William Lee Irwin III
2002-07-08 20:39                                     ` Andrew Morton
2002-07-08 21:08                                       ` Benjamin LaHaise
2002-07-08 21:45                                         ` Andrew Morton
2002-07-08 22:24                                           ` Benjamin LaHaise
2002-07-07 16:00                             ` Martin J. Bligh
2002-07-07 18:28                               ` Linus Torvalds
2002-07-08  7:11                                 ` Andrea Arcangeli
2002-07-08 10:15                                 ` Eric W. Biederman
2002-07-08  7:00                               ` Andrea Arcangeli
2002-07-08 17:29                           ` Martin J. Bligh
2002-07-08 22:14                             ` Linus Torvalds
2002-07-09  0:16                               ` Andrew Morton
2002-07-09  3:17                             ` Andrew Morton
2002-07-09  4:28                               ` Martin J. Bligh
2002-07-09  5:28                                 ` Andrew Morton [this message]
2002-07-09  6:15                                   ` Martin J. Bligh
2002-07-09  6:30                                     ` William Lee Irwin III
2002-07-09  6:32                                     ` William Lee Irwin III
2002-07-09 16:08                                   ` Martin J. Bligh
2002-07-09 17:32                                   ` Andrea Arcangeli
2002-07-10  5:32                                     ` Andrew Morton
2002-07-10 22:43                                       ` Martin J. Bligh
2002-07-10 23:08                                         ` Andrew Morton
2002-07-10 23:26                                           ` Martin J. Bligh
2002-07-11  0:19                                             ` Andrew Morton
2002-07-12 17:48                                           ` Martin J. Bligh
2002-07-13 11:18                                             ` Andrea Arcangeli
2002-07-09 13:59                               ` Benjamin LaHaise
2002-07-08  0:38                         ` vm lock contention reduction William Lee Irwin III
2002-07-05  6:46                 ` Andrew Morton
2002-07-05 14:25                   ` Rik van Riel
2002-07-05 23:11         ` William Lee Irwin III
2002-07-05 23:48           ` Andrew Morton
2002-07-06  0:11             ` Rik van Riel
2002-07-06  0:31               ` Linus Torvalds
2002-07-06  0:45                 ` Rik van Riel
2002-07-06  0:48               ` Andrew Morton
2002-07-08  0:59                 ` William Lee Irwin III

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=3D2A7466.AD867DA7@zip.com.au \
    --to=akpm@zip.com.au \
    --cc=Martin.Bligh@us.ibm.com \
    --cc=andrea@suse.de \
    --cc=linux-mm@kvack.org \
    --cc=riel@conectiva.com.br \
    --cc=torvalds@transmeta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox