VM tuning through fault trace gathering [with actual code]

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* VM tuning through fault trace gathering [with actual code]
@ 2001-06-25 15:26 John Fremlin
  2001-06-25 17:57 ` Rik van Riel
  2001-06-26  0:53 ` Marcelo Tosatti
  0 siblings, 2 replies; 13+ messages in thread
From: John Fremlin @ 2001-06-25 15:26 UTC (permalink / raw)
  To: linux-mm; +Cc: linux-kernel

[-- Attachment #1: Type: text/plain, Size: 881 bytes --]


Last year I had the idea of tracing the memory accesses of the system
to improve the VM - the traces could be used to test algorithms in
userspace. The difficulty is of course making all memory accesses
fault without destroying system performance.

The following patch (i386 only) will dump all page faults to
/dev/biglog (you need devfs for this node to appear). If you echo 1 >
/proc/sys/vm/trace then *almost all* userspace memory accesses will
take a soft fault. Note that this is a bit suicidal at the moment
because of the staggeringly inefficient way its implemented, on my box
(K6-2 300MHz) only processes which do very little (e.g. /usr/bin/yes)
running at highest priority are able to print anything to the console.

I think the best way would be to have only one valid l2 pte per
process. I'll have a go at doing that in a day or two unless someone
has a better idea?


[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #2: linux-2.4.4-i386-pagetrace-2.patch --]
[-- Type: text/x-patch, Size: 12080 bytes --]

diff --exclude *~ --new-file -u -r linux-2.4.4-orig/drivers/char/Makefile linux-2.4.4-i386-pagetrace/drivers/char/Makefile
--- linux-2.4.4-orig/drivers/char/Makefile	Tue May  1 14:33:51 2001
+++ linux-2.4.4-i386-pagetrace/drivers/char/Makefile	Sat Jun 23 22:21:34 2001
@@ -16,7 +16,7 @@
 
 O_TARGET := char.o
 
-obj-y	 += tty_io.o n_tty.o tty_ioctl.o mem.o raw.o pty.o misc.o random.o
+obj-y	 += tty_io.o n_tty.o tty_ioctl.o mem.o raw.o pty.o misc.o random.o biglog.o
 
 # All of the (potential) objects that export symbols.
 # This list comes from 'grep -l EXPORT_SYMBOL *.[hc]'.
diff --exclude *~ --new-file -u -r linux-2.4.4-orig/drivers/char/biglog.c linux-2.4.4-i386-pagetrace/drivers/char/biglog.c
--- linux-2.4.4-orig/drivers/char/biglog.c	Thu Jan  1 01:00:00 1970
+++ linux-2.4.4-i386-pagetrace/drivers/char/biglog.c	Sun Jun 24 14:55:55 2001
@@ -0,0 +1,204 @@
+/* Implements a misc device that can output large amounts of data from
+ * the kernel to userspace
+ *
+ * (c) 2001 John Fremlin released under GPL
+ */
+
+#include <linux/slab.h>
+#include <linux/init.h>
+#include <linux/miscdevice.h>
+#include <linux/spinlock.h>
+#include <linux/wait.h>
+#include <asm/uaccess.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+#define BUFFER_SIZE (1024*1024*1)
+
+char buffer[BUFFER_SIZE];
+
+static DECLARE_WAIT_QUEUE_HEAD(waiters);
+static spinlock_t write_head_lock = SPIN_LOCK_UNLOCKED;
+unsigned long write_head;
+
+struct fop_priv
+{
+	unsigned long read_head;
+}
+;
+
+void biglog_log(const char*str)
+{
+	const char*i = str;
+	unsigned long head;
+	unsigned long flags;
+
+	spin_lock_irqsave(&write_head_lock,flags);
+	head = write_head;
+	while(*i) {
+		buffer[head++]= *i++;
+		if(head>=BUFFER_SIZE)
+			head = 0;
+	}
+	write_head = head;
+	spin_unlock_irqrestore(&write_head_lock,flags);
+	wake_up_all(&waiters);
+}
+
+void biglog_logfault(struct mm_struct *mm, struct vm_area_struct * vma,
+		     unsigned long address, int write_access) 
+{
+	static unsigned long no;
+	static char faultbuf[1024];
+
+	char* process = current ? current->comm : "unknown";
+	pid_t pid = current ? current->pid : 0;
+	
+	unsigned long offset = address - vma->vm_start;
+	struct file* file = vma->vm_file;
+	struct dentry* dentry = file ? file->f_dentry : 0;
+	struct inode* inode = dentry ? dentry->d_inode : 0;
+
+	unsigned long ino = inode ? inode->i_ino : 0;
+	kdev_t device = inode ? inode->i_dev : 0;
+	struct qstr* d_name = dentry ? &dentry->d_name : 0;
+               
+	char name[100];
+	unsigned len = sizeof(name)-1;
+               
+	if(d_name && (d_name->len < len))
+		len = d_name->len;
+               
+	strncpy(name, d_name ? (const char*)d_name->name : (const char*)
+		"anon", len);
+	name[len] = 0;
+
+	sprintf(faultbuf,"%lu: %p%c (%s) %lu (%s) %p %lu:%lu+%lu\n",
+		no++,
+		(void*)address,
+		write_access?'W':'r',
+		process,
+		(unsigned long)pid,
+		(char*)name,
+		vma,
+		(unsigned long)device,
+		ino,
+		offset
+		);
+
+	biglog_log(faultbuf);
+}
+
+static int fop_open(struct inode * inode, struct file * file)
+{
+	struct fop_priv*priv;
+	priv = kmalloc(sizeof *priv,GFP_KERNEL);
+	if(!priv)
+		return -ENOMEM;
+
+	memset(priv,0,sizeof *priv);
+
+	priv->read_head = write_head;
+	file->private_data = priv;
+
+	return 0;
+}
+
+static ssize_t fop_read(struct file * file, char * buf,
+			size_t count, loff_t *ppos)
+{
+	ssize_t ret = 0;
+	unsigned long head;
+	unsigned long flags;
+	struct fop_priv *priv = (struct fop_priv *)file->private_data;
+
+	if (ppos != &file->f_pos)
+		return -ESPIPE;
+
+	spin_lock_irqsave(&write_head_lock,flags);
+	head = write_head;
+	if(head == priv->read_head) {
+		spin_unlock_irqrestore(&write_head_lock,flags);
+		if(file->f_flags&O_NONBLOCK)
+			return -EAGAIN;
+		
+		if (wait_event_interruptible(waiters,
+		    head != write_head)
+		    == -ERESTARTSYS) {
+			return -ERESTARTSYS;
+		}
+		spin_lock_irqsave(&write_head_lock,flags);
+		head = write_head;
+	}
+	if(!count) 
+		goto out;
+	
+	if(head >= priv->read_head)
+		if(count > head - priv->read_head)
+			count = head - priv->read_head;
+		
+	if(count+priv->read_head >  BUFFER_SIZE)
+		count = BUFFER_SIZE - priv->read_head;
+	
+	if (copy_to_user(buf, buffer + priv->read_head, count))
+		ret = -EFAULT;
+	else {
+		ret = count;
+		priv->read_head += count;
+		if(priv->read_head >= BUFFER_SIZE)
+			priv->read_head = 0;
+	}
+ out:
+	spin_unlock_irqrestore(&write_head_lock,flags);
+	return ret;
+}
+
+static int fop_release(struct inode * inode, struct file * file)
+{
+	struct fop_priv *priv = (struct fop_priv *)file->private_data;
+	kfree(priv);
+	return 0;
+}
+
+static struct file_operations fops = {
+	owner:		THIS_MODULE,
+	read:		fop_read,
+	open:		fop_open,
+	release:		fop_release,
+};
+
+static struct miscdevice dev=
+{
+	MISC_DYNAMIC_MINOR,
+	"biglog",
+	&fops
+};
+
+
+static int __init mod_init(void)
+{
+	if(misc_register(&dev)){
+		printk(KERN_DEBUG "biglog: could not register device node\n");
+		return -EBUSY;
+	}
+	
+	printk(KERN_INFO "biglog: ready to rock\n");
+	return 0;
+}
+
+static void __exit mod_exit(void)
+{
+	if(misc_deregister(&dev))
+		printk(KERN_DEBUG "biglog: could not deregister device node\n");
+
+	/* FIXME: remove if this gets into main tree */
+	printk(KERN_INFO "biglog: biglog has left the building\n");
+}
+
+module_init(mod_init);
+module_exit(mod_exit);
+
+MODULE_DESCRIPTION("Interface for loggin large amounts of data from the kernel");
+MODULE_AUTHOR("John Fremlin");
+EXPORT_SYMBOL(biglog_logfault);
+EXPORT_SYMBOL(biglog_log);
diff --exclude *~ --new-file -u -r linux-2.4.4-orig/include/asm-i386/mmu_context.h linux-2.4.4-i386-pagetrace/include/asm-i386/mmu_context.h
--- linux-2.4.4-orig/include/asm-i386/mmu_context.h	Tue May  1 20:35:24 2001
+++ linux-2.4.4-i386-pagetrace/include/asm-i386/mmu_context.h	Sun Jun 24 15:34:31 2001
@@ -27,6 +27,8 @@
 
 static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk, unsigned cpu)
 {
+	extern void trace_mm(struct mm_struct*mm);
+	trace_mm(next);
 	if (prev != next) {
 		/* stop flush ipis for the previous mm */
 		clear_bit(cpu, &prev->cpu_vm_mask);
diff --exclude *~ --new-file -u -r linux-2.4.4-orig/include/asm-i386/pgtable.h linux-2.4.4-i386-pagetrace/include/asm-i386/pgtable.h
--- linux-2.4.4-orig/include/asm-i386/pgtable.h	Tue May  1 20:35:24 2001
+++ linux-2.4.4-i386-pagetrace/include/asm-i386/pgtable.h	Sun Jun 24 14:57:11 2001
@@ -174,6 +174,7 @@
 #define _PAGE_GLOBAL	0x100	/* Global TLB entry PPro+ */
 
 #define _PAGE_PROTNONE	0x080	/* If not present */
+#define _PAGE_TRACE     0x200
 
 #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
 #define _KERNPG_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -243,7 +244,8 @@
 /* page table for 0-4MB for everybody */
 extern unsigned long pg0[1024];
 
-#define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE))
+#define pte_present(x)	((x).pte_low & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_TRACE))
+#define pte_traced(x)   ((x).pte_low & _PAGE_TRACE)
 #define pte_clear(xp)	do { set_pte(xp, __pte(0)); } while (0)
 
 #define pmd_none(x)	(!pmd_val(x))
@@ -278,6 +280,8 @@
 static inline pte_t pte_mkdirty(pte_t pte)	{ (pte).pte_low |= _PAGE_DIRTY; return pte; }
 static inline pte_t pte_mkyoung(pte_t pte)	{ (pte).pte_low |= _PAGE_ACCESSED; return pte; }
 static inline pte_t pte_mkwrite(pte_t pte)	{ (pte).pte_low |= _PAGE_RW; return pte; }
+static inline pte_t pte_mktrace(pte_t pte)      { (pte).pte_low |= _PAGE_TRACE; (pte).pte_low &= ~_PAGE_PRESENT; return pte; }
+static inline pte_t pte_untrace(pte_t pte)      { if(!pte_traced(pte))return pte; (pte).pte_low &= ~_PAGE_TRACE; (pte).pte_low |= _PAGE_PRESENT; return pte; }
 
 static inline  int ptep_test_and_clear_dirty(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_DIRTY, ptep); }
 static inline  int ptep_test_and_clear_young(pte_t *ptep)	{ return test_and_clear_bit(_PAGE_BIT_ACCESSED, ptep); }
diff --exclude *~ --new-file -u -r linux-2.4.4-orig/include/linux/sysctl.h linux-2.4.4-i386-pagetrace/include/linux/sysctl.h
--- linux-2.4.4-orig/include/linux/sysctl.h	Tue May  1 20:35:46 2001
+++ linux-2.4.4-i386-pagetrace/include/linux/sysctl.h	Sun Jun 24 04:08:01 2001
@@ -134,7 +134,8 @@
 	VM_PAGECACHE=7,		/* struct: Set cache memory thresholds */
 	VM_PAGERDAEMON=8,	/* struct: Control kswapd behaviour */
 	VM_PGT_CACHE=9,		/* struct: Set page table cache parameters */
-	VM_PAGE_CLUSTER=10	/* int: set number of pages to swap together */
+	VM_PAGE_CLUSTER=10,	/* int: set number of pages to swap together */
+	VM_TRACE=11,            /* Turn on page access tracing */
 };
 
 
diff --exclude *~ --new-file -u -r linux-2.4.4-orig/kernel/sysctl.c linux-2.4.4-i386-pagetrace/kernel/sysctl.c
--- linux-2.4.4-orig/kernel/sysctl.c	Tue May  1 14:34:43 2001
+++ linux-2.4.4-i386-pagetrace/kernel/sysctl.c	Sun Jun 24 04:16:27 2001
@@ -40,6 +40,7 @@
 #if defined(CONFIG_SYSCTL)
 
 /* External variables not in a header file. */
+extern int page_trace;
 extern int panic_timeout;
 extern int C_A_D;
 extern int bdf_prm[], bdflush_min[], bdflush_max[];
@@ -270,6 +271,8 @@
 	 &pgt_cache_water, 2*sizeof(int), 0644, NULL, &proc_dointvec},
 	{VM_PAGE_CLUSTER, "page-cluster", 
 	 &page_cluster, sizeof(int), 0644, NULL, &proc_dointvec},
+	{VM_TRACE, "trace",
+	 &page_trace, sizeof(int), 0644, NULL, &proc_dointvec},
 	{0}
 };
 
diff --exclude *~ --new-file -u -r linux-2.4.4-orig/mm/memory.c linux-2.4.4-i386-pagetrace/mm/memory.c
--- linux-2.4.4-orig/mm/memory.c	Tue May  1 14:34:43 2001
+++ linux-2.4.4-i386-pagetrace/mm/memory.c	Sun Jun 24 15:51:19 2001
@@ -52,6 +52,7 @@
 unsigned long num_physpages;
 void * high_memory;
 struct page *highmem_start_page;
+int page_trace;
 
 /*
  * We special-case the C-O-W ZERO_PAGE, because it's such
@@ -1271,6 +1272,48 @@
 	return 2;	/* Major fault */
 }
 
+void trace_vma(struct vm_area_struct*vma,pte_t *avoid)
+{
+	unsigned long address;
+
+	if(!page_trace)
+		return;
+	
+	for(address = vma->vm_start;
+	    address < vma->vm_end;
+	    address += PAGE_SIZE)
+	{
+		pgd_t *pgd;
+		pmd_t *pmd;
+		pte_t * pte;
+
+		pgd = pgd_offset(vma->vm_mm, address);
+		if(pgd && !pgd_none(*pgd) && !pgd_bad(*pgd)){
+			pmd = pmd_offset(pgd, address);
+			if(pmd && !pmd_none(*pmd) && !pmd_bad(*pmd)){
+				pte = pte_offset(pmd, address);
+				if(pte && pte != avoid && !pte_none(*pte))
+					if(pte_present(*pte) && !pte_traced(*pte)) {
+						establish_pte(vma, address, pte, pte_mktrace(*pte));
+						flush_tlb_page(vma,address);
+					}
+				
+			}
+		}
+	}
+}
+
+void trace_mm(struct mm_struct*mm)
+{
+	struct vm_area_struct * mmap;
+
+	if(!page_trace)
+		return;
+	
+	for(mmap = mm->mmap;mmap;mmap = mmap->vm_next)
+		trace_vma(mmap,0);
+}
+
 /*
  * These routines also need to handle stuff like marking pages dirty
  * and/or accessed for architectures that don't do it in hardware (most
@@ -1294,19 +1337,25 @@
 	int write_access, pte_t * pte)
 {
 	pte_t entry;
-
+	
 	entry = *pte;
+	if(pte_traced(entry)) {
+		trace_vma(vma,pte);
+		entry = pte_untrace(entry);
+		establish_pte(vma, address, pte, entry);
+		return 1;
+	}
+
+	trace_vma(vma,pte);
+
 	if (!pte_present(entry)) {
-		/*
-		 * If it truly wasn't present, we know that kswapd
-		 * and the PTE updates will not touch it later. So
-		 * drop the lock.
-		 */
+
 		if (pte_none(entry))
 			return do_no_page(mm, vma, address, write_access, pte);
 		return do_swap_page(mm, vma, address, pte, pte_to_swp_entry(entry), write_access);
 	}
 
+	entry = pte_untrace(entry);
 	if (write_access) {
 		if (!pte_write(entry))
 			return do_wp_page(mm, vma, address, pte, entry);
@@ -1324,10 +1373,14 @@
 int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma,
 	unsigned long address, int write_access)
 {
+	extern void biglog_logfault(struct mm_struct *mm, struct vm_area_struct * vma,
+				    unsigned long address, int write_access);
 	int ret = -1;
 	pgd_t *pgd;
 	pmd_t *pmd;
 
+	biglog_logfault(mm,vma,address,write_access);
+	
 	current->state = TASK_RUNNING;
 	pgd = pgd_offset(mm, address);
 

[-- Attachment #3: Type: text/plain, Size: 143 bytes --]


-- 
PS. I'm desparately seeking last minute summer job in Europe due to an
unfortunate series of events. Please see http://ape.n3.net/cv.html

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-25 15:26 VM tuning through fault trace gathering [with actual code] John Fremlin
@ 2001-06-25 17:57 ` Rik van Riel
  2001-06-25 21:15   ` John Fremlin
  2001-06-26  0:53 ` Marcelo Tosatti
  1 sibling, 1 reply; 13+ messages in thread
From: Rik van Riel @ 2001-06-25 17:57 UTC (permalink / raw)
  To: John Fremlin; +Cc: linux-mm, linux-kernel

On 25 Jun 2001, John Fremlin wrote:

> Last year I had the idea of tracing the memory accesses of the system
> to improve the VM - the traces could be used to test algorithms in
> userspace. The difficulty is of course making all memory accesses
> fault without destroying system performance.

Sounds like a cool idea.  One thing you should keep in mind
though is to gather traces of the WHOLE SYSTEM and not of
individual applications.

There has to be a way to balance the eviction of pages from
applications against those of other applications.

regards,

Rik
--
Virtual memory is like a game you can't win;
However, without VM there's truly nothing to lose...

http://www.surriel.com/		http://distro.conectiva.com/

Send all your spam to aardvark@nl.linux.org (spam digging piggy)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-25 17:57 ` Rik van Riel
@ 2001-06-25 21:15   ` John Fremlin
  2001-06-26 14:02     ` Scott F. Kaplan
  0 siblings, 1 reply; 13+ messages in thread
From: John Fremlin @ 2001-06-25 21:15 UTC (permalink / raw)
  To: Rik van Riel; +Cc: linux-mm, linux-kernel

Rik van Riel <riel@conectiva.com.br> writes:

> On 25 Jun 2001, John Fremlin wrote:
> 
> > Last year I had the idea of tracing the memory accesses of the
> > system to improve the VM - the traces could be used to test
> > algorithms in userspace. The difficulty is of course making all
> > memory accesses fault without destroying system performance.
> 
> Sounds like a cool idea.  One thing you should keep in mind though
> is to gather traces of the WHOLE SYSTEM and not of individual
> applications.

In the current patch all pagefaults are recorded from all sources. I'd
like to be able to catch read(2) and write(2) (buffer cache stuff) as
well but I don't know how . . . .

> There has to be a way to balance the eviction of pages from
> applications against those of other applications.

Of course! It is important not to regard each thread group as an
independent entity IMHO (had a big old argument about this).

[...]

-- 

	http://ape.n3.net
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-25 15:26 VM tuning through fault trace gathering [with actual code] John Fremlin
  2001-06-25 17:57 ` Rik van Riel
@ 2001-06-26  0:53 ` Marcelo Tosatti
  2001-06-26 12:54   ` John Fremlin
  1 sibling, 1 reply; 13+ messages in thread
From: Marcelo Tosatti @ 2001-06-26  0:53 UTC (permalink / raw)
  To: John Fremlin; +Cc: linux-mm, linux-kernel


On 25 Jun 2001, John Fremlin wrote:

> 
> Last year I had the idea of tracing the memory accesses of the system
> to improve the VM - the traces could be used to test algorithms in
> userspace. The difficulty is of course making all memory accesses
> fault without destroying system performance.
> 
> The following patch (i386 only) will dump all page faults to
> /dev/biglog (you need devfs for this node to appear). If you echo 1 >
> /proc/sys/vm/trace then *almost all* userspace memory accesses will
> take a soft fault. Note that this is a bit suicidal at the moment
> because of the staggeringly inefficient way its implemented, on my box
> (K6-2 300MHz) only processes which do very little (e.g. /usr/bin/yes)
> running at highest priority are able to print anything to the console.
> 
> I think the best way would be to have only one valid l2 pte per
> process. I'll have a go at doing that in a day or two unless someone
> has a better idea?

Linux Trace Toolkit (http://www.opersys.com/LTT) does that. 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-26  0:53 ` Marcelo Tosatti
@ 2001-06-26 12:54   ` John Fremlin
  2001-06-26 13:52     ` Marcelo Tosatti
  0 siblings, 1 reply; 13+ messages in thread
From: John Fremlin @ 2001-06-26 12:54 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: linux-mm, linux-kernel

Marcelo Tosatti <marcelo@conectiva.com.br> writes:

> On 25 Jun 2001, John Fremlin wrote:
> 
> > 
> > Last year I had the idea of tracing the memory accesses of the system
> > to improve the VM - the traces could be used to test algorithms in
> > userspace. The difficulty is of course making all memory accesses
> > fault without destroying system performance.

[...]

> Linux Trace Toolkit (http://www.opersys.com/LTT) does that. 

I dld the ltt-usenix paper and skim read it. It didn't seem to talk
about page faults much. Where should I look?

-- 

	http://ape.n3.net
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-26 12:54   ` John Fremlin
@ 2001-06-26 13:52     ` Marcelo Tosatti
  2001-06-26 15:38       ` John Fremlin
  0 siblings, 1 reply; 13+ messages in thread
From: Marcelo Tosatti @ 2001-06-26 13:52 UTC (permalink / raw)
  To: John Fremlin; +Cc: linux-mm, linux-kernel


On 26 Jun 2001, John Fremlin wrote:

> Marcelo Tosatti <marcelo@conectiva.com.br> writes:
> 
> > On 25 Jun 2001, John Fremlin wrote:
> > 
> > > 
> > > Last year I had the idea of tracing the memory accesses of the system
> > > to improve the VM - the traces could be used to test algorithms in
> > > userspace. The difficulty is of course making all memory accesses
> > > fault without destroying system performance.
> 
> [...]
> 
> > Linux Trace Toolkit (http://www.opersys.com/LTT) does that. 
> 
> I dld the ltt-usenix paper and skim read it. It didn't seem to talk
> about page faults much. Where should I look?

Grab the source and try it out?

Example page fault trace: 

####################################################################
Event     	          Time                   PID     Length Description
####################################################################

Trap entry              991,299,585,597,016     678     12      TRAP: page fault; EIP : 0x40067785


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-25 21:15   ` John Fremlin
@ 2001-06-26 14:02     ` Scott F. Kaplan
  2001-06-26 19:29       ` John Fremlin
  0 siblings, 1 reply; 13+ messages in thread
From: Scott F. Kaplan @ 2001-06-26 14:02 UTC (permalink / raw)
  To: linux-mm

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

John Fremlin <vii@users.sourceforge.net> wrote:
> Rik van Riel <riel@conectiva.com.br> writes:
> > Sounds like a cool idea.  One thing you should keep in mind though
> > is to gather traces of the WHOLE SYSTEM and not of individual
> > applications.

Not to look a gift horse in the mouth, but the ability to trace selectively 
either the whole system OR an individual application would be useful.  
Certainly whole system traces would be new, as individual process traces can 
be gathered with other tools (although I don't know of one available on Linux 
- -- I'm stuck using ATOM under Alpha/Tru64.)

> In the current patch all pagefaults are recorded from all sources. I'd
> like to be able to catch read(2) and write(2) (buffer cache stuff) as
> well but I don't know how . . . .

Also a great idea.  Someone who works on the filesystem end of the kernel 
should be able to add support for this kind of thing without much trouble, 
don't you think?

> Of course! It is important not to regard each thread group as an
> independent entity IMHO (had a big old argument about this).

Yes, I was the other side of that argument! :-)  I'll still contend that, 
tracking references for each process is better than tracking it only for the 
whole system, and tracking references for each thread might be better still.  
When you track references from the whole-system view alone, pathological 
reference behavior of one process gets mixed in with other processes, making 
it impossible to identify that the one process should have its memory managed 
in a manner different from the others.  Grouping together behaviors just 
smooths their features.  Separating them offers an opportunity to identify 
anomolies, and anomolies are opportunities for better memory management.

Scott
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.0.4 (GNU/Linux)
Comment: For info see http://www.gnupg.org

iD8DBQE7OJX18eFdWQtoOmgRAmniAKCTFGVJmgMOXJWiHfA+UxVUiT37zQCfZywy
bRYZKRymeXfjhh6wX2SZb6I=
=5TTZ
-----END PGP SIGNATURE-----
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-26 13:52     ` Marcelo Tosatti
@ 2001-06-26 15:38       ` John Fremlin
  2001-06-27 10:09         ` Marcelo Tosatti
  0 siblings, 1 reply; 13+ messages in thread
From: John Fremlin @ 2001-06-26 15:38 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: linux-mm, linux-kernel

Marcelo Tosatti <marcelo@conectiva.com.br> writes:

> ####################################################################
> Event     	          Time                   PID     Length Description
> ####################################################################
> 
> Trap entry              991,299,585,597,016     678     12      TRAP: page fault; EIP : 0x40067785

That looks like just the generic interrupt handling. It does not do
what I want to do, i.e. record some more info about the fault saying
where it comes from.

-- 

	http://ape.n3.net
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-26 14:02     ` Scott F. Kaplan
@ 2001-06-26 19:29       ` John Fremlin
  0 siblings, 0 replies; 13+ messages in thread
From: John Fremlin @ 2001-06-26 19:29 UTC (permalink / raw)
  To: Scott F; +Cc: linux-mm

Hi people!

I just sent an updated version of the patch to Scott, which faults on
almost every mem access. Unfortunately that slows the system to a
crawl (doh), in fact so much of a crawl that nothing much
happens. Anybody have a turbofast P4/Athlon they want to lend or send
me ;-)

Scott F. Kaplan <sfkaplan@cs.amherst.edu> writes:

[...]

> Not to look a gift horse in the mouth, but the ability to trace
> selectively either the whole system OR an individual application
> would be useful.  Certainly whole system traces would be new, as
> individual process traces can be gathered with other tools (although
> I don't know of one available on Linux -- I'm stuck using ATOM under
> Alpha/Tru64.)

That looks like a very cool package (AFAICS it instruments the binary
to call a subroutine before every memory access).

The pagetrace patch has a slightly different goal however. The alpha
people seemed to want to tune their cache behaviour whereas I want to
tune the VM behaviour.

> > In the current patch all pagefaults are recorded from all
> > sources. I'd like to be able to catch read(2) and write(2) (buffer
> > cache stuff) as well but I don't know how . . . .
> 
> Also a great idea.  Someone who works on the filesystem end of the
> kernel should be able to add support for this kind of thing without
> much trouble, don't you think?

I'd really like a clue or too in this direction certainly because its
difficult to simulate the VM if you don't know how big e.g. the
directory dcache is.

> > Of course! It is important not to regard each thread group as an
> > independent entity IMHO (had a big old argument about this).
> 
> Yes, I was the other side of that argument! :-)  I'll still contend that, 

Hehe. Let's not go into that right now ;-)

[...]

--
	http://ape.n3.net
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-26 15:38       ` John Fremlin
@ 2001-06-27 10:09         ` Marcelo Tosatti
  2001-06-27 12:47           ` Scott F. Kaplan
  0 siblings, 1 reply; 13+ messages in thread
From: Marcelo Tosatti @ 2001-06-27 10:09 UTC (permalink / raw)
  To: John Fremlin; +Cc: linux-mm, lkml


On 26 Jun 2001, John Fremlin wrote:

> Marcelo Tosatti <marcelo@conectiva.com.br> writes:
> 
> > ####################################################################
> > Event     	          Time                   PID     Length Description
> > ####################################################################
> > 
> > Trap entry              991,299,585,597,016     678     12      TRAP: page fault; EIP : 0x40067785
> 
> That looks like just the generic interrupt handling. It does not do
> what I want to do, i.e. record some more info about the fault saying
> where it comes from.

You can create custom events with LTT and then you can get them from a
"big buffer" to userlevel later, then. 

I just told you about LTT because I think you are redoing work by creating
the tracing facilities... 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-27 10:09         ` Marcelo Tosatti
@ 2001-06-27 12:47           ` Scott F. Kaplan
  2001-06-27 13:51             ` Marcelo Tosatti
  0 siblings, 1 reply; 13+ messages in thread
From: Scott F. Kaplan @ 2001-06-27 12:47 UTC (permalink / raw)
  To: linux-mm

-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

On Wednesday 27 June 2001 06:09 am, Marcelo Tosatti wrote:
> On 26 Jun 2001, John Fremlin wrote:
> > Marcelo Tosatti <marcelo@conectiva.com.br> writes:
> > > ####################################################################
> > > Event     	          Time                   PID     Length Description
> > > ####################################################################
> > >
> > > Trap entry              991,299,585,597,016     678     12      TRAP:
> > > page fault; EIP : 0x40067785
> >
> > That looks like just the generic interrupt handling. It does not do
> > what I want to do, i.e. record some more info about the fault saying
> > where it comes from.
>
> You can create custom events with LTT and then you can get them from a
> "big buffer" to userlevel later, then.

I guess that i have a different concern with this existing utility.  It seems 
that it will report page faults (minor or major) for the normal VM system 
configuration.  What if we want it to record all (or nearly) all page 
references, even ones to pages that *normally* wouldn't cause any kind of 
interrupt?  That ability seems new and unique to John's utility.

(I know, I need to read the LLT manual, as it may be able to do exactly what 
I'm describing.  However, I don't think that's the case.)

Scott
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.0.4 (GNU/Linux)
Comment: For info see http://www.gnupg.org

iD8DBQE7OdX28eFdWQtoOmgRAlhHAKCFHjgw62OlQmytkRiY+Zl9xaMz7gCfXSmm
mNsg0QUAwAhJnhwrL088IwI=
=CC+e
-----END PGP SIGNATURE-----
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-27 12:47           ` Scott F. Kaplan
@ 2001-06-27 13:51             ` Marcelo Tosatti
  2001-06-27 16:05               ` John Fremlin
  0 siblings, 1 reply; 13+ messages in thread
From: Marcelo Tosatti @ 2001-06-27 13:51 UTC (permalink / raw)
  To: Scott F.Kaplan; +Cc: linux-mm


On Wed, 27 Jun 2001, Scott F.Kaplan wrote:

> -----BEGIN PGP SIGNED MESSAGE-----
> Hash: SHA1
> 
> On Wednesday 27 June 2001 06:09 am, Marcelo Tosatti wrote:
> > On 26 Jun 2001, John Fremlin wrote:
> > > Marcelo Tosatti <marcelo@conectiva.com.br> writes:
> > > > ####################################################################
> > > > Event     	          Time                   PID     Length Description
> > > > ####################################################################
> > > >
> > > > Trap entry              991,299,585,597,016     678     12      TRAP:
> > > > page fault; EIP : 0x40067785
> > >
> > > That looks like just the generic interrupt handling. It does not do
> > > what I want to do, i.e. record some more info about the fault saying
> > > where it comes from.
> >
> > You can create custom events with LTT and then you can get them from a
> > "big buffer" to userlevel later, then.
> 
> I guess that i have a different concern with this existing utility.  It seems 
> that it will report page faults (minor or major) for the normal VM system 
> configuration.  What if we want it to record all (or nearly) all page 
> references, even ones to pages that *normally* wouldn't cause any kind of 
> interrupt?  That ability seems new and unique to John's utility.
> 
> (I know, I need to read the LLT manual, as it may be able to do exactly what 
> I'm describing.  However, I don't think that's the case.)

You are right here. 

But anyway, I think John can do what he wants without writting a whole
new tracing facility. 

IMHO. 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

* Re: VM tuning through fault trace gathering [with actual code]
  2001-06-27 13:51             ` Marcelo Tosatti
@ 2001-06-27 16:05               ` John Fremlin
  0 siblings, 0 replies; 13+ messages in thread
From: John Fremlin @ 2001-06-27 16:05 UTC (permalink / raw)
  To: Marcelo Tosatti; +Cc: linux-mm

[...]

> > (I know, I need to read the LLT manual, as it may be able to do
> > exactly what I'm describing.  However, I don't think that's the
> > case.)
> 
> You are right here. 
> 
> But anyway, I think John can do what he wants without writting a
> whole new tracing facility.

That is unfortunate because the tracing facility is already written.
Oh well. I guess I should have asked again before restarting work on
it a day or so ago. At least it didn't take very long so not too many
man-hours wasted ;-)

-- 

	http://ape.n3.net
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/

^ permalink raw reply	[flat|nested] 13+ messages in thread

end of thread, other threads:[~2001-06-27 16:05 UTC | newest]

Thread overview: 13+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2001-06-25 15:26 VM tuning through fault trace gathering [with actual code] John Fremlin
2001-06-25 17:57 ` Rik van Riel
2001-06-25 21:15   ` John Fremlin
2001-06-26 14:02     ` Scott F. Kaplan
2001-06-26 19:29       ` John Fremlin
2001-06-26  0:53 ` Marcelo Tosatti
2001-06-26 12:54   ` John Fremlin
2001-06-26 13:52     ` Marcelo Tosatti
2001-06-26 15:38       ` John Fremlin
2001-06-27 10:09         ` Marcelo Tosatti
2001-06-27 12:47           ` Scott F. Kaplan
2001-06-27 13:51             ` Marcelo Tosatti
2001-06-27 16:05               ` John Fremlin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox