Light weight counter 1/1 Framework

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

* Light weight counter 1/1 Framework
@ 2006-06-09 19:18 Christoph Lameter
  2006-06-09 19:19 ` Light weight counter 2/2 counter conversion Christoph Lameter
  2006-06-09 21:33 ` Light weight counter 1/1 Framework Andrew Morton
  0 siblings, 2 replies; 6+ messages in thread
From: Christoph Lameter @ 2006-06-09 19:18 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, linux-mm, npiggin, ak, hugh

The remaining counters in page_state after the zoned VM counter patch has been
applied are all just for show in /proc/vmstat. They have no essential function
for the VM and therefore maybe we can make these counters lightweight by making
them per cpu and ignoring races. We can then get away without disabling 
interrupts for these counters.

The patchset also adds an off switch for embedded systems that allows a building
of linux kernels without these counters.

This may be one case in which perhaps Andi's local_t can come into play to
avoid per cpu races on x86_64 and i386 in a light weight way. However, for
all other platforms we would not want the fallback to an atomic
type.

The implementation of these counters is through inline code that typically
results in a simple increment of a global memory locations.

Also
- Rename page_state to event_state
- Make event state an array indexed by the event item like the zoned counters.

Caveat: If use space tools rely on these counters being accurate on SMP and UP
then this wont work.

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.17-rc6-mm1/init/Kconfig
===================================================================
--- linux-2.6.17-rc6-mm1.orig/init/Kconfig	2006-06-08 15:20:11.226963282 -0700
+++ linux-2.6.17-rc6-mm1/init/Kconfig	2006-06-09 12:07:56.893459696 -0700
@@ -446,6 +446,15 @@ config SLOB
 	default !SLAB
 	bool
 
+config VM_EVENT_COUNTERS
+	default y
+	bool "Enable event counters for /proc/vmstat" if EMBEDDED
+	help
+	  Event counters are only needed to display statistics. They
+	  have no function for the kernel itself. This option allows
+	  the disabling of the event counters. /proc/vmstat will only
+	  contain essential counters.
+
 menu "Loadable module support"
 
 config MODULES
Index: linux-2.6.17-rc6-mm1/mm/page_alloc.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/page_alloc.c	2006-06-09 12:00:00.272620254 -0700
+++ linux-2.6.17-rc6-mm1/mm/page_alloc.c	2006-06-09 12:07:56.895412700 -0700
@@ -1582,79 +1582,50 @@ static void show_node(struct zone *zone)
 #define show_node(zone)	do { } while (0)
 #endif
 
-/*
- * Accumulate the page_state information across all CPUs.
- * The result is unavoidably approximate - it can change
- * during and after execution of this function.
- */
-static DEFINE_PER_CPU(struct page_state, page_states) = {0};
+#ifdef CONFIG_VM_EVENT_COUNTERS
+DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
 
-static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask)
+void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
 {
-	unsigned cpu;
+	int cpu = 0;
+	int i;
 
-	memset(ret, 0, nr * sizeof(unsigned long));
-	cpus_and(*cpumask, *cpumask, cpu_online_map);
+	memset(ret, 0, NR_VM_EVENT_ITEMS * sizeof(unsigned long));
 
-	for_each_cpu_mask(cpu, *cpumask) {
-		unsigned long *in;
-		unsigned long *out;
-		unsigned off;
-		unsigned next_cpu;
+	cpu = first_cpu(*cpumask);
+	while (cpu < NR_CPUS) {
+		struct vm_event_state *this = &per_cpu(vm_event_states, cpu);
 
-		in = (unsigned long *)&per_cpu(page_states, cpu);
+		cpu = next_cpu(cpu, *cpumask);
 
-		next_cpu = next_cpu(cpu, *cpumask);
-		if (likely(next_cpu < NR_CPUS))
-			prefetch(&per_cpu(page_states, next_cpu));
+		if (cpu < NR_CPUS)
+			prefetch(&per_cpu(vm_event_states, cpu));
 
-		out = (unsigned long *)ret;
-		for (off = 0; off < nr; off++)
-			*out++ += *in++;
+
+		for (i=0; i< NR_VM_EVENT_ITEMS; i++)
+			ret[i] += this->event[i];
 	}
 }
+EXPORT_SYMBOL(sum_vm_events);
 
-void get_full_page_state(struct page_state *ret)
+void all_vm_events(unsigned long *ret)
 {
-	cpumask_t mask = CPU_MASK_ALL;
-
-	__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
+	sum_vm_events(ret, &cpu_online_map);
 }
+EXPORT_SYMBOL(all_vm_events);
 
-unsigned long read_page_state_offset(unsigned long offset)
+unsigned long get_global_vm_events(enum vm_event_item e)
 {
 	unsigned long ret = 0;
 	int cpu;
 
-	for_each_online_cpu(cpu) {
-		unsigned long in;
+	for_each_possible_cpu(cpu)
+		ret += per_cpu(vm_event_states, cpu).event[e];
 
-		in = (unsigned long)&per_cpu(page_states, cpu) + offset;
-		ret += *((unsigned long *)in);
-	}
 	return ret;
 }
-
-void __mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
-	void *ptr;
-
-	ptr = &__get_cpu_var(page_states);
-	*(unsigned long *)(ptr + offset) += delta;
-}
-EXPORT_SYMBOL(__mod_page_state_offset);
-
-void mod_page_state_offset(unsigned long offset, unsigned long delta)
-{
-	unsigned long flags;
-	void *ptr;
-
-	local_irq_save(flags);
-	ptr = &__get_cpu_var(page_states);
-	*(unsigned long *)(ptr + offset) += delta;
-	local_irq_restore(flags);
-}
-EXPORT_SYMBOL(mod_page_state_offset);
+EXPORT_SYMBOL(get_global_vm_events);
+#endif
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
 			unsigned long *free, struct pglist_data *pgdat)
@@ -2792,7 +2763,7 @@ static char *vmstat_text[] = {
 	"nr_unstable",
 	"nr_bounce",
 
-	/* Page state */
+#ifdef CONFIG_VM_EVENT_COUNTERS
 	"pgpgin",
 	"pgpgout",
 	"pswpin",
@@ -2838,28 +2809,36 @@ static char *vmstat_text[] = {
 	"allocstall",
 
 	"pgrotated"
+#endif
 };
 
 static void *vmstat_start(struct seq_file *m, loff_t *pos)
 {
 	unsigned long *v;
-	struct page_state *ps;
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	unsigned long *e;
+#endif
 	int i;
 
 	if (*pos >= ARRAY_SIZE(vmstat_text))
 		return NULL;
-
+#ifdef CONFIG_VM_EVENT_COUNTERS
 	v = kmalloc(NR_STAT_ITEMS * sizeof(unsigned long)
-			+ sizeof(struct page_state), GFP_KERNEL);
+			+ sizeof(struct vm_event_state), GFP_KERNEL);
+#else
+	v = kmalloc(NR_STAT_ITEMS * sizeof(unsigned long), GFP_KERNEL);
+#endif
 	m->private = v;
 	if (!v)
 		return ERR_PTR(-ENOMEM);
 	for (i = 0; i < NR_STAT_ITEMS; i++)
 		v[i] = global_page_state(i);
-	ps = (struct page_state *)(v + NR_STAT_ITEMS);
-	get_full_page_state(ps);
-	ps->pgpgin /= 2;		/* sectors -> kbytes */
-	ps->pgpgout /= 2;
+#ifdef CONFIG_VM_EVENT_COUNTERS
+	e = v + NR_STAT_ITEMS;
+	all_vm_events(e);
+	e[PGPGIN] /= 2;		/* sectors -> kbytes */
+	e[PGPGOUT] /= 2;
+#endif
 	return v + *pos;
 }
 
Index: linux-2.6.17-rc6-mm1/include/linux/page-flags.h
===================================================================
--- linux-2.6.17-rc6-mm1.orig/include/linux/page-flags.h	2006-06-09 11:47:36.275737668 -0700
+++ linux-2.6.17-rc6-mm1/include/linux/page-flags.h	2006-06-09 12:07:56.896389202 -0700
@@ -104,121 +104,70 @@
 #define PG_uncached		31	/* Page has been mapped as uncached */
 #endif
 
+#ifdef CONFIG_VM_EVENT_COUNTERS
 /*
- * Global page accounting.  One instance per CPU.  Only unsigned longs are
- * allowed.
+ * Light weight per cpu counter implementation.
  *
- * - Fields can be modified with xxx_page_state and xxx_page_state_zone at
- * any time safely (which protects the instance from modification by
- * interrupt.
- * - The __xxx_page_state variants can be used safely when interrupts are
- * disabled.
- * - The __xxx_page_state variants can be used if the field is only
- * modified from process context and protected from preemption, or only
- * modified from interrupt context.  In this case, the field should be
- * commented here.
+ * Note that these can race. We do not bother to enable preemption
+ * or care about interrupt races. All we care about is to have some
+ * approximate count of events.
+ *
+ * Counters should only be incremented and no critical kernel component
+ * should rely on the counter values.
+ *
+ * Counters are handled completely inline. On many platforms the code
+ * generated will simply be the increment of a global address.
  */
-struct page_state {
-	/*
-	 * The below are zeroed by get_page_state().  Use get_full_page_state()
-	 * to add up all these.
-	 */
-	unsigned long pgpgin;		/* Disk reads */
-	unsigned long pgpgout;		/* Disk writes */
-	unsigned long pswpin;		/* swap reads */
-	unsigned long pswpout;		/* swap writes */
-
-	unsigned long pgalloc_high;	/* page allocations */
-	unsigned long pgalloc_normal;
-	unsigned long pgalloc_dma32;
-	unsigned long pgalloc_dma;
-
-	unsigned long pgfree;		/* page freeings */
-	unsigned long pgactivate;	/* pages moved inactive->active */
-	unsigned long pgdeactivate;	/* pages moved active->inactive */
-
-	unsigned long pgfault;		/* faults (major+minor) */
-	unsigned long pgmajfault;	/* faults (major only) */
-
-	unsigned long pgrefill_high;	/* inspected in refill_inactive_zone */
-	unsigned long pgrefill_normal;
-	unsigned long pgrefill_dma32;
-	unsigned long pgrefill_dma;
-
-	unsigned long pgsteal_high;	/* total highmem pages reclaimed */
-	unsigned long pgsteal_normal;
-	unsigned long pgsteal_dma32;
-	unsigned long pgsteal_dma;
-
-	unsigned long pgscan_kswapd_high;/* total highmem pages scanned */
-	unsigned long pgscan_kswapd_normal;
-	unsigned long pgscan_kswapd_dma32;
-	unsigned long pgscan_kswapd_dma;
-
-	unsigned long pgscan_direct_high;/* total highmem pages scanned */
-	unsigned long pgscan_direct_normal;
-	unsigned long pgscan_direct_dma32;
-	unsigned long pgscan_direct_dma;
-
-	unsigned long pginodesteal;	/* pages reclaimed via inode freeing */
-	unsigned long slabs_scanned;	/* slab objects scanned */
-	unsigned long kswapd_steal;	/* pages reclaimed by kswapd */
-	unsigned long kswapd_inodesteal;/* reclaimed via kswapd inode freeing */
-	unsigned long pageoutrun;	/* kswapd's calls to page reclaim */
-	unsigned long allocstall;	/* direct reclaim calls */
+#define FOR_ALL_ZONES(x) x##_DMA, x##_DMA32, x##_NORMAL, x##_HIGH
 
-	unsigned long pgrotated;	/* pages rotated to tail of the LRU */
+enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
+		FOR_ALL_ZONES(PGALLOC),
+		PGFREE, PGACTIVATE, PGDEACTIVATE,
+		PGFAULT, PGMAJFAULT,
+ 		FOR_ALL_ZONES(PGREFILL),
+ 		FOR_ALL_ZONES(PGSTEAL),
+		FOR_ALL_ZONES(PGSCAN_KSWAPD),
+		FOR_ALL_ZONES(PGSCAN_DIRECT),
+		PGINODESTEAL, SLABS_SCANNED, KSWAPD_STEAL, KSWAPD_INODESTEAL,
+		PAGEOUTRUN, ALLOCSTALL, PGROTATED,
+		NR_VM_EVENT_ITEMS
 };
 
-extern void get_full_page_state(struct page_state *ret);
-extern unsigned long read_page_state_offset(unsigned long offset);
-extern void mod_page_state_offset(unsigned long offset, unsigned long delta);
-extern void __mod_page_state_offset(unsigned long offset, unsigned long delta);
-
-#define read_page_state(member) \
-	read_page_state_offset(offsetof(struct page_state, member))
-
-#define mod_page_state(member, delta)	\
-	mod_page_state_offset(offsetof(struct page_state, member), (delta))
-
-#define __mod_page_state(member, delta)	\
-	__mod_page_state_offset(offsetof(struct page_state, member), (delta))
-
-#define inc_page_state(member)		mod_page_state(member, 1UL)
-#define dec_page_state(member)		mod_page_state(member, 0UL - 1)
-#define add_page_state(member,delta)	mod_page_state(member, (delta))
-#define sub_page_state(member,delta)	mod_page_state(member, 0UL - (delta))
-
-#define __inc_page_state(member)	__mod_page_state(member, 1UL)
-#define __dec_page_state(member)	__mod_page_state(member, 0UL - 1)
-#define __add_page_state(member,delta)	__mod_page_state(member, (delta))
-#define __sub_page_state(member,delta)	__mod_page_state(member, 0UL - (delta))
-
-#define page_state(member) (*__page_state(offsetof(struct page_state, member)))
-
-#define state_zone_offset(zone, member)					\
-({									\
-	unsigned offset;						\
-	if (is_highmem(zone))						\
-		offset = offsetof(struct page_state, member##_high);	\
-	else if (is_normal(zone))					\
-		offset = offsetof(struct page_state, member##_normal);	\
-	else if (is_dma32(zone))					\
-		offset = offsetof(struct page_state, member##_dma32);	\
-	else								\
-		offset = offsetof(struct page_state, member##_dma);	\
-	offset;								\
-})
-
-#define __mod_page_state_zone(zone, member, delta)			\
- do {									\
-	__mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
- } while (0)
-
-#define mod_page_state_zone(zone, member, delta)			\
- do {									\
-	mod_page_state_offset(state_zone_offset(zone, member), (delta)); \
- } while (0)
+struct vm_event_state {
+	unsigned long event[NR_VM_EVENT_ITEMS];
+};
+
+DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
+
+extern unsigned long get_global_vm_events(enum vm_event_item e);
+extern void sum_vm_events(unsigned long *r, cpumask_t *cpumask);
+extern void all_vm_events(unsigned long *r);
+
+static inline unsigned long get_cpu_vm_events(enum vm_event_item item)
+{
+	return __get_cpu_var(vm_event_states).event[item];
+}
+
+static inline void count_vm_event(enum vm_event_item item)
+{
+	__get_cpu_var(vm_event_states).event[item]++;
+}
+
+static inline void count_vm_events(enum vm_event_item item, long delta)
+{
+	__get_cpu_var(vm_event_states).event[item] += delta;
+}
+
+#else
+/* Disable counters */
+#define get_cpu_vm_events(e)	0L
+#define get_global_vm_events(e)	0L
+#define count_vm_event(e)	do { } while (0)
+#define count_vm_events(e,d)	do { } while (0)
+#endif
+
+#define count_zone_vm_event(item, zone) count_vm_event(item##_DMA + zone_idx(zone))
+#define count_zone_vm_events(item, zone, delta) count_vm_events(item##_DMA + zone_idx(zone), delta)
 
 /*
  * Zone based accounting with per cpu differentials.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Light weight counter 2/2 counter conversion
  2006-06-09 19:18 Light weight counter 1/1 Framework Christoph Lameter
@ 2006-06-09 19:19 ` Christoph Lameter
  2006-06-09 21:33 ` Light weight counter 1/1 Framework Andrew Morton
  1 sibling, 0 replies; 6+ messages in thread
From: Christoph Lameter @ 2006-06-09 19:19 UTC (permalink / raw)
  To: akpm; +Cc: linux-kernel, linux-mm, npiggin, ak, hugh

Convert inc/add page_state to count_event(s)

Convert the page_state operations to count_event() and count_zone_event()

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.17-rc6-mm1/mm/page_alloc.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/page_alloc.c	2006-06-09 12:07:56.895412700 -0700
+++ linux-2.6.17-rc6-mm1/mm/page_alloc.c	2006-06-09 12:09:10.642793011 -0700
@@ -471,7 +471,7 @@ static void __free_pages_ok(struct page 
 
 	kernel_map_pages(page, 1 << order, 0);
 	local_irq_save(flags);
-	__mod_page_state(pgfree, 1 << order);
+	count_vm_events(PGFREE, 1 << order);
 	free_one_page(page_zone(page), page, order);
 	local_irq_restore(flags);
 }
@@ -1042,7 +1042,7 @@ static void fastcall free_hot_cold_page(
 
 	pcp = &zone_pcp(zone, get_cpu())->pcp[cold];
 	local_irq_save(flags);
-	__inc_page_state(pgfree);
+	count_vm_event(PGFREE);
 	list_add(&page->lru, &pcp->list);
 	pcp->count++;
 	if (pcp->count >= pcp->high) {
@@ -1118,7 +1118,7 @@ again:
 			goto failed;
 	}
 
-	__mod_page_state_zone(zone, pgalloc, 1 << order);
+	count_zone_vm_events(PGALLOC, zone, 1 << order);
 	zone_statistics(zonelist, zone, cpu);
 	local_irq_restore(flags);
 	put_cpu();
Index: linux-2.6.17-rc6-mm1/mm/vmscan.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/vmscan.c	2006-06-09 11:30:34.159368970 -0700
+++ linux-2.6.17-rc6-mm1/mm/vmscan.c	2006-06-09 12:09:10.644746015 -0700
@@ -215,7 +215,7 @@ unsigned long shrink_slab(unsigned long 
 					(nr_before - shrink_ret));
 			}
 			shrinker_stat_add(shrinker, nr_req, this_scan);
-			mod_page_state(slabs_scanned, this_scan);
+			count_vm_events(SLABS_SCANNED, this_scan);
 			total_scan -= this_scan;
 
 			cond_resched();
@@ -573,7 +573,7 @@ keep:
 	list_splice(&ret_pages, page_list);
 	if (pagevec_count(&freed_pvec))
 		__pagevec_release_nonlru(&freed_pvec);
-	mod_page_state(pgactivate, pgactivate);
+	count_vm_events(PGACTIVATE, pgactivate);
 	return nr_reclaimed;
 }
 
@@ -664,11 +664,11 @@ static unsigned long shrink_inactive_lis
 		nr_reclaimed += nr_freed;
 		local_irq_disable();
 		if (current_is_kswapd()) {
-			__mod_page_state_zone(zone, pgscan_kswapd, nr_scan);
-			__mod_page_state(kswapd_steal, nr_freed);
+			count_zone_vm_events(PGSCAN_KSWAPD, zone, nr_scan);
+			count_vm_events(KSWAPD_STEAL, nr_freed);
 		} else
-			__mod_page_state_zone(zone, pgscan_direct, nr_scan);
-		__mod_page_state_zone(zone, pgsteal, nr_freed);
+			count_zone_vm_events(PGSCAN_DIRECT, zone, nr_scan);
+		count_vm_events(PGACTIVATE, nr_freed);
 
 		if (nr_taken == 0)
 			goto done;
@@ -845,11 +845,10 @@ static void shrink_active_list(unsigned 
 		}
 	}
 	zone->nr_active += pgmoved;
-	spin_unlock(&zone->lru_lock);
+	spin_unlock_irq(&zone->lru_lock);
 
-	__mod_page_state_zone(zone, pgrefill, pgscanned);
-	__mod_page_state(pgdeactivate, pgdeactivate);
-	local_irq_enable();
+	count_zone_vm_events(PGREFILL, zone, pgscanned);
+	count_vm_events(PGDEACTIVATE, pgdeactivate);
 
 	pagevec_release(&pvec);
 }
@@ -983,7 +982,7 @@ unsigned long try_to_free_pages(struct z
 
 	delay_swap_prefetch();
 
-	inc_page_state(allocstall);
+	count_vm_event(ALLOCSTALL);
 
 	for (i = 0; zones[i] != NULL; i++) {
 		struct zone *zone = zones[i];
@@ -1081,7 +1080,7 @@ loop_again:
 	nr_reclaimed = 0;
 	sc.may_writepage = !laptop_mode,
 
-	inc_page_state(pageoutrun);
+	count_vm_event(PAGEOUTRUN);
 
 	for (i = 0; i < pgdat->nr_zones; i++) {
 		struct zone *zone = pgdat->node_zones + i;
Index: linux-2.6.17-rc6-mm1/block/ll_rw_blk.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/block/ll_rw_blk.c	2006-06-08 15:20:04.945125478 -0700
+++ linux-2.6.17-rc6-mm1/block/ll_rw_blk.c	2006-06-09 12:09:10.646699019 -0700
@@ -3168,9 +3168,9 @@ void submit_bio(int rw, struct bio *bio)
 	BIO_BUG_ON(!bio->bi_io_vec);
 	bio->bi_rw |= rw;
 	if (rw & WRITE)
-		mod_page_state(pgpgout, count);
+		count_vm_events(PGPGOUT, count);
 	else
-		mod_page_state(pgpgin, count);
+		count_vm_events(PGPGIN, count);
 
 	if (unlikely(block_dump)) {
 		char b[BDEVNAME_SIZE];
Index: linux-2.6.17-rc6-mm1/mm/page_io.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/page_io.c	2006-06-05 17:57:02.000000000 -0700
+++ linux-2.6.17-rc6-mm1/mm/page_io.c	2006-06-09 12:09:10.646699019 -0700
@@ -101,7 +101,7 @@ int swap_writepage(struct page *page, st
 	}
 	if (wbc->sync_mode == WB_SYNC_ALL)
 		rw |= (1 << BIO_RW_SYNC);
-	inc_page_state(pswpout);
+	count_vm_event(PSWPOUT);
 	set_page_writeback(page);
 	unlock_page(page);
 	submit_bio(rw, bio);
@@ -123,7 +123,7 @@ int swap_readpage(struct file *file, str
 		ret = -ENOMEM;
 		goto out;
 	}
-	inc_page_state(pswpin);
+	count_vm_event(PSWPIN);
 	submit_bio(READ, bio);
 out:
 	return ret;
Index: linux-2.6.17-rc6-mm1/mm/memory.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/memory.c	2006-06-09 10:30:52.091239575 -0700
+++ linux-2.6.17-rc6-mm1/mm/memory.c	2006-06-09 12:09:10.648652023 -0700
@@ -1953,7 +1953,7 @@ static int do_swap_page(struct mm_struct
 
 		/* Had to read the page from swap area: Major fault */
 		ret = VM_FAULT_MAJOR;
-		inc_page_state(pgmajfault);
+		count_vm_event(PGMAJFAULT);
 		grab_swap_token();
 	}
 
@@ -2327,7 +2327,7 @@ int __handle_mm_fault(struct mm_struct *
 
 	__set_current_state(TASK_RUNNING);
 
-	inc_page_state(pgfault);
+	count_vm_event(PGFAULT);
 
 	if (unlikely(is_vm_hugetlb_page(vma)))
 		return hugetlb_fault(mm, vma, address, write_access);
Index: linux-2.6.17-rc6-mm1/fs/inode.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/fs/inode.c	2006-06-08 15:20:08.147075759 -0700
+++ linux-2.6.17-rc6-mm1/fs/inode.c	2006-06-09 12:09:10.649628524 -0700
@@ -458,9 +458,9 @@ static void prune_icache(int nr_to_scan)
 	mutex_unlock(&iprune_mutex);
 
 	if (current_is_kswapd())
-		mod_page_state(kswapd_inodesteal, reap);
+		count_vm_events(KSWAPD_INODESTEAL, reap);
 	else
-		mod_page_state(pginodesteal, reap);
+		count_vm_events(PGINODESTEAL, reap);
 }
 
 /*
Index: linux-2.6.17-rc6-mm1/mm/shmem.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/shmem.c	2006-06-08 15:20:11.581433533 -0700
+++ linux-2.6.17-rc6-mm1/mm/shmem.c	2006-06-09 12:09:10.650605026 -0700
@@ -1049,7 +1049,7 @@ repeat:
 			spin_unlock(&info->lock);
 			/* here we actually do the io */
 			if (type && *type == VM_FAULT_MINOR) {
-				inc_page_state(pgmajfault);
+				count_vm_event(PGMAJFAULT);
 				*type = VM_FAULT_MAJOR;
 			}
 			swappage = shmem_swapin(info, swap, idx);
Index: linux-2.6.17-rc6-mm1/mm/swap.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/swap.c	2006-06-08 15:20:11.587292545 -0700
+++ linux-2.6.17-rc6-mm1/mm/swap.c	2006-06-09 12:09:10.651581528 -0700
@@ -88,7 +88,7 @@ int rotate_reclaimable_page(struct page 
 	spin_lock_irqsave(&zone->lru_lock, flags);
 	if (PageLRU(page) && !PageActive(page)) {
 		list_move_tail(&page->lru, &zone->inactive_list);
-		inc_page_state(pgrotated);
+		count_vm_event(PGROTATED);
 	}
 	if (!test_clear_page_writeback(page))
 		BUG();
@@ -108,7 +108,7 @@ void fastcall activate_page(struct page 
 		del_page_from_inactive_list(zone, page);
 		SetPageActive(page);
 		add_page_to_active_list(zone, page);
-		inc_page_state(pgactivate);
+		count_vm_event(PGACTIVATE);
 	}
 	spin_unlock_irq(&zone->lru_lock);
 }
Index: linux-2.6.17-rc6-mm1/mm/filemap.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/filemap.c	2006-06-09 10:30:51.850043561 -0700
+++ linux-2.6.17-rc6-mm1/mm/filemap.c	2006-06-09 12:09:10.652558030 -0700
@@ -1411,7 +1411,7 @@ retry_find:
 		 */
 		if (!did_readaround) {
 			majmin = VM_FAULT_MAJOR;
-			inc_page_state(pgmajfault);
+			count_vm_event(PGMAJFAULT);
 		}
 		did_readaround = 1;
 		ra_pages = max_sane_readahead(file->f_ra.ra_pages);
@@ -1494,7 +1494,7 @@ no_cached_page:
 page_not_uptodate:
 	if (!did_readaround) {
 		majmin = VM_FAULT_MAJOR;
-		inc_page_state(pgmajfault);
+		count_vm_event(PGMAJFAULT);
 	}
 	lock_page(page);
 
Index: linux-2.6.17-rc6-mm1/fs/ncpfs/mmap.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/fs/ncpfs/mmap.c	2006-06-05 17:57:02.000000000 -0700
+++ linux-2.6.17-rc6-mm1/fs/ncpfs/mmap.c	2006-06-09 12:09:10.653534532 -0700
@@ -93,7 +93,7 @@ static struct page* ncp_file_mmap_nopage
 	 */
 	if (type)
 		*type = VM_FAULT_MAJOR;
-	inc_page_state(pgmajfault);
+	count_vm_event(PGMAJFAULT);
 	return page;
 }
 
Index: linux-2.6.17-rc6-mm1/drivers/parisc/led.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/drivers/parisc/led.c	2006-06-08 15:20:06.690134674 -0700
+++ linux-2.6.17-rc6-mm1/drivers/parisc/led.c	2006-06-09 12:09:10.653534532 -0700
@@ -411,14 +411,12 @@ static __inline__ int led_get_net_activi
 static __inline__ int led_get_diskio_activity(void)
 {	
 	static unsigned long last_pgpgin, last_pgpgout;
-	struct page_state pgstat;
 	int changed;
 
-	get_full_page_state(&pgstat); /* get no of sectors in & out */
-
 	/* Just use a very simple calculation here. Do not care about overflow,
 	   since we only want to know if there was activity or not. */
-	changed = (pgstat.pgpgin != last_pgpgin) || (pgstat.pgpgout != last_pgpgout);
+	changed = (get_global_events(PGPGIN) != last_pgpgin) ||
+		  (get_global_events(PGPGOUT) != last_pgpgout);
 	last_pgpgin  = pgstat.pgpgin;
 	last_pgpgout = pgstat.pgpgout;
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Light weight counter 1/1 Framework
  2006-06-09 19:18 Light weight counter 1/1 Framework Christoph Lameter
  2006-06-09 19:19 ` Light weight counter 2/2 counter conversion Christoph Lameter
@ 2006-06-09 21:33 ` Andrew Morton
  2006-06-09 22:38   ` Christoph Lameter
  1 sibling, 1 reply; 6+ messages in thread
From: Andrew Morton @ 2006-06-09 21:33 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, linux-mm, npiggin, ak, hugh

Christoph Lameter <clameter@sgi.com> wrote:
>
> -/*
> - * Accumulate the page_state information across all CPUs.
> - * The result is unavoidably approximate - it can change
> - * during and after execution of this function.
> - */

sob.  How about updating the nice comment rather than removing it?

>  
> -void get_full_page_state(struct page_state *ret)
> +void all_vm_events(unsigned long *ret)
>  {
> -	cpumask_t mask = CPU_MASK_ALL;
> -
> -	__get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask);
> +	sum_vm_events(ret, &cpu_online_map);
>  }
> +EXPORT_SYMBOL(all_vm_events);
>  
> -unsigned long read_page_state_offset(unsigned long offset)
> +unsigned long get_global_vm_events(enum vm_event_item e)
>  {
>  	unsigned long ret = 0;
>  	int cpu;
>  
> -	for_each_online_cpu(cpu) {
> -		unsigned long in;
> +	for_each_possible_cpu(cpu)
> +		ret += per_cpu(vm_event_states, cpu).event[e];
>  
> -		in = (unsigned long)&per_cpu(page_states, cpu) + offset;
> -		ret += *((unsigned long *)in);
> -	}
>  	return ret;
>  }

Here.   Some description of the difference between these two, and why one
would call one and not the other.

I'd be rather interested in reading that comment because afaict,
get_global_vm_events() has no callers.

And nor should it, please.  It has potential to be seriously inefficient. 
Much, much better to kill this function and to implement a CPU hotplug
notifier to spill the going-away CPU's stats into another CPU's
accumulators.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Light weight counter 1/1 Framework
  2006-06-09 21:33 ` Light weight counter 1/1 Framework Andrew Morton
@ 2006-06-09 22:38   ` Christoph Lameter
  2006-06-09 23:07     ` Andrew Morton
  2006-06-09 23:15     ` Andrew Morton
  0 siblings, 2 replies; 6+ messages in thread
From: Christoph Lameter @ 2006-06-09 22:38 UTC (permalink / raw)
  To: Andrew Morton; +Cc: linux-kernel, linux-mm, npiggin, ak, hugh

Eventcounter fixups

- Add comment to all_vm_events

- remove get_global_events.

- fold foreign cpu events into our own.

- Remove useless exports

Signed-off-by: Christoph Lameter <clameter@sgi.com>

Index: linux-2.6.17-rc6-mm1/mm/page_alloc.c
===================================================================
--- linux-2.6.17-rc6-mm1.orig/mm/page_alloc.c	2006-06-09 15:14:44.173612828 -0700
+++ linux-2.6.17-rc6-mm1/mm/page_alloc.c	2006-06-09 15:35:21.719837630 -0700
@@ -1585,7 +1585,7 @@ static void show_node(struct zone *zone)
 #ifdef CONFIG_VM_EVENT_COUNTERS
 DEFINE_PER_CPU(struct vm_event_state, vm_event_states) = {{0}};
 
-void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
+static void sum_vm_events(unsigned long *ret, cpumask_t *cpumask)
 {
 	int cpu = 0;
 	int i;
@@ -1606,25 +1606,16 @@ void sum_vm_events(unsigned long *ret, c
 			ret[i] += this->event[i];
 	}
 }
-EXPORT_SYMBOL(sum_vm_events);
 
-void all_vm_events(unsigned long *ret)
+/*
+ * Accumulate the vm event counters across all CPUs.
+ * The result is unavoidably approximate - it can change
+ * during and after execution of this function.
+*/
+static void all_vm_events(unsigned long *ret)
 {
 	sum_vm_events(ret, &cpu_online_map);
 }
-EXPORT_SYMBOL(all_vm_events);
-
-unsigned long get_global_vm_events(enum vm_event_item e)
-{
-	unsigned long ret = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		ret += per_cpu(vm_event_states, cpu).event[e];
-
-	return ret;
-}
-EXPORT_SYMBOL(get_global_vm_events);
 #endif
 
 void __get_zone_counts(unsigned long *active, unsigned long *inactive,
@@ -2875,6 +2866,25 @@ struct seq_operations vmstat_op = {
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Fold the foreign cpu states int our own.
+ *
+ * This is a pretty inconsistent thing to do since
+ * the event array is to count the events occurring
+ * for each processor. But we did this in the past
+ * so I guess that we need to continue.
+ */
+static void vm_events_fold_cpu(int cpu)
+{
+	struct vm_event_state *fold_state = &per_cpu(vm_event_states, cpu);
+	int i;
+
+	for (i = 0; i < NR_VM_EVENT_ITEMS; i++) {
+		count_vm_events(i, fold_state->event[i]);
+		fold_state->event[i] = 0;
+	}
+}
+
 static int page_alloc_cpu_notify(struct notifier_block *self,
 				 unsigned long action, void *hcpu)
 {
@@ -2886,17 +2896,7 @@ static int page_alloc_cpu_notify(struct 
 
 		local_irq_disable();
 		__drain_pages(cpu);
-
-		/* Add dead cpu's page_states to our own. */
-		dest = (unsigned long *)&__get_cpu_var(page_states);
-		src = (unsigned long *)&per_cpu(page_states, cpu);
-
-		for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long);
-				i++) {
-			dest[i] += src[i];
-			src[i] = 0;
-		}
-
+		vm_events_fold_cpu(cpu);
 		local_irq_enable();
 		refresh_cpu_vm_stats(cpu);
 	} 
Index: linux-2.6.17-rc6-mm1/include/linux/page-flags.h
===================================================================
--- linux-2.6.17-rc6-mm1.orig/include/linux/page-flags.h	2006-06-09 15:10:30.611239764 -0700
+++ linux-2.6.17-rc6-mm1/include/linux/page-flags.h	2006-06-09 15:35:41.847495238 -0700
@@ -139,10 +139,6 @@ struct vm_event_state {
 
 DECLARE_PER_CPU(struct vm_event_state, vm_event_states);
 
-extern unsigned long get_global_vm_events(enum vm_event_item e);
-extern void sum_vm_events(unsigned long *r, cpumask_t *cpumask);
-extern void all_vm_events(unsigned long *r);
-
 static inline unsigned long get_cpu_vm_events(enum vm_event_item item)
 {
 	return __get_cpu_var(vm_event_states).event[item];
@@ -161,7 +157,6 @@ static inline void count_vm_events(enum 
 #else
 /* Disable counters */
 #define get_cpu_vm_events(e)	0L
-#define get_global_vm_events(e)	0L
 #define count_vm_event(e)	do { } while (0)
 #define count_vm_events(e,d)	do { } while (0)
 #endif

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Light weight counter 1/1 Framework
  2006-06-09 22:38   ` Christoph Lameter
@ 2006-06-09 23:07     ` Andrew Morton
  2006-06-09 23:15     ` Andrew Morton
  1 sibling, 0 replies; 6+ messages in thread
From: Andrew Morton @ 2006-06-09 23:07 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, linux-mm, npiggin, ak, hugh

Christoph Lameter <clameter@sgi.com> wrote:
>
> Eventcounter fixups

This is getting to be a bit of a pain.  Could you please spend more time
reviewing and testing patches before sending them?

Says he, staring at this:

mm/page_alloc.c: In function 'page_alloc_cpu_notify':
mm/page_alloc.c:2891: error: 'per_cpu__page_states' undeclared (first use in this function)

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: Light weight counter 1/1 Framework
  2006-06-09 22:38   ` Christoph Lameter
  2006-06-09 23:07     ` Andrew Morton
@ 2006-06-09 23:15     ` Andrew Morton
  1 sibling, 0 replies; 6+ messages in thread
From: Andrew Morton @ 2006-06-09 23:15 UTC (permalink / raw)
  To: Christoph Lameter; +Cc: linux-kernel, linux-mm, npiggin, ak, hugh

Christoph Lameter <clameter@sgi.com> wrote:
>
> Eventcounter fixups

And the kernel still doesn't actually compile with this patch applied.  You
need to also apply light-weight-counters-counter-conversion.patch to make
page_alloc.c compile.  So either we break git-bisect or I fold two
inappropriate patches together or I need to patchwrangle it somehow.

<checks>

Yes, I need to fold them all together.

And fix the unused-variable warnings.

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2006-06-09 23:15 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2006-06-09 19:18 Light weight counter 1/1 Framework Christoph Lameter
2006-06-09 19:19 ` Light weight counter 2/2 counter conversion Christoph Lameter
2006-06-09 21:33 ` Light weight counter 1/1 Framework Andrew Morton
2006-06-09 22:38   ` Christoph Lameter
2006-06-09 23:07     ` Andrew Morton
2006-06-09 23:15     ` Andrew Morton

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox