linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ruthiano Simioni Munaretti <ruthiano@exatas.unisinos.br>
To: linux-mm@kvack.org
Cc: sisopiii-l@cscience.org
Subject: Non-Contiguous Memory Allocation Tests
Date: Tue, 9 Dec 2003 11:11:21 -0200	[thread overview]
Message-ID: <200312091111.21349.ruthiano@exatas.unisinos.br> (raw)

[-- Attachment #1: Type: text/plain, Size: 1062 bytes --]

Hi,

I and a colleague were making tests in Non-Contiguous Memory Allocator. We 
implemented VGNCA, a non-contiguous memory allocator improvement.

In the current non-contiguous memory allocator, each physical page is 
allocated by time, through alloc_page() function call. However, each one of 
this calls has an associated overhead with enable/disable interrupts.

In VGNCA, the main idea is enable/disable interrupts only one time, reducing 
this overhead. Also, VGNCA allocation/deallocation functions are a little 
more simple, because elimination of unnecessary test conditions in size 
allocation.

Our patch is intended to be a test to check if this could bring enough 
benefits to deserve a more careful implementation. We also included some code 
to benchmark allocations and deallocations, using the RDTSC instruction.

We are sending:
- Patch against 2.6.0-test11 with these modifications.
- Graphics with performance tests:
--- small-allocations.eps/small-frees.eps --> 1-128 kB
--- large-allocations.eps/large-frees.eps --> 1-64 MB

LMB, Ruthiano.

[-- Attachment #2: vgnca-test11.patch --]
[-- Type: text/x-diff, Size: 23105 bytes --]

diff -Naur a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
--- a/arch/i386/mm/pageattr.c	2003-11-26 18:43:41.000000000 -0200
+++ b/arch/i386/mm/pageattr.c	2003-12-04 17:00:25.000000000 -0200
@@ -175,6 +175,23 @@
 	return err;
 }
 
+int vgnca_change_page_attr(struct page *page, int numpages, pgprot_t prot)
+{
+	int err = 0; 
+	int i; 
+
+	/* (VGNCA) spin_lock_irqsave(&cpa_lock, flags); */
+   spin_lock(&cpa_lock);
+	for (i = 0; i < numpages; i++, page++) { 
+		err = __change_page_attr(page, prot);
+		if (err) 
+			break; 
+	} 	
+	/* (VGNCA) spin_unlock_irqrestore(&cpa_lock, flags); */
+   spin_unlock(&cpa_lock);
+	return err;
+}
+
 void global_flush_tlb(void)
 { 
 	LIST_HEAD(l);
@@ -208,6 +225,20 @@
 	 */
 	__flush_tlb_all();
 }
+
+void vgnca_kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (PageHighMem(page))
+		return;
+	/* the return value is ignored - the calls cannot fail,
+	 * large pages are disabled at boot time.
+	 */
+	vgnca_change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+	/* we should perform an IPI and flush all tlbs,
+	 * but that can deadlock->flush only current cpu.
+	 */
+	__flush_tlb_all();
+}
 EXPORT_SYMBOL(kernel_map_pages);
 #endif
 
diff -Naur a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
--- a/fs/proc/proc_misc.c	2003-11-26 18:43:07.000000000 -0200
+++ b/fs/proc/proc_misc.c	2003-12-04 17:04:22.000000000 -0200
@@ -237,6 +237,161 @@
 #undef K
 }
 
+
+/*************************
+   VGNCA: benchmark vmalloc
+ *************************/
+
+/* thanks to rnsanchez & felipewd */
+#define rdtsc(ticks) \
+    __asm__ volatile (".byte 0x0f, 0x31" : "=A" (ticks));
+
+
+#define VMALLOC_THEN_VFREE(AMOUNT_IN_BYTES)                                 \
+{                                                                           \
+   poff += sprintf(page+poff, "%d", (AMOUNT_IN_BYTES));                     \
+   rdtsc(ticks_before);                                                     \
+   mem = vmalloc((AMOUNT_IN_BYTES));                                        \
+   rdtsc(ticks_after);                                                      \
+   poff += sprintf(page+poff, "\t%lld", ticks_after - ticks_before);        \
+                                                                            \
+   if (!mem)                                                                \
+      poff += sprintf(page+poff, "\tallocation failed!\n");                 \
+   else                                                                     \
+   {                                                                        \
+      rdtsc(ticks_before);                                                  \
+      vfree(mem);                                                           \
+      rdtsc(ticks_after);                                                   \
+      poff += sprintf(page+poff, "\t%lld\n", ticks_after - ticks_before);   \
+   }                                                                        \
+}
+
+
+
+static int bm_vmalloc_read_proc_1(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data)
+{
+	uint64_t ticks_before, ticks_after;
+        void* mem;
+        off_t poff = off;
+        int i;
+
+        if ((mem = vmalloc(1024)))
+           vfree (mem);
+
+        for (i = 1; i <= 32; ++i)
+           VMALLOC_THEN_VFREE(1024*i);
+
+        poff += sprintf(page+poff, "\n");
+
+        return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_2(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data)
+{
+	uint64_t ticks_before, ticks_after;
+        void* mem;
+        off_t poff = off;
+        int i;
+
+        if ((mem = vmalloc(1024)))
+           vfree (mem);
+
+        for (i = 33; i <= 64; ++i)
+           VMALLOC_THEN_VFREE(1024*i);
+
+        poff += sprintf(page+poff, "\n");
+
+        return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_3(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data)
+{
+	uint64_t ticks_before, ticks_after;
+        void* mem;
+        off_t poff = off;
+        int i;
+
+        if ((mem = vmalloc(1024)))
+           vfree (mem);
+
+        for (i = 65; i <= 96; ++i)
+           VMALLOC_THEN_VFREE(1024*i);
+
+        poff += sprintf(page+poff, "\n");
+
+        return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_4(char *page, char **start, off_t off,
+                                  int count, int *eof, void *data)
+{
+	uint64_t ticks_before, ticks_after;
+        void* mem;
+        off_t poff = off;
+        int i = 0;
+
+        if ((mem = vmalloc(1024)))
+           vfree (mem);
+
+        for (i = 97; i <= 128; ++i)
+           VMALLOC_THEN_VFREE(1024*i);
+
+        poff += sprintf(page+poff, "\n");
+
+        return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_m1(char *page, char **start, off_t off,
+                                   int count, int *eof, void *data)
+{
+	uint64_t ticks_before, ticks_after;
+        void* mem;
+        off_t poff = off;
+        int i = 0;
+
+        if ((mem = vmalloc(1024)))
+           vfree (mem);
+
+        for (i = 1; i <= 32; ++i)
+           VMALLOC_THEN_VFREE(1024*1024*i);
+
+        poff += sprintf(page+poff, "\n");
+
+        return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_m2(char *page, char **start, off_t off,
+                                   int count, int *eof, void *data)
+{
+	uint64_t ticks_before, ticks_after;
+        void* mem;
+        off_t poff = off;
+        int i = 0;
+
+        if ((mem = vmalloc(1024)))
+           vfree (mem);
+
+        for (i = 33; i <= 64; ++i)
+           VMALLOC_THEN_VFREE(1024*1024*i);
+
+        poff += sprintf(page+poff, "\n");
+
+        return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+
+#undef rdtsc
+#undef VMALLOC_THEN_VFREE
+
+
+/*************************
+   VGNCA: end of benchmark vmalloc
+ *************************/
+
+
 extern struct seq_operations fragmentation_op;
 static int fragmentation_open(struct inode *inode, struct file *file)
 {
@@ -663,6 +818,13 @@
 #endif
 		{"locks",	locks_read_proc},
 		{"execdomains",	execdomains_read_proc},
+		/* VGNCA: benchmark 'vmalloc()' */
+		{"bm-vmalloc-1",  bm_vmalloc_read_proc_1},
+		{"bm-vmalloc-2",  bm_vmalloc_read_proc_2},
+		{"bm-vmalloc-3",  bm_vmalloc_read_proc_3},
+		{"bm-vmalloc-4",  bm_vmalloc_read_proc_4},
+		{"bm-vmalloc-m1",  bm_vmalloc_read_proc_m1},
+		{"bm-vmalloc-m2",  bm_vmalloc_read_proc_m2},
 		{NULL,}
 	};
 	for (p = simple_ones; p->name; p++)
diff -Naur a/include/linux/gfp.h b/include/linux/gfp.h
--- a/include/linux/gfp.h	2003-11-26 18:43:26.000000000 -0200
+++ b/include/linux/gfp.h	2003-12-04 17:12:25.000000000 -0200
@@ -64,6 +64,10 @@
  * optimized to &contig_page_data at compile-time.
  */
 extern struct page * FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
+
+
+/* VGNCA: this always allocate one page only, so the plural name is not good. */
+extern struct page * FASTCALL(__vgnca_alloc_pages(unsigned int, struct zonelist *));
 static inline struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order)
 {
 	if (unlikely(order >= MAX_ORDER))
@@ -87,13 +91,22 @@
 		__get_free_pages((gfp_mask) | GFP_DMA,(order))
 
 extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
+extern void FASTCALL(__vgnca_free_pages(struct page *page /*, (VGNCA) unsigned int order*/));
 extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
 extern void FASTCALL(free_hot_page(struct page *page));
 extern void FASTCALL(free_cold_page(struct page *page));
 
 #define __free_page(page) __free_pages((page), 0)
+/* (VGNCA) no longer passing 'order' */
+#define __vgnca_free_page(page) __vgnca_free_pages((page))
 #define free_page(addr) free_pages((addr),0)
 
 void page_alloc_init(void);
 
+
+/* VGNCA: no longer pass the 'order' parameter (is always 0) */
+#define vgnca_alloc_page(gfp_mask) \
+	__vgnca_alloc_pages((gfp_mask), NODE_DATA(numa_node_id())->node_zonelists + ((gfp_mask) & GFP_ZONEMASK))
+
+
 #endif /* __LINUX_GFP_H */
diff -Naur a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h	2003-11-26 18:42:55.000000000 -0200
+++ b/include/linux/mm.h	2003-12-04 17:14:17.000000000 -0200
@@ -620,6 +620,11 @@
 kernel_map_pages(struct page *page, int numpages, int enable)
 {
 }
+
+static inline void
+vgnca_kernel_map_pages(struct page *page, int numpages, int enable)
+{
+}
 #endif
 
 #endif /* __KERNEL__ */
diff -Naur a/include/linux/vmalloc.h b/include/linux/vmalloc.h
--- a/include/linux/vmalloc.h	2003-11-26 18:45:53.000000000 -0200
+++ b/include/linux/vmalloc.h	2003-12-04 17:15:10.000000000 -0200
@@ -26,6 +26,7 @@
 extern void *vmalloc_32(unsigned long size);
 extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot);
 extern void vfree(void *addr);
+extern void vgnca_vfree(void *addr);
 
 extern void *vmap(struct page **pages, unsigned int count,
 			unsigned long flags, pgprot_t prot);
diff -Naur a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c	2003-11-26 18:42:56.000000000 -0200
+++ b/mm/page_alloc.c	2003-12-04 17:23:42.000000000 -0200
@@ -209,6 +209,51 @@
 	list_add(&(base + page_idx)->list, &area->free_list);
 }
 
+
+static inline void __vgnca_free_pages_bulk (struct page *page, struct page *base,
+		struct zone *zone, struct free_area *area, unsigned long mask
+		/*, (VGNCA) unsigned int order*/)
+{
+	unsigned long page_idx, index;
+
+/* (VGNCA)
+	if (order)
+		destroy_compound_page(page, order);
+*/
+	page_idx = page - base;
+	if (page_idx & ~mask)
+		BUG();
+	index = page_idx >> (1 /* (VGNCA) + order*/);
+
+	zone->free_pages -= mask;
+	while (mask + (1 << (MAX_ORDER-1))) {
+		struct page *buddy1, *buddy2;
+
+		BUG_ON(area >= zone->free_area + MAX_ORDER);
+		if (!__test_and_change_bit(index, area->map))
+			/*
+			 * the buddy page is still allocated.
+			 */
+			break;
+		/*
+		 * Move the buddy up one level.
+		 * This code is taking advantage of the identity:
+		 * 	-mask = 1+~mask
+		 */
+		buddy1 = base + (page_idx ^ -mask);
+		buddy2 = base + page_idx;
+		BUG_ON(bad_range(zone, buddy1));
+		BUG_ON(bad_range(zone, buddy2));
+		list_del(&buddy1->list);
+		mask <<= 1;
+		area++;
+		index >>= 1;
+		page_idx &= mask;
+	}
+	list_add(&(base + page_idx)->list, &area->free_list);
+}
+
+
 static inline void free_pages_check(const char *function, struct page *page)
 {
 	if (	page_mapped(page) ||
@@ -264,6 +309,36 @@
 	return ret;
 }
 
+
+static int
+vgnca_free_pages_bulk(struct zone *zone, int count,
+                      struct list_head *list /* (VGNCA), unsigned int order */)
+{
+	unsigned long mask;
+	struct free_area *area;
+	struct page *base, *page = NULL;
+	int ret = 0;
+
+	mask = (~0UL) /* (VGNCA) << order*/;
+	base = zone->zone_mem_map;
+	area = zone->free_area /* (VGNCA) + order */;
+	/* (VGNCA) spin_lock_irqsave(&zone->lock, flags); */
+   spin_lock(&zone->lock);
+	zone->all_unreclaimable = 0;
+	zone->pages_scanned = 0;
+	while (!list_empty(list) && count--) {
+		page = list_entry(list->prev, struct page, list);
+		/* have to delete it as __free_pages_bulk list manipulates */
+		list_del(&page->list);
+		__vgnca_free_pages_bulk(page, base, zone, area, mask /* (VGNCA) , order */);
+		ret++;
+	}
+	/* (VGNCA) spin_unlock_irqrestore(&zone->lock, flags); */
+   spin_unlock(&zone->lock);
+	return ret;
+}
+
+
 void __free_pages_ok(struct page *page, unsigned int order)
 {
 	LIST_HEAD(list);
@@ -389,6 +464,29 @@
 	return allocated;
 }
 
+
+static int vgnca_rmqueue_bulk(struct zone *zone, /* (VGNCA) unsigned int order, */
+			unsigned long count, struct list_head *list)
+{
+	int i;
+	int allocated = 0;
+	struct page *page;
+	
+	/* (VGNCA) spin_lock_irqsave(&zone->lock, flags); */
+   spin_lock(&zone->lock);
+	for (i = 0; i < count; ++i) {
+		page = __rmqueue(zone, 0 /*(VGNCA) order*/);
+		if (page == NULL)
+			break;
+		allocated++;
+		list_add_tail(&page->list, list);
+	}
+	/* (VGNCA) spin_unlock_irqrestore(&zone->lock, flags); */
+   spin_unlock(&zone->lock);
+	return allocated;
+}
+
+
 #ifdef CONFIG_PM
 int is_head_of_free_region(struct page *page)
 {
@@ -461,10 +559,34 @@
 	put_cpu();
 }
 
+static void FASTCALL(vgnca_free_hot_cold_page(struct page *page, int cold));
+static void vgnca_free_hot_cold_page(struct page *page, int cold)
+{
+	struct zone *zone = page_zone(page);
+	struct per_cpu_pages *pcp;
+
+	vgnca_kernel_map_pages(page, 1, 0);
+	inc_page_state(pgfree);
+	free_pages_check(__FUNCTION__, page);
+	pcp = &zone->pageset[get_cpu()].pcp[cold];
+	/* (VGNCA) local_irq_save(flags); */
+	if (pcp->count >= pcp->high)
+		pcp->count -= vgnca_free_pages_bulk(zone, pcp->batch, &pcp->list/* (VGNCA), 0*/);
+	list_add(&page->list, &pcp->list);
+	pcp->count++;
+	/* (VGNCA) local_irq_restore(flags); */
+	put_cpu();
+}
+
 void free_hot_page(struct page *page)
 {
 	free_hot_cold_page(page, 0);
 }
+
+void vgnca_free_hot_page(struct page *page)
+{
+	vgnca_free_hot_cold_page(page, 0);
+}
 	
 void free_cold_page(struct page *page)
 {
@@ -515,6 +637,52 @@
 	return page;
 }
 
+
+/*  VGNCA:
+ *  - No longer pass 'order' as parameter.
+ */
+static struct page *vgnca_buffered_rmqueue(struct zone *zone, int cold)
+{
+	struct page *page = NULL;
+
+/*	(VGNCA) if (order == 0) { */
+		struct per_cpu_pages *pcp;
+
+		pcp = &zone->pageset[get_cpu()].pcp[cold];
+		/* (VGNCA) local_irq_save(flags); */
+		if (pcp->count <= pcp->low)
+			pcp->count += vgnca_rmqueue_bulk(zone, /* (VGNCA) 0, */
+						pcp->batch, &pcp->list);
+		if (pcp->count) {
+			page = list_entry(pcp->list.next, struct page, list);
+			list_del(&page->list);
+			pcp->count--;
+		}
+		/* (VGNCA) local_irq_restore(flags);*/
+		put_cpu();
+/*	(VGNCA) } */
+
+	if (page == NULL) {
+		/* (VGNCA) spin_lock_irqsave(&zone->lock, flags); */
+      spin_lock(&zone->lock);
+		page = __rmqueue(zone, 0 /* (VGNCA) order */);
+		/* (VGNCA) spin_unlock_irqrestore(&zone->lock, flags); */
+      spin_unlock(&zone->lock);
+/* (VGNCA)
+		if (order && page)
+			prep_compound_page(page, order);
+*/
+	}
+
+	if (page != NULL) {
+		BUG_ON(bad_range(zone, page));
+		mod_page_state(pgalloc, 1 /* (VGNCA) << order */);
+		prep_new_page(page, 0 /* (VGNCA) order */);
+	}
+	return page;
+}
+
+
 /*
  * This is the 'heart' of the zoned buddy allocator.
  *
@@ -679,7 +847,166 @@
 	return page;
 }
 
+/* VGNCA:
+ *  - Removed the 'order' parameter (is always 0)
+ */
+struct page *
+__vgnca_alloc_pages(unsigned int gfp_mask, struct zonelist *zonelist)
+{
+	const int wait = gfp_mask & __GFP_WAIT;
+	unsigned long min;
+	struct zone **zones, *classzone;
+	struct page *page;
+	struct reclaim_state reclaim_state;
+	struct task_struct *p = current;
+	int i;
+	int cold;
+
+	might_sleep_if(wait);
+
+	cold = 0;
+	if (gfp_mask & __GFP_COLD)
+		cold = 1;
+
+	zones = zonelist->zones;  /* the list of zones suitable for gfp_mask */
+	classzone = zones[0]; 
+	if (classzone == NULL)    /* no zones in the zonelist */
+		return NULL;
+
+	/* Go through the zonelist once, looking for a zone with enough free */
+	min = 1; /* VGNCA--used to be 'min = 1UL << order;' */
+	for (i = 0; zones[i] != NULL; i++) {
+		struct zone *z = zones[i];
+		unsigned long local_low;
+
+		/*
+		 * This is the fabled 'incremental min'. We let real-time tasks
+		 * dip their real-time paws a little deeper into reserves.
+		 */
+		local_low = z->pages_low;
+		if (rt_task(p))
+			local_low >>= 1;
+		min += local_low;
+
+		if (z->free_pages >= min ||
+				(!wait && z->free_pages >= z->pages_high)) {
+			page = vgnca_buffered_rmqueue(z, cold);
+			if (page)
+		       		goto got_pg;
+		}
+		min += z->pages_low * sysctl_lower_zone_protection;
+	}
+
+	/* we're somewhat low on memory, failed to find what we needed */
+	for (i = 0; zones[i] != NULL; i++)
+		wakeup_kswapd(zones[i]);
+
+	/* Go through the zonelist again, taking __GFP_HIGH into account */
+	min = 1; /* (VGNCA) min = 1UL << order; */
+	for (i = 0; zones[i] != NULL; i++) {
+		unsigned long local_min;
+		struct zone *z = zones[i];
+
+		local_min = z->pages_min;
+		if (gfp_mask & __GFP_HIGH)
+			local_min >>= 2;
+		if (rt_task(p))
+			local_min >>= 1;
+		min += local_min;
+		if (z->free_pages >= min ||
+				(!wait && z->free_pages >= z->pages_high)) {
+			page = vgnca_buffered_rmqueue(z, cold);
+			if (page)
+				goto got_pg;
+		}
+		min += local_min * sysctl_lower_zone_protection;
+	}
+
+	/* here we're in the low on memory slow path */
+
+rebalance:
+	if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+		/* go through the zonelist yet again, ignoring mins */
+		for (i = 0; zones[i] != NULL; i++) {
+			struct zone *z = zones[i];
+
+			page = vgnca_buffered_rmqueue(z, cold);
+			if (page)
+				goto got_pg;
+		}
+		goto nopage;
+	}
+
+	/* Atomic allocations - we can't balance anything */
+	if (!wait)
+		goto nopage;
+
+	p->flags |= PF_MEMALLOC;
+	reclaim_state.reclaimed_slab = 0;
+	p->reclaim_state = &reclaim_state;
+
+	try_to_free_pages(classzone, gfp_mask, 0 /*order*/);
+
+	p->reclaim_state = NULL;
+	p->flags &= ~PF_MEMALLOC;
+
+	/* go through the zonelist yet one more time */
+	min = 1; /* (VGNCA) min = 1UL << order; */
+	for (i = 0; zones[i] != NULL; i++) {
+		struct zone *z = zones[i];
+
+		min += z->pages_min;
+		if (z->free_pages >= min ||
+				(!wait && z->free_pages >= z->pages_high)) {
+			page = vgnca_buffered_rmqueue(z, cold);
+			if (page)
+				goto got_pg;
+		}
+		min += z->pages_low * sysctl_lower_zone_protection;
+	}
+
+	/*
+	 * Don't let big-order allocations loop unless the caller explicitly
+	 * requests that.  Wait for some write requests to complete then retry.
+	 *
+	 * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that
+	 * may not be true in other implementations.
+	 */
+
+/* (VGNCA) 'do_retry' will always be 1, because 'order <= 3' (actually,
+           'order == 0'). So, a lot of code can be removed from here.
+
+	do_retry = 0;
+	if (!(gfp_mask & __GFP_NORETRY)) {
+		if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
+			do_retry = 1;
+		if (gfp_mask & __GFP_NOFAIL)
+			do_retry = 1;
+	}
+	if (do_retry) {
+		blk_congestion_wait(WRITE, HZ/50);
+		goto rebalance;
+	}
+*/
+/* (Added by VGNCA) */
+	blk_congestion_wait(WRITE, HZ/50);
+	goto rebalance;
+/* (End of VGNCA) */
+
+nopage:
+	if (!(gfp_mask & __GFP_NOWARN)) {
+		printk("%s: page allocation failure."
+			" order:%d, mode:0x%x\n",
+			p->comm, 0, gfp_mask);
+	}
+	return NULL;
+got_pg:
+	vgnca_kernel_map_pages(page, 1 << 0 /* (VGNCA) order*/, 1);
+	return page;
+}
 EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(__vgnca_alloc_pages);
+
 
 /*
  * Common helper functions.
@@ -735,7 +1062,23 @@
 	}
 }
 
+
+/* (VGNCA) no longer taking an 'order' parameter */
+void __vgnca_free_pages(struct page *page)
+{
+	if (!PageReserved(page) && put_page_testzero(page)) {
+/* (VGNCA)
+		if (order == 0) */
+			vgnca_free_hot_page(page);
+/*	(VGNCA)
+	else
+			__free_pages_ok(page, 0);
+*/
+	}
+}
+
 EXPORT_SYMBOL(__free_pages);
+EXPORT_SYMBOL(__vgnca_free_pages);
 
 void free_pages(unsigned long addr, unsigned int order)
 {
diff -Naur a/mm/vmalloc.c b/mm/vmalloc.c
--- a/mm/vmalloc.c	2003-11-26 18:44:23.000000000 -0200
+++ b/mm/vmalloc.c	2003-12-04 17:37:25.000000000 -0200
@@ -20,6 +20,13 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 
+#define CONFIG_USE_VGNCA
+
+#ifdef CONFIG_USE_VGNCA
+static int vgnca_alloc(struct vm_struct *area, int gfp_mask);
+static void vgnca_free(struct vm_struct *area);
+#endif
+
 
 rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
 struct vm_struct *vmlist;
@@ -309,6 +316,43 @@
 	return;
 }
 
+
+void __vgnca_vunmap(void *addr, int deallocate_pages)
+{
+	struct vm_struct *area;
+
+	if (!addr)
+		return;
+
+	if ((PAGE_SIZE-1) & (unsigned long)addr) {
+		printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
+		return;
+	}
+
+	area = remove_vm_area(addr);
+	if (unlikely(!area)) {
+		printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
+				addr);
+		return;
+	}
+	
+	if (deallocate_pages) {
+		int i;
+
+		for (i = 0; i < area->nr_pages; i++) {
+			if (unlikely(!area->pages[i]))
+				BUG();
+			__vgnca_free_page(area->pages[i]);
+		}
+
+		kfree(area->pages);
+	}
+
+	kfree(area);
+	return;
+}
+
+
 /**
  *	vfree  -  release memory allocated by vmalloc()
  *
@@ -325,7 +369,13 @@
 	__vunmap(addr, 1);
 }
 
+void vgnca_vfree(void *addr)
+{
+	BUG_ON(in_interrupt());
+	__vgnca_vunmap(addr, 1);
+}
 EXPORT_SYMBOL(vfree);
+EXPORT_SYMBOL(vgnca_vfree);
 
 /**
  *	vunmap  -  release virtual mapping obtained by vmap()
@@ -392,7 +442,10 @@
 {
 	struct vm_struct *area;
 	struct page **pages;
-	unsigned int nr_pages, array_size, i;
+	unsigned int nr_pages, array_size;
+#ifndef CONFIG_USE_VGNCA
+   unsigned int i;
+#endif
 
 	size = PAGE_ALIGN(size);
 	if (!size || (size >> PAGE_SHIFT) > num_physpages)
@@ -414,6 +467,7 @@
 	}
 	memset(area->pages, 0, array_size);
 
+#ifndef CONFIG_USE_VGNCA
 	for (i = 0; i < area->nr_pages; i++) {
 		area->pages[i] = alloc_page(gfp_mask);
 		if (unlikely(!area->pages[i])) {
@@ -422,13 +476,20 @@
 			goto fail;
 		}
 	}
-	
+#else /* CONFIG_USE_VGNCA */
+   if (!vgnca_alloc(area, gfp_mask))
+      goto fail;
+#endif /* CONFIG_USE_VGNCA */
 	if (map_vm_area(area, prot, &pages))
 		goto fail;
 	return area->addr;
 
 fail:
+#ifndef CONFIG_USE_VGNCA
 	vfree(area->addr);
+#else /* CONFIG_USE_VGNCA */
+	vgnca_free(area);
+#endif /* CONFIG_USE_VGNCA */
 	return NULL;
 }
 
@@ -541,3 +602,54 @@
 	read_unlock(&vmlist_lock);
 	return buf - buf_start;
 }
+
+#ifdef CONFIG_USE_VGNCA
+
+/**
+ * VGNCA Alloc  -  allocate some virtually contiguous pages at once.
+ *
+ * @area:      the struct vm_area used in this allocation.
+ * @gfp_mask:  flags for the page level allocator (TODO: is this right?)
+ *
+ * Allocate pages (hopefully) more efficiently than calling alloc_page()
+ * for each page. @area->nr_pages must be set to the number of pages that
+ * should be allocated.
+ *
+ * In case of failure returns 1 and sets @area->nr_pages to the number of
+ * pages successfully allocated. In case of success returns 0.
+ */
+static int vgnca_alloc(struct vm_struct *area, int gfp_mask)
+{
+   unsigned int i;
+   unsigned long flags;
+
+   local_irq_save(flags);
+
+	for (i = 0; i < area->nr_pages; i++) {
+		area->pages[i] = vgnca_alloc_page(gfp_mask);
+		if (unlikely(!area->pages[i])) {
+			/* Successfully allocated i pages, free them in __vunmap() */
+			area->nr_pages = i;
+
+         local_irq_restore(flags);
+         return 0;
+		}
+	}
+   local_irq_restore(flags);
+
+   return 1;
+}
+
+
+/**
+ * VGNCA Free  -  frees memory allocated by very_good_non_contig_alloc
+ *
+ * @area:      the struct vm_area used in the allocation being freed.
+ *
+ */
+static void vgnca_free(struct vm_struct *area)
+{
+   vgnca_vfree(area->addr);
+}
+
+#endif /* CONFIG_USE_VGNCA */

[-- Attachment #3: large-frees.eps --]
[-- Type: image/x-eps, Size: 13942 bytes --]

[-- Attachment #4: large-allocations.eps --]
[-- Type: image/x-eps, Size: 13950 bytes --]

[-- Attachment #5: small-allocations.eps --]
[-- Type: image/x-eps, Size: 15609 bytes --]

[-- Attachment #6: small-frees.eps --]
[-- Type: image/x-eps, Size: 15829 bytes --]

             reply	other threads:[~2003-12-09 13:11 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2003-12-09 13:11 Ruthiano Simioni Munaretti [this message]
2003-12-13  2:32 ` Rik van Riel
2003-12-10 14:42 Mark_H_Johnson

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200312091111.21349.ruthiano@exatas.unisinos.br \
    --to=ruthiano@exatas.unisinos.br \
    --cc=linux-mm@kvack.org \
    --cc=sisopiii-l@cscience.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox