From: Ruthiano Simioni Munaretti <ruthiano@exatas.unisinos.br>
To: linux-mm@kvack.org
Cc: sisopiii-l@cscience.org
Subject: Non-Contiguous Memory Allocation Tests
Date: Tue, 9 Dec 2003 11:11:21 -0200 [thread overview]
Message-ID: <200312091111.21349.ruthiano@exatas.unisinos.br> (raw)
[-- Attachment #1: Type: text/plain, Size: 1062 bytes --]
Hi,
I and a colleague were making tests in Non-Contiguous Memory Allocator. We
implemented VGNCA, a non-contiguous memory allocator improvement.
In the current non-contiguous memory allocator, each physical page is
allocated by time, through alloc_page() function call. However, each one of
this calls has an associated overhead with enable/disable interrupts.
In VGNCA, the main idea is enable/disable interrupts only one time, reducing
this overhead. Also, VGNCA allocation/deallocation functions are a little
more simple, because elimination of unnecessary test conditions in size
allocation.
Our patch is intended to be a test to check if this could bring enough
benefits to deserve a more careful implementation. We also included some code
to benchmark allocations and deallocations, using the RDTSC instruction.
We are sending:
- Patch against 2.6.0-test11 with these modifications.
- Graphics with performance tests:
--- small-allocations.eps/small-frees.eps --> 1-128 kB
--- large-allocations.eps/large-frees.eps --> 1-64 MB
LMB, Ruthiano.
[-- Attachment #2: vgnca-test11.patch --]
[-- Type: text/x-diff, Size: 23105 bytes --]
diff -Naur a/arch/i386/mm/pageattr.c b/arch/i386/mm/pageattr.c
--- a/arch/i386/mm/pageattr.c 2003-11-26 18:43:41.000000000 -0200
+++ b/arch/i386/mm/pageattr.c 2003-12-04 17:00:25.000000000 -0200
@@ -175,6 +175,23 @@
return err;
}
+int vgnca_change_page_attr(struct page *page, int numpages, pgprot_t prot)
+{
+ int err = 0;
+ int i;
+
+ /* (VGNCA) spin_lock_irqsave(&cpa_lock, flags); */
+ spin_lock(&cpa_lock);
+ for (i = 0; i < numpages; i++, page++) {
+ err = __change_page_attr(page, prot);
+ if (err)
+ break;
+ }
+ /* (VGNCA) spin_unlock_irqrestore(&cpa_lock, flags); */
+ spin_unlock(&cpa_lock);
+ return err;
+}
+
void global_flush_tlb(void)
{
LIST_HEAD(l);
@@ -208,6 +225,20 @@
*/
__flush_tlb_all();
}
+
+void vgnca_kernel_map_pages(struct page *page, int numpages, int enable)
+{
+ if (PageHighMem(page))
+ return;
+ /* the return value is ignored - the calls cannot fail,
+ * large pages are disabled at boot time.
+ */
+ vgnca_change_page_attr(page, numpages, enable ? PAGE_KERNEL : __pgprot(0));
+ /* we should perform an IPI and flush all tlbs,
+ * but that can deadlock->flush only current cpu.
+ */
+ __flush_tlb_all();
+}
EXPORT_SYMBOL(kernel_map_pages);
#endif
diff -Naur a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c
--- a/fs/proc/proc_misc.c 2003-11-26 18:43:07.000000000 -0200
+++ b/fs/proc/proc_misc.c 2003-12-04 17:04:22.000000000 -0200
@@ -237,6 +237,161 @@
#undef K
}
+
+/*************************
+ VGNCA: benchmark vmalloc
+ *************************/
+
+/* thanks to rnsanchez & felipewd */
+#define rdtsc(ticks) \
+ __asm__ volatile (".byte 0x0f, 0x31" : "=A" (ticks));
+
+
+#define VMALLOC_THEN_VFREE(AMOUNT_IN_BYTES) \
+{ \
+ poff += sprintf(page+poff, "%d", (AMOUNT_IN_BYTES)); \
+ rdtsc(ticks_before); \
+ mem = vmalloc((AMOUNT_IN_BYTES)); \
+ rdtsc(ticks_after); \
+ poff += sprintf(page+poff, "\t%lld", ticks_after - ticks_before); \
+ \
+ if (!mem) \
+ poff += sprintf(page+poff, "\tallocation failed!\n"); \
+ else \
+ { \
+ rdtsc(ticks_before); \
+ vfree(mem); \
+ rdtsc(ticks_after); \
+ poff += sprintf(page+poff, "\t%lld\n", ticks_after - ticks_before); \
+ } \
+}
+
+
+
+static int bm_vmalloc_read_proc_1(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ uint64_t ticks_before, ticks_after;
+ void* mem;
+ off_t poff = off;
+ int i;
+
+ if ((mem = vmalloc(1024)))
+ vfree (mem);
+
+ for (i = 1; i <= 32; ++i)
+ VMALLOC_THEN_VFREE(1024*i);
+
+ poff += sprintf(page+poff, "\n");
+
+ return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_2(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ uint64_t ticks_before, ticks_after;
+ void* mem;
+ off_t poff = off;
+ int i;
+
+ if ((mem = vmalloc(1024)))
+ vfree (mem);
+
+ for (i = 33; i <= 64; ++i)
+ VMALLOC_THEN_VFREE(1024*i);
+
+ poff += sprintf(page+poff, "\n");
+
+ return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_3(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ uint64_t ticks_before, ticks_after;
+ void* mem;
+ off_t poff = off;
+ int i;
+
+ if ((mem = vmalloc(1024)))
+ vfree (mem);
+
+ for (i = 65; i <= 96; ++i)
+ VMALLOC_THEN_VFREE(1024*i);
+
+ poff += sprintf(page+poff, "\n");
+
+ return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_4(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ uint64_t ticks_before, ticks_after;
+ void* mem;
+ off_t poff = off;
+ int i = 0;
+
+ if ((mem = vmalloc(1024)))
+ vfree (mem);
+
+ for (i = 97; i <= 128; ++i)
+ VMALLOC_THEN_VFREE(1024*i);
+
+ poff += sprintf(page+poff, "\n");
+
+ return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_m1(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ uint64_t ticks_before, ticks_after;
+ void* mem;
+ off_t poff = off;
+ int i = 0;
+
+ if ((mem = vmalloc(1024)))
+ vfree (mem);
+
+ for (i = 1; i <= 32; ++i)
+ VMALLOC_THEN_VFREE(1024*1024*i);
+
+ poff += sprintf(page+poff, "\n");
+
+ return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+static int bm_vmalloc_read_proc_m2(char *page, char **start, off_t off,
+ int count, int *eof, void *data)
+{
+ uint64_t ticks_before, ticks_after;
+ void* mem;
+ off_t poff = off;
+ int i = 0;
+
+ if ((mem = vmalloc(1024)))
+ vfree (mem);
+
+ for (i = 33; i <= 64; ++i)
+ VMALLOC_THEN_VFREE(1024*1024*i);
+
+ poff += sprintf(page+poff, "\n");
+
+ return proc_calc_metrics(page, start, off, count, eof, poff - off);
+}
+
+
+#undef rdtsc
+#undef VMALLOC_THEN_VFREE
+
+
+/*************************
+ VGNCA: end of benchmark vmalloc
+ *************************/
+
+
extern struct seq_operations fragmentation_op;
static int fragmentation_open(struct inode *inode, struct file *file)
{
@@ -663,6 +818,13 @@
#endif
{"locks", locks_read_proc},
{"execdomains", execdomains_read_proc},
+ /* VGNCA: benchmark 'vmalloc()' */
+ {"bm-vmalloc-1", bm_vmalloc_read_proc_1},
+ {"bm-vmalloc-2", bm_vmalloc_read_proc_2},
+ {"bm-vmalloc-3", bm_vmalloc_read_proc_3},
+ {"bm-vmalloc-4", bm_vmalloc_read_proc_4},
+ {"bm-vmalloc-m1", bm_vmalloc_read_proc_m1},
+ {"bm-vmalloc-m2", bm_vmalloc_read_proc_m2},
{NULL,}
};
for (p = simple_ones; p->name; p++)
diff -Naur a/include/linux/gfp.h b/include/linux/gfp.h
--- a/include/linux/gfp.h 2003-11-26 18:43:26.000000000 -0200
+++ b/include/linux/gfp.h 2003-12-04 17:12:25.000000000 -0200
@@ -64,6 +64,10 @@
* optimized to &contig_page_data at compile-time.
*/
extern struct page * FASTCALL(__alloc_pages(unsigned int, unsigned int, struct zonelist *));
+
+
+/* VGNCA: this always allocate one page only, so the plural name is not good. */
+extern struct page * FASTCALL(__vgnca_alloc_pages(unsigned int, struct zonelist *));
static inline struct page * alloc_pages_node(int nid, unsigned int gfp_mask, unsigned int order)
{
if (unlikely(order >= MAX_ORDER))
@@ -87,13 +91,22 @@
__get_free_pages((gfp_mask) | GFP_DMA,(order))
extern void FASTCALL(__free_pages(struct page *page, unsigned int order));
+extern void FASTCALL(__vgnca_free_pages(struct page *page /*, (VGNCA) unsigned int order*/));
extern void FASTCALL(free_pages(unsigned long addr, unsigned int order));
extern void FASTCALL(free_hot_page(struct page *page));
extern void FASTCALL(free_cold_page(struct page *page));
#define __free_page(page) __free_pages((page), 0)
+/* (VGNCA) no longer passing 'order' */
+#define __vgnca_free_page(page) __vgnca_free_pages((page))
#define free_page(addr) free_pages((addr),0)
void page_alloc_init(void);
+
+/* VGNCA: no longer pass the 'order' parameter (is always 0) */
+#define vgnca_alloc_page(gfp_mask) \
+ __vgnca_alloc_pages((gfp_mask), NODE_DATA(numa_node_id())->node_zonelists + ((gfp_mask) & GFP_ZONEMASK))
+
+
#endif /* __LINUX_GFP_H */
diff -Naur a/include/linux/mm.h b/include/linux/mm.h
--- a/include/linux/mm.h 2003-11-26 18:42:55.000000000 -0200
+++ b/include/linux/mm.h 2003-12-04 17:14:17.000000000 -0200
@@ -620,6 +620,11 @@
kernel_map_pages(struct page *page, int numpages, int enable)
{
}
+
+static inline void
+vgnca_kernel_map_pages(struct page *page, int numpages, int enable)
+{
+}
#endif
#endif /* __KERNEL__ */
diff -Naur a/include/linux/vmalloc.h b/include/linux/vmalloc.h
--- a/include/linux/vmalloc.h 2003-11-26 18:45:53.000000000 -0200
+++ b/include/linux/vmalloc.h 2003-12-04 17:15:10.000000000 -0200
@@ -26,6 +26,7 @@
extern void *vmalloc_32(unsigned long size);
extern void *__vmalloc(unsigned long size, int gfp_mask, pgprot_t prot);
extern void vfree(void *addr);
+extern void vgnca_vfree(void *addr);
extern void *vmap(struct page **pages, unsigned int count,
unsigned long flags, pgprot_t prot);
diff -Naur a/mm/page_alloc.c b/mm/page_alloc.c
--- a/mm/page_alloc.c 2003-11-26 18:42:56.000000000 -0200
+++ b/mm/page_alloc.c 2003-12-04 17:23:42.000000000 -0200
@@ -209,6 +209,51 @@
list_add(&(base + page_idx)->list, &area->free_list);
}
+
+static inline void __vgnca_free_pages_bulk (struct page *page, struct page *base,
+ struct zone *zone, struct free_area *area, unsigned long mask
+ /*, (VGNCA) unsigned int order*/)
+{
+ unsigned long page_idx, index;
+
+/* (VGNCA)
+ if (order)
+ destroy_compound_page(page, order);
+*/
+ page_idx = page - base;
+ if (page_idx & ~mask)
+ BUG();
+ index = page_idx >> (1 /* (VGNCA) + order*/);
+
+ zone->free_pages -= mask;
+ while (mask + (1 << (MAX_ORDER-1))) {
+ struct page *buddy1, *buddy2;
+
+ BUG_ON(area >= zone->free_area + MAX_ORDER);
+ if (!__test_and_change_bit(index, area->map))
+ /*
+ * the buddy page is still allocated.
+ */
+ break;
+ /*
+ * Move the buddy up one level.
+ * This code is taking advantage of the identity:
+ * -mask = 1+~mask
+ */
+ buddy1 = base + (page_idx ^ -mask);
+ buddy2 = base + page_idx;
+ BUG_ON(bad_range(zone, buddy1));
+ BUG_ON(bad_range(zone, buddy2));
+ list_del(&buddy1->list);
+ mask <<= 1;
+ area++;
+ index >>= 1;
+ page_idx &= mask;
+ }
+ list_add(&(base + page_idx)->list, &area->free_list);
+}
+
+
static inline void free_pages_check(const char *function, struct page *page)
{
if ( page_mapped(page) ||
@@ -264,6 +309,36 @@
return ret;
}
+
+static int
+vgnca_free_pages_bulk(struct zone *zone, int count,
+ struct list_head *list /* (VGNCA), unsigned int order */)
+{
+ unsigned long mask;
+ struct free_area *area;
+ struct page *base, *page = NULL;
+ int ret = 0;
+
+ mask = (~0UL) /* (VGNCA) << order*/;
+ base = zone->zone_mem_map;
+ area = zone->free_area /* (VGNCA) + order */;
+ /* (VGNCA) spin_lock_irqsave(&zone->lock, flags); */
+ spin_lock(&zone->lock);
+ zone->all_unreclaimable = 0;
+ zone->pages_scanned = 0;
+ while (!list_empty(list) && count--) {
+ page = list_entry(list->prev, struct page, list);
+ /* have to delete it as __free_pages_bulk list manipulates */
+ list_del(&page->list);
+ __vgnca_free_pages_bulk(page, base, zone, area, mask /* (VGNCA) , order */);
+ ret++;
+ }
+ /* (VGNCA) spin_unlock_irqrestore(&zone->lock, flags); */
+ spin_unlock(&zone->lock);
+ return ret;
+}
+
+
void __free_pages_ok(struct page *page, unsigned int order)
{
LIST_HEAD(list);
@@ -389,6 +464,29 @@
return allocated;
}
+
+static int vgnca_rmqueue_bulk(struct zone *zone, /* (VGNCA) unsigned int order, */
+ unsigned long count, struct list_head *list)
+{
+ int i;
+ int allocated = 0;
+ struct page *page;
+
+ /* (VGNCA) spin_lock_irqsave(&zone->lock, flags); */
+ spin_lock(&zone->lock);
+ for (i = 0; i < count; ++i) {
+ page = __rmqueue(zone, 0 /*(VGNCA) order*/);
+ if (page == NULL)
+ break;
+ allocated++;
+ list_add_tail(&page->list, list);
+ }
+ /* (VGNCA) spin_unlock_irqrestore(&zone->lock, flags); */
+ spin_unlock(&zone->lock);
+ return allocated;
+}
+
+
#ifdef CONFIG_PM
int is_head_of_free_region(struct page *page)
{
@@ -461,10 +559,34 @@
put_cpu();
}
+static void FASTCALL(vgnca_free_hot_cold_page(struct page *page, int cold));
+static void vgnca_free_hot_cold_page(struct page *page, int cold)
+{
+ struct zone *zone = page_zone(page);
+ struct per_cpu_pages *pcp;
+
+ vgnca_kernel_map_pages(page, 1, 0);
+ inc_page_state(pgfree);
+ free_pages_check(__FUNCTION__, page);
+ pcp = &zone->pageset[get_cpu()].pcp[cold];
+ /* (VGNCA) local_irq_save(flags); */
+ if (pcp->count >= pcp->high)
+ pcp->count -= vgnca_free_pages_bulk(zone, pcp->batch, &pcp->list/* (VGNCA), 0*/);
+ list_add(&page->list, &pcp->list);
+ pcp->count++;
+ /* (VGNCA) local_irq_restore(flags); */
+ put_cpu();
+}
+
void free_hot_page(struct page *page)
{
free_hot_cold_page(page, 0);
}
+
+void vgnca_free_hot_page(struct page *page)
+{
+ vgnca_free_hot_cold_page(page, 0);
+}
void free_cold_page(struct page *page)
{
@@ -515,6 +637,52 @@
return page;
}
+
+/* VGNCA:
+ * - No longer pass 'order' as parameter.
+ */
+static struct page *vgnca_buffered_rmqueue(struct zone *zone, int cold)
+{
+ struct page *page = NULL;
+
+/* (VGNCA) if (order == 0) { */
+ struct per_cpu_pages *pcp;
+
+ pcp = &zone->pageset[get_cpu()].pcp[cold];
+ /* (VGNCA) local_irq_save(flags); */
+ if (pcp->count <= pcp->low)
+ pcp->count += vgnca_rmqueue_bulk(zone, /* (VGNCA) 0, */
+ pcp->batch, &pcp->list);
+ if (pcp->count) {
+ page = list_entry(pcp->list.next, struct page, list);
+ list_del(&page->list);
+ pcp->count--;
+ }
+ /* (VGNCA) local_irq_restore(flags);*/
+ put_cpu();
+/* (VGNCA) } */
+
+ if (page == NULL) {
+ /* (VGNCA) spin_lock_irqsave(&zone->lock, flags); */
+ spin_lock(&zone->lock);
+ page = __rmqueue(zone, 0 /* (VGNCA) order */);
+ /* (VGNCA) spin_unlock_irqrestore(&zone->lock, flags); */
+ spin_unlock(&zone->lock);
+/* (VGNCA)
+ if (order && page)
+ prep_compound_page(page, order);
+*/
+ }
+
+ if (page != NULL) {
+ BUG_ON(bad_range(zone, page));
+ mod_page_state(pgalloc, 1 /* (VGNCA) << order */);
+ prep_new_page(page, 0 /* (VGNCA) order */);
+ }
+ return page;
+}
+
+
/*
* This is the 'heart' of the zoned buddy allocator.
*
@@ -679,7 +847,166 @@
return page;
}
+/* VGNCA:
+ * - Removed the 'order' parameter (is always 0)
+ */
+struct page *
+__vgnca_alloc_pages(unsigned int gfp_mask, struct zonelist *zonelist)
+{
+ const int wait = gfp_mask & __GFP_WAIT;
+ unsigned long min;
+ struct zone **zones, *classzone;
+ struct page *page;
+ struct reclaim_state reclaim_state;
+ struct task_struct *p = current;
+ int i;
+ int cold;
+
+ might_sleep_if(wait);
+
+ cold = 0;
+ if (gfp_mask & __GFP_COLD)
+ cold = 1;
+
+ zones = zonelist->zones; /* the list of zones suitable for gfp_mask */
+ classzone = zones[0];
+ if (classzone == NULL) /* no zones in the zonelist */
+ return NULL;
+
+ /* Go through the zonelist once, looking for a zone with enough free */
+ min = 1; /* VGNCA--used to be 'min = 1UL << order;' */
+ for (i = 0; zones[i] != NULL; i++) {
+ struct zone *z = zones[i];
+ unsigned long local_low;
+
+ /*
+ * This is the fabled 'incremental min'. We let real-time tasks
+ * dip their real-time paws a little deeper into reserves.
+ */
+ local_low = z->pages_low;
+ if (rt_task(p))
+ local_low >>= 1;
+ min += local_low;
+
+ if (z->free_pages >= min ||
+ (!wait && z->free_pages >= z->pages_high)) {
+ page = vgnca_buffered_rmqueue(z, cold);
+ if (page)
+ goto got_pg;
+ }
+ min += z->pages_low * sysctl_lower_zone_protection;
+ }
+
+ /* we're somewhat low on memory, failed to find what we needed */
+ for (i = 0; zones[i] != NULL; i++)
+ wakeup_kswapd(zones[i]);
+
+ /* Go through the zonelist again, taking __GFP_HIGH into account */
+ min = 1; /* (VGNCA) min = 1UL << order; */
+ for (i = 0; zones[i] != NULL; i++) {
+ unsigned long local_min;
+ struct zone *z = zones[i];
+
+ local_min = z->pages_min;
+ if (gfp_mask & __GFP_HIGH)
+ local_min >>= 2;
+ if (rt_task(p))
+ local_min >>= 1;
+ min += local_min;
+ if (z->free_pages >= min ||
+ (!wait && z->free_pages >= z->pages_high)) {
+ page = vgnca_buffered_rmqueue(z, cold);
+ if (page)
+ goto got_pg;
+ }
+ min += local_min * sysctl_lower_zone_protection;
+ }
+
+ /* here we're in the low on memory slow path */
+
+rebalance:
+ if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) {
+ /* go through the zonelist yet again, ignoring mins */
+ for (i = 0; zones[i] != NULL; i++) {
+ struct zone *z = zones[i];
+
+ page = vgnca_buffered_rmqueue(z, cold);
+ if (page)
+ goto got_pg;
+ }
+ goto nopage;
+ }
+
+ /* Atomic allocations - we can't balance anything */
+ if (!wait)
+ goto nopage;
+
+ p->flags |= PF_MEMALLOC;
+ reclaim_state.reclaimed_slab = 0;
+ p->reclaim_state = &reclaim_state;
+
+ try_to_free_pages(classzone, gfp_mask, 0 /*order*/);
+
+ p->reclaim_state = NULL;
+ p->flags &= ~PF_MEMALLOC;
+
+ /* go through the zonelist yet one more time */
+ min = 1; /* (VGNCA) min = 1UL << order; */
+ for (i = 0; zones[i] != NULL; i++) {
+ struct zone *z = zones[i];
+
+ min += z->pages_min;
+ if (z->free_pages >= min ||
+ (!wait && z->free_pages >= z->pages_high)) {
+ page = vgnca_buffered_rmqueue(z, cold);
+ if (page)
+ goto got_pg;
+ }
+ min += z->pages_low * sysctl_lower_zone_protection;
+ }
+
+ /*
+ * Don't let big-order allocations loop unless the caller explicitly
+ * requests that. Wait for some write requests to complete then retry.
+ *
+ * In this implementation, __GFP_REPEAT means __GFP_NOFAIL, but that
+ * may not be true in other implementations.
+ */
+
+/* (VGNCA) 'do_retry' will always be 1, because 'order <= 3' (actually,
+ 'order == 0'). So, a lot of code can be removed from here.
+
+ do_retry = 0;
+ if (!(gfp_mask & __GFP_NORETRY)) {
+ if ((order <= 3) || (gfp_mask & __GFP_REPEAT))
+ do_retry = 1;
+ if (gfp_mask & __GFP_NOFAIL)
+ do_retry = 1;
+ }
+ if (do_retry) {
+ blk_congestion_wait(WRITE, HZ/50);
+ goto rebalance;
+ }
+*/
+/* (Added by VGNCA) */
+ blk_congestion_wait(WRITE, HZ/50);
+ goto rebalance;
+/* (End of VGNCA) */
+
+nopage:
+ if (!(gfp_mask & __GFP_NOWARN)) {
+ printk("%s: page allocation failure."
+ " order:%d, mode:0x%x\n",
+ p->comm, 0, gfp_mask);
+ }
+ return NULL;
+got_pg:
+ vgnca_kernel_map_pages(page, 1 << 0 /* (VGNCA) order*/, 1);
+ return page;
+}
EXPORT_SYMBOL(__alloc_pages);
+EXPORT_SYMBOL(__vgnca_alloc_pages);
+
/*
* Common helper functions.
@@ -735,7 +1062,23 @@
}
}
+
+/* (VGNCA) no longer taking an 'order' parameter */
+void __vgnca_free_pages(struct page *page)
+{
+ if (!PageReserved(page) && put_page_testzero(page)) {
+/* (VGNCA)
+ if (order == 0) */
+ vgnca_free_hot_page(page);
+/* (VGNCA)
+ else
+ __free_pages_ok(page, 0);
+*/
+ }
+}
+
EXPORT_SYMBOL(__free_pages);
+EXPORT_SYMBOL(__vgnca_free_pages);
void free_pages(unsigned long addr, unsigned int order)
{
diff -Naur a/mm/vmalloc.c b/mm/vmalloc.c
--- a/mm/vmalloc.c 2003-11-26 18:44:23.000000000 -0200
+++ b/mm/vmalloc.c 2003-12-04 17:37:25.000000000 -0200
@@ -20,6 +20,13 @@
#include <asm/pgalloc.h>
#include <asm/tlbflush.h>
+#define CONFIG_USE_VGNCA
+
+#ifdef CONFIG_USE_VGNCA
+static int vgnca_alloc(struct vm_struct *area, int gfp_mask);
+static void vgnca_free(struct vm_struct *area);
+#endif
+
rwlock_t vmlist_lock = RW_LOCK_UNLOCKED;
struct vm_struct *vmlist;
@@ -309,6 +316,43 @@
return;
}
+
+void __vgnca_vunmap(void *addr, int deallocate_pages)
+{
+ struct vm_struct *area;
+
+ if (!addr)
+ return;
+
+ if ((PAGE_SIZE-1) & (unsigned long)addr) {
+ printk(KERN_ERR "Trying to vfree() bad address (%p)\n", addr);
+ return;
+ }
+
+ area = remove_vm_area(addr);
+ if (unlikely(!area)) {
+ printk(KERN_ERR "Trying to vfree() nonexistent vm area (%p)\n",
+ addr);
+ return;
+ }
+
+ if (deallocate_pages) {
+ int i;
+
+ for (i = 0; i < area->nr_pages; i++) {
+ if (unlikely(!area->pages[i]))
+ BUG();
+ __vgnca_free_page(area->pages[i]);
+ }
+
+ kfree(area->pages);
+ }
+
+ kfree(area);
+ return;
+}
+
+
/**
* vfree - release memory allocated by vmalloc()
*
@@ -325,7 +369,13 @@
__vunmap(addr, 1);
}
+void vgnca_vfree(void *addr)
+{
+ BUG_ON(in_interrupt());
+ __vgnca_vunmap(addr, 1);
+}
EXPORT_SYMBOL(vfree);
+EXPORT_SYMBOL(vgnca_vfree);
/**
* vunmap - release virtual mapping obtained by vmap()
@@ -392,7 +442,10 @@
{
struct vm_struct *area;
struct page **pages;
- unsigned int nr_pages, array_size, i;
+ unsigned int nr_pages, array_size;
+#ifndef CONFIG_USE_VGNCA
+ unsigned int i;
+#endif
size = PAGE_ALIGN(size);
if (!size || (size >> PAGE_SHIFT) > num_physpages)
@@ -414,6 +467,7 @@
}
memset(area->pages, 0, array_size);
+#ifndef CONFIG_USE_VGNCA
for (i = 0; i < area->nr_pages; i++) {
area->pages[i] = alloc_page(gfp_mask);
if (unlikely(!area->pages[i])) {
@@ -422,13 +476,20 @@
goto fail;
}
}
-
+#else /* CONFIG_USE_VGNCA */
+ if (!vgnca_alloc(area, gfp_mask))
+ goto fail;
+#endif /* CONFIG_USE_VGNCA */
if (map_vm_area(area, prot, &pages))
goto fail;
return area->addr;
fail:
+#ifndef CONFIG_USE_VGNCA
vfree(area->addr);
+#else /* CONFIG_USE_VGNCA */
+ vgnca_free(area);
+#endif /* CONFIG_USE_VGNCA */
return NULL;
}
@@ -541,3 +602,54 @@
read_unlock(&vmlist_lock);
return buf - buf_start;
}
+
+#ifdef CONFIG_USE_VGNCA
+
+/**
+ * VGNCA Alloc - allocate some virtually contiguous pages at once.
+ *
+ * @area: the struct vm_area used in this allocation.
+ * @gfp_mask: flags for the page level allocator (TODO: is this right?)
+ *
+ * Allocate pages (hopefully) more efficiently than calling alloc_page()
+ * for each page. @area->nr_pages must be set to the number of pages that
+ * should be allocated.
+ *
+ * In case of failure returns 1 and sets @area->nr_pages to the number of
+ * pages successfully allocated. In case of success returns 0.
+ */
+static int vgnca_alloc(struct vm_struct *area, int gfp_mask)
+{
+ unsigned int i;
+ unsigned long flags;
+
+ local_irq_save(flags);
+
+ for (i = 0; i < area->nr_pages; i++) {
+ area->pages[i] = vgnca_alloc_page(gfp_mask);
+ if (unlikely(!area->pages[i])) {
+ /* Successfully allocated i pages, free them in __vunmap() */
+ area->nr_pages = i;
+
+ local_irq_restore(flags);
+ return 0;
+ }
+ }
+ local_irq_restore(flags);
+
+ return 1;
+}
+
+
+/**
+ * VGNCA Free - frees memory allocated by very_good_non_contig_alloc
+ *
+ * @area: the struct vm_area used in the allocation being freed.
+ *
+ */
+static void vgnca_free(struct vm_struct *area)
+{
+ vgnca_vfree(area->addr);
+}
+
+#endif /* CONFIG_USE_VGNCA */
[-- Attachment #3: large-frees.eps --]
[-- Type: image/x-eps, Size: 13942 bytes --]
[-- Attachment #4: large-allocations.eps --]
[-- Type: image/x-eps, Size: 13950 bytes --]
[-- Attachment #5: small-allocations.eps --]
[-- Type: image/x-eps, Size: 15609 bytes --]
[-- Attachment #6: small-frees.eps --]
[-- Type: image/x-eps, Size: 15829 bytes --]
next reply other threads:[~2003-12-09 13:11 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2003-12-09 13:11 Ruthiano Simioni Munaretti [this message]
2003-12-13 2:32 ` Rik van Riel
2003-12-10 14:42 Mark_H_Johnson
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200312091111.21349.ruthiano@exatas.unisinos.br \
--to=ruthiano@exatas.unisinos.br \
--cc=linux-mm@kvack.org \
--cc=sisopiii-l@cscience.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox