From: Christoph Lameter <clameter@sgi.com>
To: Martin Bligh <mbligh@mbligh.org>
Cc: Andi Kleen <ak@suse.de>, Alan Cox <alan@lxorguk.ukuu.org.uk>,
akpm@google.com, linux-kernel@vger.kernel.org,
Christoph Hellwig <hch@infradead.org>,
James Bottomley <James.Bottomley@steeleye.com>,
linux-mm@kvack.org
Subject: Re: [RFC] Initial alpha-0 for new page allocator API
Date: Fri, 22 Sep 2006 13:23:35 -0700 (PDT) [thread overview]
Message-ID: <Pine.LNX.4.64.0609221321280.9181@schroedinger.engr.sgi.com> (raw)
In-Reply-To: <4514441E.70207@mbligh.org>
Here is an iniitial patch of alloc_pages_range (untested, compiles).
Directed reclaim missing. Feedback wanted. There are some comments in the
patch where I am at the boundary of my knowledge and it would be good if
someone could supply the info needed.
Index: linux-2.6.18-rc7-mm1/arch/i386/kernel/pci-dma.c
===================================================================
--- linux-2.6.18-rc7-mm1.orig/arch/i386/kernel/pci-dma.c 2006-09-22 15:10:42.246731179 -0500
+++ linux-2.6.18-rc7-mm1/arch/i386/kernel/pci-dma.c 2006-09-22 15:11:10.449709078 -0500
@@ -26,6 +26,8 @@ void *dma_alloc_coherent(struct device *
dma_addr_t *dma_handle, gfp_t gfp)
{
void *ret;
+ unsigned long low = 0L;
+ unsigned long high = 0xffffffff;
struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
int order = get_order(size);
/* ignore region specifiers */
@@ -44,10 +46,14 @@ void *dma_alloc_coherent(struct device *
return NULL;
}
- if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
- gfp |= GFP_DMA;
+ if (dev == NULL)
+ /* Apply safe ISA LIMITS */
+ high = 16*1024*1024L;
+ else
+ if (dev->coherent_dma_mask < 0xffffffff)
+ high = dev->coherent_dma_mask;
- ret = (void *)__get_free_pages(gfp, order);
+ ret = page_address(alloc_pages_range(low, high, gfp, order));
if (ret != NULL) {
memset(ret, 0, size);
Index: linux-2.6.18-rc7-mm1/include/linux/gfp.h
===================================================================
--- linux-2.6.18-rc7-mm1.orig/include/linux/gfp.h 2006-09-22 15:10:42.235994626 -0500
+++ linux-2.6.18-rc7-mm1/include/linux/gfp.h 2006-09-22 15:11:10.462397735 -0500
@@ -136,6 +136,9 @@ static inline struct page *alloc_pages_n
NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
}
+extern struct page *alloc_pages_range(unsigned long low, unsigned long high,
+ int nid, gfp_t gfp_mask, unsigned int order);
+
#ifdef CONFIG_NUMA
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
Index: linux-2.6.18-rc7-mm1/mm/page_alloc.c
===================================================================
--- linux-2.6.18-rc7-mm1.orig/mm/page_alloc.c 2006-09-22 15:10:53.973976539 -0500
+++ linux-2.6.18-rc7-mm1/mm/page_alloc.c 2006-09-22 15:19:59.996440889 -0500
@@ -1195,9 +1195,119 @@ got_pg:
#endif
return page;
}
-
EXPORT_SYMBOL(__alloc_pages);
+static struct page *rmqueue_range(unsigned long low, unsigned long high,
+ struct zone *zone, unsigned int order)
+{
+ struct free_area * area;
+ unsigned int current_order;
+ struct page *page;
+
+ for (current_order = order; current_order < MAX_ORDER; ++current_order) {
+ area = zone->free_area + current_order;
+ if (list_empty(&area->free_list))
+ continue;
+
+ list_for_each_entry(page, &area->free_list, lru) {
+ unsigned long addr = (unsigned long)page_address(page);
+
+ if (addr >= low ||
+ addr < high - (PAGE_SIZE << order))
+ goto found_match;
+ }
+ continue;
+found_match:
+ list_del(&page->lru);
+ rmv_page_order(page);
+ area->nr_free--;
+ zone->free_pages -= 1UL << order;
+ expand(zone, page, order, current_order, area);
+ return page;
+ }
+ return NULL;
+}
+
+struct page *alloc_pages_range(unsigned long low, unsigned long high, int node,
+ gfp_t gfp_flags, unsigned int order)
+{
+ const gfp_t wait = gfp_flags & __GFP_WAIT;
+ struct zonelist *zl;
+ struct zone **z;
+ struct page *page;
+
+#ifdef CONFIG_ZONE_DMA
+ if (high < MAX_DMA_ADDRESS)
+ return alloc_pages(gfp_flags | __GFP_DMA, order);
+#endif
+#ifdef CONFIG_ZONE_DMA32
+ if (high < MAX_DMA32_ADDRESS)
+ return alloc_pages(gfp_flags | __GFP_DMA32, order);
+#endif
+ /*
+ * Is there an upper/lower limit of installed memory that we could
+ * check against instead of -1 ? The less memory installed the less
+ * the chance that we would have to do the expensive range search.
+ */
+ if (high == -1L && low == 0L)
+ return alloc_pages(gfp_flags, order);
+
+ if (node == -1)
+ node = numa_node_id();
+
+ /*
+ * Scan in the page allocator for memory.
+ * We skip all the niceties of the page allocator since this is
+ * used for device allocations that require memory from a limited
+ * address range.
+ */
+
+ might_sleep_if(wait);
+
+ zl = &NODE_DATA(node)->node_zonelists[gfp_zone(gfp_flags)];
+
+ z = zl->zones;
+
+ if (unlikely(*z == NULL))
+ /* Should this ever happen?? */
+ return NULL;
+
+ do {
+ struct zone *zone = *z;
+ unsigned long flags;
+
+ spin_lock_irqsave(&zone->lock, flags);
+ page = rmqueue_range(low, high, zone, order);
+ spin_unlock(&zone->lock);
+ if (!page) {
+ local_irq_restore(flags);
+ put_cpu();
+ continue;
+ }
+ __count_zone_vm_events(PGALLOC, zone, 1 << order);
+ zone_statistics(zl, zone);
+ local_irq_restore(flags);
+ put_cpu();
+
+ VM_BUG_ON(bad_range(zone, page));
+ if (!prep_new_page(page, order, gfp_flags))
+ goto got_pg;
+
+ } while (*(++z) != NULL);
+
+ /*
+ * For now just give up. In the future we need something like
+ * directed reclaim here.
+ */
+ page = NULL;
+got_pg:
+#ifdef CONFIG_PAGE_OWNER
+ if (page)
+ set_page_owner(page, order, gfp_flags);
+#endif
+ return page;
+}
+
/*
* Common helper functions.
*/
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2006-09-22 20:23 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2006-09-22 4:02 Christoph Lameter
2006-09-22 6:17 ` Andi Kleen
2006-09-22 16:35 ` Christoph Lameter
2006-09-22 19:10 ` Andi Kleen
2006-09-22 19:17 ` Christoph Lameter
2006-09-22 19:24 ` Martin Bligh
2006-09-22 20:10 ` Alan Cox
2006-09-22 20:02 ` Andi Kleen
2006-09-22 20:14 ` Martin Bligh
2006-09-22 20:23 ` Christoph Lameter [this message]
2006-09-22 20:41 ` Jesse Barnes
2006-09-22 21:01 ` Christoph Lameter
2006-09-22 21:14 ` Jesse Barnes
2006-09-22 21:21 ` Christoph Lameter
2006-09-22 20:48 ` Andi Kleen
2006-09-22 21:13 ` Christoph Lameter
2006-09-22 21:32 ` Christoph Lameter
2006-09-22 23:34 ` More thoughts on getting rid of ZONE_DMA Andi Kleen
2006-09-23 0:23 ` Christoph Lameter
2006-09-23 0:39 ` Andi Kleen
2006-09-23 0:25 ` Christoph Lameter
2006-09-23 0:37 ` Andi Kleen
2006-09-24 2:13 ` Christoph Lameter
2006-09-24 2:36 ` Martin J. Bligh
2006-09-24 7:26 ` Andi Kleen
2006-09-24 7:19 ` Andi Kleen
2006-09-22 17:36 ` [RFC] Initial alpha-0 for new page allocator API Christoph Lameter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=Pine.LNX.4.64.0609221321280.9181@schroedinger.engr.sgi.com \
--to=clameter@sgi.com \
--cc=James.Bottomley@steeleye.com \
--cc=ak@suse.de \
--cc=akpm@google.com \
--cc=alan@lxorguk.ukuu.org.uk \
--cc=hch@infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=mbligh@mbligh.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox