linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Christoph Lameter <clameter@sgi.com>
To: Andi Kleen <ak@suse.de>
Cc: Martin Bligh <mbligh@mbligh.org>,
	Alan Cox <alan@lxorguk.ukuu.org.uk>,
	akpm@google.com, linux-kernel@vger.kernel.org,
	Christoph Hellwig <hch@infradead.org>,
	James Bottomley <James.Bottomley@steeleye.com>,
	linux-mm@kvack.org
Subject: Re: [RFC] Initial alpha-0 for new page allocator API
Date: Fri, 22 Sep 2006 14:13:20 -0700 (PDT)	[thread overview]
Message-ID: <Pine.LNX.4.64.0609221401550.9370@schroedinger.engr.sgi.com> (raw)
In-Reply-To: <200609222248.27700.ak@suse.de>

Next try.

- Drop node parameter since nodes have physical address spaces and
  we can match on those using the high / low parameters.

- Check the boundaries of a node before searching the zones in the
  node. This includes checking the upper / lower 
  boundary of present memory. So we can simply fall back to regular alloc 
  pages if f.e. we have a x86_64 with all memory below 4GB and we have
  configured ZONE_DMA and ZONE_DMA32 off.

- Still no reclaim.

- Hmmm... I have no floppy drive....

Index: linux-2.6.18-rc7-mm1/arch/i386/kernel/pci-dma.c
===================================================================
--- linux-2.6.18-rc7-mm1.orig/arch/i386/kernel/pci-dma.c	2006-09-22 15:10:42.246731179 -0500
+++ linux-2.6.18-rc7-mm1/arch/i386/kernel/pci-dma.c	2006-09-22 15:37:41.464093162 -0500
@@ -26,6 +26,8 @@ void *dma_alloc_coherent(struct device *
 			   dma_addr_t *dma_handle, gfp_t gfp)
 {
 	void *ret;
+	unsigned long low = 0L;
+	unsigned long high = 0xffffffff;
 	struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL;
 	int order = get_order(size);
 	/* ignore region specifiers */
@@ -44,10 +46,14 @@ void *dma_alloc_coherent(struct device *
 			return NULL;
 	}
 
-	if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff))
-		gfp |= GFP_DMA;
+	if (dev == NULL)
+		/* Apply safe ISA LIMITS */
+		high = 16*1024*1024L;
+	else
+	if (dev->coherent_dma_mask < 0xffffffff)
+		high = dev->coherent_dma_mask;
 
-	ret = (void *)__get_free_pages(gfp, order);
+	ret = page_address(alloc_pages_range(low, high, gfp, order));
 
 	if (ret != NULL) {
 		memset(ret, 0, size);
Index: linux-2.6.18-rc7-mm1/include/linux/gfp.h
===================================================================
--- linux-2.6.18-rc7-mm1.orig/include/linux/gfp.h	2006-09-22 15:10:42.235994626 -0500
+++ linux-2.6.18-rc7-mm1/include/linux/gfp.h	2006-09-22 15:58:53.385391317 -0500
@@ -136,6 +136,9 @@ static inline struct page *alloc_pages_n
 		NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
 }
 
+extern struct page *alloc_pages_range(unsigned long low, unsigned long high,
+				gfp_t gfp_mask, unsigned int order);
+
 #ifdef CONFIG_NUMA
 extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
 
Index: linux-2.6.18-rc7-mm1/mm/page_alloc.c
===================================================================
--- linux-2.6.18-rc7-mm1.orig/mm/page_alloc.c	2006-09-22 15:10:53.973976539 -0500
+++ linux-2.6.18-rc7-mm1/mm/page_alloc.c	2006-09-22 16:10:13.940439657 -0500
@@ -1195,9 +1195,145 @@ got_pg:
 #endif
 	return page;
 }
-
 EXPORT_SYMBOL(__alloc_pages);
 
+static struct page *rmqueue_range(unsigned long low, unsigned long high,
+				struct zone *zone, unsigned int order)
+{
+	struct free_area * area;
+	unsigned int current_order;
+	struct page *page;
+
+	for (current_order = order; current_order < MAX_ORDER; ++current_order) {
+		area = zone->free_area + current_order;
+		if (list_empty(&area->free_list))
+			continue;
+
+		list_for_each_entry(page, &area->free_list, lru) {
+			unsigned long addr = (unsigned long)page_address(page);
+
+			if (addr >= low &&
+				addr < high - (PAGE_SIZE << order))
+					goto found_match;
+		}
+		continue;
+found_match:
+		list_del(&page->lru);
+		rmv_page_order(page);
+		area->nr_free--;
+		zone->free_pages -= 1UL << order;
+		expand(zone, page, order, current_order, area);
+		return page;
+	}
+	return NULL;
+}
+
+static struct page *zonelist_alloc_range(unsigned long low, unsigned long high,
+				int order, gfp_t gfp_flags,
+				struct zonelist *zl)
+{
+	struct zone **z = zl->zones;
+	struct page *page;
+
+	if (unlikely(*z == NULL))
+		/* Should this ever happen?? */
+		return NULL;
+
+	do {
+		struct zone *zone = *z;
+		unsigned long flags;
+
+		spin_lock_irqsave(&zone->lock, flags);
+		page = rmqueue_range(low, high, zone, order);
+		spin_unlock(&zone->lock);
+		if (!page) {
+			local_irq_restore(flags);
+			put_cpu();
+			continue;
+		}
+		__count_zone_vm_events(PGALLOC, zone, 1 << order);
+		zone_statistics(zl, zone);
+		local_irq_restore(flags);
+		put_cpu();
+
+		VM_BUG_ON(bad_range(zone, page));
+		if (!prep_new_page(page, order, gfp_flags))
+			goto got_pg;
+
+	} while (*(++z) != NULL);
+
+	/*
+	 * For now just give up. In the future we need something like
+	 * directed reclaim here.
+	 */
+	page = NULL;
+got_pg:
+#ifdef CONFIG_PAGE_OWNER
+	if (page)
+		set_page_owner(page, order, gfp_flags);
+#endif
+	return page;
+}
+
+struct page *alloc_pages_range(unsigned long low, unsigned long high,
+					gfp_t gfp_flags, unsigned int order)
+{
+	const gfp_t wait = gfp_flags & __GFP_WAIT;
+	struct page *page = NULL;
+	struct pglist_data *lastpgdat;
+	int node;
+
+#ifdef CONFIG_ZONE_DMA
+	if (high < MAX_DMA_ADDRESS)
+		return alloc_pages(gfp_flags | __GFP_DMA, order);
+#endif
+#ifdef CONFIG_ZONE_DMA32
+	if (high < MAX_DMA32_ADDRESS)
+		return alloc_pages(gfp_flags | __GFP_DMA32, order);
+#endif
+	/*
+	 * Is there an upper/lower limit of installed memory that we could
+	 * check against instead of -1 ? The less memory installed the less
+	 * the chance that we would have to do the expensive range search.
+	 */
+
+	/* This probably should check against the last online node in the future */
+	lastpgdat = NODE_DATA(MAX_NUMNODES -1);
+
+	if (high >= ((lastpgdat->node_start_pfn + lastpgdat->node_spanned_pages) << PAGE_SHIFT) &&
+		low <= (NODE_DATA(0)->node_start_pfn << PAGE_SHIFT))
+			return alloc_pages(gfp_flags, order);
+
+	/*
+	 * Scan in the page allocator for memory.
+	 * We skip all the niceties of the page allocator since this is
+	 * used for device allocations that require memory from a limited
+	 * address range.
+	 */
+
+	might_sleep_if(wait);
+
+	for_each_online_node(node) {
+		struct pglist_data *pgdat = NODE_DATA(node);
+
+		if (low > ((pgdat->node_start_pfn +
+				pgdat->node_spanned_pages) << PAGE_SHIFT))
+			continue;
+
+		/*
+		 * This check assumes that increasing node numbers go
+		 * along with increasing addresses!
+		 */
+		if (high < (pgdat->node_start_pfn << PAGE_SHIFT))
+			break;
+
+		page = zonelist_alloc_range(low, high, gfp_flags, order,
+			NODE_DATA(node)->node_zonelists + gfp_zone(gfp_flags));
+		if (page)
+			break;
+	}
+	return page;
+}
 /*
  * Common helper functions.
  */

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2006-09-22 21:13 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-09-22  4:02 Christoph Lameter
2006-09-22  6:17 ` Andi Kleen
2006-09-22 16:35   ` Christoph Lameter
2006-09-22 19:10     ` Andi Kleen
2006-09-22 19:17       ` Christoph Lameter
2006-09-22 19:24       ` Martin Bligh
2006-09-22 20:10       ` Alan Cox
2006-09-22 20:02         ` Andi Kleen
2006-09-22 20:14           ` Martin Bligh
2006-09-22 20:23             ` Christoph Lameter
2006-09-22 20:41               ` Jesse Barnes
2006-09-22 21:01                 ` Christoph Lameter
2006-09-22 21:14                   ` Jesse Barnes
2006-09-22 21:21                     ` Christoph Lameter
2006-09-22 20:48               ` Andi Kleen
2006-09-22 21:13                 ` Christoph Lameter [this message]
2006-09-22 21:32                 ` Christoph Lameter
2006-09-22 23:34               ` More thoughts on getting rid of ZONE_DMA Andi Kleen
2006-09-23  0:23                 ` Christoph Lameter
2006-09-23  0:39                   ` Andi Kleen
2006-09-23  0:25                 ` Christoph Lameter
2006-09-23  0:37                   ` Andi Kleen
2006-09-24  2:13                 ` Christoph Lameter
2006-09-24  2:36                   ` Martin J. Bligh
2006-09-24  7:26                     ` Andi Kleen
2006-09-24  7:19                   ` Andi Kleen
2006-09-22 17:36   ` [RFC] Initial alpha-0 for new page allocator API Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Pine.LNX.4.64.0609221401550.9370@schroedinger.engr.sgi.com \
    --to=clameter@sgi.com \
    --cc=James.Bottomley@steeleye.com \
    --cc=ak@suse.de \
    --cc=akpm@google.com \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=hch@infradead.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mbligh@mbligh.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox