linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Mel Gorman <mel@csn.ul.ie>
To: akpm@osdl.org
Cc: jschopp@austin.ibm.com, Mel Gorman <mel@csn.ul.ie>,
	kravetz@us.ibm.com, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, lhms-devel@lists.sourceforge.net
Subject: [PATCH 5/8] Fragmentation Avoidance V17: 005_fallback
Date: Tue, 11 Oct 2005 16:12:47 +0100 (IST)	[thread overview]
Message-ID: <20051011151246.16178.40148.sendpatchset@skynet.csn.ul.ie> (raw)
In-Reply-To: <20051011151221.16178.67130.sendpatchset@skynet.csn.ul.ie>

This patch implements fallback logic. In the event there is no 2^(MAX_ORDER-1)
blocks of pages left, this will help the system decide what list to use. The
highlights of the patch are;

o Define a RCLM_FALLBACK type for fallbacks
o Use a percentage of each zone for fallbacks. When a reserved pool of pages
  is depleted, it will try and use RCLM_FALLBACK before using anything else.
  This greatly reduces the amount of fallbacks causing fragmentation without
  needing complex balancing algorithms
o Add a fallback_reserve and fallback_balance so that the system knows how
  may 2^(MAX_ORDER-1) blocks are being used for fallbacks and if more need
  to be reserved.
o Adds a fallback_allocs[] array that determines the order of freelists are
  used for each allocation type

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Mike Kravetz <kravetz@us.ibm.com>
Signed-off-by: Joel Schopp <jschopp@austin.ibm.com>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.14-rc3-004_markfree/include/linux/mmzone.h linux-2.6.14-rc3-005_fallback/include/linux/mmzone.h
--- linux-2.6.14-rc3-004_markfree/include/linux/mmzone.h	2005-10-11 12:08:07.000000000 +0100
+++ linux-2.6.14-rc3-005_fallback/include/linux/mmzone.h	2005-10-11 12:09:27.000000000 +0100
@@ -29,7 +29,8 @@
 #define RCLM_NORCLM   0
 #define RCLM_USER     1
 #define RCLM_KERN     2
-#define RCLM_TYPES    3
+#define RCLM_FALLBACK 3
+#define RCLM_TYPES    4
 #define BITS_PER_RCLM_TYPE 2
 
 #define for_each_rclmtype_order(type, order) \
@@ -163,6 +164,13 @@ struct zone {
 
 	struct free_area	free_area_lists[RCLM_TYPES][MAX_ORDER];
 
+	/*
+	 * Track what percentage of the zone should be used for fallbacks and
+	 * how much is being currently used
+	 */
+	unsigned long		fallback_reserve;
+	long			fallback_balance;
+
 	ZONE_PADDING(_pad1_)
 
 	/* Fields commonly accessed by the page reclaim scanner */
@@ -275,6 +283,17 @@ struct zonelist {
 	struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited
 };
 
+static inline void inc_reserve_count(struct zone* zone, int type)
+{
+	if (type == RCLM_FALLBACK)
+		zone->fallback_reserve++;
+}
+
+static inline void dec_reserve_count(struct zone* zone, int type)
+{
+	if (type == RCLM_FALLBACK && zone->fallback_reserve)
+		zone->fallback_reserve--;
+}
 
 /*
  * The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.14-rc3-004_markfree/mm/page_alloc.c linux-2.6.14-rc3-005_fallback/mm/page_alloc.c
--- linux-2.6.14-rc3-004_markfree/mm/page_alloc.c	2005-10-11 12:08:47.000000000 +0100
+++ linux-2.6.14-rc3-005_fallback/mm/page_alloc.c	2005-10-11 12:09:27.000000000 +0100
@@ -53,6 +53,40 @@ unsigned long totalhigh_pages __read_mos
 long nr_swap_pages;
 
 /*
+ * fallback_allocs contains the fallback types for low memory conditions
+ * where the preferred alloction type if not available.
+ */
+int fallback_allocs[RCLM_TYPES][RCLM_TYPES+1] = {
+	{RCLM_NORCLM,	RCLM_FALLBACK, RCLM_KERN,   RCLM_USER, RCLM_TYPES},
+	{RCLM_KERN,     RCLM_FALLBACK, RCLM_NORCLM, RCLM_USER, RCLM_TYPES},
+	{RCLM_USER,     RCLM_FALLBACK, RCLM_NORCLM, RCLM_KERN, RCLM_TYPES},
+	{RCLM_FALLBACK, RCLM_NORCLM,   RCLM_KERN,   RCLM_USER, RCLM_TYPES}
+};
+
+/*
+ * Returns 1 if the required presentage of the zone if reserved for fallbacks
+ *
+ * fallback_balance and fallback_reserve are used to detect when the required
+ * percentage is reserved. fallback_balance is decremented when a
+ * 2^(MAX_ORDER-1) block is split and incremented when coalesced.
+ * fallback_reserve is incremented when a block is reserved for fallbacks
+ * and decremented when reassigned elsewhere.
+ *
+ */
+static inline int min_fallback_reserved(struct zone *zone) {
+	/* If fallback_balance is positive, we do not need to reserve */
+	if (zone->fallback_balance > 0)
+		return 1;
+
+	/*
+	 * When fallback_balance is negative, a reserve is required. The number
+	 * of reserved blocks required is related to the negative value of
+	 * fallback_balance
+	 */
+	return -(zone->fallback_balance) < zone->fallback_reserve;
+}
+
+/*
  * results with 256, 32 in the lowmem_reserve sysctl:
  *	1G machine -> (16M dma, 800M-16M normal, 1G-800M high)
  *	1G machine -> (16M dma, 784M normal, 224M high)
@@ -404,6 +438,8 @@ static inline void __free_pages_bulk (st
 		page_idx = combined_idx;
 		order++;
 	}
+	if (unlikely(order == MAX_ORDER-1))
+		zone->fallback_balance++;
 	set_page_order(page, order);
 	list_add_tail(&page->lru, &freelist[order].free_list);
 	freelist[order].nr_free++;
@@ -585,7 +621,12 @@ struct page* steal_largepage(struct zone
 	page = list_entry(area->free_list.next, struct page, lru);
 	area->nr_free--;
 
+	if (!min_fallback_reserved(zone))
+		alloctype = RCLM_FALLBACK;
+
 	set_pageblock_type(zone, page, (unsigned long)alloctype);
+	dec_reserve_count(zone, i);
+	inc_reserve_count(zone, alloctype);
 
 	return page;
 }
@@ -595,6 +636,8 @@ static inline struct page *
 remove_page(struct zone *zone, struct page *page, unsigned int order,
 		unsigned int current_order, struct free_area *area)
 {
+	if (unlikely(current_order == MAX_ORDER-1))
+		zone->fallback_balance--;
 	list_del(&page->lru);
 	rmv_page_order(page);
 	zone->free_pages -= 1UL << order;
@@ -602,6 +645,83 @@ remove_page(struct zone *zone, struct pa
 
 }
 
+/*
+ * If we are falling back, and the allocation is KERNNORCLM,
+ * then reserve any buddies for the KERNNORCLM pool. These
+ * allocations fragment the worst so this helps keep them
+ * in the one place
+ */
+static inline struct free_area *
+fallback_buddy_reserve(int start_alloctype, struct zone *zone,
+			unsigned int current_order, struct page *page,
+			struct free_area *area)
+{
+	if (start_alloctype != RCLM_NORCLM)
+		return area;
+
+	area = &(zone->free_area_lists[RCLM_NORCLM][current_order]);
+
+	/* Reserve the whole block if this is a large split */
+	if (current_order >= MAX_ORDER / 2) {
+		int reserve_type = RCLM_NORCLM;
+		dec_reserve_count(zone, get_pageblock_type(zone,page));
+
+		/*
+		 * Use this block for fallbacks if the
+		 * minimum reserve is not being met
+		 */
+		if (!min_fallback_reserved(zone))
+			reserve_type = RCLM_FALLBACK;
+
+		set_pageblock_type(zone, page, (unsigned long)reserve_type);
+		inc_reserve_count(zone, reserve_type);
+	}
+	return area;
+}
+
+static struct page *
+fallback_alloc(int alloctype, struct zone *zone, unsigned int order)
+{
+	int *fallback_list;
+	int start_alloctype = alloctype;
+	struct free_area *area;
+	unsigned int current_order;
+	struct page *page;
+	int i;
+
+	/* Ok, pick the fallback order based on the type */
+	BUG_ON(alloctype >= RCLM_TYPES);
+	fallback_list = fallback_allocs[alloctype];
+
+	/*
+	 * Here, the alloc type lists has been depleted as well as the global
+	 * pool, so fallback. When falling back, the largest possible block
+	 * will be taken to keep the fallbacks clustered if possible
+	 */
+	for (i = 0; fallback_list[i] != RCLM_TYPES; i++) {
+		alloctype = fallback_list[i];
+
+		/* Find a block to allocate */
+		area = &(zone->free_area_lists[alloctype][MAX_ORDER-1]);
+		for (current_order = MAX_ORDER - 1; current_order > order;
+				current_order--, area--) {
+			if (list_empty(&area->free_list))
+				continue;
+
+			page = list_entry(area->free_list.next, 
+						struct page, lru);
+			area->nr_free--;
+			area = fallback_buddy_reserve(start_alloctype, zone,
+					current_order, page, area);
+			return remove_page(zone, page, order,
+					current_order, area);
+
+		}
+	}
+
+	return NULL;
+}
+
 /* 
  * Do the hard work of removing an element from the buddy allocator.
  * Call me with the zone->lock already held.
@@ -628,7 +748,8 @@ static struct page *__rmqueue(struct zon
 	if (page != NULL)
 		return remove_page(zone, page, order, MAX_ORDER-1, area);
 
-	return NULL;
+	/* Try falling back */
+	return fallback_alloc(alloctype, zone, order);
 }
 
 /* 
@@ -2106,6 +2227,10 @@ static void __init free_area_init_core(s
 		spin_lock_init(&zone->lru_lock);
 		zone->zone_pgdat = pgdat;
 		zone->free_pages = 0;
+		zone->fallback_reserve = 0;
+
+		/* Set the balance so about 12.5% will be used for fallbacks */
+		zone->fallback_balance = -(realsize >> (MAX_ORDER+2));
 
 		zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2005-10-11 15:12 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-10-11 15:12 [PATCH 0/8] Fragmentation Avoidance V17 Mel Gorman
2005-10-11 15:12 ` [PATCH 1/8] Fragmentation Avoidance V17: 001_antidefrag_flags Mel Gorman
2005-10-11 15:12 ` [PATCH 2/8] Fragmentation Avoidance V17: 002_usemap Mel Gorman
2005-10-13 13:56   ` Dave Hansen
2005-10-13 14:10     ` Mel Gorman
2005-10-13 14:18       ` Dave Hansen
2005-10-13 14:35         ` Mel Gorman
2005-10-16 20:44         ` Mel Gorman
2005-10-13 16:33       ` Joel Schopp
2005-10-11 15:12 ` [PATCH 3/8] Fragmentation Avoidance V17: 003_fragcore Mel Gorman
2005-10-11 15:12 ` [PATCH 4/8] Fragmentation Avoidance V17: 004_markfree Mel Gorman
2005-10-11 15:12 ` Mel Gorman [this message]
2005-10-12 16:43   ` [PATCH 5/8] Fragmentation Avoidance V17: 005_fallback mike kravetz
2005-10-12 17:21     ` Mel Gorman
2005-10-12 17:29       ` [Lhms-devel] " Joel Schopp
2005-10-14  5:12         ` Dave Hansen
2005-10-11 15:12 ` [PATCH 6/8] Fragmentation Avoidance V17: 006_largealloc_tryharder Mel Gorman
2005-10-13 19:07   ` Joel Schopp
2005-10-14  5:36     ` Dave Hansen
2005-10-11 15:12 ` [PATCH 7/8] Fragmentation Avoidance V17: 007_percpu Mel Gorman
2005-10-11 15:13 ` [PATCH 8/8] Fragmentation Avoidance V17: 008_stats Mel Gorman
2005-10-12 11:57   ` [Lhms-devel] " Dave Hansen
2005-10-12 12:19     ` Mel Gorman
2005-10-16  2:52 ` [PATCH 0/8] Fragmentation Avoidance V17 Paul Jackson
2005-10-16 11:59   ` Mel Gorman
2005-10-16 17:53     ` Paul Jackson
2005-10-16 18:03       ` Mel Gorman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20051011151246.16178.40148.sendpatchset@skynet.csn.ul.ie \
    --to=mel@csn.ul.ie \
    --cc=akpm@osdl.org \
    --cc=jschopp@austin.ibm.com \
    --cc=kravetz@us.ibm.com \
    --cc=lhms-devel@lists.sourceforge.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox