linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: menage@google.com
To: linux-mm@kvack.org
Cc: akpm@osdl.org
Subject: [RFC][PATCH 1/1] Expose per-node reclaim and migration to userspace
Date: Tue, 28 Nov 2006 19:06:56 -0800	[thread overview]
Message-ID: <20061129033826.268090000@menage.corp.google.com> (raw)
In-Reply-To: <20061129030655.941148000@menage.corp.google.com>

[-- Attachment #1: node_reclaim.patch --]
[-- Type: text/plain, Size: 8417 bytes --]

Currently the page migration APIs allow you to migrate pages from
particular processes, but don't provide a clean and efficient way to
migrate and/or reclaim memory from individual nodes.

This patch provides:

- an additional parameter to try_to_free_pages() to specify the
  priority at which the reclaim should give up if it doesn't make
  progress

- a way to trigger try_to_free_pages() for a given node with a given
  minimum priority, vy writing an integer to
  /sys/device/system/node/node<id>/try_to_free_pages

- a way to request that any migratable pages on a given node be
  migrated to availage pages on a specified set of nodes by writing a
  destination nodemask (in ASCII form) to
  /sys/device/system/node/node<id>/migrate_node

Signed-off-by: Paul Menage <menage@google.com>

---
 drivers/base/node.c       |   92 ++++++++++++++++++++++++++++++++++++++++++++++
 fs/buffer.c               |    2 -
 include/linux/mempolicy.h |    2 +
 include/linux/swap.h      |    2 -
 mm/mempolicy.c            |    3 -
 mm/page_alloc.c           |    2 -
 mm/vmscan.c               |    5 +-
 7 files changed, 101 insertions(+), 7 deletions(-)

Index: 2.6.19-node_reclaim/drivers/base/node.c
===================================================================
--- 2.6.19-node_reclaim.orig/drivers/base/node.c
+++ 2.6.19-node_reclaim/drivers/base/node.c
@@ -12,6 +12,8 @@
 #include <linux/topology.h>
 #include <linux/nodemask.h>
 #include <linux/cpu.h>
+#include <linux/swap.h>
+#include <linux/migrate.h>
 
 static struct sysdev_class node_class = {
 	set_kset_name("node"),
@@ -137,6 +139,92 @@ static ssize_t node_read_distance(struct
 static SYSDEV_ATTR(distance, S_IRUGO, node_read_distance, NULL);
 
 
+static ssize_t node_store_ttfp(struct sys_device *dev,
+			       struct sysdev_attribute *attr,
+			       const char *buf,
+			       size_t count) {
+	int nid = dev->id;
+	unsigned int priority;
+	struct zonelist *zl;
+	nodemask_t nodes;
+	ssize_t ret = count;
+
+	priority = max(0, min(DEF_PRIORITY, (int)simple_strtoul(buf, NULL, 0)));
+	printk(KERN_INFO "Calling try_to_free_pages(%d, %d)\n",
+	       nid, priority);
+
+	nodes_clear(nodes);
+	node_set(nid, nodes);
+	zl = bind_zonelist(&nodes);
+
+	if (!try_to_free_pages(zl->zones, GFP_USER, priority))
+		ret = -ENOMEM;
+
+	kfree(zl);
+
+	return ret;
+}
+
+static SYSDEV_ATTR(try_to_free_pages, 0200, NULL, node_store_ttfp);
+
+static struct page *migrate_from_node_page(struct page *page,
+					   unsigned long private,
+					   int **result) {
+	struct zonelist *zl = (struct zonelist *) private;
+	return __alloc_pages(GFP_HIGHUSER & ~__GFP_WAIT, 0, zl);
+}
+
+static ssize_t node_store_migrate_node(struct sys_device *dev,
+				       struct sysdev_attribute *attr,
+				       const char *buf,
+				       size_t count) {
+	int nid = dev->id;
+	nodemask_t nodes;
+	ssize_t ret;
+	struct zonelist *zl;
+	struct pglist_data *pgdat = NODE_DATA(nid);
+	int i;
+	int pagecount = 0, failcount = 0;
+	LIST_HEAD(pagelist);
+
+	ret = nodelist_parse(buf, nodes);
+	if (ret)
+		return ret;
+
+	zl = bind_zonelist(&nodes);
+
+	migrate_prep();
+
+	for (i = 0; i < pgdat->node_spanned_pages; ++i) {
+		struct page *page = pgdat_page_nr(pgdat, i);
+		if (!isolate_lru_page(page, &pagelist)) {
+			pagecount++;
+		} else {
+			failcount++;
+		}
+	}
+
+	ret = count;
+	printk(KERN_INFO "Migrating %d pages from node %d\n", pagecount, nid);
+	if (!list_empty(&pagelist)) {
+		int migrate_ret = migrate_pages(&pagelist,
+						migrate_from_node_page,
+						(unsigned long)zl);
+
+		printk(KERN_INFO "migrate_pages returned %d\n", migrate_ret);
+		if (migrate_ret < 0) {
+			ret = migrate_ret;
+		}
+	} else {
+		printk(KERN_INFO "No pages to migrate. Failcount = %d!\n",
+		       failcount++);
+	}
+
+	kfree(zl);
+	return ret;
+}
+
+static SYSDEV_ATTR(migrate_node, 0200, NULL, node_store_migrate_node);
 /*
  * register_node - Setup a driverfs device for a node.
  * @num - Node number to use when creating the device.
@@ -156,6 +244,8 @@ int register_node(struct node *node, int
 		sysdev_create_file(&node->sysdev, &attr_meminfo);
 		sysdev_create_file(&node->sysdev, &attr_numastat);
 		sysdev_create_file(&node->sysdev, &attr_distance);
+		sysdev_create_file(&node->sysdev, &attr_try_to_free_pages);
+		sysdev_create_file(&node->sysdev, &attr_migrate_node);
 	}
 	return error;
 }
@@ -173,6 +263,8 @@ void unregister_node(struct node *node)
 	sysdev_remove_file(&node->sysdev, &attr_meminfo);
 	sysdev_remove_file(&node->sysdev, &attr_numastat);
 	sysdev_remove_file(&node->sysdev, &attr_distance);
+	sysdev_remove_file(&node->sysdev, &attr_try_to_free_pages);
+	sysdev_remove_file(&node->sysdev, &attr_migrate_node);
 
 	sysdev_unregister(&node->sysdev);
 }
Index: 2.6.19-node_reclaim/fs/buffer.c
===================================================================
--- 2.6.19-node_reclaim.orig/fs/buffer.c
+++ 2.6.19-node_reclaim/fs/buffer.c
@@ -374,7 +374,7 @@ static void free_more_memory(void)
 	for_each_online_pgdat(pgdat) {
 		zones = pgdat->node_zonelists[gfp_zone(GFP_NOFS)].zones;
 		if (*zones)
-			try_to_free_pages(zones, GFP_NOFS);
+			try_to_free_pages(zones, GFP_NOFS, 0);
 	}
 }
 
Index: 2.6.19-node_reclaim/include/linux/mempolicy.h
===================================================================
--- 2.6.19-node_reclaim.orig/include/linux/mempolicy.h
+++ 2.6.19-node_reclaim/include/linux/mempolicy.h
@@ -175,6 +175,8 @@ int do_migrate_pages(struct mm_struct *m
 
 extern void *cpuset_being_rebound;	/* Trigger mpol_copy vma rebind */
 
+struct zonelist *bind_zonelist(nodemask_t *nodes);
+
 #else
 
 struct mempolicy {};
Index: 2.6.19-node_reclaim/include/linux/swap.h
===================================================================
--- 2.6.19-node_reclaim.orig/include/linux/swap.h
+++ 2.6.19-node_reclaim/include/linux/swap.h
@@ -187,7 +187,7 @@ extern int rotate_reclaimable_page(struc
 extern void swap_setup(void);
 
 /* linux/mm/vmscan.c */
-extern unsigned long try_to_free_pages(struct zone **, gfp_t);
+extern unsigned long try_to_free_pages(struct zone **, gfp_t, int priority);
 extern unsigned long shrink_all_memory(unsigned long nr_pages);
 extern int vm_swappiness;
 extern int remove_mapping(struct address_space *mapping, struct page *page);
Index: 2.6.19-node_reclaim/mm/mempolicy.c
===================================================================
--- 2.6.19-node_reclaim.orig/mm/mempolicy.c
+++ 2.6.19-node_reclaim/mm/mempolicy.c
@@ -134,7 +134,7 @@ static int mpol_check_policy(int mode, n
 }
 
 /* Generate a custom zonelist for the BIND policy. */
-static struct zonelist *bind_zonelist(nodemask_t *nodes)
+struct zonelist *bind_zonelist(nodemask_t *nodes)
 {
 	struct zonelist *zl;
 	int num, max, nd;
@@ -1908,4 +1908,3 @@ out:
 		m->version = (vma != priv->tail_vma) ? vma->vm_start : 0;
 	return 0;
 }
-
Index: 2.6.19-node_reclaim/mm/page_alloc.c
===================================================================
--- 2.6.19-node_reclaim.orig/mm/page_alloc.c
+++ 2.6.19-node_reclaim/mm/page_alloc.c
@@ -1371,7 +1371,7 @@ nofail_alloc:
 	reclaim_state.reclaimed_slab = 0;
 	p->reclaim_state = &reclaim_state;
 
-	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask);
+	did_some_progress = try_to_free_pages(zonelist->zones, gfp_mask, 0);
 
 	p->reclaim_state = NULL;
 
Index: 2.6.19-node_reclaim/mm/vmscan.c
===================================================================
--- 2.6.19-node_reclaim.orig/mm/vmscan.c
+++ 2.6.19-node_reclaim/mm/vmscan.c
@@ -1014,7 +1014,8 @@ static unsigned long shrink_zones(int pr
  * holds filesystem locks which prevent writeout this might not work, and the
  * allocation attempt will fail.
  */
-unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask)
+unsigned long try_to_free_pages(struct zone **zones, gfp_t gfp_mask,
+				int min_priority)
 {
 	int priority;
 	int ret = 0;
@@ -1057,7 +1058,7 @@ unsigned long try_to_free_pages(struct z
 		lru_pages += zone->nr_active + zone->nr_inactive;
 	}
 
-	for (priority = DEF_PRIORITY; priority >= 0; priority--) {
+	for (priority = DEF_PRIORITY; priority >= min_priority; priority--) {
 		sc.nr_scanned = 0;
 		if (!priority)
 			disable_swap_token();

--

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2006-11-29  3:06 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-11-29  3:06 [RFC][PATCH 0/1] Node-based reclaim/migration menage
2006-11-29  3:06 ` menage [this message]
2006-11-29  6:07   ` [RFC][PATCH 1/1] Expose per-node reclaim and migration to userspace Nick Piggin
2006-11-29 21:57     ` Paul Menage
2006-11-30  4:13       ` Christoph Lameter
2006-11-30  4:18         ` Paul Menage
2006-11-30  7:38       ` Nick Piggin
2006-11-30  7:57         ` Paul Menage
2006-11-30  8:26           ` Nick Piggin
2006-11-30  8:39             ` Paul Menage
2006-11-30  8:55               ` Nick Piggin
2006-11-30  9:06                 ` Paul Menage
2006-11-30  9:21                   ` Nick Piggin
2006-11-30  9:45                     ` Paul Menage
2006-11-30 10:15                       ` Nick Piggin
2006-11-30 10:40                         ` Paul Menage
2006-11-30 11:04                           ` Nick Piggin
2006-11-30 11:23                             ` Paul Menage
2006-11-30 11:35                               ` Nick Piggin
2006-11-30  0:18   ` KAMEZAWA Hiroyuki
2006-11-30  0:25     ` Paul Menage
2006-11-30  0:38       ` KAMEZAWA Hiroyuki
2006-11-30  4:15       ` Christoph Lameter
2006-11-30  4:10   ` Christoph Lameter
2006-11-30  0:31 ` [RFC][PATCH 0/1] Node-based reclaim/migration KAMEZAWA Hiroyuki
2006-11-30  0:31   ` Paul Menage
2006-11-30  4:11     ` KAMEZAWA Hiroyuki
2006-11-30  4:17     ` Christoph Lameter
2006-11-30 10:45       ` Paul Menage
2006-11-30 11:12         ` KAMEZAWA Hiroyuki
2006-11-30 11:25           ` Paul Menage
2006-11-30 12:18             ` KAMEZAWA Hiroyuki
2006-11-30 18:28             ` Christoph Lameter
2006-11-30 18:35               ` Paul Menage
2006-11-30 18:39                 ` Christoph Lameter
2006-11-30 19:09                   ` Paul Menage
2006-11-30 19:42                     ` Christoph Lameter
2006-11-30 19:53                       ` Paul Menage
2006-11-30 20:00                         ` Christoph Lameter
2006-11-30 20:07                           ` Paul Menage
2006-11-30 20:15                             ` Christoph Lameter
2006-11-30 21:33                               ` Paul Menage
2006-11-30 23:41                                 ` Christoph Lameter
2006-11-30 23:48                                   ` Paul Menage
2006-12-01  2:23                                     ` Christoph Lameter
2006-12-01 19:32                                       ` Paul Menage
2006-12-01 19:56                                         ` Christoph Lameter
2006-12-01  2:44                                     ` KAMEZAWA Hiroyuki
2006-12-01  2:43                                       ` Christoph Lameter
2006-12-01  2:59                                         ` KAMEZAWA Hiroyuki
2006-12-01  2:44                                       ` Christoph Lameter
2006-12-01  3:10                                         ` KAMEZAWA Hiroyuki
2006-12-01  5:28                                           ` Christoph Lameter
2006-11-30  4:04 ` Christoph Lameter

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20061129033826.268090000@menage.corp.google.com \
    --to=menage@google.com \
    --cc=akpm@osdl.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox