linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: KUROSAWA Takahiro <kurosawa@valinux.co.jp>
To: ckrm-tech@lists.sourceforge.net
Cc: linux-mm@kvack.org, KUROSAWA Takahiro <kurosawa@valinux.co.jp>
Subject: [PATCH 8/8] Add a CKRM memory resource controller using pzones
Date: Tue, 31 Jan 2006 11:30:40 +0900 (JST)	[thread overview]
Message-ID: <20060131023040.7915.47443.sendpatchset@debian> (raw)
In-Reply-To: <20060131023000.7915.71955.sendpatchset@debian>

This patch implements the CKRM memory resource controller using
pzones.  This patch requires CKRM patched source code.

CKRM patches can be obtained from
 http://sourceforge.net/project/showfiles.php?group_id=85838&package_id=163747

The CKRM patches requires configfs-patched source code:
 http://oss.oracle.com/projects/ocfs2/dist/files/patches/2.6.15-rc5/2005-12-14/0
1_configfs.patch

Signed-off-by: MAEDA Naoaki <maeda.naoaki@jp.fujitsu.com>

---
 include/linux/gfp.h |   31 ++
 mm/Kconfig          |    8 
 mm/Makefile         |    2 
 mm/mem_rc_pzone.c   |  632 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 mm/mempolicy.c      |   10 
 5 files changed, 680 insertions(+), 3 deletions(-)

diff -urNp b/include/linux/gfp.h c/include/linux/gfp.h
--- b/include/linux/gfp.h	2006-01-26 18:08:29.000000000 +0900
+++ c/include/linux/gfp.h	2006-01-26 17:52:06.000000000 +0900
@@ -104,12 +104,43 @@ static inline void arch_free_page(struct
 extern struct page *
 FASTCALL(__alloc_pages(gfp_t, unsigned int, struct zonelist *));
 
+#ifdef CONFIG_MEM_RC
+static inline int mem_rc_available(gfp_t gfp_mask, unsigned int order)
+{
+	gfp_mask &= GFP_LEVEL_MASK & ~(__GFP_HIGHMEM | __GFP_COLD);
+	return gfp_mask == GFP_USER && order == 0;
+}
+
+extern struct page *alloc_page_mem_rc(int nid, gfp_t gfp_mask);
+extern struct zonelist *mem_rc_get_zonelist(int nd, gfp_t gfp_mask,
+		unsigned int order);
+#else
+static inline int mem_rc_available(gfp_t gfp_mask, unsigned int order)
+{
+	return 0;
+}
+
+static inline struct page *alloc_page_mem_rc(int nid, gfp_t gfp_mask)
+{
+	return NULL;
+}
+
+static inline struct zonelist *mem_rc_get_zonelist(int nd, gfp_t gfp_mask,
+		unsigned int order)
+{
+	return NULL;
+}
+#endif
+
 static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
 						unsigned int order)
 {
 	if (unlikely(order >= MAX_ORDER))
 		return NULL;
 
+	if (mem_rc_available(gfp_mask, order))
+		return alloc_page_mem_rc(nid, gfp_mask);
+
 	return __alloc_pages(gfp_mask, order,
 		NODE_DATA(nid)->node_zonelists + gfp_zone(gfp_mask));
 }
diff -urNp b/mm/Kconfig c/mm/Kconfig
--- b/mm/Kconfig	2006-01-26 18:09:01.000000000 +0900
+++ c/mm/Kconfig	2006-01-26 18:07:11.000000000 +0900
@@ -138,3 +138,11 @@ config PSEUDO_ZONE
 	help
 	  This option provides pseudo zone creation from a non-pseudo zone.
 	  Pseudo zones could be used for memory resource management.
+
+config MEM_RC
+	bool "Memory resource controller"
+	select PSEUDO_ZONE
+	depends on CPUMETER || CKRM
+	help
+	  This option will let you control the memory resource by using
+	  the pseudo zone.
diff -urNp b/mm/Makefile c/mm/Makefile
--- b/mm/Makefile	2006-01-26 18:09:11.000000000 +0900
+++ c/mm/Makefile	2006-01-26 17:52:06.000000000 +0900
@@ -20,3 +20,5 @@ obj-$(CONFIG_SHMEM) += shmem.o
 obj-$(CONFIG_TINY_SHMEM) += tiny-shmem.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_FS_XIP) += filemap_xip.o
+
+obj-$(CONFIG_MEM_RC) += mem_rc_pzone.o
diff -urNp b/mm/mem_rc_pzone.c c/mm/mem_rc_pzone.c
--- b/mm/mem_rc_pzone.c	1970-01-01 09:00:00.000000000 +0900
+++ c/mm/mem_rc_pzone.c	2006-01-26 18:07:11.000000000 +0900
@@ -0,0 +1,632 @@
+/*
+ *  mm/mem_rc_pzone.c
+ *
+ *  Memory resource controller by using pzones.
+ *
+ *  Copyright 2005 FUJITSU LIMITED
+ *
+ *  This file is subject to the terms and conditions of the GNU General Public
+ *  License.  See the file COPYING in the main directory of the Linux
+ *  distribution for more details.
+ */
+
+#include <linux/config.h>
+#include <linux/stddef.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/cpuset.h>
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/nodemask.h>
+#include <linux/ckrm_rc.h>
+
+#include <asm/semaphore.h>
+
+#define MEM_RC_METER_BASE	100
+#define MEM_RC_METER_TO_PAGES(_rcd, _node, _zidx, _val) \
+	((_rcd)->zone_pages[(_node)][(_zidx)] * (_val) / MEM_RC_METER_BASE)
+
+struct mem_rc_domain {
+	struct semaphore sem;
+	nodemask_t nodes;
+	unsigned long *zone_pages[MAX_NUMNODES];
+};
+
+struct mem_rc {
+	unsigned long guarantee;
+	struct mem_rc_domain *rcd;
+	struct zone **zones[MAX_NUMNODES];
+	struct zonelist *zonelists[MAX_NUMNODES];
+};
+
+
+struct ckrm_mem {
+	struct ckrm_class *class;	/* the class I belong to */
+	struct ckrm_class *parent;	/* parent of the class above. */
+	struct ckrm_shares shares;
+	spinlock_t cnt_lock;	/* always grab parent's lock before child's */
+	struct mem_rc	*mem_rc;	/* mem resource controller */
+	int 	cnt_total_guarantee; 	/* total guarantee behind the class */
+};
+
+static struct mem_rc_domain *grcd; /* system wide resource controller domain */
+static struct ckrm_res_ctlr rcbs; /* resource controller callback structure */
+
+static void mem_rc_destroy_rcdomain(void *arg)
+{
+	struct mem_rc_domain *rcd = arg;
+	int node;
+
+	for_each_node_mask(node, rcd->nodes) {
+		if (rcd->zone_pages[node])
+			kfree(rcd->zone_pages[node]);
+	}
+
+	kfree(rcd);
+}
+
+static void *mem_rc_create_rcdomain(struct cpuset *cs,
+					cpumask_t cpus, nodemask_t mems)
+{
+	struct mem_rc_domain *rcd;
+	struct zone *z;
+	pg_data_t *pgdat;
+	unsigned long *pp;
+	int i, node, allocn;
+
+	allocn = first_node(mems);
+	rcd = kmalloc_node(sizeof(*rcd), GFP_KERNEL, allocn);
+	if (!rcd)
+		return NULL;
+
+	memset(rcd, 0, sizeof(*rcd));
+
+	init_MUTEX(&rcd->sem);
+	rcd->nodes = mems;
+	for_each_node_mask(node, mems) {
+		pgdat = NODE_DATA(node);
+
+		pp = kmalloc_node(sizeof(unsigned long) * MAX_NR_ZONES,
+				  GFP_KERNEL, allocn);
+		if (!pp)
+			goto failed;
+
+		rcd->zone_pages[node] = pp;
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			if (i == ZONE_DMA) {
+				pp[i] = 0;
+				continue;
+			}
+			z = pgdat->node_zones + i;
+			pp[i] = z->present_pages;
+		}
+	}
+
+	return rcd;
+
+failed:
+	mem_rc_destroy_rcdomain(rcd);
+
+	return NULL;
+}
+
+
+static void *mem_rc_create(void *arg, char *name)
+{
+	struct mem_rc_domain *rcd = arg;
+	struct mem_rc *mr;
+	struct zonelist *zl, *zl_ref;
+	struct zone *parent, *z, *z_ref;
+	pg_data_t *pgdat;
+	int node, allocn;
+	int i, j;
+
+	allocn = first_node(rcd->nodes);
+	mr = kmalloc_node(sizeof(*mr), GFP_KERNEL, allocn);
+	if (!mr)
+		return NULL;
+
+	memset(mr, 0, sizeof(*mr));
+
+	down(&rcd->sem);
+	mr->rcd = rcd;
+	for_each_node_mask(node, rcd->nodes) {
+		pgdat = NODE_DATA(node);
+
+		mr->zones[node]
+			= kmalloc_node(sizeof(*mr->zones[node]) * MAX_NR_ZONES,
+				       GFP_KERNEL, allocn);
+		if (!mr->zones[node])
+			goto failed;
+
+		memset(mr->zones[node], 0,
+		       sizeof(*mr->zones[node]) * MAX_NR_ZONES);
+
+		mr->zonelists[node]
+			= kmalloc_node(sizeof(*mr->zonelists[node]),
+				       GFP_KERNEL, allocn);
+		if (!mr->zonelists[node])
+			goto failed;
+
+		memset(mr->zonelists[node], 0, sizeof(*mr->zonelists[node]));
+
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			parent = pgdat->node_zones + i;
+			if (rcd->zone_pages[node][i] == 0)
+				continue;
+
+			z = pzone_create(parent, name, 0);
+			if (!z)
+				goto failed;
+			mr->zones[node][i] = z;
+		}
+	}
+
+	for_each_node_mask(node, rcd->nodes) {
+		/* NORMAL zones and DMA zones also in HIGHMEM zonelist. */
+		zl_ref = NODE_DATA(node)->node_zonelists + __GFP_HIGHMEM;
+		zl = mr->zonelists[node];
+
+		for (j = i = 0; i < ARRAY_SIZE(zl_ref->zones); i++) {
+			z_ref = zl_ref->zones[i];
+			if (!z_ref)
+				break;
+
+			z = mr->zones[node][zone_idx(z_ref)];
+			if (!z)
+				continue;
+			zl->zones[j++] = z;
+		}
+		zl->zones[j] = NULL;
+	}
+	up(&rcd->sem);
+
+	return mr;
+
+failed:
+	for_each_node_mask(node, rcd->nodes) {
+		if (mr->zonelists[node])
+			kfree(mr->zonelists[node]);
+
+		if (!mr->zones[node])
+			continue;
+
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			z = mr->zones[node][i];
+			if (!z)
+				continue;
+			pzone_destroy(z);
+		}
+		kfree(mr->zones[node]);
+	}
+	up(&rcd->sem);
+	kfree(mr);
+
+	return NULL;
+}
+
+static void mem_rc_destroy(void *p)
+{
+	struct mem_rc *mr = p;
+	struct mem_rc_domain *rcd = mr->rcd;
+	struct zone *z;
+	int node, i;
+
+	down(&rcd->sem);
+	for (node = 0; node < MAX_NUMNODES; node++) {
+		if (mr->zonelists[node])
+			kfree(mr->zonelists[node]);
+			
+		if (!mr->zones[node])
+			continue;
+
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			z = mr->zones[node][i];
+			if (z)
+				pzone_destroy(z);
+			mr->zones[node][i] = NULL;
+		}
+		kfree(mr->zones[node]);
+	}
+	up(&rcd->sem);
+
+	kfree(mr);
+}
+
+static int mem_rc_set_guar(void *ctldata, unsigned long val)
+{
+	struct mem_rc *mr = ctldata;
+	struct mem_rc_domain *rcd = mr->rcd;
+	struct zone *z;
+	nodemask_t nodes_done;
+	int err;
+	int node;
+	int i;
+
+	down(&rcd->sem);
+	nodes_clear(nodes_done);
+	for_each_node_mask(node, rcd->nodes) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			z = mr->zones[node][i];
+			if (!z)
+				continue;
+
+			err = pzone_set_numpages(z,
+					MEM_RC_METER_TO_PAGES(rcd,
+						node, i, val));
+			if (err)
+				goto undo;
+		}
+		node_set(node, nodes_done);
+	}
+
+	mr->guarantee = val;
+	up(&rcd->sem);
+
+	return 0;
+
+undo:
+	for (i--; i >= 0; i--)
+		pzone_set_numpages(z, MEM_RC_METER_TO_PAGES(rcd, node, i,
+						mr->guarantee));
+		
+	for_each_node_mask(node, nodes_done) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			z = mr->zones[node][i];
+			if (!z)
+				continue;
+
+			pzone_set_numpages(z,
+					MEM_RC_METER_TO_PAGES(rcd,
+						node, i, mr->guarantee));
+		}
+	}
+	up(&rcd->sem);
+
+	return err;
+}
+
+static int mem_rc_get_cur(void *ctldata, unsigned long *valp)
+{
+	struct mem_rc *mr = ctldata;
+	struct mem_rc_domain *rcd = mr->rcd;
+	struct zone *z;
+	unsigned long total, used;
+	int node;
+	int i;
+
+	total = used = 0;
+	for_each_node_mask(node, rcd->nodes) {
+		for (i = 0; i < MAX_NR_ZONES; i++) {
+			z = mr->zones[node][i];
+			if (!z)
+				continue;
+			total += z->present_pages;
+			used += z->present_pages - z->free_pages;
+		}
+	}
+
+	if (total > 0)
+		*valp = mr->guarantee * used / total;
+	else
+		*valp = 0;
+
+	return 0;
+}
+
+#if 0
+static struct cpumeter_ctlr mem_rc_ctlr = {
+	.name = "mem",
+	.create_rcdomain = mem_rc_create_rcdomain,
+	.destroy_rcdomain = mem_rc_destroy_rcdomain,
+	.create = mem_rc_create,
+	.destroy = mem_rc_destroy,
+	.set_guar = mem_rc_set_guar,
+	.set_lim = NULL,
+	.get_cur = mem_rc_get_cur,
+};
+
+int mem_rc_init(void)
+{
+	int err;
+
+	err = cpumeter_register_controller(&mem_rc_ctlr);
+	if (err)
+		printk(KERN_ERR "mem_rc: register to cpumeter failed\n");
+	else
+		printk(KERN_INFO
+		       "mem_rc: memory resource controller registered.\n");
+
+	return err;
+}
+
+__initcall(mem_rc_init);
+
+void *mem_rc_get(task_t *tsk)
+{
+	return cpumeter_get_controller_data(tsk->cpuset, &mem_rc_ctlr);
+}
+
+#endif
+
+struct mem_rc *mem_rc_get(task_t *tsk)
+{
+	struct ckrm_class *class = tsk->class;
+	struct ckrm_mem *res;
+
+	if (unlikely(class == NULL))
+		return NULL;
+
+	res = ckrm_get_res_class(class, rcbs.resid, struct ckrm_mem);
+
+	if (unlikely(res == NULL))
+		return NULL;
+
+	return res->mem_rc;
+}
+EXPORT_SYMBOL(mem_rc_get);
+
+struct page *alloc_page_mem_rc(int nid, gfp_t gfpmask)
+{
+	struct mem_rc *mr;
+
+	mr = mem_rc_get(current);
+	if (!mr)
+		return __alloc_pages(gfpmask, 0,
+				     NODE_DATA(nid)->node_zonelists
+				     + (gfpmask & GFP_ZONEMASK));
+
+	return __alloc_pages(gfpmask, 0, mr->zonelists[nid]);
+}
+EXPORT_SYMBOL(alloc_page_mem_rc);
+
+struct zonelist *mem_rc_get_zonelist(int nd, gfp_t gfpmask,
+				     unsigned int order)
+{
+	struct mem_rc *mr;
+
+	if (!mem_rc_available(gfpmask, order))
+		return NULL;
+
+	mr = mem_rc_get(current);
+	if (!mr)
+		return NULL;
+
+	return mr->zonelists[nd];
+}
+
+static void mem_rc_set_guarantee(struct ckrm_mem *res, int val)
+{
+	int	rc;
+
+	if (res->mem_rc == NULL)
+		return;
+
+	res->mem_rc->guarantee = val;
+	rc = mem_rc_set_guar(res->mem_rc, (unsigned long)val);
+	if (rc)
+		printk("mem_rc_set_guar failed, err = %d\n", rc);
+}
+
+static void mem_res_initcls_one(struct ckrm_mem * res)
+{
+	res->shares.my_guarantee = 0;
+	res->shares.my_limit = CKRM_SHARE_DONTCARE;
+	res->shares.total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+	res->shares.max_limit = CKRM_SHARE_DONTCARE;
+	res->shares.unused_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+	res->cnt_total_guarantee = 0;
+
+	return;
+}
+
+static void *mem_res_alloc(struct ckrm_class *class,
+				struct ckrm_class *parent)
+{
+	struct ckrm_mem *res;
+
+	res = kmalloc(sizeof(struct ckrm_mem), GFP_ATOMIC);
+
+	if (res) {
+		memset(res, 0, sizeof(struct ckrm_mem));
+		res->class = class;
+		res->parent = parent;
+		mem_res_initcls_one(res);
+		res->cnt_lock = SPIN_LOCK_UNLOCKED;
+		if (!parent)	{	/* root class */
+			res->cnt_total_guarantee = CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+			res->shares.my_guarantee = CKRM_SHARE_DONTCARE;
+		} else {
+			res->mem_rc = (struct mem_rc *)mem_rc_create(grcd, class->name);
+			if (res->mem_rc == NULL)
+				printk(KERN_ERR "mem_rc_create failed\n");
+		}
+	} else {
+		printk(KERN_ERR
+		       "mem_res_alloc: failed GFP_ATOMIC alloc\n");
+	}
+	return res;
+}
+
+static void mem_res_free(void *my_res)
+{
+	struct ckrm_mem *res = my_res, *parres;
+	u64	temp = 0;
+
+	if (!res)
+		return;
+
+	parres = ckrm_get_res_class(res->parent, rcbs.resid, struct ckrm_mem);
+	/* return child's guarantee to parent class */
+	spin_lock(&parres->cnt_lock);
+	ckrm_child_guarantee_changed(&parres->shares, res->shares.my_guarantee, 0);
+	if (parres->shares.total_guarantee) {
+		temp = (u64) parres->shares.unused_guarantee
+				* parres->cnt_total_guarantee;
+		do_div(temp, parres->shares.total_guarantee);
+	}
+	mem_rc_set_guarantee(parres, temp);
+	spin_unlock(&parres->cnt_lock);
+
+	mem_rc_destroy(res->mem_rc);
+	kfree(res);
+	return;
+}
+
+static void
+recalc_and_propagate(struct ckrm_mem * res)
+{
+	struct ckrm_class *child = NULL;
+	struct ckrm_mem *parres, *childres;
+	u64	cnt_total = 0,	cnt_guar = 0;
+
+	parres = ckrm_get_res_class(res->parent, rcbs.resid, struct ckrm_mem);
+
+	if (parres) {
+		struct ckrm_shares *par = &parres->shares;
+		struct ckrm_shares *self = &res->shares;
+
+		/* calculate total and currnet guarantee */
+		if (par->total_guarantee && self->total_guarantee) {
+			cnt_total = (u64) self->my_guarantee
+					 * parres->cnt_total_guarantee;
+			do_div(cnt_total, par->total_guarantee);
+			cnt_guar = (u64) self->unused_guarantee * cnt_total;
+			do_div(cnt_guar, self->total_guarantee);
+		}
+		mem_rc_set_guarantee(res, (int) cnt_guar);
+		res->cnt_total_guarantee = (int ) cnt_total;
+	}
+
+	/* propagate to children */
+	ckrm_lock_hier(res->class);
+	while ((child = ckrm_get_next_child(res->class, child)) != NULL) {
+		childres =
+			ckrm_get_res_class(child, rcbs.resid, struct ckrm_mem);
+		if (childres) {
+		    spin_lock(&childres->cnt_lock);
+		    recalc_and_propagate(childres);
+		    spin_unlock(&childres->cnt_lock);
+		}
+	}
+	ckrm_unlock_hier(res->class);
+	return;
+}
+
+static int mem_set_share_values(void *my_res, struct ckrm_shares *new)
+{
+	struct ckrm_mem *parres, *res = my_res;
+	struct ckrm_shares *cur = &res->shares, *par;
+	int rc = -EINVAL;
+	u64	temp = 0;
+
+	if (!res)
+		return rc;
+
+	if (res->parent) {
+		parres =
+		   ckrm_get_res_class(res->parent, rcbs.resid, struct ckrm_mem);
+		spin_lock(&parres->cnt_lock);
+		spin_lock(&res->cnt_lock);
+		par = &parres->shares;
+	} else {
+		spin_lock(&res->cnt_lock);
+		par = NULL;
+		parres = NULL;
+	}
+
+	rc = ckrm_set_shares(new, cur, par);
+
+	if (rc)
+		goto share_err;
+
+	if (parres) {
+		/* adjust parent's unused guarantee */
+		if (par->total_guarantee) {
+			temp = (u64) par->unused_guarantee
+					* parres->cnt_total_guarantee;
+			do_div(temp, par->total_guarantee);
+		}
+		mem_rc_set_guarantee(parres, temp);
+	} else {
+		/* adjust root class's unused guarantee */
+		temp = (u64) cur->unused_guarantee
+				* CKRM_SHARE_DFLT_TOTAL_GUARANTEE;
+		do_div(temp, cur->total_guarantee);
+		mem_rc_set_guarantee(res, temp);
+	}
+	recalc_and_propagate(res);
+
+share_err:
+	spin_unlock(&res->cnt_lock);
+	if (res->parent)
+		spin_unlock(&parres->cnt_lock);
+	return rc;
+}
+
+static int mem_get_share_values(void *my_res, struct ckrm_shares *shares)
+{
+	struct ckrm_mem *res = my_res;
+
+	if (!res)
+		return -EINVAL;
+	*shares = res->shares;
+	return 0;
+}
+
+static ssize_t mem_show_stats(void *my_res, char *buf)
+{
+	struct ckrm_mem *res = my_res;
+	unsigned long val;
+	ssize_t	i;
+
+	if (!res)
+		return -EINVAL;
+
+	if (res->mem_rc == NULL)
+		return 0;
+
+	mem_rc_get_cur(res->mem_rc, &val);
+	i = sprintf(buf, "mem:current=%ld\n", val);
+	return i;
+}
+
+static struct ckrm_res_ctlr rcbs = {
+	.res_name = "mem",
+	.resid = -1,
+	.res_alloc = mem_res_alloc,
+	.res_free = mem_res_free,
+	.set_share_values = mem_set_share_values,
+	.get_share_values = mem_get_share_values,
+	.show_stats = mem_show_stats,
+};
+
+static void init_global_rcd(void)
+{
+	grcd = (struct mem_rc_domain *) mem_rc_create_rcdomain((struct cpuset *)NULL, cpu_online_map, node_online_map);
+	if (grcd == NULL)
+		printk("mem_rc_create_rcdomain failed\n");
+}
+
+int __init init_ckrm_mem_res(void)
+{
+	init_global_rcd();
+	if (rcbs.resid == CKRM_NO_RES)	{
+		ckrm_register_res_ctlr(&rcbs);
+	}
+	return 0;
+}
+
+void __exit exit_ckrm_mem_res(void)
+{
+	ckrm_unregister_res_ctlr(&rcbs);
+	mem_rc_destroy_rcdomain(grcd);
+}
+
+module_init(init_ckrm_mem_res)
+module_exit(exit_ckrm_mem_res)
+
+MODULE_LICENSE("GPL")
diff -urNp b/mm/mempolicy.c c/mm/mempolicy.c
--- b/mm/mempolicy.c	2006-01-03 12:21:10.000000000 +0900
+++ c/mm/mempolicy.c	2006-01-26 17:52:06.000000000 +0900
@@ -726,8 +726,10 @@ get_vma_policy(struct task_struct *task,
 }
 
 /* Return a zonelist representing a mempolicy */
-static struct zonelist *zonelist_policy(gfp_t gfp, struct mempolicy *policy)
+static struct zonelist *zonelist_policy(gfp_t gfp, int order,
+		struct mempolicy *policy)
 {
+	struct zonelist *zl;
 	int nd;
 
 	switch (policy->policy) {
@@ -746,6 +748,8 @@ static struct zonelist *zonelist_policy(
 	case MPOL_INTERLEAVE: /* should not happen */
 	case MPOL_DEFAULT:
 		nd = numa_node_id();
+		if ((zl = mem_rc_get_zonelist(nd, gfp, order)) != NULL)
+			return zl;
 		break;
 	default:
 		nd = 0;
@@ -844,7 +848,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area
 		}
 		return alloc_page_interleave(gfp, 0, nid);
 	}
-	return __alloc_pages(gfp, 0, zonelist_policy(gfp, pol));
+	return __alloc_pages(gfp, 0, zonelist_policy(gfp, 0, pol));
 }
 
 /**
@@ -876,7 +880,7 @@ struct page *alloc_pages_current(gfp_t g
 		pol = &default_policy;
 	if (pol->policy == MPOL_INTERLEAVE)
 		return alloc_page_interleave(gfp, order, interleave_nodes(pol));
-	return __alloc_pages(gfp, order, zonelist_policy(gfp, pol));
+	return __alloc_pages(gfp, order, zonelist_policy(gfp, order, pol));
 }
 EXPORT_SYMBOL(alloc_pages_current);
 

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2006-01-31  2:30 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2006-01-19  8:04 [PATCH 0/2] Pzone based CKRM memory resource controller KUROSAWA Takahiro
2006-01-19  8:04 ` [PATCH 1/2] Add the pzone KUROSAWA Takahiro
2006-01-19 18:04   ` Andy Whitcroft
2006-01-19 23:42     ` KUROSAWA Takahiro
2006-01-20  9:17       ` Andy Whitcroft
2006-01-20  7:08   ` KAMEZAWA Hiroyuki
2006-01-20  8:22     ` KUROSAWA Takahiro
2006-01-20  8:30       ` KAMEZAWA Hiroyuki
2006-01-19  8:04 ` [PATCH 2/2] Add CKRM memory resource controller using pzones KUROSAWA Takahiro
2006-01-31  2:30 ` [PATCH 0/8] Pzone based CKRM memory resource controller KUROSAWA Takahiro
2006-01-31  2:30   ` [PATCH 1/8] Add the __GFP_NOLRU flag KUROSAWA Takahiro
2006-01-31 18:18     ` [ckrm-tech] " Dave Hansen
2006-02-01  5:06       ` KUROSAWA Takahiro
2006-01-31  2:30   ` [PATCH 2/8] Keep the number of zones while zone iterator loop KUROSAWA Takahiro
2006-01-31  2:30   ` [PATCH 3/8] Add for_each_zone_in_node macro KUROSAWA Takahiro
2006-01-31  2:30   ` [PATCH 4/8] Extract zone specific routines as functions KUROSAWA Takahiro
2006-01-31  2:30   ` [PATCH 5/8] Add the pzone_create() function KUROSAWA Takahiro
2006-01-31  2:30   ` [PATCH 6/8] Add the pzone_destroy() function KUROSAWA Takahiro
2006-01-31  2:30   ` [PATCH 7/8] Make the number of pages in pzones resizable KUROSAWA Takahiro
2006-01-31  2:30   ` KUROSAWA Takahiro [this message]
2006-02-01  2:58   ` [ckrm-tech] [PATCH 0/8] Pzone based CKRM memory resource controller chandra seetharaman
2006-02-01  5:39     ` KUROSAWA Takahiro
2006-02-01  6:16       ` Hirokazu Takahashi
2006-02-02  1:26       ` chandra seetharaman
2006-02-02  3:54         ` KUROSAWA Takahiro
2006-02-03  0:37           ` chandra seetharaman
2006-02-03  0:51             ` KUROSAWA Takahiro
2006-02-03  1:01               ` chandra seetharaman
2006-02-01  3:07   ` chandra seetharaman
2006-02-01  5:54     ` KUROSAWA Takahiro
2006-02-03  1:33     ` KUROSAWA Takahiro
2006-02-03  9:37       ` KUROSAWA Takahiro

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20060131023040.7915.47443.sendpatchset@debian \
    --to=kurosawa@valinux.co.jp \
    --cc=ckrm-tech@lists.sourceforge.net \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox