linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 5/6] CKRM: Add config support for mem controller
@ 2005-06-24 22:26 Chandra Seetharaman
  0 siblings, 0 replies; 3+ messages in thread
From: Chandra Seetharaman @ 2005-06-24 22:26 UTC (permalink / raw)
  To: ckrm-tech, linux-mm

Patch 5 of 6 patches to support memory controller under CKRM framework.
Provides some config parameter support. Details about the config
parameters
in the Documentation patch.
----------------------------------------

 include/linux/ckrm_mem.h        |   14 ++++
 include/linux/ckrm_mem_inline.h |    4 +
 kernel/ckrm/ckrm_memcore.c      |  127 ++++++++++++++++++++++++++++++++
++++++--
 kernel/ckrm/ckrm_memctlr.c      |   49 +++++++++++++++
 mm/vmscan.c                     |   96 ++++++++++++++++++++++++++++--
 5 files changed, 279 insertions(+), 11 deletions(-)

Content-Disposition: inline; filename=11-05-mem_guar-config

Index: linux-2.6.12/include/linux/ckrm_mem.h
===================================================================
--- linux-2.6.12.orig/include/linux/ckrm_mem.h
+++ linux-2.6.12/include/linux/ckrm_mem.h
@@ -63,16 +63,28 @@ struct ckrm_mem_res {
 	int nr_dontcare;		/* # of dont care children */
 
 	struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+
+ 	struct list_head shrink_list;	/* list of classes that are near
+				 	 * limit and need to be shrunk
+					 */
+	int shrink_count;
+	unsigned long last_shrink;
 };
 
+#define CLS_AT_LIMIT		(1)
+
 extern atomic_t ckrm_mem_real_count;
 extern struct ckrm_res_ctlr mem_rcbs;
 extern struct ckrm_mem_res *ckrm_mem_root_class;
 extern struct list_head ckrm_memclass_list;
+extern struct list_head ckrm_shrink_list;
 extern spinlock_t ckrm_mem_lock;
 extern spinlock_t ckrm_overguar_lock[MAX_NR_ZONES];
 extern int ckrm_nr_mem_classes;
 extern unsigned int ckrm_tot_lru_pages;
+extern int ckrm_mem_shrink_count;
+extern int ckrm_mem_shrink_to;
+extern int ckrm_mem_shrink_interval;
 
 extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res
*);
 extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
@@ -84,6 +96,8 @@ extern int ckrm_class_limit_ok(struct ck
 
 extern struct ckrm_zone *ckrm_get_max_overguar_czone(int);
 
+extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
+
 #else
 
 #define ckrm_mem_migrate_mm(a, b)			do {} while (0)
Index: linux-2.6.12/include/linux/ckrm_mem_inline.h
===================================================================
--- linux-2.6.12.orig/include/linux/ckrm_mem_inline.h
+++ linux-2.6.12/include/linux/ckrm_mem_inline.h
@@ -26,6 +26,8 @@
 
 #ifdef CONFIG_CKRM_RES_MEM
 
+#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
+
 static inline struct ckrm_mem_res *
 ckrm_task_memclass(struct task_struct *tsk)
 {
@@ -324,6 +326,8 @@ static inline void ckrm_add_tail_inactiv
 
 #else
 
+#define ckrm_shrink_list_empty()		(1)
+
 static inline void *
 ckrm_task_memclass(struct task_struct *tsk)
 {
Index: linux-2.6.12/kernel/ckrm/ckrm_memcore.c
===================================================================
--- linux-2.6.12.orig/kernel/ckrm/ckrm_memcore.c
+++ linux-2.6.12/kernel/ckrm/ckrm_memcore.c
@@ -42,6 +42,7 @@ LIST_HEAD(ckrm_memclass_list);
 spinlock_t ckrm_mem_lock; /* protects list above */
 unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
 int ckrm_nr_mem_classes = 0;
+int ckrm_mem_state = 0;
 struct ckrm_mem_res *ckrm_mem_root_class;
 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
 
@@ -52,6 +53,7 @@ EXPORT_SYMBOL_GPL(ckrm_memclass_list);
 EXPORT_SYMBOL_GPL(ckrm_mem_lock);
 EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
 EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
+EXPORT_SYMBOL_GPL(ckrm_mem_state);
 EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
 EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
 
@@ -101,8 +103,10 @@ mem_res_initcls_one(struct ckrm_mem_res 
 	res->pg_guar = CKRM_SHARE_DONTCARE;
 	res->pg_limit = CKRM_SHARE_DONTCARE;
 	res->implicit_guar = CKRM_SHARE_DONTCARE;
+	res->last_shrink = jiffies;
 
 	INIT_LIST_HEAD(&res->mcls_list);
+	INIT_LIST_HEAD(&res->shrink_list);
 
 	for_each_zone(zone) {
 		INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
@@ -248,6 +252,11 @@ mem_res_alloc(struct ckrm_core_class *co
 		ckrm_nr_mem_classes++;
 	} else
 		printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
+
+	/* enable the controller if the user defined atleast 1 class */
+	if (ckrm_nr_mem_classes > 1)
+		ckrm_mem_state = 1;
+
 	return res;
 }
 
@@ -402,6 +411,9 @@ mem_set_share_values(void *my_res, struc
 		set_impl_guar_children(parres);
 	}
 
+	/* If the user has changed the shares, enable the controller */
+	ckrm_mem_state = 1;
+
 	return rc;
 }
 
@@ -499,6 +511,23 @@ mem_change_resclass(void *tsk, void *old
 	return;
 }
 
+#define MEM_STATE "state"
+#define MEM_FAIL_OVER "fail_over"
+#define MEM_SHRINK_AT "shrink_at"
+#define MEM_SHRINK_TO "shrink_to"
+#define MEM_SHRINK_COUNT "num_shrinks"
+#define MEM_SHRINK_INTERVAL "shrink_interval"
+
+int ckrm_mem_fail_at = 110;
+int ckrm_mem_shrink_at = 90;
+int ckrm_mem_shrink_to = 80;
+int ckrm_mem_shrink_count = 10;
+int ckrm_mem_shrink_interval = 10;
+
+EXPORT_SYMBOL_GPL(ckrm_mem_fail_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_to);
+
 static int
 mem_show_config(void *my_res, struct seq_file *sfile)
 {
@@ -506,24 +535,110 @@ mem_show_config(void *my_res, struct seq
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "show_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
 
-	seq_printf(sfile, "res=%s", MEM_RES_NAME);
+	seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
+		MEM_RES_NAME,
+		MEM_STATE, ckrm_mem_state,
+		MEM_FAIL_OVER, ckrm_mem_fail_at,
+		MEM_SHRINK_AT, ckrm_mem_shrink_at,
+		MEM_SHRINK_TO, ckrm_mem_shrink_to,
+		MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
+		MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
 
 	return 0;
 }
 
+typedef int __bitwise memclass_token_t;
+
+enum memclass_token {
+	mem_state = (__force memclass_token_t) 1,
+	mem_fail_over = (__force memclass_token_t) 2,
+	mem_shrink_at = (__force memclass_token_t) 3,
+	mem_shrink_to = (__force memclass_token_t) 4,
+	mem_shrink_count = (__force memclass_token_t) 5,
+	mem_shrink_interval = (__force memclass_token_t) 6,
+	mem_err = (__force memclass_token_t) 7
+};
+
+static match_table_t mem_tokens = {
+	{mem_state, MEM_STATE "=%d"},
+	{mem_fail_over, MEM_FAIL_OVER "=%d"},
+	{mem_shrink_at, MEM_SHRINK_AT "=%d"},
+	{mem_shrink_to, MEM_SHRINK_TO "=%d"},
+	{mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
+	{mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
+	{mem_err, NULL},
+};
+
 static int
 mem_set_config(void *my_res, const char *cfgstr)
 {
+	char *p;
 	struct ckrm_mem_res *res = my_res;
+	int err = 0, val;
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "set_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
-	return 0;
+
+	while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, mem_tokens, args);
+		switch (token) {
+		case mem_state:
+			err = -EINVAL;
+			match_int(args, &val);
+			switch (val) {
+			case 0:
+				if (ckrm_nr_mem_classes > 1)
+					break;
+				/* FALLTHRU */
+			case 1:
+				ckrm_mem_state = val;
+				err = 0;
+				break;
+			default:
+				break;
+			}
+			break;
+		case mem_fail_over:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_fail_at = val;
+			break;
+		case mem_shrink_at:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_at = val;
+			break;
+		case mem_shrink_to:
+			if (match_int(args, &val) || (val < 0) || (val > 100))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_to = val;
+			break;
+		case mem_shrink_count:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_count = val;
+			break;
+		case mem_shrink_interval:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_interval = val;
+			break;
+		default:
+			err = -EINVAL;
+		}
+	}
+	return err;
 }
 
 static int
Index: linux-2.6.12/kernel/ckrm/ckrm_memctlr.c
===================================================================
--- linux-2.6.12.orig/kernel/ckrm/ckrm_memctlr.c
+++ linux-2.6.12/kernel/ckrm/ckrm_memctlr.c
@@ -59,10 +59,13 @@ ckrm_del_from_guar_list(struct ckrm_zone
 	}
 }
 
+extern int ckrm_mem_state;
+
 void
 add_use_count(struct ckrm_mem_res *cls, int borrow, int zindex, int
cnt)
 {
 	int i, pg_total = 0;
+	extern int ckrm_mem_shrink_at;
 	struct ckrm_mem_res *parcls = ckrm_memclass(cls->parent);
 
 	if (!cls)
@@ -82,6 +85,12 @@ add_use_count(struct ckrm_mem_res *cls, 
 	} else
 		atomic_add(cnt, &ckrm_mem_real_count);
 	ckrm_add_to_guar_list(&cls->ckrm_zone[zindex], zindex);
+
+	if (ckrm_mem_state && (cls->pg_limit != CKRM_SHARE_DONTCARE) &&
+			(pg_total >=
+			((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
+			(!test_bit(CLS_AT_LIMIT, &cls->flags)))
+		ckrm_shrink_atlimit(cls);
 	return;
 }
 
@@ -113,7 +122,7 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 {
 	int ret, i, pg_total = 0;
 
-	if ((mem_rcbs.resid == -1) || !cls)
+	if ((ckrm_mem_state == 0) || (mem_rcbs.resid == -1) || !cls)
 		return 1;
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		pg_total += cls->pg_total[i];
@@ -123,6 +132,10 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 	} else
 		ret = (pg_total <= cls->pg_limit);
 
+	/* If we are failing, just nudge the back end */
+	if (ret == 0)
+		ckrm_shrink_atlimit(cls);
+
 	return ret;
 }
 
@@ -407,3 +420,37 @@ ckrm_get_max_overguar_czone(int zindex)
 
 	return maxczone;
 }
+LIST_HEAD(ckrm_shrink_list);
+
+void
+ckrm_shrink_atlimit(struct ckrm_mem_res *cls)
+{
+	struct zone *zone;
+	unsigned long flags;
+	int order;
+
+	if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE))
+		return;
+	if (test_and_set_bit(CLS_AT_LIMIT, &cls->flags))
+		return;
+	if (time_after(jiffies, cls->last_shrink +
+				ckrm_mem_shrink_interval * HZ)) {
+		cls->last_shrink = jiffies;
+		cls->shrink_count = 0;
+	}
+	cls->shrink_count++;
+	if (cls->shrink_count > ckrm_mem_shrink_count) {
+		clear_bit(CLS_AT_LIMIT, &cls->flags);
+		return;
+	}
+	spin_lock_irqsave(&ckrm_mem_lock, flags);
+	list_add(&cls->shrink_list, &ckrm_shrink_list);
+	spin_unlock_irqrestore(&ckrm_mem_lock, flags);
+	for_each_zone(zone) {
+		/* This is just a number to get to wakeup kswapd */
+		order = cls->pg_total[0] -
+			((ckrm_mem_shrink_to * cls->pg_limit) / 100);
+		wakeup_kswapd(zone, order);
+		break; /* only once is enough */
+	}
+}
Index: linux-2.6.12/mm/vmscan.c
===================================================================
--- linux-2.6.12.orig/mm/vmscan.c
+++ linux-2.6.12/mm/vmscan.c
@@ -869,6 +869,88 @@ shrink_ckrmzone(struct ckrm_zone *czone,
 		}
 	}
 }
+
+/* FIXME: This function needs to be given more thought. */
+static void
+ckrm_shrink_class(struct ckrm_mem_res *cls)
+{
+	struct scan_control sc;
+	struct zone *zone;
+	int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
+
+	sc.nr_mapped = read_page_state(nr_mapped);
+	sc.nr_scanned = 0;
+	sc.nr_reclaimed = 0;
+	sc.priority = 0; /* always very high priority */
+	sc.swap_cluster_max = SWAP_CLUSTER_MAX;
+
+	for_each_zone(zone) {
+		int zone_total, zone_limit, active_limit,
+					inactive_limit, clszone_limit;
+		struct ckrm_zone *czone;
+		u64 temp;
+
+		czone = &cls->ckrm_zone[zindex];
+
+		zone->temp_priority = zone->prev_priority;
+		zone->prev_priority = sc.priority;
+
+		zone_total = zone->nr_active + zone->nr_inactive
+						+ zone->free_pages;
+
+		temp = (u64) cls->pg_limit * zone_total;
+		do_div(temp, ckrm_tot_lru_pages);
+		zone_limit = (int) temp;
+		clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
+		active_limit = (2 * clszone_limit) / 3; /* 2/3rd in active */
+		inactive_limit = clszone_limit / 3; /* 1/3rd in inactive */
+
+		sc.ckrm_active = 0;
+		cnt = czone->nr_active + act_credit - active_limit;
+		if (cnt > 0) {
+			sc.ckrm_active = (unsigned long) cnt;
+			act_credit = 0;
+		} else
+			act_credit += cnt;
+
+		sc.ckrm_inactive = 0;
+		cnt = sc.ckrm_active + inact_credit +
+					(czone->nr_inactive - inactive_limit);
+		if (cnt > 0) {
+			sc.ckrm_inactive = (unsigned long) cnt;
+			inact_credit = 0;
+		} else
+			inact_credit += cnt;
+
+		if (sc.ckrm_active || sc.ckrm_inactive) {
+			sc.nr_to_reclaim = sc.ckrm_inactive;
+			shrink_ckrmzone(czone, &sc);
+		}
+		zone->prev_priority = zone->temp_priority;
+		zindex++;
+	}
+}
+
+static void
+ckrm_shrink_classes(void)
+{
+	struct ckrm_mem_res *cls;
+
+	spin_lock_irq(&ckrm_mem_lock);
+	while (!ckrm_shrink_list_empty()) {
+		cls =  list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+				shrink_list);
+		list_del(&cls->shrink_list);
+		spin_unlock_irq(&ckrm_mem_lock);
+		ckrm_shrink_class(cls);
+		clear_bit(CLS_AT_LIMIT, &cls->flags);
+		spin_lock_irq(&ckrm_mem_lock);
+	}
+	spin_unlock_irq(&ckrm_mem_lock);
+}
+
+#else
+#define ckrm_shrink_classes()	do { } while(0)
 #endif
 
 /*
@@ -1135,7 +1217,8 @@ loop_again:
 					continue;
 
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0, 0)) {
+						zone->pages_high, 0, 0, 0) &&
+						ckrm_shrink_list_empty()) {
 					end_zone = i;
 					goto scan;
 				}
@@ -1171,7 +1254,8 @@ scan:
 
 			if (nr_pages == 0) {	/* Not software suspend */
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0, 0))
+					zone->pages_high, end_zone, 0, 0) &&
+						ckrm_shrink_list_empty())
 					all_zones_ok = 0;
 			}
 			zone->temp_priority = priority;
@@ -1300,7 +1384,10 @@ static int kswapd(void *p)
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		balance_pgdat(pgdat, 0, order);
+		if (!ckrm_shrink_list_empty())
+			ckrm_shrink_classes();
+		else
+			balance_pgdat(pgdat, 0, order);
 	}
 	return 0;
 }
@@ -1316,7 +1403,8 @@ void wakeup_kswapd(struct zone *zone, in
 		return;
 
 	pgdat = zone->zone_pgdat;
-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0) &&
+			ckrm_shrink_list_empty())
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;

-- 

----------------------------------------------------------------------
    Chandra Seetharaman               | Be careful what you choose....
              - sekharan@us.ibm.com   |      .......you may get it.
----------------------------------------------------------------------


--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread
* [Patch 5/6] CKRM: Add config support for mem controller
@ 2005-05-19  0:33 Chandra Seetharaman
  0 siblings, 0 replies; 3+ messages in thread
From: Chandra Seetharaman @ 2005-05-19  0:33 UTC (permalink / raw)
  To: ckrm-tech, linux-mm

Patch 5 of 6 patches to support memory controller under CKRM framework.
Provides some config parameter support. Details about the config parameters
in the Documentation patch.
----------------------------------------

 include/linux/ckrm_mem.h        |   14 ++++
 include/linux/ckrm_mem_inline.h |    4 +
 kernel/ckrm/ckrm_memcore.c      |  126 ++++++++++++++++++++++++++++++++++++++--
 kernel/ckrm/ckrm_memctlr.c      |   49 +++++++++++++++
 mm/vmscan.c                     |   95 ++++++++++++++++++++++++++++--
 5 files changed, 277 insertions(+), 11 deletions(-)

Content-Disposition: inline; filename=11-05-mem_guar-config

Index: linux-2612-rc3/include/linux/ckrm_mem.h
===================================================================
--- linux-2612-rc3.orig/include/linux/ckrm_mem.h
+++ linux-2612-rc3/include/linux/ckrm_mem.h
@@ -63,16 +63,28 @@ struct ckrm_mem_res {
 	int nr_dontcare;		/* # of dont care children */
 
 	struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+
+ 	struct list_head shrink_list;	/* list of classes that are near
+				 	 * limit and need to be shrunk
+					 */
+	int shrink_count;
+	unsigned long last_shrink;
 };
 
+#define CLS_AT_LIMIT		(1)
+
 extern atomic_t ckrm_mem_real_count;
 extern struct ckrm_res_ctlr mem_rcbs;
 extern struct ckrm_mem_res *ckrm_mem_root_class;
 extern struct list_head ckrm_memclass_list;
+extern struct list_head ckrm_shrink_list;
 extern spinlock_t ckrm_mem_lock;
 extern spinlock_t ckrm_overguar_lock[MAX_NR_ZONES];
 extern int ckrm_nr_mem_classes;
 extern unsigned int ckrm_tot_lru_pages;
+extern int ckrm_mem_shrink_count;
+extern int ckrm_mem_shrink_to;
+extern int ckrm_mem_shrink_interval;
 
 extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res *);
 extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
@@ -84,6 +96,8 @@ extern int ckrm_class_limit_ok(struct ck
 
 extern struct ckrm_zone *ckrm_get_max_overguar_czone(int);
 
+extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
+
 #else
 
 #define ckrm_mem_migrate_mm(a, b)			do {} while (0)
Index: linux-2612-rc3/include/linux/ckrm_mem_inline.h
===================================================================
--- linux-2612-rc3.orig/include/linux/ckrm_mem_inline.h
+++ linux-2612-rc3/include/linux/ckrm_mem_inline.h
@@ -26,6 +26,8 @@
 
 #ifdef CONFIG_CKRM_RES_MEM
 
+#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
+
 static inline struct ckrm_mem_res *
 ckrm_task_memclass(struct task_struct *tsk)
 {
@@ -324,6 +326,8 @@ static inline void ckrm_add_tail_inactiv
 
 #else
 
+#define ckrm_shrink_list_empty()		(1)
+
 static inline void *
 ckrm_task_memclass(struct task_struct *tsk)
 {
Index: linux-2612-rc3/kernel/ckrm/ckrm_memcore.c
===================================================================
--- linux-2612-rc3.orig/kernel/ckrm/ckrm_memcore.c
+++ linux-2612-rc3/kernel/ckrm/ckrm_memcore.c
@@ -42,6 +42,7 @@ LIST_HEAD(ckrm_memclass_list);
 spinlock_t ckrm_mem_lock; /* protects list above */
 unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
 int ckrm_nr_mem_classes = 0;
+int ckrm_mem_state = 0;
 struct ckrm_mem_res *ckrm_mem_root_class;
 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
 
@@ -52,6 +53,7 @@ EXPORT_SYMBOL_GPL(ckrm_memclass_list);
 EXPORT_SYMBOL_GPL(ckrm_mem_lock);
 EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
 EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
+EXPORT_SYMBOL_GPL(ckrm_mem_state);
 EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
 EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
 
@@ -103,6 +105,7 @@ mem_res_initcls_one(struct ckrm_mem_res 
 	res->implicit_guar = CKRM_SHARE_DONTCARE;
 
 	INIT_LIST_HEAD(&res->mcls_list);
+	INIT_LIST_HEAD(&res->shrink_list);
 
 	for_each_zone(zone) {
 		INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
@@ -248,6 +251,11 @@ mem_res_alloc(struct ckrm_core_class *co
 		ckrm_nr_mem_classes++;
 	} else
 		printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
+
+	/* enable the controller if the user defined atleast 1 class */
+	if (ckrm_nr_mem_classes > 1)
+		ckrm_mem_state = 1;
+	 
 	return res;
 }
 
@@ -402,6 +410,9 @@ mem_set_share_values(void *my_res, struc
 		set_impl_guar_children(parres);
 	}
 
+	/* If the user has changed the shares, enable the controller */
+	ckrm_mem_state = 1;
+
 	return rc;
 }
 
@@ -499,6 +510,23 @@ mem_change_resclass(void *tsk, void *old
 	return;
 }
 
+#define MEM_STATE "state"
+#define MEM_FAIL_OVER "fail_over"
+#define MEM_SHRINK_AT "shrink_at"
+#define MEM_SHRINK_TO "shrink_to"
+#define MEM_SHRINK_COUNT "num_shrinks"
+#define MEM_SHRINK_INTERVAL "shrink_interval"
+
+int ckrm_mem_fail_at = 110;
+int ckrm_mem_shrink_at = 90;
+int ckrm_mem_shrink_to = 80;
+int ckrm_mem_shrink_count = 10;
+int ckrm_mem_shrink_interval = 10;
+
+EXPORT_SYMBOL_GPL(ckrm_mem_fail_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_to);
+
 static int
 mem_show_config(void *my_res, struct seq_file *sfile)
 {
@@ -506,24 +534,110 @@ mem_show_config(void *my_res, struct seq
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "show_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
 
-	seq_printf(sfile, "res=%s", MEM_RES_NAME);
+	seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
+		MEM_RES_NAME,
+		MEM_STATE, ckrm_mem_state,
+		MEM_FAIL_OVER, ckrm_mem_fail_at,
+		MEM_SHRINK_AT, ckrm_mem_shrink_at,
+		MEM_SHRINK_TO, ckrm_mem_shrink_to,
+		MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
+		MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
 
 	return 0;
 }
 
+typedef int __bitwise memclass_token_t;
+
+enum memclass_token {
+	mem_state = (__force memclass_token_t) 1,
+	mem_fail_over = (__force memclass_token_t) 2,
+	mem_shrink_at = (__force memclass_token_t) 3,
+	mem_shrink_to = (__force memclass_token_t) 4,
+	mem_shrink_count = (__force memclass_token_t) 5,
+	mem_shrink_interval = (__force memclass_token_t) 6,
+	mem_err = (__force memclass_token_t) 7
+};
+
+static match_table_t mem_tokens = {
+	{mem_state, MEM_STATE "=%d"},
+	{mem_fail_over, MEM_FAIL_OVER "=%d"},
+	{mem_shrink_at, MEM_SHRINK_AT "=%d"},
+	{mem_shrink_to, MEM_SHRINK_TO "=%d"},
+	{mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
+	{mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
+	{mem_err, NULL},
+};
+
 static int
 mem_set_config(void *my_res, const char *cfgstr)
 {
+	char *p;
 	struct ckrm_mem_res *res = my_res;
+	int err = 0, val;
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "set_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
-	return 0;
+
+	while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, mem_tokens, args);
+		switch (token) {
+		case mem_state:
+			err = -EINVAL;
+			match_int(args, &val);
+			switch (val) {
+			case 0:
+				if (ckrm_nr_mem_classes > 1)
+					break;
+				/* FALLTHRU */
+			case 1:
+				ckrm_mem_state = val;
+				err = 0;
+				break;
+			default:
+				break;
+			}
+			break;
+		case mem_fail_over:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_fail_at = val;
+			break;
+		case mem_shrink_at:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_at = val;
+			break;
+		case mem_shrink_to:
+			if (match_int(args, &val) || (val < 0) || (val > 100))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_to = val;
+			break;
+		case mem_shrink_count:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_count = val;
+			break;
+		case mem_shrink_interval:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_interval = val;
+			break;
+		default:
+			err = -EINVAL;
+		}
+	}
+	return err;
 }
 
 static int
Index: linux-2612-rc3/kernel/ckrm/ckrm_memctlr.c
===================================================================
--- linux-2612-rc3.orig/kernel/ckrm/ckrm_memctlr.c
+++ linux-2612-rc3/kernel/ckrm/ckrm_memctlr.c
@@ -59,10 +59,13 @@ ckrm_del_from_guar_list(struct ckrm_zone
 	}
 }
 
+extern int ckrm_mem_state;
+
 void
 add_use_count(struct ckrm_mem_res *cls, int borrow, int zindex, int cnt)
 {
 	int i, pg_total = 0;
+	extern int ckrm_mem_shrink_at;
 	struct ckrm_mem_res *parcls = ckrm_memclass(cls->parent);
 
 	if (!cls)
@@ -82,6 +85,12 @@ add_use_count(struct ckrm_mem_res *cls, 
 	} else
 		atomic_add(cnt, &ckrm_mem_real_count);
 	ckrm_add_to_guar_list(&cls->ckrm_zone[zindex], zindex);
+
+	if (ckrm_mem_state && (cls->pg_limit != CKRM_SHARE_DONTCARE) &&
+			(pg_total >= 
+			((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
+			(test_bit(CLS_AT_LIMIT, &cls->flags)))
+		ckrm_shrink_atlimit(cls);
 	return;
 }
 
@@ -113,7 +122,7 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 {
 	int ret, i, pg_total = 0;
 
-	if ((mem_rcbs.resid == -1) || !cls)
+	if ((ckrm_mem_state == 0) || (mem_rcbs.resid == -1) || !cls)
 		return 1;
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		pg_total += cls->pg_total[i];
@@ -123,6 +132,10 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 	} else
 		ret = (pg_total <= cls->pg_limit);
 
+	/* If we are failing, just nudge the back end */
+	if (ret == 0)
+		ckrm_shrink_atlimit(cls);
+
 	return ret;
 }
 
@@ -404,3 +417,37 @@ ckrm_get_max_overguar_czone(int zindex)
 
 	return maxczone;
 }
+LIST_HEAD(ckrm_shrink_list);
+
+void
+ckrm_shrink_atlimit(struct ckrm_mem_res *cls)
+{
+	struct zone *zone;
+	unsigned long flags;
+	int order;
+
+	if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE))
+		return;
+	if (test_and_set_bit(CLS_AT_LIMIT, &cls->flags))
+		return;
+	if (time_after(cls->last_shrink + ckrm_mem_shrink_interval * HZ, 
+								jiffies)) {
+		cls->last_shrink = jiffies;
+		cls->shrink_count = 0;
+	}
+	cls->shrink_count++;
+	if (cls->shrink_count > ckrm_mem_shrink_count) {
+		clear_bit(CLS_AT_LIMIT, &cls->flags);
+		return;
+	}
+	spin_lock_irqsave(&ckrm_mem_lock, flags);
+	list_add(&cls->shrink_list, &ckrm_shrink_list);
+	spin_unlock_irqrestore(&ckrm_mem_lock, flags);
+	for_each_zone(zone) {
+		/* This is just a number to get to wakeup kswapd */
+		order = cls->pg_total[0] -
+			((ckrm_mem_shrink_to * cls->pg_limit) / 100);
+		wakeup_kswapd(zone, order);
+		break; /* only once is enough */
+	}
+}
Index: linux-2612-rc3/mm/vmscan.c
===================================================================
--- linux-2612-rc3.orig/mm/vmscan.c
+++ linux-2612-rc3/mm/vmscan.c
@@ -869,6 +869,87 @@ shrink_ckrmzone(struct ckrm_zone *czone,
 		}
 	}
 }
+
+/* FIXME: This function needs to be given more thought. */
+static void
+ckrm_shrink_class(struct ckrm_mem_res *cls)
+{
+	struct scan_control sc;
+	struct zone *zone;
+	int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
+
+	sc.nr_mapped = read_page_state(nr_mapped);
+	sc.nr_scanned = 0;
+	sc.nr_reclaimed = 0;
+	sc.priority = 0; /* always very high priority */
+
+	for_each_zone(zone) {
+		int zone_total, zone_limit, active_limit,
+					inactive_limit, clszone_limit;
+		struct ckrm_zone *czone;
+		u64 temp;
+
+		czone = &cls->ckrm_zone[zindex];
+
+		zone->temp_priority = zone->prev_priority;
+		zone->prev_priority = sc.priority;
+
+		zone_total = zone->nr_active + zone->nr_inactive 
+						+ zone->free_pages;
+
+		temp = (u64) cls->pg_limit * zone_total;
+		do_div(temp, ckrm_tot_lru_pages);
+		zone_limit = (int) temp;
+		clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
+		active_limit = (2 * clszone_limit) / 3; /* 2/3rd in active */
+		inactive_limit = clszone_limit / 3; /* 1/3rd in inactive */
+
+		sc.ckrm_active = 0;
+		cnt = czone->nr_active + act_credit - active_limit;
+		if (cnt > 0) {
+			sc.ckrm_active = (unsigned long) cnt;
+			act_credit = 0;
+		} else
+			act_credit += cnt;
+
+		sc.ckrm_inactive = 0;
+		cnt = sc.ckrm_active + inact_credit +
+					(czone->nr_inactive - inactive_limit);
+		if (cnt > 0) {
+			sc.ckrm_inactive = (unsigned long) cnt;
+			inact_credit = 0;
+		} else
+			inact_credit += cnt;
+
+		if (sc.ckrm_active || sc.ckrm_inactive) {
+			sc.nr_to_reclaim = sc.ckrm_inactive;
+			shrink_ckrmzone(czone, &sc);
+		}
+		zone->prev_priority = zone->temp_priority;
+		zindex++;
+	}
+}
+
+static void
+ckrm_shrink_classes(void)
+{
+	struct ckrm_mem_res *cls;
+
+	spin_lock_irq(&ckrm_mem_lock);
+	while (!ckrm_shrink_list_empty()) {
+		cls =  list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+				shrink_list);
+		list_del(&cls->shrink_list);
+		spin_unlock_irq(&ckrm_mem_lock);
+		ckrm_shrink_class(cls);
+		clear_bit(CLS_AT_LIMIT, &cls->flags);
+		spin_lock_irq(&ckrm_mem_lock);
+	}
+	spin_unlock_irq(&ckrm_mem_lock);
+}
+
+#else
+#define ckrm_shrink_classes()	do { } while(0)
 #endif
 
 /*
@@ -1135,7 +1216,8 @@ loop_again:
 					continue;
 
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0, 0)) {
+						zone->pages_high, 0, 0, 0) &&
+						ckrm_shrink_list_empty()) {
 					end_zone = i;
 					goto scan;
 				}
@@ -1171,7 +1253,8 @@ scan:
 
 			if (nr_pages == 0) {	/* Not software suspend */
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0, 0))
+					zone->pages_high, end_zone, 0, 0) &&
+						ckrm_shrink_list_empty())
 					all_zones_ok = 0;
 			}
 			zone->temp_priority = priority;
@@ -1300,7 +1383,10 @@ static int kswapd(void *p)
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		balance_pgdat(pgdat, 0, order);
+		if (!ckrm_shrink_list_empty())
+			ckrm_shrink_classes();
+		else 
+			balance_pgdat(pgdat, 0, order);
 	}
 	return 0;
 }
@@ -1316,7 +1402,8 @@ void wakeup_kswapd(struct zone *zone, in
 		return;
 
 	pgdat = zone->zone_pgdat;
-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0) &&
+			ckrm_shrink_list_empty())
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 3+ messages in thread
* [Patch 5/6] CKRM: Add config support for mem controller
@ 2005-04-02  3:15 Chandra Seetharaman
  0 siblings, 0 replies; 3+ messages in thread
From: Chandra Seetharaman @ 2005-04-02  3:15 UTC (permalink / raw)
  To: ckrm-tech, linux-mm

[-- Attachment #1: Type: text/plain, Size: 287 bytes --]


-- 

----------------------------------------------------------------------
    Chandra Seetharaman               | Be careful what you choose....
              - sekharan@us.ibm.com   |      .......you may get it.
----------------------------------------------------------------------

[-- Attachment #2: 11-05-mem_guar-config --]
[-- Type: text/plain, Size: 11889 bytes --]

Patch 5 of 6 patches to support memory controller under CKRM framework.
Provides some config parameter support. Details about the config parameters
in the Docuemntation patch.

 include/linux/ckrm_mem.h        |   13 +++++
 include/linux/ckrm_mem_inline.h |    4 +
 kernel/ckrm/ckrm_memcore.c      |  101 +++++++++++++++++++++++++++++++++++++---
 kernel/ckrm/ckrm_memctlr.c      |   46 ++++++++++++++++++
 mm/vmscan.c                     |  101 ++++++++++++++++++++++++++++++++++++++--
 5 files changed, 255 insertions(+), 10 deletions(-)

Index: linux-2.6.12-rc1/include/linux/ckrm_mem.h
===================================================================
--- linux-2.6.12-rc1.orig/include/linux/ckrm_mem.h
+++ linux-2.6.12-rc1/include/linux/ckrm_mem.h
@@ -69,17 +69,29 @@ struct ckrm_mem_res {
 	int nr_dontcare;		/* # of dont care children */
 
 	struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+
+	struct list_head shrink_list;	/* list of classes that are near
+				 	 * limit and need to be shrunk
+					 */
+	int shrink_count;
+	unsigned long last_shrink;
 };
 
 #define CLS_SHRINK_BIT		(1)
 
+#define CLS_AT_LIMIT		(1)
+
 extern atomic_t ckrm_mem_real_count;
 extern struct ckrm_res_ctlr mem_rcbs;
 extern struct ckrm_mem_res *ckrm_mem_root_class;
 extern struct list_head ckrm_memclass_list;
+extern struct list_head ckrm_shrink_list;
 extern spinlock_t ckrm_mem_lock;
 extern int ckrm_nr_mem_classes;
 extern unsigned int ckrm_tot_lru_pages;
+extern int ckrm_mem_shrink_count;
+extern int ckrm_mem_shrink_to;
+extern int ckrm_mem_shrink_interval ;
 
 extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res *);
 extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
@@ -91,6 +103,7 @@ extern int ckrm_class_limit_ok(struct ck
 
 extern void shrink_get_victims(struct zone *, unsigned long ,
 				unsigned long, struct list_head *);
+extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
 #else
 
 #define ckrm_mem_migrate_mm(a, b)			do {} while (0)
Index: linux-2.6.12-rc1/include/linux/ckrm_mem_inline.h
===================================================================
--- linux-2.6.12-rc1.orig/include/linux/ckrm_mem_inline.h
+++ linux-2.6.12-rc1/include/linux/ckrm_mem_inline.h
@@ -26,6 +26,8 @@
 
 #ifdef CONFIG_CKRM_RES_MEM
 
+#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
+
 static inline struct ckrm_mem_res *
 ckrm_get_mem_class(struct task_struct *tsk)
 {
@@ -331,6 +333,8 @@ static inline void ckrm_add_tail_inactiv
 
 #else
 
+#define ckrm_shrink_list_empty()		(1)
+
 static inline void *
 ckrm_get_memclass(struct task_struct *tsk)
 {
Index: linux-2.6.12-rc1/kernel/ckrm/ckrm_memcore.c
===================================================================
--- linux-2.6.12-rc1.orig/kernel/ckrm/ckrm_memcore.c
+++ linux-2.6.12-rc1/kernel/ckrm/ckrm_memcore.c
@@ -99,6 +99,7 @@ mem_res_initcls_one(struct ckrm_mem_res 
 	res->pg_limit = CKRM_SHARE_DONTCARE;
 
 	INIT_LIST_HEAD(&res->mcls_list);
+	INIT_LIST_HEAD(&res->shrink_list);
 
 	for_each_zone(zone) {
 		INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
@@ -454,6 +455,22 @@ mem_change_resclass(void *tsk, void *old
 	return;
 }
 
+#define MEM_FAIL_OVER "fail_over"
+#define MEM_SHRINK_AT "shrink_at"
+#define MEM_SHRINK_TO "shrink_to"
+#define MEM_SHRINK_COUNT "num_shrinks"
+#define MEM_SHRINK_INTERVAL "shrink_interval"
+
+int ckrm_mem_fail_at = 110;
+int ckrm_mem_shrink_at = 90;
+int ckrm_mem_shrink_to = 80;
+int ckrm_mem_shrink_count = 10;
+int ckrm_mem_shrink_interval = 10;
+
+EXPORT_SYMBOL_GPL(ckrm_mem_fail_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_to);
+
 static int
 mem_show_config(void *my_res, struct seq_file *sfile)
 {
@@ -461,24 +478,91 @@ mem_show_config(void *my_res, struct seq
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "show_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
 
-	seq_printf(sfile, "res=%s", MEM_RES_NAME);
+	seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
+		MEM_RES_NAME,
+		MEM_FAIL_OVER, ckrm_mem_fail_at,
+		MEM_SHRINK_AT, ckrm_mem_shrink_at,
+		MEM_SHRINK_TO, ckrm_mem_shrink_to,
+		MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
+		MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
 
 	return 0;
 }
 
+typedef int __bitwise memclass_token_t;
+
+enum memclass_token {
+	mem_fail_over = (__force memclass_token_t) 1,
+	mem_shrink_at = (__force memclass_token_t) 2,
+	mem_shrink_to = (__force memclass_token_t) 3,
+	mem_shrink_count = (__force memclass_token_t) 4,
+	mem_shrink_interval = (__force memclass_token_t) 5,
+	mem_err = (__force memclass_token_t) 6
+};
+
+static match_table_t mem_tokens = {
+	{mem_fail_over, MEM_FAIL_OVER "=%d"},
+	{mem_shrink_at, MEM_SHRINK_AT "=%d"},
+	{mem_shrink_to, MEM_SHRINK_TO "=%d"},
+	{mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
+	{mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
+	{mem_err, NULL},
+};
+
 static int
 mem_set_config(void *my_res, const char *cfgstr)
 {
+	char *p;
 	struct ckrm_mem_res *res = my_res;
+	int err = 0, val;
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "set_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
-	return 0;
+
+	while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, mem_tokens, args);
+		switch (token) {
+		case mem_fail_over:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_fail_at = val;
+			break;
+		case mem_shrink_at:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_at = val;
+			break;
+		case mem_shrink_to:
+			if (match_int(args, &val) || (val < 0) || (val > 100))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_to = val;
+			break;
+		case mem_shrink_count:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_count = val;
+			break;
+		case mem_shrink_interval:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_interval = val;
+			break;
+		default:
+			err = -EINVAL;
+		}
+	}
+	return err;
 }
 
 static int
Index: linux-2.6.12-rc1/kernel/ckrm/ckrm_memctlr.c
===================================================================
--- linux-2.6.12-rc1.orig/kernel/ckrm/ckrm_memctlr.c
+++ linux-2.6.12-rc1/kernel/ckrm/ckrm_memctlr.c
@@ -24,6 +24,7 @@ incr_use_count(struct ckrm_mem_res *cls,
 	int i, pg_total = 0;
 	struct ckrm_mem_res *parcls = ckrm_get_res_class(cls->parent,
 				mem_rcbs.resid, struct ckrm_mem_res);
+	extern int ckrm_mem_shrink_at;
 
 	if (!cls)
 		return;
@@ -42,6 +43,12 @@ incr_use_count(struct ckrm_mem_res *cls,
 		cls->pg_borrowed[zindex]++;
 	} else
 		atomic_inc(&ckrm_mem_real_count);
+
+	if ((cls->pg_limit != CKRM_SHARE_DONTCARE) &&
+			(pg_total >= 
+			((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
+			((cls->flags & CLS_AT_LIMIT) != CLS_AT_LIMIT))
+		ckrm_shrink_atlimit(cls);
 	return;
 }
 
@@ -81,6 +88,10 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 	} else
 		ret = (pg_total <= cls->pg_limit);
 
+	/* If we are failing, just nudge the back end */
+	if (ret == 0)
+		ckrm_shrink_atlimit(cls);
+
 	return ret;
 }
 
@@ -467,3 +478,35 @@ shrink_get_victims(struct zone *zone, un
 		pos = pos->next;
 	}
 }
+LIST_HEAD(ckrm_shrink_list);
+
+void
+ckrm_shrink_atlimit(struct ckrm_mem_res *cls)
+{
+	struct zone *zone;
+	unsigned long flags;
+	int order;
+
+	if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE) ||
+			((cls->flags & CLS_AT_LIMIT) == CLS_AT_LIMIT))
+		return;
+	if (time_after(cls->last_shrink + ckrm_mem_shrink_interval * HZ, 
+								jiffies)) {
+		cls->last_shrink = jiffies;
+		cls->shrink_count = 0;
+	}
+	cls->shrink_count++;
+	if (cls->shrink_count > ckrm_mem_shrink_count)
+		return;
+	spin_lock_irqsave(&ckrm_mem_lock, flags);
+	list_add(&cls->shrink_list, &ckrm_shrink_list);
+	spin_unlock_irqrestore(&ckrm_mem_lock, flags);
+	cls->flags |= CLS_AT_LIMIT;
+	for_each_zone(zone) {
+		/* This is just a number to get to wakeup kswapd */
+		order = cls->pg_total[0] -
+			((ckrm_mem_shrink_to * cls->pg_limit) / 100);
+		wakeup_kswapd(zone, order);
+		break; /* only once is enough */
+	}
+}
Index: linux-2.6.12-rc1/mm/vmscan.c
===================================================================
--- linux-2.6.12-rc1.orig/mm/vmscan.c
+++ linux-2.6.12-rc1/mm/vmscan.c
@@ -860,6 +860,90 @@ shrink_ckrmzone(struct ckrm_zone *czone,
 		}
 	}
 }
+
+/* FIXME: This function needs to be given more thought. */
+static void
+ckrm_shrink_class(struct ckrm_mem_res *cls)
+{
+	struct scan_control sc;
+	struct zone *zone;
+	int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
+
+	sc.nr_mapped = read_page_state(nr_mapped);
+	sc.nr_scanned = 0;
+	sc.nr_reclaimed = 0;
+	sc.priority = 0; /* always very high priority */
+
+	for_each_zone(zone) {
+		int zone_total, zone_limit, active_limit,
+					inactive_limit, clszone_limit;
+		struct ckrm_zone *czone;
+		u64 temp;
+
+		czone = &cls->ckrm_zone[zindex];
+		if (ckrm_test_set_shrink(czone))
+			continue;
+
+		zone->temp_priority = zone->prev_priority;
+		zone->prev_priority = sc.priority;
+
+		zone_total = zone->nr_active + zone->nr_inactive 
+						+ zone->free_pages;
+
+		temp = (u64) cls->pg_limit * zone_total;
+		do_div(temp, ckrm_tot_lru_pages);
+		zone_limit = (int) temp;
+		clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
+		active_limit = (2 * clszone_limit) / 3; /* 2/3rd in active */
+		inactive_limit = clszone_limit / 3; /* 1/3rd in inactive */
+
+		czone->shrink_active = 0;
+		cnt = czone->nr_active + act_credit - active_limit;
+		if (cnt > 0) {
+			czone->shrink_active = (unsigned long) cnt;
+			act_credit = 0;
+		} else
+			act_credit += cnt;
+
+		czone->shrink_inactive = 0;
+		cnt = czone->shrink_active + inact_credit +
+					(czone->nr_inactive - inactive_limit);
+		if (cnt > 0) {
+			czone->shrink_inactive = (unsigned long) cnt;
+			inact_credit = 0;
+		} else
+			inact_credit += cnt;
+
+		if (czone->shrink_active || czone->shrink_inactive) {
+			sc.nr_to_reclaim = czone->shrink_inactive;
+			shrink_ckrmzone(czone, &sc);
+		}
+		zone->prev_priority = zone->temp_priority;
+		zindex++;
+		ckrm_clear_shrink(czone);
+	}
+}
+
+static void
+ckrm_shrink_classes(void)
+{
+	struct ckrm_mem_res *cls;
+
+	spin_lock_irq(&ckrm_mem_lock);
+	while (!ckrm_shrink_list_empty()) {
+		cls =  list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+				shrink_list);
+		list_del(&cls->shrink_list);
+		spin_unlock_irq(&ckrm_mem_lock);
+		ckrm_shrink_class(cls);
+		spin_lock_irq(&ckrm_mem_lock);
+		cls->flags &= ~CLS_AT_LIMIT;
+	}
+	spin_unlock_irq(&ckrm_mem_lock);
+}
+
+#else
+#define ckrm_shrink_classes()	do { } while(0)
 #endif
 
 /*
@@ -1133,7 +1217,8 @@ loop_again:
 					continue;
 
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0, 0)) {
+						zone->pages_high, 0, 0, 0) &&
+						ckrm_shrink_list_empty()) {
 					end_zone = i;
 					goto scan;
 				}
@@ -1169,7 +1254,8 @@ scan:
 
 			if (nr_pages == 0) {	/* Not software suspend */
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0, 0))
+					zone->pages_high, end_zone, 0, 0) &&
+						ckrm_shrink_list_empty())
 					all_zones_ok = 0;
 			}
 			zone->temp_priority = priority;
@@ -1298,7 +1384,10 @@ static int kswapd(void *p)
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		balance_pgdat(pgdat, 0, order);
+		if (!ckrm_shrink_list_empty())
+			ckrm_shrink_classes();
+		else 
+			balance_pgdat(pgdat, 0, order);
 	}
 	return 0;
 }
@@ -1314,7 +1403,8 @@ void wakeup_kswapd(struct zone *zone, in
 		return;
 
 	pgdat = zone->zone_pgdat;
-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0) &&
+			ckrm_shrink_list_empty())
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2005-06-24 22:26 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-06-24 22:26 [PATCH 5/6] CKRM: Add config support for mem controller Chandra Seetharaman
  -- strict thread matches above, loose matches on Subject: below --
2005-05-19  0:33 [Patch " Chandra Seetharaman
2005-04-02  3:15 Chandra Seetharaman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox