linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
* [Patch 5/6] CKRM: Add config support for mem controller
@ 2005-05-19  0:33 Chandra Seetharaman
  2005-05-19  1:26 ` [ckrm-tech] " Dave Hansen
  0 siblings, 1 reply; 5+ messages in thread
From: Chandra Seetharaman @ 2005-05-19  0:33 UTC (permalink / raw)
  To: ckrm-tech, linux-mm

Patch 5 of 6 patches to support memory controller under CKRM framework.
Provides some config parameter support. Details about the config parameters
in the Documentation patch.
----------------------------------------

 include/linux/ckrm_mem.h        |   14 ++++
 include/linux/ckrm_mem_inline.h |    4 +
 kernel/ckrm/ckrm_memcore.c      |  126 ++++++++++++++++++++++++++++++++++++++--
 kernel/ckrm/ckrm_memctlr.c      |   49 +++++++++++++++
 mm/vmscan.c                     |   95 ++++++++++++++++++++++++++++--
 5 files changed, 277 insertions(+), 11 deletions(-)

Content-Disposition: inline; filename=11-05-mem_guar-config

Index: linux-2612-rc3/include/linux/ckrm_mem.h
===================================================================
--- linux-2612-rc3.orig/include/linux/ckrm_mem.h
+++ linux-2612-rc3/include/linux/ckrm_mem.h
@@ -63,16 +63,28 @@ struct ckrm_mem_res {
 	int nr_dontcare;		/* # of dont care children */
 
 	struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+
+ 	struct list_head shrink_list;	/* list of classes that are near
+				 	 * limit and need to be shrunk
+					 */
+	int shrink_count;
+	unsigned long last_shrink;
 };
 
+#define CLS_AT_LIMIT		(1)
+
 extern atomic_t ckrm_mem_real_count;
 extern struct ckrm_res_ctlr mem_rcbs;
 extern struct ckrm_mem_res *ckrm_mem_root_class;
 extern struct list_head ckrm_memclass_list;
+extern struct list_head ckrm_shrink_list;
 extern spinlock_t ckrm_mem_lock;
 extern spinlock_t ckrm_overguar_lock[MAX_NR_ZONES];
 extern int ckrm_nr_mem_classes;
 extern unsigned int ckrm_tot_lru_pages;
+extern int ckrm_mem_shrink_count;
+extern int ckrm_mem_shrink_to;
+extern int ckrm_mem_shrink_interval;
 
 extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res *);
 extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
@@ -84,6 +96,8 @@ extern int ckrm_class_limit_ok(struct ck
 
 extern struct ckrm_zone *ckrm_get_max_overguar_czone(int);
 
+extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
+
 #else
 
 #define ckrm_mem_migrate_mm(a, b)			do {} while (0)
Index: linux-2612-rc3/include/linux/ckrm_mem_inline.h
===================================================================
--- linux-2612-rc3.orig/include/linux/ckrm_mem_inline.h
+++ linux-2612-rc3/include/linux/ckrm_mem_inline.h
@@ -26,6 +26,8 @@
 
 #ifdef CONFIG_CKRM_RES_MEM
 
+#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
+
 static inline struct ckrm_mem_res *
 ckrm_task_memclass(struct task_struct *tsk)
 {
@@ -324,6 +326,8 @@ static inline void ckrm_add_tail_inactiv
 
 #else
 
+#define ckrm_shrink_list_empty()		(1)
+
 static inline void *
 ckrm_task_memclass(struct task_struct *tsk)
 {
Index: linux-2612-rc3/kernel/ckrm/ckrm_memcore.c
===================================================================
--- linux-2612-rc3.orig/kernel/ckrm/ckrm_memcore.c
+++ linux-2612-rc3/kernel/ckrm/ckrm_memcore.c
@@ -42,6 +42,7 @@ LIST_HEAD(ckrm_memclass_list);
 spinlock_t ckrm_mem_lock; /* protects list above */
 unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
 int ckrm_nr_mem_classes = 0;
+int ckrm_mem_state = 0;
 struct ckrm_mem_res *ckrm_mem_root_class;
 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
 
@@ -52,6 +53,7 @@ EXPORT_SYMBOL_GPL(ckrm_memclass_list);
 EXPORT_SYMBOL_GPL(ckrm_mem_lock);
 EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
 EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
+EXPORT_SYMBOL_GPL(ckrm_mem_state);
 EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
 EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
 
@@ -103,6 +105,7 @@ mem_res_initcls_one(struct ckrm_mem_res 
 	res->implicit_guar = CKRM_SHARE_DONTCARE;
 
 	INIT_LIST_HEAD(&res->mcls_list);
+	INIT_LIST_HEAD(&res->shrink_list);
 
 	for_each_zone(zone) {
 		INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
@@ -248,6 +251,11 @@ mem_res_alloc(struct ckrm_core_class *co
 		ckrm_nr_mem_classes++;
 	} else
 		printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
+
+	/* enable the controller if the user defined atleast 1 class */
+	if (ckrm_nr_mem_classes > 1)
+		ckrm_mem_state = 1;
+	 
 	return res;
 }
 
@@ -402,6 +410,9 @@ mem_set_share_values(void *my_res, struc
 		set_impl_guar_children(parres);
 	}
 
+	/* If the user has changed the shares, enable the controller */
+	ckrm_mem_state = 1;
+
 	return rc;
 }
 
@@ -499,6 +510,23 @@ mem_change_resclass(void *tsk, void *old
 	return;
 }
 
+#define MEM_STATE "state"
+#define MEM_FAIL_OVER "fail_over"
+#define MEM_SHRINK_AT "shrink_at"
+#define MEM_SHRINK_TO "shrink_to"
+#define MEM_SHRINK_COUNT "num_shrinks"
+#define MEM_SHRINK_INTERVAL "shrink_interval"
+
+int ckrm_mem_fail_at = 110;
+int ckrm_mem_shrink_at = 90;
+int ckrm_mem_shrink_to = 80;
+int ckrm_mem_shrink_count = 10;
+int ckrm_mem_shrink_interval = 10;
+
+EXPORT_SYMBOL_GPL(ckrm_mem_fail_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_to);
+
 static int
 mem_show_config(void *my_res, struct seq_file *sfile)
 {
@@ -506,24 +534,110 @@ mem_show_config(void *my_res, struct seq
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "show_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
 
-	seq_printf(sfile, "res=%s", MEM_RES_NAME);
+	seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
+		MEM_RES_NAME,
+		MEM_STATE, ckrm_mem_state,
+		MEM_FAIL_OVER, ckrm_mem_fail_at,
+		MEM_SHRINK_AT, ckrm_mem_shrink_at,
+		MEM_SHRINK_TO, ckrm_mem_shrink_to,
+		MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
+		MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
 
 	return 0;
 }
 
+typedef int __bitwise memclass_token_t;
+
+enum memclass_token {
+	mem_state = (__force memclass_token_t) 1,
+	mem_fail_over = (__force memclass_token_t) 2,
+	mem_shrink_at = (__force memclass_token_t) 3,
+	mem_shrink_to = (__force memclass_token_t) 4,
+	mem_shrink_count = (__force memclass_token_t) 5,
+	mem_shrink_interval = (__force memclass_token_t) 6,
+	mem_err = (__force memclass_token_t) 7
+};
+
+static match_table_t mem_tokens = {
+	{mem_state, MEM_STATE "=%d"},
+	{mem_fail_over, MEM_FAIL_OVER "=%d"},
+	{mem_shrink_at, MEM_SHRINK_AT "=%d"},
+	{mem_shrink_to, MEM_SHRINK_TO "=%d"},
+	{mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
+	{mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
+	{mem_err, NULL},
+};
+
 static int
 mem_set_config(void *my_res, const char *cfgstr)
 {
+	char *p;
 	struct ckrm_mem_res *res = my_res;
+	int err = 0, val;
 
 	if (!res)
 		return -EINVAL;
-	printk(KERN_INFO "set_config called for %s resource of class %s\n",
-			MEM_RES_NAME, res->core->name);
-	return 0;
+
+	while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
+		substring_t args[MAX_OPT_ARGS];
+		int token;
+		if (!*p)
+			continue;
+
+		token = match_token(p, mem_tokens, args);
+		switch (token) {
+		case mem_state:
+			err = -EINVAL;
+			match_int(args, &val);
+			switch (val) {
+			case 0:
+				if (ckrm_nr_mem_classes > 1)
+					break;
+				/* FALLTHRU */
+			case 1:
+				ckrm_mem_state = val;
+				err = 0;
+				break;
+			default:
+				break;
+			}
+			break;
+		case mem_fail_over:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_fail_at = val;
+			break;
+		case mem_shrink_at:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_at = val;
+			break;
+		case mem_shrink_to:
+			if (match_int(args, &val) || (val < 0) || (val > 100))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_to = val;
+			break;
+		case mem_shrink_count:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_count = val;
+			break;
+		case mem_shrink_interval:
+			if (match_int(args, &val) || (val <= 0))
+				err = -EINVAL;
+			else
+				ckrm_mem_shrink_interval = val;
+			break;
+		default:
+			err = -EINVAL;
+		}
+	}
+	return err;
 }
 
 static int
Index: linux-2612-rc3/kernel/ckrm/ckrm_memctlr.c
===================================================================
--- linux-2612-rc3.orig/kernel/ckrm/ckrm_memctlr.c
+++ linux-2612-rc3/kernel/ckrm/ckrm_memctlr.c
@@ -59,10 +59,13 @@ ckrm_del_from_guar_list(struct ckrm_zone
 	}
 }
 
+extern int ckrm_mem_state;
+
 void
 add_use_count(struct ckrm_mem_res *cls, int borrow, int zindex, int cnt)
 {
 	int i, pg_total = 0;
+	extern int ckrm_mem_shrink_at;
 	struct ckrm_mem_res *parcls = ckrm_memclass(cls->parent);
 
 	if (!cls)
@@ -82,6 +85,12 @@ add_use_count(struct ckrm_mem_res *cls, 
 	} else
 		atomic_add(cnt, &ckrm_mem_real_count);
 	ckrm_add_to_guar_list(&cls->ckrm_zone[zindex], zindex);
+
+	if (ckrm_mem_state && (cls->pg_limit != CKRM_SHARE_DONTCARE) &&
+			(pg_total >= 
+			((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
+			(test_bit(CLS_AT_LIMIT, &cls->flags)))
+		ckrm_shrink_atlimit(cls);
 	return;
 }
 
@@ -113,7 +122,7 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 {
 	int ret, i, pg_total = 0;
 
-	if ((mem_rcbs.resid == -1) || !cls)
+	if ((ckrm_mem_state == 0) || (mem_rcbs.resid == -1) || !cls)
 		return 1;
 	for (i = 0; i < MAX_NR_ZONES; i++)
 		pg_total += cls->pg_total[i];
@@ -123,6 +132,10 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 	} else
 		ret = (pg_total <= cls->pg_limit);
 
+	/* If we are failing, just nudge the back end */
+	if (ret == 0)
+		ckrm_shrink_atlimit(cls);
+
 	return ret;
 }
 
@@ -404,3 +417,37 @@ ckrm_get_max_overguar_czone(int zindex)
 
 	return maxczone;
 }
+LIST_HEAD(ckrm_shrink_list);
+
+void
+ckrm_shrink_atlimit(struct ckrm_mem_res *cls)
+{
+	struct zone *zone;
+	unsigned long flags;
+	int order;
+
+	if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE))
+		return;
+	if (test_and_set_bit(CLS_AT_LIMIT, &cls->flags))
+		return;
+	if (time_after(cls->last_shrink + ckrm_mem_shrink_interval * HZ, 
+								jiffies)) {
+		cls->last_shrink = jiffies;
+		cls->shrink_count = 0;
+	}
+	cls->shrink_count++;
+	if (cls->shrink_count > ckrm_mem_shrink_count) {
+		clear_bit(CLS_AT_LIMIT, &cls->flags);
+		return;
+	}
+	spin_lock_irqsave(&ckrm_mem_lock, flags);
+	list_add(&cls->shrink_list, &ckrm_shrink_list);
+	spin_unlock_irqrestore(&ckrm_mem_lock, flags);
+	for_each_zone(zone) {
+		/* This is just a number to get to wakeup kswapd */
+		order = cls->pg_total[0] -
+			((ckrm_mem_shrink_to * cls->pg_limit) / 100);
+		wakeup_kswapd(zone, order);
+		break; /* only once is enough */
+	}
+}
Index: linux-2612-rc3/mm/vmscan.c
===================================================================
--- linux-2612-rc3.orig/mm/vmscan.c
+++ linux-2612-rc3/mm/vmscan.c
@@ -869,6 +869,87 @@ shrink_ckrmzone(struct ckrm_zone *czone,
 		}
 	}
 }
+
+/* FIXME: This function needs to be given more thought. */
+static void
+ckrm_shrink_class(struct ckrm_mem_res *cls)
+{
+	struct scan_control sc;
+	struct zone *zone;
+	int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
+
+	sc.nr_mapped = read_page_state(nr_mapped);
+	sc.nr_scanned = 0;
+	sc.nr_reclaimed = 0;
+	sc.priority = 0; /* always very high priority */
+
+	for_each_zone(zone) {
+		int zone_total, zone_limit, active_limit,
+					inactive_limit, clszone_limit;
+		struct ckrm_zone *czone;
+		u64 temp;
+
+		czone = &cls->ckrm_zone[zindex];
+
+		zone->temp_priority = zone->prev_priority;
+		zone->prev_priority = sc.priority;
+
+		zone_total = zone->nr_active + zone->nr_inactive 
+						+ zone->free_pages;
+
+		temp = (u64) cls->pg_limit * zone_total;
+		do_div(temp, ckrm_tot_lru_pages);
+		zone_limit = (int) temp;
+		clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
+		active_limit = (2 * clszone_limit) / 3; /* 2/3rd in active */
+		inactive_limit = clszone_limit / 3; /* 1/3rd in inactive */
+
+		sc.ckrm_active = 0;
+		cnt = czone->nr_active + act_credit - active_limit;
+		if (cnt > 0) {
+			sc.ckrm_active = (unsigned long) cnt;
+			act_credit = 0;
+		} else
+			act_credit += cnt;
+
+		sc.ckrm_inactive = 0;
+		cnt = sc.ckrm_active + inact_credit +
+					(czone->nr_inactive - inactive_limit);
+		if (cnt > 0) {
+			sc.ckrm_inactive = (unsigned long) cnt;
+			inact_credit = 0;
+		} else
+			inact_credit += cnt;
+
+		if (sc.ckrm_active || sc.ckrm_inactive) {
+			sc.nr_to_reclaim = sc.ckrm_inactive;
+			shrink_ckrmzone(czone, &sc);
+		}
+		zone->prev_priority = zone->temp_priority;
+		zindex++;
+	}
+}
+
+static void
+ckrm_shrink_classes(void)
+{
+	struct ckrm_mem_res *cls;
+
+	spin_lock_irq(&ckrm_mem_lock);
+	while (!ckrm_shrink_list_empty()) {
+		cls =  list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+				shrink_list);
+		list_del(&cls->shrink_list);
+		spin_unlock_irq(&ckrm_mem_lock);
+		ckrm_shrink_class(cls);
+		clear_bit(CLS_AT_LIMIT, &cls->flags);
+		spin_lock_irq(&ckrm_mem_lock);
+	}
+	spin_unlock_irq(&ckrm_mem_lock);
+}
+
+#else
+#define ckrm_shrink_classes()	do { } while(0)
 #endif
 
 /*
@@ -1135,7 +1216,8 @@ loop_again:
 					continue;
 
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, 0, 0, 0)) {
+						zone->pages_high, 0, 0, 0) &&
+						ckrm_shrink_list_empty()) {
 					end_zone = i;
 					goto scan;
 				}
@@ -1171,7 +1253,8 @@ scan:
 
 			if (nr_pages == 0) {	/* Not software suspend */
 				if (!zone_watermark_ok(zone, order,
-						zone->pages_high, end_zone, 0, 0))
+					zone->pages_high, end_zone, 0, 0) &&
+						ckrm_shrink_list_empty())
 					all_zones_ok = 0;
 			}
 			zone->temp_priority = priority;
@@ -1300,7 +1383,10 @@ static int kswapd(void *p)
 		}
 		finish_wait(&pgdat->kswapd_wait, &wait);
 
-		balance_pgdat(pgdat, 0, order);
+		if (!ckrm_shrink_list_empty())
+			ckrm_shrink_classes();
+		else 
+			balance_pgdat(pgdat, 0, order);
 	}
 	return 0;
 }
@@ -1316,7 +1402,8 @@ void wakeup_kswapd(struct zone *zone, in
 		return;
 
 	pgdat = zone->zone_pgdat;
-	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+	if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0) &&
+			ckrm_shrink_list_empty())
 		return;
 	if (pgdat->kswapd_max_order < order)
 		pgdat->kswapd_max_order = order;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [ckrm-tech] [Patch 5/6] CKRM: Add config support for mem controller
  2005-05-19  0:33 [Patch 5/6] CKRM: Add config support for mem controller Chandra Seetharaman
@ 2005-05-19  1:26 ` Dave Hansen
  2005-05-19 16:26   ` Chandra Seetharaman
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Hansen @ 2005-05-19  1:26 UTC (permalink / raw)
  To: Chandra Seetharaman; +Cc: ckrm-tech, linux-mm

There appears to still be some serious issues in the patch with respect
to per-zone accounting.  There is only accounting in each ckrm_mem_res
for each *kind* of zone, not each zone.

For instance, the accounting for a page appears to be the same no matter
which zone it came from, just which kind of zone

Then, when it comes to actually use some of the information, the kswapd
wakeup just throws a completely unrelated number into wakeup_kswapd().
ZONE_DMA (zone 0) tends to be *MUCH* smaller than ZONE_HIGHMEM, for
instance.  It doesn't make a whole lot of logical sense to me to be
waking up kswapd for a possibly 16GB zone with data from a 16MB zone.

+       for_each_zone(zone) {
+               /* This is just a number to get to wakeup kswapd */
+               order = cls->pg_total[0] -
+                       ((ckrm_mem_shrink_to * cls->pg_limit) / 100);
+               wakeup_kswapd(zone, order);
+               break; /* only once is enough */
+       }

If the number doesn't matter, why not just pass 0 into it?

Could you explain what advantages keeping a per-zone-type count has over
actually doing one count for each zone?  Also, why bother tracking it
per-zone-type anyway?  Would a single count work the same way?

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [ckrm-tech] [Patch 5/6] CKRM: Add config support for mem controller
  2005-05-19  1:26 ` [ckrm-tech] " Dave Hansen
@ 2005-05-19 16:26   ` Chandra Seetharaman
  2005-05-19 16:43     ` Dave Hansen
  0 siblings, 1 reply; 5+ messages in thread
From: Chandra Seetharaman @ 2005-05-19 16:26 UTC (permalink / raw)
  To: Dave Hansen; +Cc: ckrm-tech, linux-mm

On Wed, May 18, 2005 at 06:26:50PM -0700, Dave Hansen wrote:
> There appears to still be some serious issues in the patch with respect
> to per-zone accounting.  There is only accounting in each ckrm_mem_res
> for each *kind* of zone, not each zone.

In the absense of NUMA/DISCONTIGMEM, isn't 'kind of zone' and 'zone'
the same ? Correct me if this assumption is wrong.

> 
> For instance, the accounting for a page appears to be the same no matter
> which zone it came from, just which kind of zone
> 
> Then, when it comes to actually use some of the information, the kswapd
> wakeup just throws a completely unrelated number into wakeup_kswapd().
> ZONE_DMA (zone 0) tends to be *MUCH* smaller than ZONE_HIGHMEM, for
> instance.  It doesn't make a whole lot of logical sense to me to be
> waking up kswapd for a possibly 16GB zone with data from a 16MB zone.

When control goes into wakeup_kswapd(), it looks for the over_limit list and
works only on the classes, and completely ignores the arguments to 
wakeup_kswapd().

I did it this way(instead of having my own logic) to use existing code.
> 
> +       for_each_zone(zone) {
> +               /* This is just a number to get to wakeup kswapd */
> +               order = cls->pg_total[0] -
> +                       ((ckrm_mem_shrink_to * cls->pg_limit) / 100);
> +               wakeup_kswapd(zone, order);
> +               break; /* only once is enough */
> +       }
> 
> If the number doesn't matter, why not just pass 0 into it?

Yes, i could. will do it.
> 
> Could you explain what advantages keeping a per-zone-type count has over
> actually doing one count for each zone?  Also, why bother tracking it
> per-zone-type anyway?  Would a single count work the same way

fits the NUMA/DISCONTIGMEM issue discussed above.

> 
> -- Dave
> 

-- 

----------------------------------------------------------------------
    Chandra Seetharaman               | Be careful what you choose....
              - sekharan@us.ibm.com   |      .......you may get it.
----------------------------------------------------------------------
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [ckrm-tech] [Patch 5/6] CKRM: Add config support for mem controller
  2005-05-19 16:26   ` Chandra Seetharaman
@ 2005-05-19 16:43     ` Dave Hansen
  2005-05-19 16:49       ` Chandra Seetharaman
  0 siblings, 1 reply; 5+ messages in thread
From: Dave Hansen @ 2005-05-19 16:43 UTC (permalink / raw)
  To: Chandra Seetharaman; +Cc: ckrm-tech, linux-mm

On Thu, 2005-05-19 at 09:26 -0700, Chandra Seetharaman wrote:
> On Wed, May 18, 2005 at 06:26:50PM -0700, Dave Hansen wrote:
> > There appears to still be some serious issues in the patch with respect
> > to per-zone accounting.  There is only accounting in each ckrm_mem_res
> > for each *kind* of zone, not each zone.
> 
> In the absense of NUMA/DISCONTIGMEM, isn't 'kind of zone' and 'zone'
> the same ? Correct me if this assumption is wrong.

Yes, that is correct.  Do you not expect your code to work with NUMA or
DISCONTIGMEM?

> > Could you explain what advantages keeping a per-zone-type count has over
> > actually doing one count for each zone?  Also, why bother tracking it
> > per-zone-type anyway?  Would a single count work the same way
> 
> fits the NUMA/DISCONTIGMEM issue discussed above.

I don't think it fits it very well, it kinda just glosses over it.  A
great fit would be something that tracked how much each class was using
in each zone, not each kind of zone.  Perhaps a controller would like to
keep an individual class from using too much memory in any particular
NUMA node.  The current memory controller design would keep that from
happening.

-- Dave

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [ckrm-tech] [Patch 5/6] CKRM: Add config support for mem controller
  2005-05-19 16:43     ` Dave Hansen
@ 2005-05-19 16:49       ` Chandra Seetharaman
  0 siblings, 0 replies; 5+ messages in thread
From: Chandra Seetharaman @ 2005-05-19 16:49 UTC (permalink / raw)
  To: Dave Hansen; +Cc: ckrm-tech, linux-mm

On Thu, May 19, 2005 at 09:43:10AM -0700, Dave Hansen wrote:
> On Thu, 2005-05-19 at 09:26 -0700, Chandra Seetharaman wrote:
> > On Wed, May 18, 2005 at 06:26:50PM -0700, Dave Hansen wrote:
> > > There appears to still be some serious issues in the patch with respect
> > > to per-zone accounting.  There is only accounting in each ckrm_mem_res
> > > for each *kind* of zone, not each zone.
> > 
> > In the absense of NUMA/DISCONTIGMEM, isn't 'kind of zone' and 'zone'
> > the same ? Correct me if this assumption is wrong.
> 
> Yes, that is correct.  Do you not expect your code to work with NUMA or
> DISCONTIGMEM?

not yet...
> 
> > > Could you explain what advantages keeping a per-zone-type count has over
> > > actually doing one count for each zone?  Also, why bother tracking it
> > > per-zone-type anyway?  Would a single count work the same way
> > 
> > fits the NUMA/DISCONTIGMEM issue discussed above.
> 
> I don't think it fits it very well, it kinda just glosses over it.  A
> great fit would be something that tracked how much each class was using
> in each zone, not each kind of zone.  Perhaps a controller would like to
> keep an individual class from using too much memory in any particular
> NUMA node.  The current memory controller design would keep that from
> happening.

This is one of "things to consider" in our "numa support".
> 
> -- Dave
> 

-- 

----------------------------------------------------------------------
    Chandra Seetharaman               | Be careful what you choose....
              - sekharan@us.ibm.com   |      .......you may get it.
----------------------------------------------------------------------
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2005-05-19 16:55 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2005-05-19  0:33 [Patch 5/6] CKRM: Add config support for mem controller Chandra Seetharaman
2005-05-19  1:26 ` [ckrm-tech] " Dave Hansen
2005-05-19 16:26   ` Chandra Seetharaman
2005-05-19 16:43     ` Dave Hansen
2005-05-19 16:49       ` Chandra Seetharaman

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox