From: Chandra Seetharaman <sekharan@us.ibm.com>
To: ckrm-tech <ckrm-tech@lists.sourceforge.net>,
linux-mm <linux-mm@kvack.org>
Subject: [PATCH 5/6] CKRM: Add config support for mem controller
Date: Fri, 24 Jun 2005 15:26:38 -0700 [thread overview]
Message-ID: <1119651998.5105.23.camel@linuxchandra> (raw)
Patch 5 of 6 patches to support memory controller under CKRM framework.
Provides some config parameter support. Details about the config
parameters
in the Documentation patch.
----------------------------------------
include/linux/ckrm_mem.h | 14 ++++
include/linux/ckrm_mem_inline.h | 4 +
kernel/ckrm/ckrm_memcore.c | 127 ++++++++++++++++++++++++++++++++
++++++--
kernel/ckrm/ckrm_memctlr.c | 49 +++++++++++++++
mm/vmscan.c | 96 ++++++++++++++++++++++++++++--
5 files changed, 279 insertions(+), 11 deletions(-)
Content-Disposition: inline; filename=11-05-mem_guar-config
Index: linux-2.6.12/include/linux/ckrm_mem.h
===================================================================
--- linux-2.6.12.orig/include/linux/ckrm_mem.h
+++ linux-2.6.12/include/linux/ckrm_mem.h
@@ -63,16 +63,28 @@ struct ckrm_mem_res {
int nr_dontcare; /* # of dont care children */
struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
+
+ struct list_head shrink_list; /* list of classes that are near
+ * limit and need to be shrunk
+ */
+ int shrink_count;
+ unsigned long last_shrink;
};
+#define CLS_AT_LIMIT (1)
+
extern atomic_t ckrm_mem_real_count;
extern struct ckrm_res_ctlr mem_rcbs;
extern struct ckrm_mem_res *ckrm_mem_root_class;
extern struct list_head ckrm_memclass_list;
+extern struct list_head ckrm_shrink_list;
extern spinlock_t ckrm_mem_lock;
extern spinlock_t ckrm_overguar_lock[MAX_NR_ZONES];
extern int ckrm_nr_mem_classes;
extern unsigned int ckrm_tot_lru_pages;
+extern int ckrm_mem_shrink_count;
+extern int ckrm_mem_shrink_to;
+extern int ckrm_mem_shrink_interval;
extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res
*);
extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
@@ -84,6 +96,8 @@ extern int ckrm_class_limit_ok(struct ck
extern struct ckrm_zone *ckrm_get_max_overguar_czone(int);
+extern void ckrm_shrink_atlimit(struct ckrm_mem_res *);
+
#else
#define ckrm_mem_migrate_mm(a, b) do {} while (0)
Index: linux-2.6.12/include/linux/ckrm_mem_inline.h
===================================================================
--- linux-2.6.12.orig/include/linux/ckrm_mem_inline.h
+++ linux-2.6.12/include/linux/ckrm_mem_inline.h
@@ -26,6 +26,8 @@
#ifdef CONFIG_CKRM_RES_MEM
+#define ckrm_shrink_list_empty() list_empty(&ckrm_shrink_list)
+
static inline struct ckrm_mem_res *
ckrm_task_memclass(struct task_struct *tsk)
{
@@ -324,6 +326,8 @@ static inline void ckrm_add_tail_inactiv
#else
+#define ckrm_shrink_list_empty() (1)
+
static inline void *
ckrm_task_memclass(struct task_struct *tsk)
{
Index: linux-2.6.12/kernel/ckrm/ckrm_memcore.c
===================================================================
--- linux-2.6.12.orig/kernel/ckrm/ckrm_memcore.c
+++ linux-2.6.12/kernel/ckrm/ckrm_memcore.c
@@ -42,6 +42,7 @@ LIST_HEAD(ckrm_memclass_list);
spinlock_t ckrm_mem_lock; /* protects list above */
unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
int ckrm_nr_mem_classes = 0;
+int ckrm_mem_state = 0;
struct ckrm_mem_res *ckrm_mem_root_class;
atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
@@ -52,6 +53,7 @@ EXPORT_SYMBOL_GPL(ckrm_memclass_list);
EXPORT_SYMBOL_GPL(ckrm_mem_lock);
EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
+EXPORT_SYMBOL_GPL(ckrm_mem_state);
EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
@@ -101,8 +103,10 @@ mem_res_initcls_one(struct ckrm_mem_res
res->pg_guar = CKRM_SHARE_DONTCARE;
res->pg_limit = CKRM_SHARE_DONTCARE;
res->implicit_guar = CKRM_SHARE_DONTCARE;
+ res->last_shrink = jiffies;
INIT_LIST_HEAD(&res->mcls_list);
+ INIT_LIST_HEAD(&res->shrink_list);
for_each_zone(zone) {
INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
@@ -248,6 +252,11 @@ mem_res_alloc(struct ckrm_core_class *co
ckrm_nr_mem_classes++;
} else
printk(KERN_ERR "MEM_RC: alloc: GFP_ATOMIC failed\n");
+
+ /* enable the controller if the user defined atleast 1 class */
+ if (ckrm_nr_mem_classes > 1)
+ ckrm_mem_state = 1;
+
return res;
}
@@ -402,6 +411,9 @@ mem_set_share_values(void *my_res, struc
set_impl_guar_children(parres);
}
+ /* If the user has changed the shares, enable the controller */
+ ckrm_mem_state = 1;
+
return rc;
}
@@ -499,6 +511,23 @@ mem_change_resclass(void *tsk, void *old
return;
}
+#define MEM_STATE "state"
+#define MEM_FAIL_OVER "fail_over"
+#define MEM_SHRINK_AT "shrink_at"
+#define MEM_SHRINK_TO "shrink_to"
+#define MEM_SHRINK_COUNT "num_shrinks"
+#define MEM_SHRINK_INTERVAL "shrink_interval"
+
+int ckrm_mem_fail_at = 110;
+int ckrm_mem_shrink_at = 90;
+int ckrm_mem_shrink_to = 80;
+int ckrm_mem_shrink_count = 10;
+int ckrm_mem_shrink_interval = 10;
+
+EXPORT_SYMBOL_GPL(ckrm_mem_fail_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_at);
+EXPORT_SYMBOL_GPL(ckrm_mem_shrink_to);
+
static int
mem_show_config(void *my_res, struct seq_file *sfile)
{
@@ -506,24 +535,110 @@ mem_show_config(void *my_res, struct seq
if (!res)
return -EINVAL;
- printk(KERN_INFO "show_config called for %s resource of class %s\n",
- MEM_RES_NAME, res->core->name);
- seq_printf(sfile, "res=%s", MEM_RES_NAME);
+ seq_printf(sfile, "res=%s,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d,%s=%d\n",
+ MEM_RES_NAME,
+ MEM_STATE, ckrm_mem_state,
+ MEM_FAIL_OVER, ckrm_mem_fail_at,
+ MEM_SHRINK_AT, ckrm_mem_shrink_at,
+ MEM_SHRINK_TO, ckrm_mem_shrink_to,
+ MEM_SHRINK_COUNT, ckrm_mem_shrink_count,
+ MEM_SHRINK_INTERVAL, ckrm_mem_shrink_interval);
return 0;
}
+typedef int __bitwise memclass_token_t;
+
+enum memclass_token {
+ mem_state = (__force memclass_token_t) 1,
+ mem_fail_over = (__force memclass_token_t) 2,
+ mem_shrink_at = (__force memclass_token_t) 3,
+ mem_shrink_to = (__force memclass_token_t) 4,
+ mem_shrink_count = (__force memclass_token_t) 5,
+ mem_shrink_interval = (__force memclass_token_t) 6,
+ mem_err = (__force memclass_token_t) 7
+};
+
+static match_table_t mem_tokens = {
+ {mem_state, MEM_STATE "=%d"},
+ {mem_fail_over, MEM_FAIL_OVER "=%d"},
+ {mem_shrink_at, MEM_SHRINK_AT "=%d"},
+ {mem_shrink_to, MEM_SHRINK_TO "=%d"},
+ {mem_shrink_count, MEM_SHRINK_COUNT "=%d"},
+ {mem_shrink_interval, MEM_SHRINK_INTERVAL "=%d"},
+ {mem_err, NULL},
+};
+
static int
mem_set_config(void *my_res, const char *cfgstr)
{
+ char *p;
struct ckrm_mem_res *res = my_res;
+ int err = 0, val;
if (!res)
return -EINVAL;
- printk(KERN_INFO "set_config called for %s resource of class %s\n",
- MEM_RES_NAME, res->core->name);
- return 0;
+
+ while ((p = strsep((char**)&cfgstr, ",")) != NULL) {
+ substring_t args[MAX_OPT_ARGS];
+ int token;
+ if (!*p)
+ continue;
+
+ token = match_token(p, mem_tokens, args);
+ switch (token) {
+ case mem_state:
+ err = -EINVAL;
+ match_int(args, &val);
+ switch (val) {
+ case 0:
+ if (ckrm_nr_mem_classes > 1)
+ break;
+ /* FALLTHRU */
+ case 1:
+ ckrm_mem_state = val;
+ err = 0;
+ break;
+ default:
+ break;
+ }
+ break;
+ case mem_fail_over:
+ if (match_int(args, &val) || (val <= 0))
+ err = -EINVAL;
+ else
+ ckrm_mem_fail_at = val;
+ break;
+ case mem_shrink_at:
+ if (match_int(args, &val) || (val <= 0))
+ err = -EINVAL;
+ else
+ ckrm_mem_shrink_at = val;
+ break;
+ case mem_shrink_to:
+ if (match_int(args, &val) || (val < 0) || (val > 100))
+ err = -EINVAL;
+ else
+ ckrm_mem_shrink_to = val;
+ break;
+ case mem_shrink_count:
+ if (match_int(args, &val) || (val <= 0))
+ err = -EINVAL;
+ else
+ ckrm_mem_shrink_count = val;
+ break;
+ case mem_shrink_interval:
+ if (match_int(args, &val) || (val <= 0))
+ err = -EINVAL;
+ else
+ ckrm_mem_shrink_interval = val;
+ break;
+ default:
+ err = -EINVAL;
+ }
+ }
+ return err;
}
static int
Index: linux-2.6.12/kernel/ckrm/ckrm_memctlr.c
===================================================================
--- linux-2.6.12.orig/kernel/ckrm/ckrm_memctlr.c
+++ linux-2.6.12/kernel/ckrm/ckrm_memctlr.c
@@ -59,10 +59,13 @@ ckrm_del_from_guar_list(struct ckrm_zone
}
}
+extern int ckrm_mem_state;
+
void
add_use_count(struct ckrm_mem_res *cls, int borrow, int zindex, int
cnt)
{
int i, pg_total = 0;
+ extern int ckrm_mem_shrink_at;
struct ckrm_mem_res *parcls = ckrm_memclass(cls->parent);
if (!cls)
@@ -82,6 +85,12 @@ add_use_count(struct ckrm_mem_res *cls,
} else
atomic_add(cnt, &ckrm_mem_real_count);
ckrm_add_to_guar_list(&cls->ckrm_zone[zindex], zindex);
+
+ if (ckrm_mem_state && (cls->pg_limit != CKRM_SHARE_DONTCARE) &&
+ (pg_total >=
+ ((ckrm_mem_shrink_at * cls->pg_limit) / 100)) &&
+ (!test_bit(CLS_AT_LIMIT, &cls->flags)))
+ ckrm_shrink_atlimit(cls);
return;
}
@@ -113,7 +122,7 @@ ckrm_class_limit_ok(struct ckrm_mem_res
{
int ret, i, pg_total = 0;
- if ((mem_rcbs.resid == -1) || !cls)
+ if ((ckrm_mem_state == 0) || (mem_rcbs.resid == -1) || !cls)
return 1;
for (i = 0; i < MAX_NR_ZONES; i++)
pg_total += cls->pg_total[i];
@@ -123,6 +132,10 @@ ckrm_class_limit_ok(struct ckrm_mem_res
} else
ret = (pg_total <= cls->pg_limit);
+ /* If we are failing, just nudge the back end */
+ if (ret == 0)
+ ckrm_shrink_atlimit(cls);
+
return ret;
}
@@ -407,3 +420,37 @@ ckrm_get_max_overguar_czone(int zindex)
return maxczone;
}
+LIST_HEAD(ckrm_shrink_list);
+
+void
+ckrm_shrink_atlimit(struct ckrm_mem_res *cls)
+{
+ struct zone *zone;
+ unsigned long flags;
+ int order;
+
+ if (!cls || (cls->pg_limit == CKRM_SHARE_DONTCARE))
+ return;
+ if (test_and_set_bit(CLS_AT_LIMIT, &cls->flags))
+ return;
+ if (time_after(jiffies, cls->last_shrink +
+ ckrm_mem_shrink_interval * HZ)) {
+ cls->last_shrink = jiffies;
+ cls->shrink_count = 0;
+ }
+ cls->shrink_count++;
+ if (cls->shrink_count > ckrm_mem_shrink_count) {
+ clear_bit(CLS_AT_LIMIT, &cls->flags);
+ return;
+ }
+ spin_lock_irqsave(&ckrm_mem_lock, flags);
+ list_add(&cls->shrink_list, &ckrm_shrink_list);
+ spin_unlock_irqrestore(&ckrm_mem_lock, flags);
+ for_each_zone(zone) {
+ /* This is just a number to get to wakeup kswapd */
+ order = cls->pg_total[0] -
+ ((ckrm_mem_shrink_to * cls->pg_limit) / 100);
+ wakeup_kswapd(zone, order);
+ break; /* only once is enough */
+ }
+}
Index: linux-2.6.12/mm/vmscan.c
===================================================================
--- linux-2.6.12.orig/mm/vmscan.c
+++ linux-2.6.12/mm/vmscan.c
@@ -869,6 +869,88 @@ shrink_ckrmzone(struct ckrm_zone *czone,
}
}
}
+
+/* FIXME: This function needs to be given more thought. */
+static void
+ckrm_shrink_class(struct ckrm_mem_res *cls)
+{
+ struct scan_control sc;
+ struct zone *zone;
+ int zindex = 0, cnt, act_credit = 0, inact_credit = 0;
+
+ sc.nr_mapped = read_page_state(nr_mapped);
+ sc.nr_scanned = 0;
+ sc.nr_reclaimed = 0;
+ sc.priority = 0; /* always very high priority */
+ sc.swap_cluster_max = SWAP_CLUSTER_MAX;
+
+ for_each_zone(zone) {
+ int zone_total, zone_limit, active_limit,
+ inactive_limit, clszone_limit;
+ struct ckrm_zone *czone;
+ u64 temp;
+
+ czone = &cls->ckrm_zone[zindex];
+
+ zone->temp_priority = zone->prev_priority;
+ zone->prev_priority = sc.priority;
+
+ zone_total = zone->nr_active + zone->nr_inactive
+ + zone->free_pages;
+
+ temp = (u64) cls->pg_limit * zone_total;
+ do_div(temp, ckrm_tot_lru_pages);
+ zone_limit = (int) temp;
+ clszone_limit = (ckrm_mem_shrink_to * zone_limit) / 100;
+ active_limit = (2 * clszone_limit) / 3; /* 2/3rd in active */
+ inactive_limit = clszone_limit / 3; /* 1/3rd in inactive */
+
+ sc.ckrm_active = 0;
+ cnt = czone->nr_active + act_credit - active_limit;
+ if (cnt > 0) {
+ sc.ckrm_active = (unsigned long) cnt;
+ act_credit = 0;
+ } else
+ act_credit += cnt;
+
+ sc.ckrm_inactive = 0;
+ cnt = sc.ckrm_active + inact_credit +
+ (czone->nr_inactive - inactive_limit);
+ if (cnt > 0) {
+ sc.ckrm_inactive = (unsigned long) cnt;
+ inact_credit = 0;
+ } else
+ inact_credit += cnt;
+
+ if (sc.ckrm_active || sc.ckrm_inactive) {
+ sc.nr_to_reclaim = sc.ckrm_inactive;
+ shrink_ckrmzone(czone, &sc);
+ }
+ zone->prev_priority = zone->temp_priority;
+ zindex++;
+ }
+}
+
+static void
+ckrm_shrink_classes(void)
+{
+ struct ckrm_mem_res *cls;
+
+ spin_lock_irq(&ckrm_mem_lock);
+ while (!ckrm_shrink_list_empty()) {
+ cls = list_entry(ckrm_shrink_list.next, struct ckrm_mem_res,
+ shrink_list);
+ list_del(&cls->shrink_list);
+ spin_unlock_irq(&ckrm_mem_lock);
+ ckrm_shrink_class(cls);
+ clear_bit(CLS_AT_LIMIT, &cls->flags);
+ spin_lock_irq(&ckrm_mem_lock);
+ }
+ spin_unlock_irq(&ckrm_mem_lock);
+}
+
+#else
+#define ckrm_shrink_classes() do { } while(0)
#endif
/*
@@ -1135,7 +1217,8 @@ loop_again:
continue;
if (!zone_watermark_ok(zone, order,
- zone->pages_high, 0, 0, 0)) {
+ zone->pages_high, 0, 0, 0) &&
+ ckrm_shrink_list_empty()) {
end_zone = i;
goto scan;
}
@@ -1171,7 +1254,8 @@ scan:
if (nr_pages == 0) { /* Not software suspend */
if (!zone_watermark_ok(zone, order,
- zone->pages_high, end_zone, 0, 0))
+ zone->pages_high, end_zone, 0, 0) &&
+ ckrm_shrink_list_empty())
all_zones_ok = 0;
}
zone->temp_priority = priority;
@@ -1300,7 +1384,10 @@ static int kswapd(void *p)
}
finish_wait(&pgdat->kswapd_wait, &wait);
- balance_pgdat(pgdat, 0, order);
+ if (!ckrm_shrink_list_empty())
+ ckrm_shrink_classes();
+ else
+ balance_pgdat(pgdat, 0, order);
}
return 0;
}
@@ -1316,7 +1403,8 @@ void wakeup_kswapd(struct zone *zone, in
return;
pgdat = zone->zone_pgdat;
- if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0))
+ if (zone_watermark_ok(zone, order, zone->pages_low, 0, 0, 0) &&
+ ckrm_shrink_list_empty())
return;
if (pgdat->kswapd_max_order < order)
pgdat->kswapd_max_order = order;
--
----------------------------------------------------------------------
Chandra Seetharaman | Be careful what you choose....
- sekharan@us.ibm.com | .......you may get it.
----------------------------------------------------------------------
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"aart@kvack.org"> aart@kvack.org </a>
next reply other threads:[~2005-06-24 22:26 UTC|newest]
Thread overview: 3+ messages / expand[flat|nested] mbox.gz Atom feed top
2005-06-24 22:26 Chandra Seetharaman [this message]
-- strict thread matches above, loose matches on Subject: below --
2005-05-19 0:33 [Patch " Chandra Seetharaman
2005-04-02 3:15 Chandra Seetharaman
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1119651998.5105.23.camel@linuxchandra \
--to=sekharan@us.ibm.com \
--cc=ckrm-tech@lists.sourceforge.net \
--cc=linux-mm@kvack.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox