linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Chandra Seetharaman <sekharan@us.ibm.com>
To: ckrm-tech@lists.sourceforge.net, linux-mm@kvack.org
Subject: [PATCH 4/6] CKRM: Add guarantee support for mem controller
Date: Fri, 1 Apr 2005 19:14:34 -0800	[thread overview]
Message-ID: <20050402031434.GE23284@chandralinux.beaverton.ibm.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 287 bytes --]


-- 

----------------------------------------------------------------------
    Chandra Seetharaman               | Be careful what you choose....
              - sekharan@us.ibm.com   |      .......you may get it.
----------------------------------------------------------------------

[-- Attachment #2: 11-04-mem_limit-guar --]
[-- Type: text/plain, Size: 34604 bytes --]

Patch 4 of 6 patches to support memory controller under CKRM framework.
This patch provides the guarantee support for the controller.

 include/linux/ckrm_mem.h        |   31 ++++
 include/linux/ckrm_mem_inline.h |  244 ++++++++++++++++++++++++--------
 include/linux/mm.h              |    2 
 include/linux/mm_inline.h       |   10 +
 include/linux/mmzone.h          |    2 
 kernel/ckrm/ckrm_memcore.c      |   29 +++
 kernel/ckrm/ckrm_memctlr.c      |  299 +++++++++++++++++++++++++++++++++++++---
 mm/page_alloc.c                 |    3 
 mm/swap.c                       |    3 
 mm/vmscan.c                     |  114 +++++++++++++--
 10 files changed, 631 insertions(+), 106 deletions(-)

Index: linux-2.6.12-rc1/include/linux/ckrm_mem.h
===================================================================
--- linux-2.6.12-rc1.orig/include/linux/ckrm_mem.h
+++ linux-2.6.12-rc1/include/linux/ckrm_mem.h
@@ -26,6 +26,27 @@
 #include <linux/mmzone.h>
 #include <linux/ckrm_rc.h>
 
+struct ckrm_zone {
+	struct list_head active_list;
+	struct list_head inactive_list;
+
+	unsigned long nr_active;
+	unsigned long nr_inactive;
+	unsigned long active_over;
+	unsigned long inactive_over;
+
+	unsigned long shrink_active;
+	unsigned long shrink_inactive;
+	long shrink_weight;
+	unsigned long shrink_flag;
+	struct list_head victim_list;	/* list of ckrm_zones chosen for
+					 * shrinking. These are over their
+					 * 'guarantee'
+					 */
+	struct zone *zone;
+	struct ckrm_mem_res *memcls;
+};
+
 struct ckrm_mem_res {
 	unsigned long flags;
 	struct ckrm_core_class *core;	/* the core i am part of... */
@@ -46,11 +67,19 @@ struct ckrm_mem_res {
 	int hier;			/* hiearchy level, root = 0 */
 	int impl_guar;			/* for classes with don't care guar */
 	int nr_dontcare;		/* # of dont care children */
+
+	struct ckrm_zone ckrm_zone[MAX_NR_ZONES];
 };
 
+#define CLS_SHRINK_BIT		(1)
+
 extern atomic_t ckrm_mem_real_count;
 extern struct ckrm_res_ctlr mem_rcbs;
 extern struct ckrm_mem_res *ckrm_mem_root_class;
+extern struct list_head ckrm_memclass_list;
+extern spinlock_t ckrm_mem_lock;
+extern int ckrm_nr_mem_classes;
+extern unsigned int ckrm_tot_lru_pages;
 
 extern void ckrm_mem_migrate_mm(struct mm_struct *, struct ckrm_mem_res *);
 extern void ckrm_mem_migrate_all_pages(struct ckrm_mem_res *,
@@ -60,6 +89,8 @@ extern void incr_use_count(struct ckrm_m
 extern void decr_use_count(struct ckrm_mem_res *, int, int);
 extern int ckrm_class_limit_ok(struct ckrm_mem_res *);
 
+extern void shrink_get_victims(struct zone *, unsigned long ,
+				unsigned long, struct list_head *);
 #else
 
 #define ckrm_mem_migrate_mm(a, b)			do {} while (0)
Index: linux-2.6.12-rc1/include/linux/ckrm_mem_inline.h
===================================================================
--- linux-2.6.12-rc1.orig/include/linux/ckrm_mem_inline.h
+++ linux-2.6.12-rc1/include/linux/ckrm_mem_inline.h
@@ -34,16 +34,75 @@ ckrm_get_mem_class(struct task_struct *t
 }
 
 static inline void
+ckrm_set_shrink(struct ckrm_zone *cz)
+{
+	set_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
+}
+
+static inline int
+ckrm_test_set_shrink(struct ckrm_zone *cz)
+{
+	return test_and_set_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
+}
+
+static inline void 
+ckrm_clear_shrink(struct ckrm_zone *cz)
+{
+	clear_bit(CLS_SHRINK_BIT, &cz->shrink_flag);
+}
+
+static inline void
+set_page_ckrmzone( struct page *page, struct ckrm_zone *cz)
+{
+	page->ckrm_zone = cz;
+}
+
+static inline struct ckrm_zone *
+page_ckrmzone(struct page *page)
+{
+	return page->ckrm_zone;
+}
+
+/*
+ * Currently, a shared page that is shared by multiple classes is charged
+ * to a class with max available guarantee. Simply replace this function
+ * for other policies.
+ */
+static inline int
+ckrm_mem_share_compare(struct ckrm_mem_res *a, struct ckrm_mem_res *b)
+{
+	if (a == NULL)
+		return -(b != NULL);
+	if (b == NULL)
+		return 1;
+	if (a->pg_guar == b->pg_guar)
+		return 0;
+	if (a->pg_guar == CKRM_SHARE_DONTCARE)
+		return 1;
+	if (b->pg_guar == CKRM_SHARE_DONTCARE)
+		return -1;
+	return (a->pg_unused - b->pg_unused);
+}
+
+static inline void
 ckrm_set_page_class(struct page *page, struct ckrm_mem_res *cls)
 {
+	struct ckrm_zone *new_czone, *old_czone;
+
 	if (!cls) {
-		if (!ckrm_mem_root_class)
+		if (!ckrm_mem_root_class) {
+			set_page_ckrmzone(page, NULL);
 			return;
+		}
 		cls = ckrm_mem_root_class;
 	}
-	if (page->ckrm_class)
-		kref_put(&page->ckrm_class->nr_users, memclass_release);
-	page->ckrm_class = cls;
+	new_czone = &cls->ckrm_zone[page_zonenum(page)];
+	old_czone = page_ckrmzone(page);
+	
+	if (old_czone)
+		kref_put(&old_czone->memcls->nr_users, memclass_release);
+
+	set_page_ckrmzone(page, new_czone);
 	kref_get(&cls->nr_users);
 	incr_use_count(cls, 0, page_zonenum(page));
 	SetPageCkrmAccount(page);
@@ -52,7 +111,8 @@ ckrm_set_page_class(struct page *page, s
 static inline void
 ckrm_change_page_class(struct page *page, struct ckrm_mem_res *newcls)
 {
-	struct ckrm_mem_res *oldcls = page->ckrm_class;
+	struct ckrm_zone *old_czone = page_ckrmzone(page), *new_czone;
+	struct ckrm_mem_res *oldcls;
 	int zindex = page_zonenum(page);
 
 	if  (!newcls) {
@@ -61,6 +121,7 @@ ckrm_change_page_class(struct page *page
 		newcls = ckrm_mem_root_class;
 	}
 
+	oldcls = old_czone->memcls;
 	if (oldcls == newcls)
 		return;
 
@@ -69,20 +130,35 @@ ckrm_change_page_class(struct page *page
 		decr_use_count(oldcls, 0, zindex);
 	}
 
-	page->ckrm_class = newcls;
+	new_czone = &newcls->ckrm_zone[page_zonenum(page)];
+	set_page_ckrmzone(page, new_czone);
 	kref_get(&newcls->nr_users);
 	incr_use_count(newcls, 0, zindex);
+
+	list_del(&page->lru);
+	if (PageActive(page)) {
+		old_czone->nr_active--;
+		new_czone->nr_active++;
+		list_add(&page->lru, &new_czone->active_list);
+	} else {
+		old_czone->nr_inactive--;
+		new_czone->nr_inactive++;
+		list_add(&page->lru, &new_czone->inactive_list);
+	}
 }
 
 static inline void
 ckrm_clear_page_class(struct page *page)
 {
-	struct ckrm_mem_res *cls = page->ckrm_class;
-	if (cls && PageCkrmAccount(page)) {
-		decr_use_count(cls, 0, page_zonenum(page));
-		ClearPageCkrmAccount(page);
-		kref_put(&cls->nr_users, memclass_release);
-	}
+	struct ckrm_zone *czone = page_ckrmzone(page);
+	if (czone != NULL) {
+		if (PageCkrmAccount(page)) {
+			decr_use_count(czone->memcls, 0, page_zonenum(page));
+			ClearPageCkrmAccount(page);
+		}
+		kref_put(&czone->memcls->nr_users, memclass_release);
+		set_page_ckrmzone(page, NULL);
+  	}
 }
 
 static inline void
@@ -91,17 +167,27 @@ ckrm_mem_inc_active(struct page *page)
 	struct ckrm_mem_res *cls = ckrm_get_mem_class(current)
 						?: ckrm_mem_root_class;
 
-	if (!cls)
-		return;
-	ckrm_set_page_class(page, cls);
+	struct ckrm_zone *czone;
+  
+	if (cls == NULL)
+  		return;
+
+  	ckrm_set_page_class(page, cls);
+	czone = page_ckrmzone(page);
+	czone->nr_active++;
+	list_add(&page->lru, &czone->active_list);
 }
 
 static inline void
 ckrm_mem_dec_active(struct page *page)
 {
-	if (page->ckrm_class == NULL)
-		return;
-	ckrm_clear_page_class(page);
+	struct ckrm_zone *czone = page_ckrmzone(page);
+	if (czone == NULL)
+  		return;
+
+	list_del(&page->lru);
+	czone->nr_active--;
+  	ckrm_clear_page_class(page);
 }
 
 static inline void
@@ -109,25 +195,58 @@ ckrm_mem_inc_inactive(struct page *page)
 {
 	struct ckrm_mem_res *cls = ckrm_get_mem_class(current)
 					?: ckrm_mem_root_class;
-
-	if (!cls)
-		return;
-	ckrm_set_page_class(page, cls);
+	struct ckrm_zone *czone;
+  
+	if (cls == NULL)
+  		return;
+
+  	ckrm_set_page_class(page, cls);
+	czone = page_ckrmzone(page);
+	czone->nr_inactive++;
+	list_add(&page->lru, &czone->inactive_list);
 }
 
 static inline void
 ckrm_mem_dec_inactive(struct page *page)
 {
-	if (!page->ckrm_class)
-		return;
-	ckrm_clear_page_class(page);
+	struct ckrm_zone *czone = page_ckrmzone(page);
+	if (czone == NULL)
+  		return;
+
+	czone->nr_inactive--;
+	list_del(&page->lru);
+  	ckrm_clear_page_class(page);
 }
 
 static inline void
 ckrm_page_init(struct page *page)
 {
 	page->flags &= ~(1 << PG_ckrm_account);
-	page->ckrm_class = NULL;
+	set_page_ckrmzone(page, NULL);
+}
+
+static inline void
+ckrm_zone_add_active(struct ckrm_zone *czone, int cnt)
+{
+	czone->nr_active += cnt;
+}
+
+static inline void
+ckrm_zone_add_inactive(struct ckrm_zone *czone, int cnt)
+{
+	czone->nr_inactive += cnt;
+}
+
+static inline void
+ckrm_zone_sub_active(struct ckrm_zone *czone, int cnt)
+{
+	czone->nr_active -= cnt;
+}
+
+static inline void
+ckrm_zone_sub_inactive(struct ckrm_zone *czone, int cnt)
+{
+	czone->nr_inactive -= cnt;
 }
 
 
@@ -202,28 +321,15 @@ ckrm_mm_clearclass(struct mm_struct *mm)
 	}
 }
 
-#else
-
-static inline void
-ckrm_task_mm_init(struct task_struct *tsk)
-{
-}
+static inline void ckrm_init_lists(struct zone *zone) 			{}
 
-static inline void
-ckrm_task_mm_set(struct mm_struct * mm, struct task_struct *task)
+static inline void ckrm_add_tail_inactive(struct page *page)
 {
+	 struct ckrm_zone *ckrm_zone = page_ckrmzone(page);
+	 list_add_tail(&page->lru, &ckrm_zone->inactive_list);
 }
 
-static inline void
-ckrm_task_mm_change(struct task_struct *tsk,
-		struct mm_struct *oldmm, struct mm_struct *newmm)
-{
-}
-
-static inline void
-ckrm_task_mm_clear(struct task_struct *tsk, struct mm_struct *mm)
-{
-}
+#else
 
 static inline void *
 ckrm_get_memclass(struct task_struct *tsk)
@@ -231,27 +337,47 @@ ckrm_get_memclass(struct task_struct *ts
 	return NULL;
 }
 
-static inline void
-ckrm_mm_init(struct mm_struct *mm)
-{
-}
+static inline void ckrm_clear_page_class(struct page *p)		{}
+
+static inline void ckrm_mem_inc_active(struct page *p)			{}
+static inline void ckrm_mem_dec_active(struct page *p)			{}
+static inline void ckrm_mem_inc_inactive(struct page *p)		{}
+static inline void ckrm_mem_dec_inactive(struct page *p)		{}
+
+#define ckrm_zone_add_active(a, b)	do {} while (0)
+#define ckrm_zone_add_inactive(a, b)	do {} while (0)
+#define ckrm_zone_sub_active(a, b)	do {} while (0)
+#define ckrm_zone_sub_inactive(a, b)	do {} while (0)
+
+#define ckrm_class_limit_ok(a)						(1)
+
+static inline void ckrm_page_init(struct page *p)			{}
+static inline void ckrm_task_mm_init(struct task_struct *tsk)		{}
+static inline void ckrm_task_mm_set(struct mm_struct * mm,
+					struct task_struct *task)	{}
+static inline void ckrm_task_mm_change(struct task_struct *tsk,
+		struct mm_struct *oldmm, struct mm_struct *newmm)	{}
+static inline void ckrm_task_mm_clear(struct task_struct *tsk,
+						struct mm_struct *mm)	{}
+
+static inline void ckrm_mm_init(struct mm_struct *mm)			{}
 
 /* using #define instead of static inline as the prototype requires   *
  * data structures that is available only with the controller enabled */
-#define ckrm_mm_setclass(a, b) do { } while(0)
-#define ckrm_class_limit_ok(a)	(1)
+#define ckrm_mm_setclass(a, b) 					do {} while(0)
 
-static inline void
-ckrm_mm_clearclass(struct mm_struct *mm)
+static inline void ckrm_mm_clearclass(struct mm_struct *mm)		{}
+
+static inline void ckrm_init_lists(struct zone *zone)
 {
+	INIT_LIST_HEAD(&zone->active_list);
+	INIT_LIST_HEAD(&zone->inactive_list);
 }
 
-static inline void ckrm_mem_inc_active(struct page *p)		{}
-static inline void ckrm_mem_dec_active(struct page *p)		{}
-static inline void ckrm_mem_inc_inactive(struct page *p)	{}
-static inline void ckrm_mem_dec_inactive(struct page *p)	{}
-static inline void ckrm_page_init(struct page *p)		{}
-static inline void ckrm_clear_page_class(struct page *p)	{}
-
+static inline void ckrm_add_tail_inactive(struct page *page)
+{
+	 struct zone *zone = page_zone(page);
+	 list_add_tail(&page->lru, &zone->inactive_list);
+}
 #endif 
 #endif /* _LINUX_CKRM_MEM_INLINE_H_ */
Index: linux-2.6.12-rc1/include/linux/mm.h
===================================================================
--- linux-2.6.12-rc1.orig/include/linux/mm.h
+++ linux-2.6.12-rc1/include/linux/mm.h
@@ -262,7 +262,7 @@ struct page {
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
 #ifdef CONFIG_CKRM_RES_MEM
-	struct ckrm_mem_res *ckrm_class;
+	struct ckrm_zone *ckrm_zone;
 #endif
 };
 
Index: linux-2.6.12-rc1/include/linux/mm_inline.h
===================================================================
--- linux-2.6.12-rc1.orig/include/linux/mm_inline.h
+++ linux-2.6.12-rc1/include/linux/mm_inline.h
@@ -3,7 +3,9 @@
 static inline void
 add_page_to_active_list(struct zone *zone, struct page *page)
 {
+#ifndef CONFIG_CKRM_RES_MEM
 	list_add(&page->lru, &zone->active_list);
+#endif
 	zone->nr_active++;
 	ckrm_mem_inc_active(page);
 }
@@ -11,7 +13,9 @@ add_page_to_active_list(struct zone *zon
 static inline void
 add_page_to_inactive_list(struct zone *zone, struct page *page)
 {
+#ifndef CONFIG_CKRM_RES_MEM
 	list_add(&page->lru, &zone->inactive_list);
+#endif
 	zone->nr_inactive++;
 	ckrm_mem_inc_inactive(page);
 }
@@ -19,7 +23,9 @@ add_page_to_inactive_list(struct zone *z
 static inline void
 del_page_from_active_list(struct zone *zone, struct page *page)
 {
+#ifndef CONFIG_CKRM_RES_MEM
 	list_del(&page->lru);
+#endif
 	zone->nr_active--;
 	ckrm_mem_dec_active(page);
 }
@@ -27,7 +33,9 @@ del_page_from_active_list(struct zone *z
 static inline void
 del_page_from_inactive_list(struct zone *zone, struct page *page)
 {
+#ifndef CONFIG_CKRM_RES_MEM
 	list_del(&page->lru);
+#endif
 	zone->nr_inactive--;
 	ckrm_mem_dec_inactive(page);
 }
@@ -35,7 +43,9 @@ del_page_from_inactive_list(struct zone 
 static inline void
 del_page_from_lru(struct zone *zone, struct page *page)
 {
+#ifndef CONFIG_CKRM_RES_MEM
 	list_del(&page->lru);
+#endif
 	if (PageActive(page)) {
 		ClearPageActive(page);
 		zone->nr_active--;
Index: linux-2.6.12-rc1/include/linux/mmzone.h
===================================================================
--- linux-2.6.12-rc1.orig/include/linux/mmzone.h
+++ linux-2.6.12-rc1/include/linux/mmzone.h
@@ -135,8 +135,10 @@ struct zone {
 
 	/* Fields commonly accessed by the page reclaim scanner */
 	spinlock_t		lru_lock;	
+#ifndef CONFIG_CKRM_RES_MEM
 	struct list_head	active_list;
 	struct list_head	inactive_list;
+#endif
 	unsigned long		nr_scan_active;
 	unsigned long		nr_scan_inactive;
 	unsigned long		nr_active;
Index: linux-2.6.12-rc1/kernel/ckrm/ckrm_memcore.c
===================================================================
--- linux-2.6.12-rc1.orig/kernel/ckrm/ckrm_memcore.c
+++ linux-2.6.12-rc1/kernel/ckrm/ckrm_memcore.c
@@ -38,14 +38,17 @@
 #define CKRM_MEM_MAX_HIERARCHY 2 /* allows only upto 2 levels - 0, 1 & 2 */
 
 /* all 1-level memory_share_class are chained together */
-static LIST_HEAD(ckrm_memclass_list);
-static spinlock_t ckrm_mem_lock; /* protects list above */
-static unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
-
-static int ckrm_nr_mem_classes = 0;
-
+LIST_HEAD(ckrm_memclass_list);
+spinlock_t ckrm_mem_lock; /* protects list above */
+unsigned int ckrm_tot_lru_pages; /* # of pages in the system */
+int ckrm_nr_mem_classes = 0;
 struct ckrm_mem_res *ckrm_mem_root_class;
 atomic_t ckrm_mem_real_count = ATOMIC_INIT(0);
+
+EXPORT_SYMBOL_GPL(ckrm_memclass_list);
+EXPORT_SYMBOL_GPL(ckrm_mem_lock);
+EXPORT_SYMBOL_GPL(ckrm_tot_lru_pages);
+EXPORT_SYMBOL_GPL(ckrm_nr_mem_classes);
 EXPORT_SYMBOL_GPL(ckrm_mem_root_class);
 EXPORT_SYMBOL_GPL(ckrm_mem_real_count);
 
@@ -80,6 +83,9 @@ set_ckrm_tot_pages(void)
 static void
 mem_res_initcls_one(struct ckrm_mem_res *res)
 {
+	int zindex = 0;
+	struct zone *zone;
+
 	memset(res, 0, sizeof(struct ckrm_mem_res));
 
 	res->shares.my_guarantee     = CKRM_SHARE_DONTCARE;
@@ -94,6 +100,17 @@ mem_res_initcls_one(struct ckrm_mem_res 
 
 	INIT_LIST_HEAD(&res->mcls_list);
 
+	for_each_zone(zone) {
+		INIT_LIST_HEAD(&res->ckrm_zone[zindex].active_list);
+		INIT_LIST_HEAD(&res->ckrm_zone[zindex].inactive_list);
+		INIT_LIST_HEAD(&res->ckrm_zone[zindex].victim_list);
+		res->ckrm_zone[zindex].nr_active = 0;
+		res->ckrm_zone[zindex].nr_inactive = 0;
+		res->ckrm_zone[zindex].zone = zone;
+		res->ckrm_zone[zindex].memcls = res;
+		zindex++;
+	}
+
 	res->pg_unused = 0;
 	res->nr_dontcare = 1; /* for default class */
 	kref_init(&res->nr_users);
Index: linux-2.6.12-rc1/kernel/ckrm/ckrm_memctlr.c
===================================================================
--- linux-2.6.12-rc1.orig/kernel/ckrm/ckrm_memctlr.c
+++ linux-2.6.12-rc1/kernel/ckrm/ckrm_memctlr.c
@@ -14,6 +14,8 @@
  *
  */
 
+#include <linux/swap.h>
+#include <linux/pagemap.h>
 #include <linux/ckrm_mem_inline.h>
 
 void
@@ -82,8 +84,88 @@ ckrm_class_limit_ok(struct ckrm_mem_res 
 	return ret;
 }
 
+static int
+ckrm_mem_evaluate_page_anon(struct page* page)
+{
+	struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
+	struct ckrm_mem_res* maxshareclass = NULL;
+	struct anon_vma *anon_vma = (struct anon_vma *) page->mapping;
+	struct vm_area_struct *vma;
+	struct mm_struct* mm;
+	int ret = 0;
+
+	if (!spin_trylock(&anon_vma->lock))
+		return 0;
+	BUG_ON(list_empty(&anon_vma->head));
+	list_for_each_entry(vma, &anon_vma->head, anon_vma_node) {
+		mm = vma->vm_mm;
+		if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
+				mm->memclass) < 0) {
+			maxshareclass = mm->memclass;
+		}
+	}
+	spin_unlock(&anon_vma->lock);
+
+	if (!maxshareclass)
+		maxshareclass = ckrm_mem_root_class;
+	if (pgcls != maxshareclass) {
+		ckrm_change_page_class(page, maxshareclass);
+		ret = 1;
+	}
+	return ret;
+}
+
+static int
+ckrm_mem_evaluate_page_file(struct page* page)
+{
+	struct ckrm_mem_res* pgcls = page_ckrmzone(page)->memcls;
+	struct ckrm_mem_res* maxshareclass = NULL;
+	struct address_space *mapping = page->mapping;
+	struct vm_area_struct *vma = NULL;
+	pgoff_t pgoff = page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT);
+	struct prio_tree_iter iter;
+	struct mm_struct* mm;
+	int ret = 0;
+
+	if (!mapping)
+		return 0;
+
+	if (!spin_trylock(&mapping->i_mmap_lock))
+		return 0;
+
+	vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap,
+					pgoff, pgoff) {
+		mm = vma->vm_mm;
+		if (!maxshareclass || ckrm_mem_share_compare(maxshareclass,
+				mm->memclass)<0)
+			maxshareclass = mm->memclass;
+	}
+	spin_unlock(&mapping->i_mmap_lock);
+
+	if (!maxshareclass)
+		maxshareclass = ckrm_mem_root_class;
+	if (pgcls != maxshareclass) {
+		ckrm_change_page_class(page, maxshareclass);
+		ret = 1;
+	}
+	return ret;
+}
+
+static int
+ckrm_mem_evaluate_page(struct page* page)
+{
+	int ret = 0;
+	if (page->mapping) {
+		if (PageAnon(page))
+			ret = ckrm_mem_evaluate_page_anon(page);
+		else
+			ret = ckrm_mem_evaluate_page_file(page);
+	}
+	return ret;
+}
+
 static void migrate_list(struct list_head *list,
-	struct ckrm_mem_res* from, struct ckrm_mem_res* to)
+	struct ckrm_mem_res* from, struct ckrm_mem_res* def)
 {
 	struct page *page;
 	struct list_head *pos, *next;
@@ -92,21 +174,26 @@ static void migrate_list(struct list_hea
 	while (pos != list) {
 		next = pos->next;
 		page = list_entry(pos, struct page, lru);
-		if (page->ckrm_class == from) 
-			ckrm_change_page_class(page, to);
+		if (ckrm_mem_evaluate_page(page))
+			ckrm_change_page_class(page, def);
 		pos = next;
 	}
 }
 
 void
-ckrm_mem_migrate_all_pages(struct ckrm_mem_res* from, struct ckrm_mem_res* to)
+ckrm_mem_migrate_all_pages(struct ckrm_mem_res* from,
+					struct ckrm_mem_res* def)
 {
+	int i;
 	struct zone *zone;
-
-	for_each_zone(zone) {
+	struct ckrm_zone *ckrm_zone;
+  
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		ckrm_zone = &from->ckrm_zone[i];
+		zone = ckrm_zone->zone;
 		spin_lock_irq(&zone->lru_lock);
-		migrate_list(&zone->inactive_list, from, to);
-		migrate_list(&zone->active_list, from, to);
+		migrate_list(&ckrm_zone->inactive_list, from, def);
+		migrate_list(&ckrm_zone->active_list, from, def);
 		spin_unlock_irq(&zone->lru_lock);
 	}
 	return;
@@ -131,8 +218,13 @@ class_migrate_pmd(struct mm_struct* mm, 
 		pte = pte_offset_map(pmdir, address);
 		if (pte_present(*pte)) {
 			struct page *page = pte_page(*pte);
-			if (page->mapping)
+			struct ckrm_zone *czone = page_ckrmzone(page);
+			if (page->mapping && czone) {
+				struct zone *zone = czone->zone;
+				spin_lock_irq(&zone->lru_lock);
 				ckrm_change_page_class(page, mm->memclass);
+				spin_unlock_irq(&zone->lru_lock);
+			}
 		}
 		address += PAGE_SIZE;
 		pte_unmap(pte);
@@ -190,7 +282,9 @@ class_migrate_vma(struct mm_struct* mm, 
 void
 ckrm_mem_migrate_mm(struct mm_struct* mm, struct ckrm_mem_res *def)
 {
+	struct task_struct *task;
 	struct vm_area_struct *vma;
+	struct ckrm_mem_res *maxshareclass = def;
 
 	/* We leave the mm->memclass untouched since we believe that one
 	 * mm with no task associated will be deleted soon or attach
@@ -199,18 +293,177 @@ ckrm_mem_migrate_mm(struct mm_struct* mm
 	if (list_empty(&mm->tasklist))
 		return;
 
-	if (mm->memclass)
-		kref_put(&mm->memclass->nr_users, memclass_release);
-	mm->memclass = def ?: ckrm_mem_root_class;
-	kref_get(&mm->memclass->nr_users);
-
-	/* Go through all VMA to migrate pages */
-	down_read(&mm->mmap_sem);
-	vma = mm->mmap;
-	while(vma) {
-		class_migrate_vma(mm, vma);
-		vma = vma->vm_next;
+	list_for_each_entry(task, &mm->tasklist, mm_peers) {
+		struct ckrm_mem_res* cls = ckrm_get_mem_class(task);
+		if (!cls)
+			continue;
+		if (!maxshareclass ||
+				ckrm_mem_share_compare(maxshareclass,cls)<0 )
+			maxshareclass = cls;
+	}
+
+	if (maxshareclass && (mm->memclass != maxshareclass)) {
+		if (mm->memclass) {
+			kref_put(&mm->memclass->nr_users, memclass_release);
+		}
+		mm->memclass = maxshareclass;
+		kref_get(&maxshareclass->nr_users);
+
+		/* Go through all VMA to migrate pages */
+		down_read(&mm->mmap_sem);
+		vma = mm->mmap;
+		while(vma) {
+			class_migrate_vma(mm, vma);
+			vma = vma->vm_next;
+		}
+		up_read(&mm->mmap_sem);
 	}
-	up_read(&mm->mmap_sem);
 	return;
 }
+
+static int
+shrink_weight(struct ckrm_zone *czone)
+{
+	u64 temp;
+	struct zone *zone = czone->zone;
+	struct ckrm_mem_res *cls = czone->memcls;
+	int zone_usage, zone_guar, zone_total, guar, ret, cnt;
+
+	zone_usage = czone->nr_active + czone->nr_inactive;
+	czone->active_over = czone->inactive_over = 0;
+
+	if (zone_usage < SWAP_CLUSTER_MAX * 4)
+		return 0;
+
+	if (cls->pg_guar == CKRM_SHARE_DONTCARE)
+		/* no guarantee for this class. use implicit guarantee */
+		guar = cls->impl_guar / cls->nr_dontcare;
+	else
+		guar = cls->pg_unused / cls->nr_dontcare;
+	zone_total = zone->nr_active + zone->nr_inactive + zone->free_pages;
+	temp = (u64) guar * zone_total;
+	do_div(temp, ckrm_tot_lru_pages);
+	zone_guar = (int) temp;
+
+	ret = ((zone_usage - zone_guar) > SWAP_CLUSTER_MAX) ?
+				(zone_usage - zone_guar) : 0;
+	if (ret) {
+		cnt = czone->nr_active - (2 * zone_guar / 3);
+		if (cnt > 0)
+			czone->active_over = cnt;
+		cnt = czone->active_over + czone->nr_inactive
+					- zone_guar / 3;
+		if (cnt > 0)
+			czone->inactive_over = cnt;
+	}
+	return ret;
+}
+
+/* insert an entry to the list and sort decendently*/
+static void
+list_add_sort(struct list_head *entry, struct list_head *head)
+{
+	struct ckrm_zone *czone, *new =
+			list_entry(entry, struct ckrm_zone, victim_list);
+	struct list_head* pos = head->next;
+
+	while (pos != head) {
+		czone = list_entry(pos, struct ckrm_zone, victim_list);
+		if (new->shrink_weight > czone->shrink_weight) {
+			__list_add(entry, pos->prev, pos);
+			return;
+		}
+		pos = pos->next;
+  	}
+	list_add_tail(entry, head);
+	return;	
+}
+
+static void
+shrink_choose_victims(struct list_head *victims,
+		unsigned long nr_active, unsigned long nr_inactive)
+{
+	unsigned long nr;
+	struct ckrm_zone* czone;
+	struct list_head *pos, *next;
+  
+	pos = victims->next;
+	while ((pos != victims) && (nr_active || nr_inactive)) {
+		czone = list_entry(pos, struct ckrm_zone, victim_list);
+		
+		if (nr_active && czone->active_over) {
+			nr = min(nr_active, czone->active_over);
+			czone->shrink_active += nr;
+			czone->active_over -= nr;
+			nr_active -= nr;
+		}
+
+		if (nr_inactive && czone->inactive_over) {
+			nr = min(nr_inactive, czone->inactive_over);
+			czone->shrink_inactive += nr;
+			czone->inactive_over -= nr;
+			nr_inactive -= nr;
+		}
+		pos = pos->next;
+  	}
+
+	pos = victims->next;
+	while (pos != victims) {
+		czone = list_entry(pos, struct ckrm_zone, victim_list);
+		next = pos->next;
+		if (czone->shrink_active == 0 && czone->shrink_inactive == 0) {
+			list_del_init(pos);
+			ckrm_clear_shrink(czone);
+		}
+		pos = next;
+	}	
+  	return;
+  }
+
+void
+shrink_get_victims(struct zone *zone, unsigned long nr_active,
+		unsigned long nr_inactive, struct list_head *victims)
+{
+	struct list_head *pos;
+	struct ckrm_mem_res *cls;
+	struct ckrm_zone *czone;
+	int zoneindex = zone_idx(zone);
+	
+	if (ckrm_nr_mem_classes <= 1) {
+		if (ckrm_mem_root_class) {
+			czone = ckrm_mem_root_class->ckrm_zone + zoneindex;
+			if (!ckrm_test_set_shrink(czone)) {
+				list_add(&czone->victim_list, victims);
+				czone->shrink_active = nr_active;
+				czone->shrink_inactive = nr_inactive;
+			}
+		}
+		return;
+	}
+	spin_lock_irq(&ckrm_mem_lock);
+	list_for_each_entry(cls, &ckrm_memclass_list, mcls_list) {
+		czone = cls->ckrm_zone + zoneindex;
+		if (ckrm_test_set_shrink(czone))
+			continue;
+
+		czone->shrink_active = 0;
+		czone->shrink_inactive = 0;
+		czone->shrink_weight = shrink_weight(czone);
+		if (czone->shrink_weight)
+			list_add_sort(&czone->victim_list, victims);
+		else
+			ckrm_clear_shrink(czone);
+	}
+	pos = victims->next;
+	while (pos != victims) {
+		czone = list_entry(pos, struct ckrm_zone, victim_list);
+		pos = pos->next;
+	}
+	shrink_choose_victims(victims, nr_active, nr_inactive);
+	spin_unlock_irq(&ckrm_mem_lock);
+	pos = victims->next;
+	while (pos != victims) {
+		czone = list_entry(pos, struct ckrm_zone, victim_list);
+		pos = pos->next;
+	}
+}
Index: linux-2.6.12-rc1/mm/page_alloc.c
===================================================================
--- linux-2.6.12-rc1.orig/mm/page_alloc.c
+++ linux-2.6.12-rc1/mm/page_alloc.c
@@ -1693,8 +1693,7 @@ static void __init free_area_init_core(s
 		}
 		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
 				zone_names[j], realsize, batch);
-		INIT_LIST_HEAD(&zone->active_list);
-		INIT_LIST_HEAD(&zone->inactive_list);
+		ckrm_init_lists(zone);
 		zone->nr_scan_active = 0;
 		zone->nr_scan_inactive = 0;
 		zone->nr_active = 0;
Index: linux-2.6.12-rc1/mm/swap.c
===================================================================
--- linux-2.6.12-rc1.orig/mm/swap.c
+++ linux-2.6.12-rc1/mm/swap.c
@@ -30,6 +30,7 @@
 #include <linux/cpu.h>
 #include <linux/notifier.h>
 #include <linux/init.h>
+#include <linux/ckrm_mem_inline.h>
 
 /* How many pages do we try to swap or page in/out together? */
 int page_cluster;
@@ -87,7 +88,7 @@ int rotate_reclaimable_page(struct page 
 	spin_lock_irqsave(&zone->lru_lock, flags);
 	if (PageLRU(page) && !PageActive(page)) {
 		list_del(&page->lru);
-		list_add_tail(&page->lru, &zone->inactive_list);
+		ckrm_add_tail_inactive(page);
 		inc_page_state(pgrotated);
 	}
 	if (!test_clear_page_writeback(page))
Index: linux-2.6.12-rc1/mm/vmscan.c
===================================================================
--- linux-2.6.12-rc1.orig/mm/vmscan.c
+++ linux-2.6.12-rc1/mm/vmscan.c
@@ -33,6 +33,7 @@
 #include <linux/cpuset.h>
 #include <linux/notifier.h>
 #include <linux/rwsem.h>
+#include <linux/ckrm_mem.h>
 
 #include <asm/tlbflush.h>
 #include <asm/div64.h>
@@ -555,11 +556,23 @@ keep:
  * For pagecache intensive workloads, the first loop here is the hottest spot
  * in the kernel (apart from the copy_*_user functions).
  */
+#ifdef CONFIG_CKRM_RES_MEM
+static void shrink_cache(struct ckrm_zone *ckrm_zone, struct scan_control *sc)
+#else
 static void shrink_cache(struct zone *zone, struct scan_control *sc)
+#endif
 {
 	LIST_HEAD(page_list);
 	struct pagevec pvec;
 	int max_scan = sc->nr_to_scan;
+#ifdef CONFIG_CKRM_RES_MEM
+	struct zone *zone = ckrm_zone->zone;
+	struct list_head *inactive_list = &ckrm_zone->inactive_list;
+	struct list_head *active_list = &ckrm_zone->active_list;
+#else
+	struct list_head *inactive_list = &zone->inactive_list;
+	struct list_head *active_list = &zone->active_list;
+#endif
 
 	pagevec_init(&pvec, 1);
 
@@ -572,11 +585,10 @@ static void shrink_cache(struct zone *zo
 		int nr_freed;
 
 		while (nr_scan++ < sc->swap_cluster_max &&
-				!list_empty(&zone->inactive_list)) {
-			page = lru_to_page(&zone->inactive_list);
+				!list_empty(inactive_list)) {
+			page = lru_to_page(inactive_list);
 
-			prefetchw_prev_lru_page(page,
-						&zone->inactive_list, flags);
+			prefetchw_prev_lru_page(page, inactive_list, flags);
 
 			if (!TestClearPageLRU(page))
 				BUG();
@@ -587,13 +599,14 @@ static void shrink_cache(struct zone *zo
 				 */
 				__put_page(page);
 				SetPageLRU(page);
-				list_add(&page->lru, &zone->inactive_list);
+				list_add(&page->lru, inactive_list);
 				continue;
 			}
 			list_add(&page->lru, &page_list);
 			nr_taken++;
 		}
 		zone->nr_inactive -= nr_taken;
+		ckrm_zone_sub_inactive(ckrm_zone, nr_taken);
 		zone->pages_scanned += nr_scan;
 		spin_unlock_irq(&zone->lru_lock);
 
@@ -620,10 +633,15 @@ static void shrink_cache(struct zone *zo
 			if (TestSetPageLRU(page))
 				BUG();
 			list_del(&page->lru);
-			if (PageActive(page))
-				add_page_to_active_list(zone, page);
-			else
-				add_page_to_inactive_list(zone, page);
+			if (PageActive(page)) {
+				ckrm_zone_add_active(ckrm_zone, 1);
+				zone->nr_active++;
+				list_add(&page->lru, active_list);
+			} else {
+				ckrm_zone_add_inactive(ckrm_zone, 1);
+				zone->nr_inactive++;
+				list_add(&page->lru, inactive_list);
+			}
 			if (!pagevec_add(&pvec, page)) {
 				spin_unlock_irq(&zone->lru_lock);
 				__pagevec_release(&pvec);
@@ -654,7 +672,11 @@ done:
  * But we had to alter page->flags anyway.
  */
 static void
+#ifdef CONFIG_CKRM_RES_MEM
+refill_inactive_zone(struct ckrm_zone *ckrm_zone, struct scan_control *sc)
+#else
 refill_inactive_zone(struct zone *zone, struct scan_control *sc)
+#endif
 {
 	int pgmoved;
 	int pgdeactivate = 0;
@@ -669,13 +691,21 @@ refill_inactive_zone(struct zone *zone, 
 	long mapped_ratio;
 	long distress;
 	long swap_tendency;
+#ifdef CONFIG_CKRM_RES_MEM
+	struct zone *zone = ckrm_zone->zone;
+	struct list_head *active_list = &ckrm_zone->active_list;
+	struct list_head *inactive_list = &ckrm_zone->inactive_list;
+#else
+	struct list_head *active_list = &zone->active_list;
+	struct list_head *inactive_list = &zone->inactive_list;
+#endif
 
 	lru_add_drain();
 	pgmoved = 0;
 	spin_lock_irq(&zone->lru_lock);
-	while (pgscanned < nr_pages && !list_empty(&zone->active_list)) {
-		page = lru_to_page(&zone->active_list);
-		prefetchw_prev_lru_page(page, &zone->active_list, flags);
+	while (pgscanned < nr_pages && !list_empty(active_list)) {
+		page = lru_to_page(active_list);
+		prefetchw_prev_lru_page(page, active_list, flags);
 		if (!TestClearPageLRU(page))
 			BUG();
 		list_del(&page->lru);
@@ -688,7 +718,7 @@ refill_inactive_zone(struct zone *zone, 
 			 */
 			__put_page(page);
 			SetPageLRU(page);
-			list_add(&page->lru, &zone->active_list);
+			list_add(&page->lru, active_list);
 		} else {
 			list_add(&page->lru, &l_hold);
 			pgmoved++;
@@ -697,6 +727,7 @@ refill_inactive_zone(struct zone *zone, 
 	}
 	zone->pages_scanned += pgscanned;
 	zone->nr_active -= pgmoved;
+	ckrm_zone_sub_active(ckrm_zone, pgmoved);
 	spin_unlock_irq(&zone->lru_lock);
 
 	/*
@@ -755,10 +786,11 @@ refill_inactive_zone(struct zone *zone, 
 			BUG();
 		if (!TestClearPageActive(page))
 			BUG();
-		list_move(&page->lru, &zone->inactive_list);
+		list_move(&page->lru, inactive_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			zone->nr_inactive += pgmoved;
+			ckrm_zone_add_inactive(ckrm_zone, pgmoved);
 			spin_unlock_irq(&zone->lru_lock);
 			pgdeactivate += pgmoved;
 			pgmoved = 0;
@@ -769,6 +801,7 @@ refill_inactive_zone(struct zone *zone, 
 		}
 	}
 	zone->nr_inactive += pgmoved;
+	ckrm_zone_add_inactive(ckrm_zone, pgmoved);
 	pgdeactivate += pgmoved;
 	if (buffer_heads_over_limit) {
 		spin_unlock_irq(&zone->lru_lock);
@@ -783,10 +816,11 @@ refill_inactive_zone(struct zone *zone, 
 		if (TestSetPageLRU(page))
 			BUG();
 		BUG_ON(!PageActive(page));
-		list_move(&page->lru, &zone->active_list);
+		list_move(&page->lru, active_list);
 		pgmoved++;
 		if (!pagevec_add(&pvec, page)) {
 			zone->nr_active += pgmoved;
+			ckrm_zone_add_active(ckrm_zone, pgmoved);
 			pgmoved = 0;
 			spin_unlock_irq(&zone->lru_lock);
 			__pagevec_release(&pvec);
@@ -794,6 +828,7 @@ refill_inactive_zone(struct zone *zone, 
 		}
 	}
 	zone->nr_active += pgmoved;
+	ckrm_zone_add_active(ckrm_zone, pgmoved);
 	spin_unlock_irq(&zone->lru_lock);
 	pagevec_release(&pvec);
 
@@ -801,6 +836,32 @@ refill_inactive_zone(struct zone *zone, 
 	mod_page_state(pgdeactivate, pgdeactivate);
 }
 
+#ifdef CONFIG_CKRM_RES_MEM
+static void
+shrink_ckrmzone(struct ckrm_zone *czone, struct scan_control *sc)
+{
+	while (czone->shrink_active || czone->shrink_inactive) {
+		if (czone->shrink_active) {
+			sc->nr_to_scan = min(czone->shrink_active,
+					(unsigned long)SWAP_CLUSTER_MAX);
+			czone->shrink_active -= sc->nr_to_scan;
+			refill_inactive_zone(czone, sc);
+		}
+		if (czone->shrink_inactive) {
+			sc->nr_to_scan = min(czone->shrink_inactive,
+					(unsigned long)SWAP_CLUSTER_MAX);
+			czone->shrink_inactive -= sc->nr_to_scan;
+			shrink_cache(czone, sc);
+			if (sc->nr_to_reclaim <= 0) {
+				czone->shrink_active = 0;
+				czone->shrink_inactive = 0;
+				break;
+			}
+		}
+	}
+}
+#endif
+
 /*
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
@@ -809,6 +870,9 @@ shrink_zone(struct zone *zone, struct sc
 {
 	unsigned long nr_active;
 	unsigned long nr_inactive;
+#ifdef CONFIG_CKRM_RES_MEM
+	struct ckrm_zone *czone;
+#endif
 
 	/*
 	 * Add one to `nr_to_scan' just to make sure that the kernel will
@@ -830,6 +894,24 @@ shrink_zone(struct zone *zone, struct sc
 
 	sc->nr_to_reclaim = sc->swap_cluster_max;
 
+#ifdef CONFIG_CKRM_RES_MEM
+	if (nr_active || nr_inactive) {
+		struct list_head *pos, *next;
+		LIST_HEAD(victims);
+
+		shrink_get_victims(zone, nr_active, nr_inactive, &victims);
+		pos = victims.next;
+		while (pos != &victims) {
+			czone = list_entry(pos, struct ckrm_zone, victim_list);
+			next = pos->next;
+			list_del_init(pos);
+			sc->nr_to_reclaim = czone->shrink_inactive;
+			shrink_ckrmzone(czone, sc);
+			ckrm_clear_shrink(czone);
+			pos = next;
+		}
+	}
+#else 
 	while (nr_active || nr_inactive) {
 		if (nr_active) {
 			sc->nr_to_scan = min(nr_active,
@@ -847,6 +929,7 @@ shrink_zone(struct zone *zone, struct sc
 				break;
 		}
 	}
+#endif
 
 	throttle_vm_writeout();
 }

             reply	other threads:[~2005-04-02  4:20 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-04-02  3:14 Chandra Seetharaman [this message]
2005-05-19  0:32 Chandra Seetharaman
2005-06-24 22:25 Chandra Seetharaman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20050402031434.GE23284@chandralinux.beaverton.ibm.com \
    --to=sekharan@us.ibm.com \
    --cc=ckrm-tech@lists.sourceforge.net \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox