From: Balbir Singh <balbir@linux.vnet.ibm.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
xemul@openvz.org, "hugh@veritas.com" <hugh@veritas.com>
Subject: Re: [PATCH 2/7] charge/uncharge
Date: Mon, 17 Mar 2008 07:16:01 +0530 [thread overview]
Message-ID: <20080317014601.GB24473@balbir.in.ibm.com> (raw)
In-Reply-To: <20080314190622.0e147b43.kamezawa.hiroyu@jp.fujitsu.com>
* KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> [2008-03-14 19:06:22]:
> Because bit spin lock is removed and spinlock is added to page_cgroup.
> There are some amount of changes.
>
> This patch does
> - modify charge/uncharge to adjust it to the new lock.
> - Added simple lock rule comments.
>
> Major changes from current(-mm) version is
> - pc->refcnt is set as "1" after the charge is done.
>
> Changelog
> - Rebased to rc5-mm1
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>
Hi, KAMEZAWA-San,
The build continues to be broken, even after this patch is applied.
We will have to find another way to refactor the code, so that we
don't break git-bisect.
>
> mm/memcontrol.c | 136 +++++++++++++++++++++++++-------------------------------
> 1 file changed, 62 insertions(+), 74 deletions(-)
>
> Index: mm-2.6.25-rc5-mm1/mm/memcontrol.c
> ===================================================================
> --- mm-2.6.25-rc5-mm1.orig/mm/memcontrol.c
> +++ mm-2.6.25-rc5-mm1/mm/memcontrol.c
> @@ -34,6 +34,16 @@
>
> #include <asm/uaccess.h>
>
> +/*
> + * Lock Rule
> + * zone->lru_lcok (global LRU)
> + * -> pc->lock (page_cgroup's lock)
> + * -> mz->lru_lock (mem_cgroup's per_zone lock.)
> + *
> + * At least, mz->lru_lock and pc->lock should be acquired irq off.
> + *
> + */
> +
I think the rule applies to even the zone's lru_lock, so we could just
state that these two locks should be acquired with irq's off.
> struct cgroup_subsys mem_cgroup_subsys;
> static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
>
> @@ -479,33 +489,22 @@ static int mem_cgroup_charge_common(stru
> if (mem_cgroup_subsys.disabled)
> return 0;
>
> + pc = get_page_cgroup(page, gfp_mask, true);
> + if (!pc || IS_ERR(pc))
> + return PTR_ERR(pc);
> +
> + spin_lock_irqsave(&pc->lock, flags);
> /*
> - * Should page_cgroup's go to their own slab?
> - * One could optimize the performance of the charging routine
> - * by saving a bit in the page_flags and using it as a lock
> - * to see if the cgroup page already has a page_cgroup associated
> - * with it
> - */
> -retry:
> - lock_page_cgroup(page);
> - pc = page_get_page_cgroup(page);
> - /*
> - * The page_cgroup exists and
> - * the page has already been accounted.
> + * Has the page already been accounted ?
> */
> - if (pc) {
> - VM_BUG_ON(pc->page != page);
> - VM_BUG_ON(pc->ref_cnt <= 0);
> -
> - pc->ref_cnt++;
> - unlock_page_cgroup(page);
> - goto done;
> + if (pc->refcnt > 0) {
> + pc->refcnt++;
> + spin_unlock_irqrestore(&pc->lock, flags);
> + goto success;
> }
> - unlock_page_cgroup(page);
> + spin_unlock_irqrestore(&pc->lock, flags);
>
> - pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
> - if (pc == NULL)
> - goto err;
> + /* Note: pc->refcnt is still 0 here. */
>
I think the comment can be updated to say for new pc's the refcnt is
0.
> /*
> * We always charge the cgroup the mm_struct belongs to.
> @@ -526,7 +525,7 @@ retry:
>
> while (res_counter_charge(&mem->res, PAGE_SIZE)) {
> if (!(gfp_mask & __GFP_WAIT))
> - goto out;
> + goto nomem;
>
> if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
> continue;
> @@ -543,45 +542,40 @@ retry:
>
> if (!nr_retries--) {
> mem_cgroup_out_of_memory(mem, gfp_mask);
> - goto out;
> + goto nomem;
> }
> congestion_wait(WRITE, HZ/10);
> }
> -
> - pc->ref_cnt = 1;
> + /*
> + * We have to acquire 2 spinlocks.
> + */
> + spin_lock_irqsave(&pc->lock, flags);
> + if (pc->refcnt) {
> + /* Someone charged this page while we released the lock */
> + ++pc->refcnt;
We used pc->refcnt++ earlier, for consistency we could use that here
as well.
> + spin_unlock_irqrestore(&pc->lock, flags);
> + res_counter_uncharge(&mem->res, PAGE_SIZE);
> + css_put(&mem->css);
> + goto success;
> + }
> + /* Anyone doesn't touch this. */
> + VM_BUG_ON(pc->mem_cgroup);
> + VM_BUG_ON(!list_empty(&pc->lru));
> + pc->refcnt = 1;
> pc->mem_cgroup = mem;
> - pc->page = page;
> pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
> if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
> pc->flags |= PAGE_CGROUP_FLAG_CACHE;
> -
> - lock_page_cgroup(page);
> - if (page_get_page_cgroup(page)) {
> - unlock_page_cgroup(page);
> - /*
> - * Another charge has been added to this page already.
> - * We take lock_page_cgroup(page) again and read
> - * page->cgroup, increment refcnt.... just retry is OK.
> - */
> - res_counter_uncharge(&mem->res, PAGE_SIZE);
> - css_put(&mem->css);
> - kfree(pc);
> - goto retry;
> - }
> - page_assign_page_cgroup(page, pc);
> -
> mz = page_cgroup_zoneinfo(pc);
> - spin_lock_irqsave(&mz->lru_lock, flags);
> + spin_lock(&mz->lru_lock);
> __mem_cgroup_add_list(pc);
> - spin_unlock_irqrestore(&mz->lru_lock, flags);
> + spin_unlock(&mz->lru_lock);
> + spin_unlock_irqrestore(&pc->lock, flags);
>
> - unlock_page_cgroup(page);
> -done:
> +success:
> return 0;
> -out:
> +nomem:
> css_put(&mem->css);
> - kfree(pc);
> -err:
> return -ENOMEM;
> }
>
> @@ -617,33 +611,27 @@ void mem_cgroup_uncharge_page(struct pag
> /*
> * Check if our page_cgroup is valid
> */
> - lock_page_cgroup(page);
> - pc = page_get_page_cgroup(page);
> + pc = get_page_cgroup(page, GFP_ATOMIC, false); /* No allocation */
> if (!pc)
> - goto unlock;
> -
> - VM_BUG_ON(pc->page != page);
> - VM_BUG_ON(pc->ref_cnt <= 0);
> -
> - if (--(pc->ref_cnt) == 0) {
> - mz = page_cgroup_zoneinfo(pc);
> - spin_lock_irqsave(&mz->lru_lock, flags);
> - __mem_cgroup_remove_list(pc);
> - spin_unlock_irqrestore(&mz->lru_lock, flags);
> -
> - page_assign_page_cgroup(page, NULL);
> - unlock_page_cgroup(page);
> -
> - mem = pc->mem_cgroup;
> - res_counter_uncharge(&mem->res, PAGE_SIZE);
> - css_put(&mem->css);
> -
> - kfree(pc);
> + return;
> + spin_lock_irqsave(&pc->lock, flags);
> + if (!pc->refcnt || --pc->refcnt > 0) {
> + spin_unlock_irqrestore(&pc->lock, flags);
> return;
> }
> + VM_BUG_ON(pc->page != page);
> + mz = page_cgroup_zoneinfo(pc);
> + mem = pc->mem_cgroup;
>
> -unlock:
> - unlock_page_cgroup(page);
> + spin_lock(&mz->lru_lock);
> + __mem_cgroup_remove_list(pc);
> + spin_unlock(&mz->lru_lock);
> +
> + pc->flags = 0;
> + pc->mem_cgroup = 0;
> + res_counter_uncharge(&mem->res, PAGE_SIZE);
> + css_put(&mem->css);
> + spin_unlock_irqrestore(&pc->lock, flags);
> }
>
--
Warm Regards,
Balbir Singh
Linux Technology Center
IBM, ISTL
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2008-03-17 1:47 UTC|newest]
Thread overview: 53+ messages / expand[flat|nested] mbox.gz Atom feed top
2008-03-14 9:59 [PATCH 0/7] memcg: radix-tree page_cgroup KAMEZAWA Hiroyuki
2008-03-14 10:03 ` [PATCH 1/7] re-define page_cgroup KAMEZAWA Hiroyuki
2008-03-16 14:15 ` Balbir Singh
2008-03-18 1:10 ` KAMEZAWA Hiroyuki
2008-03-17 0:21 ` Li Zefan
2008-03-18 1:12 ` KAMEZAWA Hiroyuki
2008-03-17 2:07 ` Li Zefan
2008-03-18 1:11 ` KAMEZAWA Hiroyuki
2008-03-14 10:06 ` [PATCH 2/7] charge/uncharge KAMEZAWA Hiroyuki
2008-03-17 1:46 ` Balbir Singh [this message]
2008-03-18 1:14 ` KAMEZAWA Hiroyuki
2008-03-17 2:26 ` Li Zefan
2008-03-18 1:15 ` KAMEZAWA Hiroyuki
2008-03-14 10:07 ` [PATCH 3/7] memcg: move_lists KAMEZAWA Hiroyuki
2008-03-18 16:44 ` Balbir Singh
2008-03-19 2:34 ` KAMEZAWA Hiroyuki
2008-03-14 10:15 ` [PATCH 4/7] memcg: page migration KAMEZAWA Hiroyuki
2008-03-17 2:36 ` Li Zefan
2008-03-18 1:17 ` KAMEZAWA Hiroyuki
2008-03-18 18:11 ` Balbir Singh
2008-03-19 2:44 ` KAMEZAWA Hiroyuki
2008-03-14 10:17 ` [PATCH 5/7] radix-tree page cgroup KAMEZAWA Hiroyuki
2008-03-17 2:56 ` Li Zefan
2008-03-17 3:26 ` Li Zefan
2008-03-18 1:18 ` KAMEZAWA Hiroyuki
2008-03-18 1:23 ` KAMEZAWA Hiroyuki
2008-03-19 2:05 ` Balbir Singh
2008-03-19 2:51 ` KAMEZAWA Hiroyuki
2008-03-19 3:14 ` Balbir Singh
2008-03-19 3:24 ` KAMEZAWA Hiroyuki
2008-03-19 21:11 ` Peter Zijlstra
2008-03-20 4:45 ` KAMEZAWA Hiroyuki
2008-03-20 5:09 ` KAMEZAWA Hiroyuki
2008-03-14 10:18 ` [PATCH 6/7] memcg: speed up by percpu KAMEZAWA Hiroyuki
2008-03-17 3:03 ` Li Zefan
2008-03-18 1:25 ` KAMEZAWA Hiroyuki
2008-03-18 23:55 ` Li Zefan
2008-03-19 2:51 ` KAMEZAWA Hiroyuki
2008-03-19 21:19 ` Peter Zijlstra
2008-03-19 21:41 ` Peter Zijlstra
2008-03-20 9:08 ` Andy Whitcroft
2008-03-20 4:46 ` KAMEZAWA Hiroyuki
2008-03-14 10:22 ` [PATCH 7/7] memcg: freeing page_cgroup at suitable chance KAMEZAWA Hiroyuki
2008-03-17 3:10 ` Li Zefan
2008-03-18 1:30 ` KAMEZAWA Hiroyuki
2008-03-19 21:33 ` Peter Zijlstra
2008-03-20 5:07 ` KAMEZAWA Hiroyuki
2008-03-20 7:55 ` Peter Zijlstra
2008-03-20 14:49 ` kamezawa.hiroyu
2008-03-20 16:04 ` kamezawa.hiroyu
2008-03-20 16:09 ` Peter Zijlstra
2008-03-20 16:15 ` kamezawa.hiroyu
2008-03-15 6:15 ` [PATCH 0/7] memcg: radix-tree page_cgroup Balbir Singh
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20080317014601.GB24473@balbir.in.ibm.com \
--to=balbir@linux.vnet.ibm.com \
--cc=hugh@veritas.com \
--cc=kamezawa.hiroyu@jp.fujitsu.com \
--cc=linux-mm@kvack.org \
--cc=xemul@openvz.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox