linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Balbir Singh <balbir@linux.vnet.ibm.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	xemul@openvz.org, "hugh@veritas.com" <hugh@veritas.com>
Subject: Re: [PATCH 2/7] charge/uncharge
Date: Mon, 17 Mar 2008 07:16:01 +0530	[thread overview]
Message-ID: <20080317014601.GB24473@balbir.in.ibm.com> (raw)
In-Reply-To: <20080314190622.0e147b43.kamezawa.hiroyu@jp.fujitsu.com>

* KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com> [2008-03-14 19:06:22]:

> Because bit spin lock is removed and spinlock is added to page_cgroup.
> There are some amount of changes.
> 
> This patch does
> 	- modify charge/uncharge to adjust it to the new lock.
> 	- Added simple lock rule comments.
> 
> Major changes from current(-mm) version is
> 	- pc->refcnt is set as "1" after the charge is done.
> 
> Changelog
>   - Rebased to rc5-mm1
> 
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
>

Hi, KAMEZAWA-San,

The build continues to be broken, even after this patch is applied.
We will have to find another way to refactor the code, so that we
don't break git-bisect.
 
> 
>  mm/memcontrol.c |  136 +++++++++++++++++++++++++-------------------------------
>  1 file changed, 62 insertions(+), 74 deletions(-)
> 
> Index: mm-2.6.25-rc5-mm1/mm/memcontrol.c
> ===================================================================
> --- mm-2.6.25-rc5-mm1.orig/mm/memcontrol.c
> +++ mm-2.6.25-rc5-mm1/mm/memcontrol.c
> @@ -34,6 +34,16 @@
> 
>  #include <asm/uaccess.h>
> 
> +/*
> + * Lock Rule
> + * zone->lru_lcok (global LRU)
> + *	-> pc->lock (page_cgroup's lock)
> + *		-> mz->lru_lock (mem_cgroup's per_zone lock.)
> + *
> + * At least, mz->lru_lock and pc->lock should be acquired irq off.
> + *
> + */
> +

I think the rule applies to even the zone's lru_lock, so we could just
state that these two locks should be acquired with irq's off.

>  struct cgroup_subsys mem_cgroup_subsys;
>  static const int MEM_CGROUP_RECLAIM_RETRIES = 5;
> 
> @@ -479,33 +489,22 @@ static int mem_cgroup_charge_common(stru
>  	if (mem_cgroup_subsys.disabled)
>  		return 0;
> 
> +	pc = get_page_cgroup(page, gfp_mask, true);
> +	if (!pc || IS_ERR(pc))
> +		return PTR_ERR(pc);
> +
> +	spin_lock_irqsave(&pc->lock, flags);
>  	/*
> -	 * Should page_cgroup's go to their own slab?
> -	 * One could optimize the performance of the charging routine
> -	 * by saving a bit in the page_flags and using it as a lock
> -	 * to see if the cgroup page already has a page_cgroup associated
> -	 * with it
> -	 */
> -retry:
> -	lock_page_cgroup(page);
> -	pc = page_get_page_cgroup(page);
> -	/*
> -	 * The page_cgroup exists and
> -	 * the page has already been accounted.
> +	 * Has the page already been accounted ?
>  	 */
> -	if (pc) {
> -		VM_BUG_ON(pc->page != page);
> -		VM_BUG_ON(pc->ref_cnt <= 0);
> -
> -		pc->ref_cnt++;
> -		unlock_page_cgroup(page);
> -		goto done;
> +	if (pc->refcnt > 0) {
> +		pc->refcnt++;
> +		spin_unlock_irqrestore(&pc->lock, flags);
> +		goto success;
>  	}
> -	unlock_page_cgroup(page);
> +	spin_unlock_irqrestore(&pc->lock, flags);
> 
> -	pc = kzalloc(sizeof(struct page_cgroup), gfp_mask);
> -	if (pc == NULL)
> -		goto err;
> +	/* Note: pc->refcnt is still 0 here. */
>

I think the comment can be updated to say for new pc's the refcnt is
0.
 
>  	/*
>  	 * We always charge the cgroup the mm_struct belongs to.
> @@ -526,7 +525,7 @@ retry:
> 
>  	while (res_counter_charge(&mem->res, PAGE_SIZE)) {
>  		if (!(gfp_mask & __GFP_WAIT))
> -			goto out;
> +			goto nomem;
> 
>  		if (try_to_free_mem_cgroup_pages(mem, gfp_mask))
>  			continue;
> @@ -543,45 +542,40 @@ retry:
> 
>  		if (!nr_retries--) {
>  			mem_cgroup_out_of_memory(mem, gfp_mask);
> -			goto out;
> +			goto nomem;
>  		}
>  		congestion_wait(WRITE, HZ/10);
>  	}
> -
> -	pc->ref_cnt = 1;
> +	/*
> + 	 * We have to acquire 2 spinlocks.
> +	 */
> +	spin_lock_irqsave(&pc->lock, flags);
> +	if (pc->refcnt) {
> +		/* Someone charged this page while we released the lock */
> +		++pc->refcnt;

We used pc->refcnt++ earlier, for consistency we could use that here
as well.

> +		spin_unlock_irqrestore(&pc->lock, flags);
> +		res_counter_uncharge(&mem->res, PAGE_SIZE);
> +		css_put(&mem->css);
> +		goto success;
> +	}
> +	/* Anyone doesn't touch this. */
> +	VM_BUG_ON(pc->mem_cgroup);
> +	VM_BUG_ON(!list_empty(&pc->lru));
> +	pc->refcnt = 1;
>  	pc->mem_cgroup = mem;
> -	pc->page = page;
>  	pc->flags = PAGE_CGROUP_FLAG_ACTIVE;
>  	if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE)
>  		pc->flags |= PAGE_CGROUP_FLAG_CACHE;
> -
> -	lock_page_cgroup(page);
> -	if (page_get_page_cgroup(page)) {
> -		unlock_page_cgroup(page);
> -		/*
> -		 * Another charge has been added to this page already.
> -		 * We take lock_page_cgroup(page) again and read
> -		 * page->cgroup, increment refcnt.... just retry is OK.
> -		 */
> -		res_counter_uncharge(&mem->res, PAGE_SIZE);
> -		css_put(&mem->css);
> -		kfree(pc);
> -		goto retry;
> -	}
> -	page_assign_page_cgroup(page, pc);
> -
>  	mz = page_cgroup_zoneinfo(pc);
> -	spin_lock_irqsave(&mz->lru_lock, flags);
> +	spin_lock(&mz->lru_lock);
>  	__mem_cgroup_add_list(pc);
> -	spin_unlock_irqrestore(&mz->lru_lock, flags);
> +	spin_unlock(&mz->lru_lock);
> +	spin_unlock_irqrestore(&pc->lock, flags);
> 
> -	unlock_page_cgroup(page);
> -done:
> +success:
>  	return 0;
> -out:
> +nomem:
>  	css_put(&mem->css);
> -	kfree(pc);
> -err:
>  	return -ENOMEM;
>  }
> 
> @@ -617,33 +611,27 @@ void mem_cgroup_uncharge_page(struct pag
>  	/*
>  	 * Check if our page_cgroup is valid
>  	 */
> -	lock_page_cgroup(page);
> -	pc = page_get_page_cgroup(page);
> +	pc = get_page_cgroup(page, GFP_ATOMIC, false); /* No allocation */
>  	if (!pc)
> -		goto unlock;
> -
> -	VM_BUG_ON(pc->page != page);
> -	VM_BUG_ON(pc->ref_cnt <= 0);
> -
> -	if (--(pc->ref_cnt) == 0) {
> -		mz = page_cgroup_zoneinfo(pc);
> -		spin_lock_irqsave(&mz->lru_lock, flags);
> -		__mem_cgroup_remove_list(pc);
> -		spin_unlock_irqrestore(&mz->lru_lock, flags);
> -
> -		page_assign_page_cgroup(page, NULL);
> -		unlock_page_cgroup(page);
> -
> -		mem = pc->mem_cgroup;
> -		res_counter_uncharge(&mem->res, PAGE_SIZE);
> -		css_put(&mem->css);
> -
> -		kfree(pc);
> +		return;
> +	spin_lock_irqsave(&pc->lock, flags);
> +	if (!pc->refcnt || --pc->refcnt > 0) {
> +		spin_unlock_irqrestore(&pc->lock, flags);
>  		return;
>  	}
> +	VM_BUG_ON(pc->page != page);
> +	mz = page_cgroup_zoneinfo(pc);
> +	mem = pc->mem_cgroup;
> 
> -unlock:
> -	unlock_page_cgroup(page);
> +	spin_lock(&mz->lru_lock);
> +	__mem_cgroup_remove_list(pc);
> +	spin_unlock(&mz->lru_lock);
> +
> +	pc->flags = 0;
> +	pc->mem_cgroup = 0;
> +	res_counter_uncharge(&mem->res, PAGE_SIZE);
> +	css_put(&mem->css);
> +	spin_unlock_irqrestore(&pc->lock, flags);
>  }
>


-- 
	Warm Regards,
	Balbir Singh
	Linux Technology Center
	IBM, ISTL

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  reply	other threads:[~2008-03-17  1:47 UTC|newest]

Thread overview: 53+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2008-03-14  9:59 [PATCH 0/7] memcg: radix-tree page_cgroup KAMEZAWA Hiroyuki
2008-03-14 10:03 ` [PATCH 1/7] re-define page_cgroup KAMEZAWA Hiroyuki
2008-03-16 14:15   ` Balbir Singh
2008-03-18  1:10     ` KAMEZAWA Hiroyuki
2008-03-17  0:21   ` Li Zefan
2008-03-18  1:12     ` KAMEZAWA Hiroyuki
2008-03-17  2:07   ` Li Zefan
2008-03-18  1:11     ` KAMEZAWA Hiroyuki
2008-03-14 10:06 ` [PATCH 2/7] charge/uncharge KAMEZAWA Hiroyuki
2008-03-17  1:46   ` Balbir Singh [this message]
2008-03-18  1:14     ` KAMEZAWA Hiroyuki
2008-03-17  2:26   ` Li Zefan
2008-03-18  1:15     ` KAMEZAWA Hiroyuki
2008-03-14 10:07 ` [PATCH 3/7] memcg: move_lists KAMEZAWA Hiroyuki
2008-03-18 16:44   ` Balbir Singh
2008-03-19  2:34     ` KAMEZAWA Hiroyuki
2008-03-14 10:15 ` [PATCH 4/7] memcg: page migration KAMEZAWA Hiroyuki
2008-03-17  2:36   ` Li Zefan
2008-03-18  1:17     ` KAMEZAWA Hiroyuki
2008-03-18 18:11   ` Balbir Singh
2008-03-19  2:44     ` KAMEZAWA Hiroyuki
2008-03-14 10:17 ` [PATCH 5/7] radix-tree page cgroup KAMEZAWA Hiroyuki
2008-03-17  2:56   ` Li Zefan
2008-03-17  3:26     ` Li Zefan
2008-03-18  1:18       ` KAMEZAWA Hiroyuki
2008-03-18  1:23     ` KAMEZAWA Hiroyuki
2008-03-19  2:05   ` Balbir Singh
2008-03-19  2:51     ` KAMEZAWA Hiroyuki
2008-03-19  3:14   ` Balbir Singh
2008-03-19  3:24     ` KAMEZAWA Hiroyuki
2008-03-19 21:11   ` Peter Zijlstra
2008-03-20  4:45     ` KAMEZAWA Hiroyuki
2008-03-20  5:09       ` KAMEZAWA Hiroyuki
2008-03-14 10:18 ` [PATCH 6/7] memcg: speed up by percpu KAMEZAWA Hiroyuki
2008-03-17  3:03   ` Li Zefan
2008-03-18  1:25     ` KAMEZAWA Hiroyuki
2008-03-18 23:55       ` Li Zefan
2008-03-19  2:51         ` KAMEZAWA Hiroyuki
2008-03-19 21:19   ` Peter Zijlstra
2008-03-19 21:41     ` Peter Zijlstra
2008-03-20  9:08       ` Andy Whitcroft
2008-03-20  4:46     ` KAMEZAWA Hiroyuki
2008-03-14 10:22 ` [PATCH 7/7] memcg: freeing page_cgroup at suitable chance KAMEZAWA Hiroyuki
2008-03-17  3:10   ` Li Zefan
2008-03-18  1:30     ` KAMEZAWA Hiroyuki
2008-03-19 21:33   ` Peter Zijlstra
2008-03-20  5:07     ` KAMEZAWA Hiroyuki
2008-03-20  7:55       ` Peter Zijlstra
2008-03-20 14:49       ` kamezawa.hiroyu
2008-03-20 16:04       ` kamezawa.hiroyu
2008-03-20 16:09         ` Peter Zijlstra
2008-03-20 16:15         ` kamezawa.hiroyu
2008-03-15  6:15 ` [PATCH 0/7] memcg: radix-tree page_cgroup Balbir Singh

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20080317014601.GB24473@balbir.in.ibm.com \
    --to=balbir@linux.vnet.ibm.com \
    --cc=hugh@veritas.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=xemul@openvz.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox