From mboxrd@z Thu Jan 1 00:00:00 1970 Received: from d28relay02.in.ibm.com (d28relay02.in.ibm.com [9.184.220.59]) by e28smtp02.in.ibm.com (8.13.1/8.13.1) with ESMTP id m2H1lSqX004509 for ; Mon, 17 Mar 2008 07:17:28 +0530 Received: from d28av02.in.ibm.com (d28av02.in.ibm.com [9.184.220.64]) by d28relay02.in.ibm.com (8.13.8/8.13.8/NCO v8.7) with ESMTP id m2H1lRaW1183826 for ; Mon, 17 Mar 2008 07:17:28 +0530 Received: from d28av02.in.ibm.com (loopback [127.0.0.1]) by d28av02.in.ibm.com (8.13.1/8.13.3) with ESMTP id m2H1lRFG010308 for ; Mon, 17 Mar 2008 01:47:27 GMT Date: Mon, 17 Mar 2008 07:16:01 +0530 From: Balbir Singh Subject: Re: [PATCH 2/7] charge/uncharge Message-ID: <20080317014601.GB24473@balbir.in.ibm.com> Reply-To: balbir@linux.vnet.ibm.com References: <20080314185954.5cd51ff6.kamezawa.hiroyu@jp.fujitsu.com> <20080314190622.0e147b43.kamezawa.hiroyu@jp.fujitsu.com> MIME-Version: 1.0 Content-Type: text/plain; charset=iso-8859-1 Content-Disposition: inline In-Reply-To: <20080314190622.0e147b43.kamezawa.hiroyu@jp.fujitsu.com> Sender: owner-linux-mm@kvack.org Return-Path: To: KAMEZAWA Hiroyuki Cc: "linux-mm@kvack.org" , xemul@openvz.org, "hugh@veritas.com" List-ID: * KAMEZAWA Hiroyuki [2008-03-14 19:06:22]: > Because bit spin lock is removed and spinlock is added to page_cgroup. > There are some amount of changes. > > This patch does > - modify charge/uncharge to adjust it to the new lock. > - Added simple lock rule comments. > > Major changes from current(-mm) version is > - pc->refcnt is set as "1" after the charge is done. > > Changelog > - Rebased to rc5-mm1 > > Signed-off-by: KAMEZAWA Hiroyuki > Hi, KAMEZAWA-San, The build continues to be broken, even after this patch is applied. We will have to find another way to refactor the code, so that we don't break git-bisect. > > mm/memcontrol.c | 136 +++++++++++++++++++++++++------------------------------- > 1 file changed, 62 insertions(+), 74 deletions(-) > > Index: mm-2.6.25-rc5-mm1/mm/memcontrol.c > =================================================================== > --- mm-2.6.25-rc5-mm1.orig/mm/memcontrol.c > +++ mm-2.6.25-rc5-mm1/mm/memcontrol.c > @@ -34,6 +34,16 @@ > > #include > > +/* > + * Lock Rule > + * zone->lru_lcok (global LRU) > + * -> pc->lock (page_cgroup's lock) > + * -> mz->lru_lock (mem_cgroup's per_zone lock.) > + * > + * At least, mz->lru_lock and pc->lock should be acquired irq off. > + * > + */ > + I think the rule applies to even the zone's lru_lock, so we could just state that these two locks should be acquired with irq's off. > struct cgroup_subsys mem_cgroup_subsys; > static const int MEM_CGROUP_RECLAIM_RETRIES = 5; > > @@ -479,33 +489,22 @@ static int mem_cgroup_charge_common(stru > if (mem_cgroup_subsys.disabled) > return 0; > > + pc = get_page_cgroup(page, gfp_mask, true); > + if (!pc || IS_ERR(pc)) > + return PTR_ERR(pc); > + > + spin_lock_irqsave(&pc->lock, flags); > /* > - * Should page_cgroup's go to their own slab? > - * One could optimize the performance of the charging routine > - * by saving a bit in the page_flags and using it as a lock > - * to see if the cgroup page already has a page_cgroup associated > - * with it > - */ > -retry: > - lock_page_cgroup(page); > - pc = page_get_page_cgroup(page); > - /* > - * The page_cgroup exists and > - * the page has already been accounted. > + * Has the page already been accounted ? > */ > - if (pc) { > - VM_BUG_ON(pc->page != page); > - VM_BUG_ON(pc->ref_cnt <= 0); > - > - pc->ref_cnt++; > - unlock_page_cgroup(page); > - goto done; > + if (pc->refcnt > 0) { > + pc->refcnt++; > + spin_unlock_irqrestore(&pc->lock, flags); > + goto success; > } > - unlock_page_cgroup(page); > + spin_unlock_irqrestore(&pc->lock, flags); > > - pc = kzalloc(sizeof(struct page_cgroup), gfp_mask); > - if (pc == NULL) > - goto err; > + /* Note: pc->refcnt is still 0 here. */ > I think the comment can be updated to say for new pc's the refcnt is 0. > /* > * We always charge the cgroup the mm_struct belongs to. > @@ -526,7 +525,7 @@ retry: > > while (res_counter_charge(&mem->res, PAGE_SIZE)) { > if (!(gfp_mask & __GFP_WAIT)) > - goto out; > + goto nomem; > > if (try_to_free_mem_cgroup_pages(mem, gfp_mask)) > continue; > @@ -543,45 +542,40 @@ retry: > > if (!nr_retries--) { > mem_cgroup_out_of_memory(mem, gfp_mask); > - goto out; > + goto nomem; > } > congestion_wait(WRITE, HZ/10); > } > - > - pc->ref_cnt = 1; > + /* > + * We have to acquire 2 spinlocks. > + */ > + spin_lock_irqsave(&pc->lock, flags); > + if (pc->refcnt) { > + /* Someone charged this page while we released the lock */ > + ++pc->refcnt; We used pc->refcnt++ earlier, for consistency we could use that here as well. > + spin_unlock_irqrestore(&pc->lock, flags); > + res_counter_uncharge(&mem->res, PAGE_SIZE); > + css_put(&mem->css); > + goto success; > + } > + /* Anyone doesn't touch this. */ > + VM_BUG_ON(pc->mem_cgroup); > + VM_BUG_ON(!list_empty(&pc->lru)); > + pc->refcnt = 1; > pc->mem_cgroup = mem; > - pc->page = page; > pc->flags = PAGE_CGROUP_FLAG_ACTIVE; > if (ctype == MEM_CGROUP_CHARGE_TYPE_CACHE) > pc->flags |= PAGE_CGROUP_FLAG_CACHE; > - > - lock_page_cgroup(page); > - if (page_get_page_cgroup(page)) { > - unlock_page_cgroup(page); > - /* > - * Another charge has been added to this page already. > - * We take lock_page_cgroup(page) again and read > - * page->cgroup, increment refcnt.... just retry is OK. > - */ > - res_counter_uncharge(&mem->res, PAGE_SIZE); > - css_put(&mem->css); > - kfree(pc); > - goto retry; > - } > - page_assign_page_cgroup(page, pc); > - > mz = page_cgroup_zoneinfo(pc); > - spin_lock_irqsave(&mz->lru_lock, flags); > + spin_lock(&mz->lru_lock); > __mem_cgroup_add_list(pc); > - spin_unlock_irqrestore(&mz->lru_lock, flags); > + spin_unlock(&mz->lru_lock); > + spin_unlock_irqrestore(&pc->lock, flags); > > - unlock_page_cgroup(page); > -done: > +success: > return 0; > -out: > +nomem: > css_put(&mem->css); > - kfree(pc); > -err: > return -ENOMEM; > } > > @@ -617,33 +611,27 @@ void mem_cgroup_uncharge_page(struct pag > /* > * Check if our page_cgroup is valid > */ > - lock_page_cgroup(page); > - pc = page_get_page_cgroup(page); > + pc = get_page_cgroup(page, GFP_ATOMIC, false); /* No allocation */ > if (!pc) > - goto unlock; > - > - VM_BUG_ON(pc->page != page); > - VM_BUG_ON(pc->ref_cnt <= 0); > - > - if (--(pc->ref_cnt) == 0) { > - mz = page_cgroup_zoneinfo(pc); > - spin_lock_irqsave(&mz->lru_lock, flags); > - __mem_cgroup_remove_list(pc); > - spin_unlock_irqrestore(&mz->lru_lock, flags); > - > - page_assign_page_cgroup(page, NULL); > - unlock_page_cgroup(page); > - > - mem = pc->mem_cgroup; > - res_counter_uncharge(&mem->res, PAGE_SIZE); > - css_put(&mem->css); > - > - kfree(pc); > + return; > + spin_lock_irqsave(&pc->lock, flags); > + if (!pc->refcnt || --pc->refcnt > 0) { > + spin_unlock_irqrestore(&pc->lock, flags); > return; > } > + VM_BUG_ON(pc->page != page); > + mz = page_cgroup_zoneinfo(pc); > + mem = pc->mem_cgroup; > > -unlock: > - unlock_page_cgroup(page); > + spin_lock(&mz->lru_lock); > + __mem_cgroup_remove_list(pc); > + spin_unlock(&mz->lru_lock); > + > + pc->flags = 0; > + pc->mem_cgroup = 0; > + res_counter_uncharge(&mem->res, PAGE_SIZE); > + css_put(&mem->css); > + spin_unlock_irqrestore(&pc->lock, flags); > } > -- Warm Regards, Balbir Singh Linux Technology Center IBM, ISTL -- To unsubscribe, send a message with 'unsubscribe linux-mm' in the body to majordomo@kvack.org. For more info on Linux MM, see: http://www.linux-mm.org/ . Don't email: email@kvack.org