linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Ying Han <yinghan@google.com>
To: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "linux-mm@kvack.org" <linux-mm@kvack.org>,
	"hugh.dickins@tiscali.co.uk" <hugh.dickins@tiscali.co.uk>,
	"hannes@cmpxchg.org" <hannes@cmpxchg.org>,
	Michal Hocko <mhocko@suse.cz>,
	cgroups@vger.kernel.org,
	"bsingharora@gmail.com" <bsingharora@gmail.com>
Subject: Re: [RFC] [PATCH 3/7 v2] memcg: remove PCG_MOVE_LOCK flag from pc->flags
Date: Mon, 23 Jan 2012 14:02:48 -0800	[thread overview]
Message-ID: <CALWz4izasaECifCYoRXL45x1YXYzACC=kUHQivnGZKRH+ySjuw@mail.gmail.com> (raw)
In-Reply-To: <20120113174019.8dff3fc1.kamezawa.hiroyu@jp.fujitsu.com>

On Fri, Jan 13, 2012 at 12:40 AM, KAMEZAWA Hiroyuki
<kamezawa.hiroyu@jp.fujitsu.com> wrote:
>
> From 1008e84d94245b1e7c4d237802ff68ff00757736 Mon Sep 17 00:00:00 2001
> From: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> Date: Thu, 12 Jan 2012 15:53:24 +0900
> Subject: [PATCH 3/7] memcg: remove PCG_MOVE_LOCK flag from pc->flags.
>
> PCG_MOVE_LOCK bit is used for bit spinlock for avoiding race between
> memcg's account moving and page state statistics updates.
>
> Considering page-statistics update, very hot path, this lock is
> taken only when someone is moving account (or PageTransHuge())
> And, now, all moving-account between memcgroups (by task-move)
> are serialized.

This might be a side question, can you clarify the serialization here?
Does it mean that we only allow one task-move at a time system-wide?

Thanks

--Ying
>
> So, it seems too costly to have 1bit per page for this purpose.
>
> This patch removes PCG_MOVE_LOCK and add hashed rwlock array
> instead of it. This works well enough. Even when we need to
> take the lock, we don't need to disable IRQ in hot path because
> of using rwlock.
>
> Signed-off-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
> ---
>  include/linux/page_cgroup.h |   19 -----------
>  mm/memcontrol.c             |   72 ++++++++++++++++++++++++++++++++++++++----
>  2 files changed, 65 insertions(+), 26 deletions(-)
>
> diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
> index a2d1177..5dba799 100644
> --- a/include/linux/page_cgroup.h
> +++ b/include/linux/page_cgroup.h
> @@ -8,7 +8,6 @@ enum {
>        PCG_USED, /* this object is in use. */
>        PCG_MIGRATION, /* under page migration */
>        /* flags for mem_cgroup and file and I/O status */
> -       PCG_MOVE_LOCK, /* For race between move_account v.s. following bits */
>        PCG_FILE_MAPPED, /* page is accounted as "mapped" */
>        __NR_PCG_FLAGS,
>  };
> @@ -95,24 +94,6 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)
>        bit_spin_unlock(PCG_LOCK, &pc->flags);
>  }
>
> -static inline void move_lock_page_cgroup(struct page_cgroup *pc,
> -       unsigned long *flags)
> -{
> -       /*
> -        * We know updates to pc->flags of page cache's stats are from both of
> -        * usual context or IRQ context. Disable IRQ to avoid deadlock.
> -        */
> -       local_irq_save(*flags);
> -       bit_spin_lock(PCG_MOVE_LOCK, &pc->flags);
> -}
> -
> -static inline void move_unlock_page_cgroup(struct page_cgroup *pc,
> -       unsigned long *flags)
> -{
> -       bit_spin_unlock(PCG_MOVE_LOCK, &pc->flags);
> -       local_irq_restore(*flags);
> -}
> -
>  #else /* CONFIG_CGROUP_MEM_RES_CTLR */
>  struct page_cgroup;
>
> diff --git a/mm/memcontrol.c b/mm/memcontrol.c
> index 9019069..61e276f 100644
> --- a/mm/memcontrol.c
> +++ b/mm/memcontrol.c
> @@ -1338,6 +1338,65 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
>        return false;
>  }
>
> +/*
> + * At moving acccounting information between cgroups, we'll have race with
> + * page satus accounting. To avoid that, we need some locks. In general,
> + * ading atomic ops to hot path is very bad. We're using 2 level logic.
> + *
> + * When a thread starts moving account information, per-cpu MEM_CGROUP_ON_MOVE
> + * value is set. If MEM_CGROUP_ON_MOVE==0, there are no race and page status
> + * update can be done withou any locks. If MEM_CGROUP_ON_MOVE>0, we use
> + * following hashed rwlocks.
> + * - At updating information, we hold rlock.
> + * - When a page is picked up and being moved, wlock is held.
> + *
> + * This logic works well enough because moving account is not an usual event.
> + */
> +
> +/*
> + * This rwlock is accessed only when MEM_CGROUP_ON_MOVE > 0.
> + */
> +#define NR_MOVE_ACCOUNT_LOCKS  (NR_CPUS)
> +#define move_account_hash(page) ((page_to_pfn(page) % NR_MOVE_ACCOUNT_LOCKS))
> +static rwlock_t move_account_locks[NR_MOVE_ACCOUNT_LOCKS];
> +
> +static rwlock_t *__mem_cgroup_account_move_lock(struct page *page)
> +{
> +       int hnum = move_account_hash(page);
> +
> +       return &move_account_locks[hnum];
> +}
> +
> +static void mem_cgroup_account_move_rlock(struct page *page)
> +{
> +       read_lock(__mem_cgroup_account_move_lock(page));
> +}
> +
> +static void mem_cgroup_account_move_runlock(struct page *page)
> +{
> +       read_unlock(__mem_cgroup_account_move_lock(page));
> +}
> +
> +static void mem_cgroup_account_move_wlock(struct page *page,
> +               unsigned long *flags)
> +{
> +       write_lock_irqsave(__mem_cgroup_account_move_lock(page), *flags);
> +}
> +
> +static void mem_cgroup_account_move_wunlock(struct page *page,
> +               unsigned long flags)
> +{
> +       write_unlock_irqrestore(__mem_cgroup_account_move_lock(page), flags);
> +}
> +
> +static  void mem_cgroup_account_move_lock_init(void)
> +{
> +       int num;
> +
> +       for (num = 0; num < NR_MOVE_ACCOUNT_LOCKS; num++)
> +               rwlock_init(&move_account_locks[num]);
> +}
> +
>  /**
>  * mem_cgroup_print_oom_info: Called from OOM with tasklist_lock held in read mode.
>  * @memcg: The memory cgroup that went over limit
> @@ -1859,7 +1918,6 @@ void mem_cgroup_update_page_stat(struct page *page,
>        struct mem_cgroup *memcg;
>        struct page_cgroup *pc = lookup_page_cgroup(page);
>        bool need_unlock = false;
> -       unsigned long uninitialized_var(flags);
>
>        if (mem_cgroup_disabled())
>                return;
> @@ -1871,7 +1929,7 @@ void mem_cgroup_update_page_stat(struct page *page,
>        /* pc->mem_cgroup is unstable ? */
>        if (unlikely(mem_cgroup_stealed(memcg))) {
>                /* take a lock against to access pc->mem_cgroup */
> -               move_lock_page_cgroup(pc, &flags);
> +               mem_cgroup_account_move_rlock(page);
>                need_unlock = true;
>                memcg = pc->mem_cgroup;
>                if (!memcg || !PageCgroupUsed(pc))
> @@ -1894,7 +1952,7 @@ void mem_cgroup_update_page_stat(struct page *page,
>
>  out:
>        if (unlikely(need_unlock))
> -               move_unlock_page_cgroup(pc, &flags);
> +               mem_cgroup_account_move_runlock(page);
>        rcu_read_unlock();
>        return;
>  }
> @@ -2457,8 +2515,7 @@ static void __mem_cgroup_commit_charge(struct mem_cgroup *memcg,
>
>  #ifdef CONFIG_TRANSPARENT_HUGEPAGE
>
> -#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) | (1 << PCG_MOVE_LOCK) |\
> -                       (1 << PCG_MIGRATION))
> +#define PCGF_NOCOPY_AT_SPLIT ((1 << PCG_LOCK) |  (1 << PCG_MIGRATION))
>  /*
>  * Because tail pages are not marked as "used", set it. We're under
>  * zone->lru_lock, 'splitting on pmd' and compound_lock.
> @@ -2537,7 +2594,7 @@ static int mem_cgroup_move_account(struct page *page,
>        if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
>                goto unlock;
>
> -       move_lock_page_cgroup(pc, &flags);
> +       mem_cgroup_account_move_wlock(page, &flags);
>
>        if (PageCgroupFileMapped(pc)) {
>                /* Update mapped_file data for mem_cgroup */
> @@ -2561,7 +2618,7 @@ static int mem_cgroup_move_account(struct page *page,
>         * guaranteed that "to" is never removed. So, we don't check rmdir
>         * status here.
>         */
> -       move_unlock_page_cgroup(pc, &flags);
> +       mem_cgroup_account_move_wunlock(page, flags);
>        ret = 0;
>  unlock:
>        unlock_page_cgroup(pc);
> @@ -4938,6 +4995,7 @@ mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont)
>                        INIT_WORK(&stock->work, drain_local_stock);
>                }
>                hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
> +               mem_cgroup_account_move_lock_init();
>        } else {
>                parent = mem_cgroup_from_cont(cont->parent);
>                memcg->use_hierarchy = parent->use_hierarchy;
> --
> 1.7.4.1
>
>

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Fight unfair telecom internet charges in Canada: sign http://stopthemeter.ca/
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2012-01-23 22:02 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-01-13  8:30 [RFC] [PATCH 0/7 v2] memcg: page_cgroup diet KAMEZAWA Hiroyuki
2012-01-13  8:32 ` [RFC] [PATCH 1/7 v2] memcg: remove unnecessary check in mem_cgroup_update_page_stat() KAMEZAWA Hiroyuki
2012-01-17 15:16   ` Michal Hocko
2012-01-17 23:55     ` KAMEZAWA Hiroyuki
2012-01-18 13:01       ` Michal Hocko
2012-01-19  2:18         ` KAMEZAWA Hiroyuki
2012-01-19 20:07         ` Ying Han
2012-01-20  0:48           ` KAMEZAWA Hiroyuki
2012-01-13  8:33 ` [RFC] [PATCH 2/7 v2] memcg: add memory barrier for checking account move KAMEZAWA Hiroyuki
2012-01-17 15:26   ` Michal Hocko
2012-01-18  0:06     ` KAMEZAWA Hiroyuki
2012-01-18 12:37       ` Michal Hocko
2012-01-19  2:17         ` KAMEZAWA Hiroyuki
2012-01-19  9:28           ` Michal Hocko
2012-01-19 23:57             ` KAMEZAWA Hiroyuki
2012-01-20 18:08           ` Ying Han
2012-01-23  9:04             ` Michal Hocko
2012-01-24  3:21               ` KAMEZAWA Hiroyuki
2012-01-24  8:49                 ` Michal Hocko
2012-01-24 19:04               ` Ying Han
2012-01-25 11:07                 ` Michal Hocko
2012-01-13  8:40 ` [RFC] [PATCH 3/7 v2] memcg: remove PCG_MOVE_LOCK flag from pc->flags KAMEZAWA Hiroyuki
2012-01-16 12:55   ` Kirill A. Shutemov
2012-01-17  0:22     ` KAMEZAWA Hiroyuki
2012-01-17 16:46   ` Michal Hocko
2012-01-18  0:12     ` KAMEZAWA Hiroyuki
2012-01-18 10:47       ` Michal Hocko
2012-01-18 23:53         ` KAMEZAWA Hiroyuki
2012-01-23 22:05           ` Ying Han
2012-01-24  4:59             ` KAMEZAWA Hiroyuki
2012-01-24  8:43             ` Michal Hocko
2012-01-25 23:07               ` Ying Han
2012-01-26  9:16                 ` Michal Hocko
2012-01-23 22:02   ` Ying Han [this message]
2012-01-24  4:47     ` KAMEZAWA Hiroyuki
2012-01-25 22:48       ` Ying Han
2012-01-13  8:41 ` [RFC] [PATCH 4/7 v2] memcg: new scheme to update per-memcg page stat accounting KAMEZAWA Hiroyuki
2012-01-18 16:45   ` Michal Hocko
2012-01-18 23:58     ` KAMEZAWA Hiroyuki
2012-01-26 19:01   ` Ying Han
2012-01-13  8:42 ` [RFC] [PATCH 5/7 v2] memcg: remove PCG_FILE_MAPPED KAMEZAWA Hiroyuki
2012-01-19 14:07   ` Michal Hocko
2012-01-26 19:10     ` Ying Han
2012-01-13  8:43 ` [RFC] [PATCH 6/7 v2] memcg: remove PCG_CACHE KAMEZAWA Hiroyuki
2012-01-13  8:45 ` [RFC] [PATCH 7/7 v2] memcg: make mem_cgroup_begin_update_stat to use global pcpu KAMEZAWA Hiroyuki
2012-01-19 14:47   ` Michal Hocko
2012-01-20  2:19     ` KAMEZAWA Hiroyuki
2012-01-20  8:38       ` Michal Hocko
2012-01-20  8:40   ` Greg Thelen
2012-01-24  3:18     ` KAMEZAWA Hiroyuki

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to='CALWz4izasaECifCYoRXL45x1YXYzACC=kUHQivnGZKRH+ySjuw@mail.gmail.com' \
    --to=yinghan@google.com \
    --cc=bsingharora@gmail.com \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=mhocko@suse.cz \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox