linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Michal Hocko <mhocko@suse.cz>
To: Hugh Dickins <hughd@google.com>
Cc: Li Zefan <lizefan@huawei.com>,
	Johannes Weiner <hannes@cmpxchg.org>, Tejun Heo <tj@kernel.org>,
	Andrew Morton <akpm@linux-foundation.org>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	linux-mm@kvack.org, cgroups@vger.kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: 3.13-rc breaks MEMCG_SWAP
Date: Mon, 16 Dec 2013 17:20:42 +0100	[thread overview]
Message-ID: <20131216162042.GC26797@dhcp22.suse.cz> (raw)
In-Reply-To: <alpine.LNX.2.00.1312160025200.2785@eggly.anvils>

On Mon 16-12-13 00:36:05, Hugh Dickins wrote:
[...]

OK, I went through the patch and it looks good except for suspicious
ctrl->lock handling in swap_cgroup_reassign (see below). I am just
suggesting to split it into 4 parts

* swap_cgroup_mutex -> swap_cgroup_lock
* swapon cleanup
* drop irqsave when taking ctrl->lock
* mem_cgroup_reparent_swap

but I will leave the split up to you. Just make sure that the fix is a
separate patch, please.

[...]
> --- 3.13-rc4/mm/page_cgroup.c	2013-02-18 15:58:34.000000000 -0800
> +++ linux/mm/page_cgroup.c	2013-12-15 14:34:36.312485960 -0800
> @@ -322,7 +322,8 @@ void __meminit pgdat_page_cgroup_init(st
>  
>  #ifdef CONFIG_MEMCG_SWAP
>  
> -static DEFINE_MUTEX(swap_cgroup_mutex);
> +static DEFINE_SPINLOCK(swap_cgroup_lock);
> +

This one is worth a separate patch IMO.

>  struct swap_cgroup_ctrl {
>  	struct page **map;
>  	unsigned long length;
> @@ -353,14 +354,11 @@ struct swap_cgroup {
>  /*
>   * allocate buffer for swap_cgroup.
>   */
> -static int swap_cgroup_prepare(int type)
> +static int swap_cgroup_prepare(struct swap_cgroup_ctrl *ctrl)
>  {
>  	struct page *page;
> -	struct swap_cgroup_ctrl *ctrl;
>  	unsigned long idx, max;
>  
> -	ctrl = &swap_cgroup_ctrl[type];
> -
>  	for (idx = 0; idx < ctrl->length; idx++) {
>  		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
>  		if (!page)

This with swap_cgroup_swapon should be in a separate patch as a cleanup.

> @@ -407,18 +405,17 @@ unsigned short swap_cgroup_cmpxchg(swp_e
>  {
>  	struct swap_cgroup_ctrl *ctrl;
>  	struct swap_cgroup *sc;
> -	unsigned long flags;
>  	unsigned short retval;
>  
>  	sc = lookup_swap_cgroup(ent, &ctrl);
>  
> -	spin_lock_irqsave(&ctrl->lock, flags);
> +	spin_lock(&ctrl->lock);
>  	retval = sc->id;
>  	if (retval == old)
>  		sc->id = new;
>  	else
>  		retval = 0;
> -	spin_unlock_irqrestore(&ctrl->lock, flags);
> +	spin_unlock(&ctrl->lock);
>  	return retval;
>  }
>  
> @@ -435,14 +432,13 @@ unsigned short swap_cgroup_record(swp_en
>  	struct swap_cgroup_ctrl *ctrl;
>  	struct swap_cgroup *sc;
>  	unsigned short old;
> -	unsigned long flags;
>  
>  	sc = lookup_swap_cgroup(ent, &ctrl);
>  
> -	spin_lock_irqsave(&ctrl->lock, flags);
> +	spin_lock(&ctrl->lock);
>  	old = sc->id;
>  	sc->id = id;
> -	spin_unlock_irqrestore(&ctrl->lock, flags);
> +	spin_unlock(&ctrl->lock);
>  
>  	return old;
>  }

I would prefer these two in a separate patch as well. I have no idea why
these were IRQ aware as this was never needed AFAICS.
e9e58a4ec3b10 is not very specific...

> @@ -451,19 +447,60 @@ unsigned short swap_cgroup_record(swp_en
>   * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
>   * @ent: swap entry to be looked up.
>   *
> - * Returns CSS ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
> + * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
>   */
>  unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
>  {
>  	return lookup_swap_cgroup(ent, NULL)->id;
>  }
>  
> +/**
> + * swap_cgroup_reassign - assign all old entries to new (before old is freed).
> + * @old: id of emptied memcg whose entries are now to be reassigned
> + * @new: id of parent memcg to which those entries are to be assigned
> + *
> + * Returns number of entries reassigned, for debugging or for statistics.
> + */
> +long swap_cgroup_reassign(unsigned short old, unsigned short new)
> +{
> +	long reassigned = 0;
> +	int type;
> +
> +	for (type = 0; type < MAX_SWAPFILES; type++) {
> +		struct swap_cgroup_ctrl *ctrl = &swap_cgroup_ctrl[type];
> +		unsigned long idx;
> +
> +		for (idx = 0; idx < ACCESS_ONCE(ctrl->length); idx++) {
> +			struct swap_cgroup *sc, *scend;
> +
> +			spin_lock(&swap_cgroup_lock);
> +			if (idx >= ACCESS_ONCE(ctrl->length))
> +				goto unlock;
> +			sc = page_address(ctrl->map[idx]);
> +			for (scend = sc + SC_PER_PAGE; sc < scend; sc++) {
> +				if (sc->id != old)
> +					continue;

Is this safe? What prevents from race when id is set to old?

> +				spin_lock(&ctrl->lock);
> +				if (sc->id == old) {

Also it seems that compiler is free to optimize this test away, no?
You need ACCESS_ONCE here as well, I guess.

> +					sc->id = new;
> +					reassigned++;
> +				}
> +				spin_unlock(&ctrl->lock);
> +			}
> +unlock:
> +			spin_unlock(&swap_cgroup_lock);
> +			cond_resched();
> +		}
> +	}
> +	return reassigned;
> +}
> +
>  int swap_cgroup_swapon(int type, unsigned long max_pages)
>  {
>  	void *array;
>  	unsigned long array_size;
>  	unsigned long length;
> -	struct swap_cgroup_ctrl *ctrl;
> +	struct swap_cgroup_ctrl ctrl;
>  
>  	if (!do_swap_account)
>  		return 0;
[...]
-- 
Michal Hocko
SUSE Labs

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2013-12-16 16:20 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-12-16  8:36 Hugh Dickins
2013-12-16  9:36 ` Li Zefan
2013-12-16  9:53   ` Michal Hocko
2013-12-16 10:40     ` Michal Hocko
2013-12-16 16:35       ` Tejun Heo
2013-12-16 17:19         ` Michal Hocko
2013-12-16 17:21           ` Tejun Heo
2013-12-17  1:41             ` Hugh Dickins
2013-12-17  3:13               ` Li Zefan
2013-12-17  7:09                 ` Hugh Dickins
2013-12-17 13:11                   ` Michal Hocko
2013-12-17 13:14                     ` Tejun Heo
2013-12-17 12:29                 ` Tejun Heo
2013-12-17 13:12                   ` Michal Hocko
2013-12-17 12:48                 ` Michal Hocko
2013-12-17 13:05                 ` Michal Hocko
2013-12-17 13:15                 ` [PATCH cgroup/for-3.13-fixes] cgroup: don't recycle cgroup id until all csses' have been destroyed Tejun Heo
2013-12-17 13:14               ` 3.13-rc breaks MEMCG_SWAP Michal Hocko
2013-12-16 16:41       ` Johannes Weiner
2013-12-16 17:15         ` Michal Hocko
2013-12-16 17:19           ` Tejun Heo
2013-12-16  9:49 ` Michal Hocko
2013-12-16 16:20 ` Michal Hocko [this message]
2013-12-17  2:26   ` Hugh Dickins
2013-12-17 10:25     ` Michal Hocko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20131216162042.GC26797@dhcp22.suse.cz \
    --to=mhocko@suse.cz \
    --cc=akpm@linux-foundation.org \
    --cc=cgroups@vger.kernel.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=lizefan@huawei.com \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox