linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed
From: Minchan Kim <minchan.kim@gmail.com>
To: Christoph Lameter <cl@linux-foundation.org>
Cc: npiggin@suse.de, linux-kernel@vger.kernel.org,
	linux-mm@kvack.org, Tejun Heo <tj@kernel.org>,
	Ingo Molnar <mingo@elte.hu>,
	KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>,
	"hugh.dickins@tiscali.co.uk" <hugh.dickins@tiscali.co.uk>
Subject: Re: Subject: [RFC MM] mmap_sem scaling: Use mutex and percpu counter instead
Date: Sat, 7 Nov 2009 00:41:40 +0900	[thread overview]
Message-ID: <28c262360911060741x3f7ab0a2k15be645e287e05ac@mail.gmail.com> (raw)
In-Reply-To: <alpine.DEB.1.10.0911051419320.24312@V090114053VZO-1>

Hi, Christoph.

How about change from 'mm_readers' to 'is_readers' to improve your
goal 'scalibility'?
===
static inline int is_readers(struct mm_struct *mm)
{
       int cpu;
       int ret = 0;

       for_each_possible_cpu(cpu) {
               if (per_cpu(mm->rss->readers, cpu)) {
                      ret = 1;
                      break;
                 }
       }

       return ret;
}
===


On Fri, Nov 6, 2009 at 4:20 AM, Christoph Lameter
<cl@linux-foundation.org> wrote:
> From: Christoph Lamter <cl@linux-foundation.org>
> Subject: [RFC MM] mmap_sem scaling: Use mutex and percpu counter instead
>
> Instead of a rw semaphore use a mutex and a per cpu counter for the number
> of the current readers. read locking then becomes very cheap requiring only
> the increment of a per cpu counter.
>
> Write locking is more expensive since the writer must scan the percpu array
> and wait until all readers are complete. Since the readers are not holding
> semaphores we have no wait queue from which the writer could wakeup. In this
> draft we simply wait for one millisecond between scans of the percpu
> array. A different solution must be found there.
>
> Patch is on top of -next and the percpu counter patches that I posted
> yesterday. The patch adds another per cpu counter to the file and anon rss
> counters.
>
> Signed-off-by: Christoph Lamter <cl@linux-foundation.org>
>
> ---
>  include/linux/mm_types.h |   68 ++++++++++++++++++++++++++++++++++++++---------
>  mm/init-mm.c             |    2 -
>  2 files changed, 56 insertions(+), 14 deletions(-)
>
> Index: linux-2.6/include/linux/mm_types.h
> ===================================================================
> --- linux-2.6.orig/include/linux/mm_types.h     2009-11-05 13:03:11.000000000 -0600
> +++ linux-2.6/include/linux/mm_types.h  2009-11-05 13:06:31.000000000 -0600
> @@ -14,6 +14,7 @@
>  #include <linux/page-debug-flags.h>
>  #include <asm/page.h>
>  #include <asm/mmu.h>
> +#include <linux/percpu.h>
>
>  #ifndef AT_VECTOR_SIZE_ARCH
>  #define AT_VECTOR_SIZE_ARCH 0
> @@ -27,6 +28,7 @@ struct address_space;
>  struct mm_counter {
>        long file;
>        long anon;
> +       long readers;
>  };
>
>  /*
> @@ -214,7 +216,7 @@ struct mm_struct {
>        atomic_t mm_users;                      /* How many users with user space? */
>        atomic_t mm_count;                      /* How many references to "struct mm_struct" (users count as 1) */
>        int map_count;                          /* number of VMAs */
> -       struct rw_semaphore sem;
> +       struct mutex lock;
>        spinlock_t page_table_lock;             /* Protects page tables and some counters */
>
>        struct list_head mmlist;                /* List of maybe swapped mm's.  These are globally strung
> @@ -285,64 +287,104 @@ struct mm_struct {
>  #endif
>  };
>
> +static inline int mm_readers(struct mm_struct *mm)
> +{
> +       int cpu;
> +       int readers = 0;
> +
> +       for_each_possible_cpu(cpu)
> +               readers += per_cpu(mm->rss->readers, cpu);
> +
> +       return readers;
> +}
> +
>  static inline void mm_reader_lock(struct mm_struct *mm)
>  {
> -       down_read(&mm->sem);
> +redo:
> +       this_cpu_inc(mm->rss->readers);
> +       if (mutex_is_locked(&mm->lock)) {
> +               this_cpu_dec(mm->rss->readers);
> +               /* Need to wait till mutex is released */
> +               mutex_lock(&mm->lock);
> +               mutex_unlock(&mm->lock);
> +               goto redo;
> +       }
>  }
>
>  static inline void mm_reader_unlock(struct mm_struct *mm)
>  {
> -       up_read(&mm->sem);
> +       this_cpu_dec(mm->rss->readers);
>  }
>
>  static inline int mm_reader_trylock(struct mm_struct *mm)
>  {
> -       return down_read_trylock(&mm->sem);
> +       this_cpu_inc(mm->rss->readers);
> +       if (mutex_is_locked(&mm->lock)) {
> +               this_cpu_dec(mm->rss->readers);
> +               return 0;
> +       }
> +       return 1;
>  }
>
>  static inline void mm_writer_lock(struct mm_struct *mm)
>  {
> -       down_write(&mm->sem);
> +redo:
> +       mutex_lock(&mm->lock);
> +       if (mm_readers(mm) == 0)

We can change this.

if (!is_readers(mm))
         return;

> +               return;
> +
> +       mutex_unlock(&mm->lock);
> +       msleep(1);
> +       goto redo;
>  }
>
>  static inline void mm_writer_unlock(struct mm_struct *mm)
>  {
> -       up_write(&mm->sem);
> +       mutex_unlock(&mm->lock);
>  }
>
>  static inline int mm_writer_trylock(struct mm_struct *mm)
>  {
> -       return down_write_trylock(&mm->sem);
> +       if (!mutex_trylock(&mm->lock))
> +               goto fail;
> +
> +       if (mm_readers(mm) == 0)
> +               return 1;

if (!is_readers(mm))
        return 1;

> +
> +       mutex_unlock(&mm->lock);
> +fail:
> +       return 0;
>  }
>

-- 
Kind regards,
Minchan Kim

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

  parent reply	other threads:[~2009-11-06 15:41 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-11-05 19:19 [RFC MM] Accessors for mm locking Christoph Lameter
2009-11-05 19:20 ` Subject: [RFC MM] mmap_sem scaling: Use mutex and percpu counter instead Christoph Lameter
2009-11-05 20:56   ` Andi Kleen
2009-11-05 21:03     ` Christoph Lameter
2009-11-06  7:39       ` Andi Kleen
2009-11-06 17:08         ` Christoph Lameter
2009-11-06 17:44           ` Andi Kleen
2009-11-06 17:54             ` Christoph Lameter
2009-11-10  6:21               ` KOSAKI Motohiro
2009-11-10  9:19                 ` Andi Kleen
2009-11-06 18:53         ` [RFC MM] mmap_sem scaling: only scan cpus used by an mm Christoph Lameter
2009-11-06 19:14           ` Andi Kleen
2009-11-06 19:45             ` Christoph Lameter
2009-11-05 22:05   ` [RFC MM] swap counters Christoph Lameter
2009-11-06  2:54     ` KAMEZAWA Hiroyuki
2009-11-06 15:41   ` Minchan Kim [this message]
2009-11-06 17:10     ` Subject: [RFC MM] mmap_sem scaling: Use mutex and percpu counter instead Christoph Lameter
2009-11-07  4:19       ` Minchan Kim
2009-11-10 20:20         ` Christoph Lameter
2009-11-05 20:52 ` [RFC MM] Accessors for mm locking Andi Kleen
2009-11-05 20:57   ` Christoph Lameter
2009-11-17  6:42 ` Zhang, Yanmin

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=28c262360911060741x3f7ab0a2k15be645e287e05ac@mail.gmail.com \
    --to=minchan.kim@gmail.com \
    --cc=cl@linux-foundation.org \
    --cc=hugh.dickins@tiscali.co.uk \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    --cc=mingo@elte.hu \
    --cc=npiggin@suse.de \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox