Re: [RFC PATCH] Split mm_slot from ksm and huge_memory

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Ni zhan Chen <nizhan.chen@gmail.com>
To: Bob Liu <lliubbo@gmail.com>
Cc: akpm@linux-foundation.org, linux-mm@kvack.org, mhocko@suse.cz,
	hughd@google.com, kamezawa.hiroyu@jp.fujitsu.com,
	aarcange@redhat.com, hannes@cmpxchg.org, rientjes@google.com
Subject: Re: [RFC PATCH] Split mm_slot from ksm and huge_memory
Date: Mon, 08 Oct 2012 16:51:44 +0800	[thread overview]
Message-ID: <50729420.5050001@gmail.com> (raw)
In-Reply-To: <1349685772-29359-1-git-send-email-lliubbo@gmail.com>

On 10/08/2012 04:42 PM, Bob Liu wrote:
> Both ksm and huge_memory do hash lookup from mm to mm_slot, but the
> mm_slot are mostly the same except ksm need a rmap_list.
>
> This patch split some duplicated part of mm_slot from ksm/huge_memory
> to a head file mm_slot.h, it make code cleaner and future work easier
> if someone need to lookup from mm to mm_slot also.
>
> To make things simple, they still have their own slab cache and
> mm_slots_hash table.

I also found this issue several months ago, looks reasonable to me.

> Not well tested, just see whether the way is right firstly.
>
> Signed-off-by: Bob Liu <lliubbo@gmail.com>
> ---
>   include/linux/mm_slot.h |   68 ++++++++++++++++++++++++++++++++
>   mm/huge_memory.c        |   98 ++++++++---------------------------------------
>   mm/ksm.c                |   86 +++++++++--------------------------------
>   3 files changed, 102 insertions(+), 150 deletions(-)
>   create mode 100644 include/linux/mm_slot.h
>
> diff --git a/include/linux/mm_slot.h b/include/linux/mm_slot.h
> new file mode 100644
> index 0000000..e1e3725
> --- /dev/null
> +++ b/include/linux/mm_slot.h
> @@ -0,0 +1,68 @@
> +#ifndef _LINUX_MM_SLOT_H
> +#define _LINUX_MM_SLOT_H
> +
> +#define MM_SLOTS_HASH_HEADS 1024
> +
> +/**
> + * struct mm_slot - hash lookup from mm to mm_slot
> + * @hash: hash collision list
> + * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
> + * @mm: the mm that this information is valid for
> + * @private: rmaplist for ksm
> + */
> +struct mm_slot {
> +	struct hlist_node hash;
> +	struct list_head mm_list;
> +	struct mm_struct *mm;
> +	void *private;
> +};
> +
> +static inline struct mm_slot *alloc_mm_slot(struct kmem_cache *mm_slot_cache)
> +{
> +	if (!mm_slot_cache)	/* initialization failed */
> +		return NULL;
> +	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
> +}
> +
> +static inline void free_mm_slot(struct mm_slot *mm_slot,
> +			struct kmem_cache *mm_slot_cache)
> +{
> +	kmem_cache_free(mm_slot_cache, mm_slot);
> +}
> +
> +static int __init mm_slots_hash_init(struct hlist_head **mm_slots_hash)
> +{
> +	*mm_slots_hash = kzalloc(MM_SLOTS_HASH_HEADS * sizeof(struct hlist_head),
> +			GFP_KERNEL);
> +	if (!(*mm_slots_hash))
> +		return -ENOMEM;
> +	return 0;
> +}
> +
> +static struct mm_slot *get_mm_slot(struct mm_struct *mm,
> +				struct hlist_head *mm_slots_hash)
> +{
> +	struct mm_slot *mm_slot;
> +	struct hlist_head *bucket;
> +	struct hlist_node *node;
> +
> +	bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
> +				% MM_SLOTS_HASH_HEADS];
> +	hlist_for_each_entry(mm_slot, node, bucket, hash) {
> +		if (mm == mm_slot->mm)
> +			return mm_slot;
> +	}
> +	return NULL;
> +}
> +
> +static void insert_to_mm_slots_hash(struct mm_struct *mm,
> +		struct mm_slot *mm_slot, struct hlist_head *mm_slots_hash)
> +{
> +	struct hlist_head *bucket;
> +
> +	bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
> +				% MM_SLOTS_HASH_HEADS];
> +	mm_slot->mm = mm;
> +	hlist_add_head(&mm_slot->hash, bucket);
> +}
> +#endif
> diff --git a/mm/huge_memory.c b/mm/huge_memory.c
> index 141dbb6..8ab58a0 100644
> --- a/mm/huge_memory.c
> +++ b/mm/huge_memory.c
> @@ -17,6 +17,7 @@
>   #include <linux/khugepaged.h>
>   #include <linux/freezer.h>
>   #include <linux/mman.h>
> +#include <linux/mm_slot.h>
>   #include <asm/tlb.h>
>   #include <asm/pgalloc.h>
>   #include "internal.h"
> @@ -57,27 +58,13 @@ static DECLARE_WAIT_QUEUE_HEAD(khugepaged_wait);
>   static unsigned int khugepaged_max_ptes_none __read_mostly = HPAGE_PMD_NR-1;
>   
>   static int khugepaged(void *none);
> -static int mm_slots_hash_init(void);
>   static int khugepaged_slab_init(void);
>   static void khugepaged_slab_free(void);
>   
> -#define MM_SLOTS_HASH_HEADS 1024
>   static struct hlist_head *mm_slots_hash __read_mostly;
>   static struct kmem_cache *mm_slot_cache __read_mostly;
>   
>   /**
> - * struct mm_slot - hash lookup from mm to mm_slot
> - * @hash: hash collision list
> - * @mm_node: khugepaged scan list headed in khugepaged_scan.mm_head
> - * @mm: the mm that this information is valid for
> - */
> -struct mm_slot {
> -	struct hlist_node hash;
> -	struct list_head mm_node;
> -	struct mm_struct *mm;
> -};
> -
> -/**
>    * struct khugepaged_scan - cursor for scanning
>    * @mm_head: the head of the mm list to scan
>    * @mm_slot: the current mm_slot we are scanning
> @@ -554,7 +541,7 @@ static int __init hugepage_init(void)
>   	if (err)
>   		goto out;
>   
> -	err = mm_slots_hash_init();
> +	err = mm_slots_hash_init(&mm_slots_hash);
>   	if (err) {
>   		khugepaged_slab_free();
>   		goto out;
> @@ -1550,61 +1537,6 @@ static void __init khugepaged_slab_free(void)
>   	mm_slot_cache = NULL;
>   }
>   
> -static inline struct mm_slot *alloc_mm_slot(void)
> -{
> -	if (!mm_slot_cache)	/* initialization failed */
> -		return NULL;
> -	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
> -}
> -
> -static inline void free_mm_slot(struct mm_slot *mm_slot)
> -{
> -	kmem_cache_free(mm_slot_cache, mm_slot);
> -}
> -
> -static int __init mm_slots_hash_init(void)
> -{
> -	mm_slots_hash = kzalloc(MM_SLOTS_HASH_HEADS * sizeof(struct hlist_head),
> -				GFP_KERNEL);
> -	if (!mm_slots_hash)
> -		return -ENOMEM;
> -	return 0;
> -}
> -
> -#if 0
> -static void __init mm_slots_hash_free(void)
> -{
> -	kfree(mm_slots_hash);
> -	mm_slots_hash = NULL;
> -}
> -#endif
> -
> -static struct mm_slot *get_mm_slot(struct mm_struct *mm)
> -{
> -	struct mm_slot *mm_slot;
> -	struct hlist_head *bucket;
> -	struct hlist_node *node;
> -
> -	bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
> -				% MM_SLOTS_HASH_HEADS];
> -	hlist_for_each_entry(mm_slot, node, bucket, hash) {
> -		if (mm == mm_slot->mm)
> -			return mm_slot;
> -	}
> -	return NULL;
> -}
> -
> -static void insert_to_mm_slots_hash(struct mm_struct *mm,
> -				    struct mm_slot *mm_slot)
> -{
> -	struct hlist_head *bucket;
> -
> -	bucket = &mm_slots_hash[((unsigned long)mm / sizeof(struct mm_struct))
> -				% MM_SLOTS_HASH_HEADS];
> -	mm_slot->mm = mm;
> -	hlist_add_head(&mm_slot->hash, bucket);
> -}
> -
>   static inline int khugepaged_test_exit(struct mm_struct *mm)
>   {
>   	return atomic_read(&mm->mm_users) == 0;
> @@ -1615,25 +1547,25 @@ int __khugepaged_enter(struct mm_struct *mm)
>   	struct mm_slot *mm_slot;
>   	int wakeup;
>   
> -	mm_slot = alloc_mm_slot();
> +	mm_slot = alloc_mm_slot(mm_slot_cache);
>   	if (!mm_slot)
>   		return -ENOMEM;
>   
>   	/* __khugepaged_exit() must not run from under us */
>   	VM_BUG_ON(khugepaged_test_exit(mm));
>   	if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) {
> -		free_mm_slot(mm_slot);
> +		free_mm_slot(mm_slot, mm_slot_cache);
>   		return 0;
>   	}
>   
>   	spin_lock(&khugepaged_mm_lock);
> -	insert_to_mm_slots_hash(mm, mm_slot);
> +	insert_to_mm_slots_hash(mm, mm_slot, mm_slots_hash);
>   	/*
>   	 * Insert just behind the scanning cursor, to let the area settle
>   	 * down a little.
>   	 */
>   	wakeup = list_empty(&khugepaged_scan.mm_head);
> -	list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head);
> +	list_add_tail(&mm_slot->mm_list, &khugepaged_scan.mm_head);
>   	spin_unlock(&khugepaged_mm_lock);
>   
>   	atomic_inc(&mm->mm_count);
> @@ -1673,17 +1605,17 @@ void __khugepaged_exit(struct mm_struct *mm)
>   	int free = 0;
>   
>   	spin_lock(&khugepaged_mm_lock);
> -	mm_slot = get_mm_slot(mm);
> +	mm_slot = get_mm_slot(mm, mm_slots_hash);
>   	if (mm_slot && khugepaged_scan.mm_slot != mm_slot) {
>   		hlist_del(&mm_slot->hash);
> -		list_del(&mm_slot->mm_node);
> +		list_del(&mm_slot->mm_list);
>   		free = 1;
>   	}
>   	spin_unlock(&khugepaged_mm_lock);
>   
>   	if (free) {
>   		clear_bit(MMF_VM_HUGEPAGE, &mm->flags);
> -		free_mm_slot(mm_slot);
> +		free_mm_slot(mm_slot, mm_slot_cache);
>   		mmdrop(mm);
>   	} else if (mm_slot) {
>   		/*
> @@ -2089,7 +2021,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
>   	if (khugepaged_test_exit(mm)) {
>   		/* free mm_slot */
>   		hlist_del(&mm_slot->hash);
> -		list_del(&mm_slot->mm_node);
> +		list_del(&mm_slot->mm_list);
>   
>   		/*
>   		 * Not strictly needed because the mm exited already.
> @@ -2098,7 +2030,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot)
>   		 */
>   
>   		/* khugepaged_mm_lock actually not necessary for the below */
> -		free_mm_slot(mm_slot);
> +		free_mm_slot(mm_slot, mm_slot_cache);
>   		mmdrop(mm);
>   	}
>   }
> @@ -2120,7 +2052,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages,
>   		mm_slot = khugepaged_scan.mm_slot;
>   	else {
>   		mm_slot = list_entry(khugepaged_scan.mm_head.next,
> -				     struct mm_slot, mm_node);
> +				     struct mm_slot, mm_list);
>   		khugepaged_scan.address = 0;
>   		khugepaged_scan.mm_slot = mm_slot;
>   	}
> @@ -2209,10 +2141,10 @@ breakouterloop_mmap_sem:
>   		 * khugepaged runs here, khugepaged_exit will find
>   		 * mm_slot not pointing to the exiting mm.
>   		 */
> -		if (mm_slot->mm_node.next != &khugepaged_scan.mm_head) {
> +		if (mm_slot->mm_list.next != &khugepaged_scan.mm_head) {
>   			khugepaged_scan.mm_slot = list_entry(
> -				mm_slot->mm_node.next,
> -				struct mm_slot, mm_node);
> +				mm_slot->mm_list.next,
> +				struct mm_slot, mm_list);
>   			khugepaged_scan.address = 0;
>   		} else {
>   			khugepaged_scan.mm_slot = NULL;
> diff --git a/mm/ksm.c b/mm/ksm.c
> index 47c8853..37b73c6 100644
> --- a/mm/ksm.c
> +++ b/mm/ksm.c
> @@ -31,6 +31,7 @@
>   #include <linux/rbtree.h>
>   #include <linux/memory.h>
>   #include <linux/mmu_notifier.h>
> +#include <linux/mm_slot.h>
>   #include <linux/swap.h>
>   #include <linux/ksm.h>
>   #include <linux/hash.h>
> @@ -79,21 +80,6 @@
>    *    it is secured in the stable tree.  (When we scan a new page, we first
>    *    compare it against the stable tree, and then against the unstable tree.)
>    */
> -
> -/**
> - * struct mm_slot - ksm information per mm that is being scanned
> - * @link: link to the mm_slots hash list
> - * @mm_list: link into the mm_slots list, rooted in ksm_mm_head
> - * @rmap_list: head for this mm_slot's singly-linked list of rmap_items
> - * @mm: the mm that this information is valid for
> - */
> -struct mm_slot {
> -	struct hlist_node link;
> -	struct list_head mm_list;
> -	struct rmap_item *rmap_list;
> -	struct mm_struct *mm;
> -};
> -
>   /**
>    * struct ksm_scan - cursor for scanning
>    * @mm_slot: the current mm_slot we are scanning
> @@ -156,9 +142,7 @@ struct rmap_item {
>   static struct rb_root root_stable_tree = RB_ROOT;
>   static struct rb_root root_unstable_tree = RB_ROOT;
>   
> -#define MM_SLOTS_HASH_SHIFT 10
> -#define MM_SLOTS_HASH_HEADS (1 << MM_SLOTS_HASH_SHIFT)
> -static struct hlist_head mm_slots_hash[MM_SLOTS_HASH_HEADS];
> +static struct hlist_head *mm_slots_hash;
>   
>   static struct mm_slot ksm_mm_head = {
>   	.mm_list = LIST_HEAD_INIT(ksm_mm_head.mm_list),
> @@ -261,42 +245,6 @@ static inline void free_stable_node(struct stable_node *stable_node)
>   	kmem_cache_free(stable_node_cache, stable_node);
>   }
>   
> -static inline struct mm_slot *alloc_mm_slot(void)
> -{
> -	if (!mm_slot_cache)	/* initialization failed */
> -		return NULL;
> -	return kmem_cache_zalloc(mm_slot_cache, GFP_KERNEL);
> -}
> -
> -static inline void free_mm_slot(struct mm_slot *mm_slot)
> -{
> -	kmem_cache_free(mm_slot_cache, mm_slot);
> -}
> -
> -static struct mm_slot *get_mm_slot(struct mm_struct *mm)
> -{
> -	struct mm_slot *mm_slot;
> -	struct hlist_head *bucket;
> -	struct hlist_node *node;
> -
> -	bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
> -	hlist_for_each_entry(mm_slot, node, bucket, link) {
> -		if (mm == mm_slot->mm)
> -			return mm_slot;
> -	}
> -	return NULL;
> -}
> -
> -static void insert_to_mm_slots_hash(struct mm_struct *mm,
> -				    struct mm_slot *mm_slot)
> -{
> -	struct hlist_head *bucket;
> -
> -	bucket = &mm_slots_hash[hash_ptr(mm, MM_SLOTS_HASH_SHIFT)];
> -	mm_slot->mm = mm;
> -	hlist_add_head(&mm_slot->link, bucket);
> -}
> -
>   static inline int in_stable_tree(struct rmap_item *rmap_item)
>   {
>   	return rmap_item->address & STABLE_FLAG;
> @@ -641,17 +589,17 @@ static int unmerge_and_remove_all_rmap_items(void)
>   				goto error;
>   		}
>   
> -		remove_trailing_rmap_items(mm_slot, &mm_slot->rmap_list);
> +		remove_trailing_rmap_items(mm_slot, (struct rmap_item **)&mm_slot->private);
>   
>   		spin_lock(&ksm_mmlist_lock);
>   		ksm_scan.mm_slot = list_entry(mm_slot->mm_list.next,
>   						struct mm_slot, mm_list);
>   		if (ksm_test_exit(mm)) {
> -			hlist_del(&mm_slot->link);
> +			hlist_del(&mm_slot->hash);
>   			list_del(&mm_slot->mm_list);
>   			spin_unlock(&ksm_mmlist_lock);
>   
> -			free_mm_slot(mm_slot);
> +			free_mm_slot(mm_slot, mm_slot_cache);
>   			clear_bit(MMF_VM_MERGEABLE, &mm->flags);
>   			up_read(&mm->mmap_sem);
>   			mmdrop(mm);
> @@ -1314,7 +1262,7 @@ static struct rmap_item *scan_get_next_rmap_item(struct page **page)
>   			return NULL;
>   next_mm:
>   		ksm_scan.address = 0;
> -		ksm_scan.rmap_list = &slot->rmap_list;
> +		ksm_scan.rmap_list = (struct rmap_item **)&slot->private;
>   	}
>   
>   	mm = slot->mm;
> @@ -1364,7 +1312,7 @@ next_mm:
>   
>   	if (ksm_test_exit(mm)) {
>   		ksm_scan.address = 0;
> -		ksm_scan.rmap_list = &slot->rmap_list;
> +		ksm_scan.rmap_list = (struct rmap_item **)&slot->private;
>   	}
>   	/*
>   	 * Nuke all the rmap_items that are above this current rmap:
> @@ -1385,11 +1333,11 @@ next_mm:
>   		 * or when all VM_MERGEABLE areas have been unmapped (and
>   		 * mmap_sem then protects against race with MADV_MERGEABLE).
>   		 */
> -		hlist_del(&slot->link);
> +		hlist_del(&slot->hash);
>   		list_del(&slot->mm_list);
>   		spin_unlock(&ksm_mmlist_lock);
>   
> -		free_mm_slot(slot);
> +		free_mm_slot(slot, mm_slot_cache);
>   		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
>   		up_read(&mm->mmap_sem);
>   		mmdrop(mm);
> @@ -1504,7 +1452,7 @@ int __ksm_enter(struct mm_struct *mm)
>   	struct mm_slot *mm_slot;
>   	int needs_wakeup;
>   
> -	mm_slot = alloc_mm_slot();
> +	mm_slot = alloc_mm_slot(mm_slot_cache);
>   	if (!mm_slot)
>   		return -ENOMEM;
>   
> @@ -1512,7 +1460,7 @@ int __ksm_enter(struct mm_struct *mm)
>   	needs_wakeup = list_empty(&ksm_mm_head.mm_list);
>   
>   	spin_lock(&ksm_mmlist_lock);
> -	insert_to_mm_slots_hash(mm, mm_slot);
> +	insert_to_mm_slots_hash(mm, mm_slot, mm_slots_hash);
>   	/*
>   	 * Insert just behind the scanning cursor, to let the area settle
>   	 * down a little; when fork is followed by immediate exec, we don't
> @@ -1545,10 +1493,10 @@ void __ksm_exit(struct mm_struct *mm)
>   	 */
>   
>   	spin_lock(&ksm_mmlist_lock);
> -	mm_slot = get_mm_slot(mm);
> +	mm_slot = get_mm_slot(mm, mm_slots_hash);
>   	if (mm_slot && ksm_scan.mm_slot != mm_slot) {
> -		if (!mm_slot->rmap_list) {
> -			hlist_del(&mm_slot->link);
> +		if (!mm_slot->private) {
> +			hlist_del(&mm_slot->hash);
>   			list_del(&mm_slot->mm_list);
>   			easy_to_free = 1;
>   		} else {
> @@ -1559,7 +1507,7 @@ void __ksm_exit(struct mm_struct *mm)
>   	spin_unlock(&ksm_mmlist_lock);
>   
>   	if (easy_to_free) {
> -		free_mm_slot(mm_slot);
> +		free_mm_slot(mm_slot, mm_slot_cache);
>   		clear_bit(MMF_VM_MERGEABLE, &mm->flags);
>   		mmdrop(mm);
>   	} else if (mm_slot) {
> @@ -1998,6 +1946,10 @@ static int __init ksm_init(void)
>   	if (err)
>   		goto out;
>   
> +	err = mm_slots_hash_init(&mm_slots_hash);
> +	if (err)
> +		goto out_free;
> +
>   	ksm_thread = kthread_run(ksm_scan_thread, NULL, "ksmd");
>   	if (IS_ERR(ksm_thread)) {
>   		printk(KERN_ERR "ksm: creating kthread failed\n");

--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org.  For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>

next prev parent reply	other threads:[~2012-10-08  8:51 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-08  8:42 Bob Liu
2012-10-08  8:51 ` Ni zhan Chen [this message]
2012-10-09 20:48 ` Andrew Morton
2012-10-11  7:45   ` Bob Liu

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50729420.5050001@gmail.com \
    --to=nizhan.chen@gmail.com \
    --cc=aarcange@redhat.com \
    --cc=akpm@linux-foundation.org \
    --cc=hannes@cmpxchg.org \
    --cc=hughd@google.com \
    --cc=kamezawa.hiroyu@jp.fujitsu.com \
    --cc=linux-mm@kvack.org \
    --cc=lliubbo@gmail.com \
    --cc=mhocko@suse.cz \
    --cc=rientjes@google.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox