Re: [PATCH v2 02/11] mm/hmm: use reference counting for HMM struct v2

linux-mm.kvack.org archive mirror
 help / color / mirror / Atom feed

From: Ira Weiny <ira.weiny@intel.com>
To: jglisse@redhat.com
Cc: linux-mm@kvack.org, linux-kernel@vger.kernel.org,
	John Hubbard <jhubbard@nvidia.com>,
	Andrew Morton <akpm@linux-foundation.org>,
	Dan Williams <dan.j.williams@intel.com>
Subject: Re: [PATCH v2 02/11] mm/hmm: use reference counting for HMM struct v2
Date: Thu, 28 Mar 2019 04:07:20 -0700	[thread overview]
Message-ID: <20190328110719.GA31324@iweiny-DESK2.sc.intel.com> (raw)
In-Reply-To: <20190325144011.10560-3-jglisse@redhat.com>

On Mon, Mar 25, 2019 at 10:40:02AM -0400, Jerome Glisse wrote:
> From: Jérôme Glisse <jglisse@redhat.com>
> 
> Every time i read the code to check that the HMM structure does not
> vanish before it should thanks to the many lock protecting its removal
> i get a headache. Switch to reference counting instead it is much
> easier to follow and harder to break. This also remove some code that
> is no longer needed with refcounting.
> 
> Changes since v1:
>     - removed bunch of useless check (if API is use with bogus argument
>       better to fail loudly so user fix their code)
>     - s/hmm_get/mm_get_hmm/
> 
> Signed-off-by: Jérôme Glisse <jglisse@redhat.com>
> Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
> Cc: John Hubbard <jhubbard@nvidia.com>
> Cc: Andrew Morton <akpm@linux-foundation.org>
> Cc: Dan Williams <dan.j.williams@intel.com>
> ---
>  include/linux/hmm.h |   2 +
>  mm/hmm.c            | 170 ++++++++++++++++++++++++++++----------------
>  2 files changed, 112 insertions(+), 60 deletions(-)
> 
> diff --git a/include/linux/hmm.h b/include/linux/hmm.h
> index ad50b7b4f141..716fc61fa6d4 100644
> --- a/include/linux/hmm.h
> +++ b/include/linux/hmm.h
> @@ -131,6 +131,7 @@ enum hmm_pfn_value_e {
>  /*
>   * struct hmm_range - track invalidation lock on virtual address range
>   *
> + * @hmm: the core HMM structure this range is active against
>   * @vma: the vm area struct for the range
>   * @list: all range lock are on a list
>   * @start: range virtual start address (inclusive)
> @@ -142,6 +143,7 @@ enum hmm_pfn_value_e {
>   * @valid: pfns array did not change since it has been fill by an HMM function
>   */
>  struct hmm_range {
> +	struct hmm		*hmm;
>  	struct vm_area_struct	*vma;
>  	struct list_head	list;
>  	unsigned long		start;
> diff --git a/mm/hmm.c b/mm/hmm.c
> index fe1cd87e49ac..306e57f7cded 100644
> --- a/mm/hmm.c
> +++ b/mm/hmm.c
> @@ -50,6 +50,7 @@ static const struct mmu_notifier_ops hmm_mmu_notifier_ops;
>   */
>  struct hmm {
>  	struct mm_struct	*mm;
> +	struct kref		kref;
>  	spinlock_t		lock;
>  	struct list_head	ranges;
>  	struct list_head	mirrors;
> @@ -57,6 +58,16 @@ struct hmm {
>  	struct rw_semaphore	mirrors_sem;
>  };
>  
> +static inline struct hmm *mm_get_hmm(struct mm_struct *mm)
> +{
> +	struct hmm *hmm = READ_ONCE(mm->hmm);
> +
> +	if (hmm && kref_get_unless_zero(&hmm->kref))
> +		return hmm;
> +
> +	return NULL;
> +}
> +
>  /*
>   * hmm_register - register HMM against an mm (HMM internal)
>   *
> @@ -67,14 +78,9 @@ struct hmm {
>   */
>  static struct hmm *hmm_register(struct mm_struct *mm)
>  {
> -	struct hmm *hmm = READ_ONCE(mm->hmm);
> +	struct hmm *hmm = mm_get_hmm(mm);

FWIW: having hmm_register == "hmm get" is a bit confusing...

Ira

>  	bool cleanup = false;
>  
> -	/*
> -	 * The hmm struct can only be freed once the mm_struct goes away,
> -	 * hence we should always have pre-allocated an new hmm struct
> -	 * above.
> -	 */
>  	if (hmm)
>  		return hmm;
>  
> @@ -86,6 +92,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
>  	hmm->mmu_notifier.ops = NULL;
>  	INIT_LIST_HEAD(&hmm->ranges);
>  	spin_lock_init(&hmm->lock);
> +	kref_init(&hmm->kref);
>  	hmm->mm = mm;
>  
>  	spin_lock(&mm->page_table_lock);
> @@ -106,7 +113,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
>  	if (__mmu_notifier_register(&hmm->mmu_notifier, mm))
>  		goto error_mm;
>  
> -	return mm->hmm;
> +	return hmm;
>  
>  error_mm:
>  	spin_lock(&mm->page_table_lock);
> @@ -118,9 +125,41 @@ static struct hmm *hmm_register(struct mm_struct *mm)
>  	return NULL;
>  }
>  
> +static void hmm_free(struct kref *kref)
> +{
> +	struct hmm *hmm = container_of(kref, struct hmm, kref);
> +	struct mm_struct *mm = hmm->mm;
> +
> +	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
> +
> +	spin_lock(&mm->page_table_lock);
> +	if (mm->hmm == hmm)
> +		mm->hmm = NULL;
> +	spin_unlock(&mm->page_table_lock);
> +
> +	kfree(hmm);
> +}
> +
> +static inline void hmm_put(struct hmm *hmm)
> +{
> +	kref_put(&hmm->kref, hmm_free);
> +}
> +
>  void hmm_mm_destroy(struct mm_struct *mm)
>  {
> -	kfree(mm->hmm);
> +	struct hmm *hmm;
> +
> +	spin_lock(&mm->page_table_lock);
> +	hmm = mm_get_hmm(mm);
> +	mm->hmm = NULL;
> +	if (hmm) {
> +		hmm->mm = NULL;
> +		spin_unlock(&mm->page_table_lock);
> +		hmm_put(hmm);
> +		return;
> +	}
> +
> +	spin_unlock(&mm->page_table_lock);
>  }
>  
>  static int hmm_invalidate_range(struct hmm *hmm, bool device,
> @@ -165,7 +204,7 @@ static int hmm_invalidate_range(struct hmm *hmm, bool device,
>  static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
>  {
>  	struct hmm_mirror *mirror;
> -	struct hmm *hmm = mm->hmm;
> +	struct hmm *hmm = mm_get_hmm(mm);
>  
>  	down_write(&hmm->mirrors_sem);
>  	mirror = list_first_entry_or_null(&hmm->mirrors, struct hmm_mirror,
> @@ -186,13 +225,16 @@ static void hmm_release(struct mmu_notifier *mn, struct mm_struct *mm)
>  						  struct hmm_mirror, list);
>  	}
>  	up_write(&hmm->mirrors_sem);
> +
> +	hmm_put(hmm);
>  }
>  
>  static int hmm_invalidate_range_start(struct mmu_notifier *mn,
>  			const struct mmu_notifier_range *range)
>  {
> +	struct hmm *hmm = mm_get_hmm(range->mm);
>  	struct hmm_update update;
> -	struct hmm *hmm = range->mm->hmm;
> +	int ret;
>  
>  	VM_BUG_ON(!hmm);
>  
> @@ -200,14 +242,16 @@ static int hmm_invalidate_range_start(struct mmu_notifier *mn,
>  	update.end = range->end;
>  	update.event = HMM_UPDATE_INVALIDATE;
>  	update.blockable = range->blockable;
> -	return hmm_invalidate_range(hmm, true, &update);
> +	ret = hmm_invalidate_range(hmm, true, &update);
> +	hmm_put(hmm);
> +	return ret;
>  }
>  
>  static void hmm_invalidate_range_end(struct mmu_notifier *mn,
>  			const struct mmu_notifier_range *range)
>  {
> +	struct hmm *hmm = mm_get_hmm(range->mm);
>  	struct hmm_update update;
> -	struct hmm *hmm = range->mm->hmm;
>  
>  	VM_BUG_ON(!hmm);
>  
> @@ -216,6 +260,7 @@ static void hmm_invalidate_range_end(struct mmu_notifier *mn,
>  	update.event = HMM_UPDATE_INVALIDATE;
>  	update.blockable = true;
>  	hmm_invalidate_range(hmm, false, &update);
> +	hmm_put(hmm);
>  }
>  
>  static const struct mmu_notifier_ops hmm_mmu_notifier_ops = {
> @@ -241,24 +286,13 @@ int hmm_mirror_register(struct hmm_mirror *mirror, struct mm_struct *mm)
>  	if (!mm || !mirror || !mirror->ops)
>  		return -EINVAL;
>  
> -again:
>  	mirror->hmm = hmm_register(mm);
>  	if (!mirror->hmm)
>  		return -ENOMEM;
>  
>  	down_write(&mirror->hmm->mirrors_sem);
> -	if (mirror->hmm->mm == NULL) {
> -		/*
> -		 * A racing hmm_mirror_unregister() is about to destroy the hmm
> -		 * struct. Try again to allocate a new one.
> -		 */
> -		up_write(&mirror->hmm->mirrors_sem);
> -		mirror->hmm = NULL;
> -		goto again;
> -	} else {
> -		list_add(&mirror->list, &mirror->hmm->mirrors);
> -		up_write(&mirror->hmm->mirrors_sem);
> -	}
> +	list_add(&mirror->list, &mirror->hmm->mirrors);
> +	up_write(&mirror->hmm->mirrors_sem);
>  
>  	return 0;
>  }
> @@ -273,33 +307,18 @@ EXPORT_SYMBOL(hmm_mirror_register);
>   */
>  void hmm_mirror_unregister(struct hmm_mirror *mirror)
>  {
> -	bool should_unregister = false;
> -	struct mm_struct *mm;
> -	struct hmm *hmm;
> +	struct hmm *hmm = READ_ONCE(mirror->hmm);
>  
> -	if (mirror->hmm == NULL)
> +	if (hmm == NULL)
>  		return;
>  
> -	hmm = mirror->hmm;
>  	down_write(&hmm->mirrors_sem);
>  	list_del_init(&mirror->list);
> -	should_unregister = list_empty(&hmm->mirrors);
> +	/* To protect us against double unregister ... */
>  	mirror->hmm = NULL;
> -	mm = hmm->mm;
> -	hmm->mm = NULL;
>  	up_write(&hmm->mirrors_sem);
>  
> -	if (!should_unregister || mm == NULL)
> -		return;
> -
> -	mmu_notifier_unregister_no_release(&hmm->mmu_notifier, mm);
> -
> -	spin_lock(&mm->page_table_lock);
> -	if (mm->hmm == hmm)
> -		mm->hmm = NULL;
> -	spin_unlock(&mm->page_table_lock);
> -
> -	kfree(hmm);
> +	hmm_put(hmm);
>  }
>  EXPORT_SYMBOL(hmm_mirror_unregister);
>  
> @@ -708,6 +727,8 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  	struct mm_walk mm_walk;
>  	struct hmm *hmm;
>  
> +	range->hmm = NULL;
> +
>  	/* Sanity check, this really should not happen ! */
>  	if (range->start < vma->vm_start || range->start >= vma->vm_end)
>  		return -EINVAL;
> @@ -717,14 +738,18 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  	hmm = hmm_register(vma->vm_mm);
>  	if (!hmm)
>  		return -ENOMEM;
> -	/* Caller must have registered a mirror, via hmm_mirror_register() ! */
> -	if (!hmm->mmu_notifier.ops)
> +
> +	/* Check if hmm_mm_destroy() was call. */
> +	if (hmm->mm == NULL) {
> +		hmm_put(hmm);
>  		return -EINVAL;
> +	}
>  
>  	/* FIXME support hugetlb fs */
>  	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
>  			vma_is_dax(vma)) {
>  		hmm_pfns_special(range);
> +		hmm_put(hmm);
>  		return -EINVAL;
>  	}
>  
> @@ -736,6 +761,7 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  		 * operations such has atomic access would not work.
>  		 */
>  		hmm_pfns_clear(range, range->pfns, range->start, range->end);
> +		hmm_put(hmm);
>  		return -EPERM;
>  	}
>  
> @@ -758,6 +784,12 @@ int hmm_vma_get_pfns(struct hmm_range *range)
>  	mm_walk.pte_hole = hmm_vma_walk_hole;
>  
>  	walk_page_range(range->start, range->end, &mm_walk);
> +	/*
> +	 * Transfer hmm reference to the range struct it will be drop inside
> +	 * the hmm_vma_range_done() function (which _must_ be call if this
> +	 * function return 0).
> +	 */
> +	range->hmm = hmm;
>  	return 0;
>  }
>  EXPORT_SYMBOL(hmm_vma_get_pfns);
> @@ -802,25 +834,27 @@ EXPORT_SYMBOL(hmm_vma_get_pfns);
>   */
>  bool hmm_vma_range_done(struct hmm_range *range)
>  {
> -	unsigned long npages = (range->end - range->start) >> PAGE_SHIFT;
> -	struct hmm *hmm;
> +	bool ret = false;
>  
> -	if (range->end <= range->start) {
> +	/* Sanity check this really should not happen. */
> +	if (range->hmm == NULL || range->end <= range->start) {
>  		BUG();
>  		return false;
>  	}
>  
> -	hmm = hmm_register(range->vma->vm_mm);
> -	if (!hmm) {
> -		memset(range->pfns, 0, sizeof(*range->pfns) * npages);
> -		return false;
> -	}
> -
> -	spin_lock(&hmm->lock);
> +	spin_lock(&range->hmm->lock);
>  	list_del_rcu(&range->list);
> -	spin_unlock(&hmm->lock);
> +	ret = range->valid;
> +	spin_unlock(&range->hmm->lock);
>  
> -	return range->valid;
> +	/* Is the mm still alive ? */
> +	if (range->hmm->mm == NULL)
> +		ret = false;
> +
> +	/* Drop reference taken by hmm_vma_fault() or hmm_vma_get_pfns() */
> +	hmm_put(range->hmm);
> +	range->hmm = NULL;
> +	return ret;
>  }
>  EXPORT_SYMBOL(hmm_vma_range_done);
>  
> @@ -880,6 +914,8 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  	struct hmm *hmm;
>  	int ret;
>  
> +	range->hmm = NULL;
> +
>  	/* Sanity check, this really should not happen ! */
>  	if (range->start < vma->vm_start || range->start >= vma->vm_end)
>  		return -EINVAL;
> @@ -891,14 +927,18 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  		hmm_pfns_clear(range, range->pfns, range->start, range->end);
>  		return -ENOMEM;
>  	}
> -	/* Caller must have registered a mirror using hmm_mirror_register() */
> -	if (!hmm->mmu_notifier.ops)
> +
> +	/* Check if hmm_mm_destroy() was call. */
> +	if (hmm->mm == NULL) {
> +		hmm_put(hmm);
>  		return -EINVAL;
> +	}
>  
>  	/* FIXME support hugetlb fs */
>  	if (is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_SPECIAL) ||
>  			vma_is_dax(vma)) {
>  		hmm_pfns_special(range);
> +		hmm_put(hmm);
>  		return -EINVAL;
>  	}
>  
> @@ -910,6 +950,7 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  		 * operations such has atomic access would not work.
>  		 */
>  		hmm_pfns_clear(range, range->pfns, range->start, range->end);
> +		hmm_put(hmm);
>  		return -EPERM;
>  	}
>  
> @@ -945,7 +986,16 @@ int hmm_vma_fault(struct hmm_range *range, bool block)
>  		hmm_pfns_clear(range, &range->pfns[i], hmm_vma_walk.last,
>  			       range->end);
>  		hmm_vma_range_done(range);
> +		hmm_put(hmm);
> +	} else {
> +		/*
> +		 * Transfer hmm reference to the range struct it will be drop
> +		 * inside the hmm_vma_range_done() function (which _must_ be
> +		 * call if this function return 0).
> +		 */
> +		range->hmm = hmm;
>  	}
> +
>  	return ret;
>  }
>  EXPORT_SYMBOL(hmm_vma_fault);
> -- 
> 2.17.2
>

next prev parent reply	other threads:[~2019-03-28 19:08 UTC|newest]

Thread overview: 68+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-03-25 14:40 [PATCH v2 00/11] Improve HMM driver API v2 jglisse
2019-03-25 14:40 ` [PATCH v2 01/11] mm/hmm: select mmu notifier when selecting HMM jglisse
2019-03-28 20:33   ` John Hubbard
2019-03-29 21:15     ` Jerome Glisse
2019-03-29 21:42       ` John Hubbard
2019-03-25 14:40 ` [PATCH v2 02/11] mm/hmm: use reference counting for HMM struct v2 jglisse
2019-03-28 11:07   ` Ira Weiny [this message]
2019-03-28 19:11     ` Jerome Glisse
2019-03-28 20:43       ` John Hubbard
2019-03-28 21:21         ` Jerome Glisse
2019-03-29  0:39           ` John Hubbard
2019-03-28 16:57             ` Ira Weiny
2019-03-29  1:00               ` Jerome Glisse
2019-03-29  1:18                 ` John Hubbard
2019-03-29  1:50                   ` Jerome Glisse
2019-03-28 18:21                     ` Ira Weiny
2019-03-29  2:25                       ` Jerome Glisse
2019-03-29 20:07                         ` John Hubbard
2019-03-29  2:11                     ` John Hubbard
2019-03-29  2:22                       ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 03/11] mm/hmm: do not erase snapshot when a range is invalidated jglisse
2019-03-25 14:40 ` [PATCH v2 04/11] mm/hmm: improve and rename hmm_vma_get_pfns() to hmm_range_snapshot() v2 jglisse
2019-03-28 13:30   ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 05/11] mm/hmm: improve and rename hmm_vma_fault() to hmm_range_fault() v2 jglisse
2019-03-28 13:43   ` Ira Weiny
2019-03-28 22:03     ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 06/11] mm/hmm: improve driver API to work and wait over a range v2 jglisse
2019-03-28 13:11   ` Ira Weiny
2019-03-28 21:39     ` Jerome Glisse
2019-03-28 16:12   ` Ira Weiny
2019-03-29  0:56     ` Jerome Glisse
2019-03-28 18:49       ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 07/11] mm/hmm: add default fault flags to avoid the need to pre-fill pfns arrays jglisse
2019-03-28 21:59   ` John Hubbard
2019-03-28 22:12     ` Jerome Glisse
2019-03-28 22:19       ` John Hubbard
2019-03-28 22:31         ` Jerome Glisse
2019-03-28 22:40           ` John Hubbard
2019-03-28 23:21             ` Jerome Glisse
2019-03-28 23:28               ` John Hubbard
2019-03-28 16:42                 ` Ira Weiny
2019-03-29  1:17                   ` Jerome Glisse
2019-03-29  1:30                     ` John Hubbard
2019-03-29  1:42                       ` Jerome Glisse
2019-03-29  1:59                         ` Jerome Glisse
2019-03-29  2:05                           ` John Hubbard
2019-03-29  2:12                             ` Jerome Glisse
2019-03-28 23:43                 ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 08/11] mm/hmm: mirror hugetlbfs (snapshoting, faulting and DMA mapping) v2 jglisse
2019-03-28 16:53   ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 09/11] mm/hmm: allow to mirror vma of a file on a DAX backed filesystem v2 jglisse
2019-03-28 18:04   ` Ira Weiny
2019-03-29  2:17     ` Jerome Glisse
2019-03-25 14:40 ` [PATCH v2 10/11] mm/hmm: add helpers for driver to safely take the mmap_sem v2 jglisse
2019-03-28 20:54   ` John Hubbard
2019-03-28 21:30     ` Jerome Glisse
2019-03-28 21:41       ` John Hubbard
2019-03-28 22:08         ` Jerome Glisse
2019-03-28 22:25           ` John Hubbard
2019-03-28 22:40             ` Jerome Glisse
2019-03-28 22:43               ` John Hubbard
2019-03-28 23:05                 ` Jerome Glisse
2019-03-28 23:20                   ` John Hubbard
2019-03-28 23:24                     ` Jerome Glisse
2019-03-28 23:34                       ` John Hubbard
2019-03-28 18:44                         ` Ira Weiny
2019-03-25 14:40 ` [PATCH v2 11/11] mm/hmm: add an helper function that fault pages and map them to a device v2 jglisse
2019-04-01 11:59   ` Souptick Joarder

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190328110719.GA31324@iweiny-DESK2.sc.intel.com \
    --to=ira.weiny@intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=dan.j.williams@intel.com \
    --cc=jglisse@redhat.com \
    --cc=jhubbard@nvidia.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-mm@kvack.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox