From: Jan Kara <jack@suse.cz>
To: Dave Jiang <dave.jiang@intel.com>
Cc: akpm@linux-foundation.org, jack@suse.cz,
linux-nvdimm@lists.01.org, david@fromorbit.com, hch@lst.de,
linux-mm@kvack.org, tytso@mit.edu, ross.zwisler@linux.intel.com,
dan.j.williams@intel.com
Subject: Re: [PATCH v2 2/3] mm, dax: make pmd_fault() and friends to be the same as fault()
Date: Thu, 15 Dec 2016 14:51:36 +0100 [thread overview]
Message-ID: <20161215135136.GB13811@quack2.suse.cz> (raw)
In-Reply-To: <148174532962.194339.8045184040029240107.stgit@djiang5-desk3.ch.intel.com>
On Wed 14-12-16 12:55:29, Dave Jiang wrote:
> Instead of passing in multiple parameters in the pmd_fault() handler,
> a vmf can be passed in just like a fault() handler. This will simplify
> code and remove the need for the actual pmd fault handlers to allocate a
> vmf. Related functions are also modified to do the same.
>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Looks good to me now. You can add:
Reviewed-by: Jan Kara <jack@suse.cz>
Honza
> ---
> drivers/dax/dax.c | 16 ++++++---------
> fs/dax.c | 45 ++++++++++++++++++-----------------------
> fs/ext4/file.c | 14 ++++++++-----
> fs/xfs/xfs_file.c | 14 +++++++------
> include/linux/dax.h | 7 +++---
> include/linux/mm.h | 3 +--
> include/trace/events/fs_dax.h | 15 ++++++--------
> mm/memory.c | 6 ++---
> 8 files changed, 57 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
> index c753a4c..947e49a 100644
> --- a/drivers/dax/dax.c
> +++ b/drivers/dax/dax.c
> @@ -379,10 +379,9 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
> - struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd,
> - unsigned int flags)
> + struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> - unsigned long pmd_addr = addr & PMD_MASK;
> + unsigned long pmd_addr = vmf->address & PMD_MASK;
> struct device *dev = &dax_dev->dev;
> struct dax_region *dax_region;
> phys_addr_t phys;
> @@ -414,23 +413,22 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
>
> pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pmd(vma, addr, pmd, pfn,
> - flags & FAULT_FLAG_WRITE);
> + return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
> + vmf->flags & FAULT_FLAG_WRITE);
> }
>
> -static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, unsigned int flags)
> +static int dax_dev_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> int rc;
> struct file *filp = vma->vm_file;
> struct dax_dev *dax_dev = filp->private_data;
>
> dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
> - current->comm, (flags & FAULT_FLAG_WRITE)
> + current->comm, (vmf->flags & FAULT_FLAG_WRITE)
> ? "write" : "read", vma->vm_start, vma->vm_end);
>
> rcu_read_lock();
> - rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
> + rc = __dax_dev_pmd_fault(dax_dev, vma, vmf);
> rcu_read_unlock();
>
> return rc;
> diff --git a/fs/dax.c b/fs/dax.c
> index 6395bc6..157f77f 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1310,18 +1310,17 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> return VM_FAULT_FALLBACK;
> }
>
> -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
> +int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> + struct iomap_ops *ops)
> {
> struct address_space *mapping = vma->vm_file->f_mapping;
> - unsigned long pmd_addr = address & PMD_MASK;
> - bool write = flags & FAULT_FLAG_WRITE;
> + unsigned long pmd_addr = vmf->address & PMD_MASK;
> + bool write = vmf->flags & FAULT_FLAG_WRITE;
> unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
> struct inode *inode = mapping->host;
> int result = VM_FAULT_FALLBACK;
> struct iomap iomap = { 0 };
> - pgoff_t max_pgoff, pgoff;
> - struct vm_fault vmf;
> + pgoff_t max_pgoff, old_pgoff;
> void *entry;
> loff_t pos;
> int error;
> @@ -1331,10 +1330,11 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> * supposed to hold locks serializing us with truncate / punch hole so
> * this is a reliable test.
> */
> - pgoff = linear_page_index(vma, pmd_addr);
> + old_pgoff = vmf->pgoff;
> + vmf->pgoff = linear_page_index(vma, pmd_addr);
> max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
>
> - trace_dax_pmd_fault(inode, vma, address, flags, pgoff, max_pgoff, 0);
> + trace_dax_pmd_fault(inode, vma, vmf, max_pgoff, 0);
>
> /* Fall back to PTEs if we're going to COW */
> if (write && !(vma->vm_flags & VM_SHARED))
> @@ -1346,13 +1346,13 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> if ((pmd_addr + PMD_SIZE) > vma->vm_end)
> goto fallback;
>
> - if (pgoff > max_pgoff) {
> + if (vmf->pgoff > max_pgoff) {
> result = VM_FAULT_SIGBUS;
> goto out;
> }
>
> /* If the PMD would extend beyond the file size */
> - if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
> + if ((vmf->pgoff | PG_PMD_COLOUR) > max_pgoff)
> goto fallback;
>
> /*
> @@ -1360,7 +1360,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> * setting up a mapping, so really we're using iomap_begin() as a way
> * to look up our filesystem block.
> */
> - pos = (loff_t)pgoff << PAGE_SHIFT;
> + pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
> error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
> if (error)
> goto fallback;
> @@ -1370,29 +1370,24 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> * the tree, for instance), it will return -EEXIST and we just fall
> * back to 4k entries.
> */
> - entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
> + entry = grab_mapping_entry(mapping, vmf->pgoff, RADIX_DAX_PMD);
> if (IS_ERR(entry))
> goto finish_iomap;
>
> if (iomap.offset + iomap.length < pos + PMD_SIZE)
> goto unlock_entry;
>
> - vmf.pgoff = pgoff;
> - vmf.flags = flags;
> - vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
> - vmf.gfp_mask &= ~__GFP_FS;
> -
> switch (iomap.type) {
> case IOMAP_MAPPED:
> - result = dax_pmd_insert_mapping(vma, pmd, &vmf, address,
> - &iomap, pos, write, &entry);
> + result = dax_pmd_insert_mapping(vma, vmf->pmd, vmf,
> + vmf->address, &iomap, pos, write, &entry);
> break;
> case IOMAP_UNWRITTEN:
> case IOMAP_HOLE:
> if (WARN_ON_ONCE(write))
> goto unlock_entry;
> - result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
> - &entry);
> + result = dax_pmd_load_hole(vma, vmf->pmd, vmf, vmf->address,
> + &iomap, &entry);
> break;
> default:
> WARN_ON_ONCE(1);
> @@ -1400,7 +1395,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> }
>
> unlock_entry:
> - put_locked_mapping_entry(mapping, pgoff, entry);
> + put_locked_mapping_entry(mapping, vmf->pgoff, entry);
> finish_iomap:
> if (ops->iomap_end) {
> int copied = PMD_SIZE;
> @@ -1418,12 +1413,12 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> }
> fallback:
> if (result == VM_FAULT_FALLBACK) {
> - split_huge_pmd(vma, pmd, address);
> + split_huge_pmd(vma, vmf->pmd, vmf->address);
> count_vm_event(THP_FAULT_FALLBACK);
> }
> out:
> - trace_dax_pmd_fault_done(inode, vma, address, flags, pgoff, max_pgoff,
> - result);
> + trace_dax_pmd_fault_done(inode, vma, vmf, max_pgoff, result);
> + vmf->pgoff = old_pgoff;
> return result;
> }
> EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index a3f2bf0..e6cdb78 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -278,22 +278,26 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> return result;
> }
>
> -static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, unsigned int flags)
> +static int
> +ext4_dax_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> int result;
> struct inode *inode = file_inode(vma->vm_file);
> struct super_block *sb = inode->i_sb;
> - bool write = flags & FAULT_FLAG_WRITE;
> + bool write = vmf->flags & FAULT_FLAG_WRITE;
> + gfp_t old_mask;
>
> if (write) {
> sb_start_pagefault(sb);
> file_update_time(vma->vm_file);
> }
> +
> + old_mask = vmf->gfp_mask;
> + vmf->gfp_mask &= ~__GFP_FS;
> down_read(&EXT4_I(inode)->i_mmap_sem);
> - result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
> - &ext4_iomap_ops);
> + result = dax_iomap_pmd_fault(vma, vmf, &ext4_iomap_ops);
> up_read(&EXT4_I(inode)->i_mmap_sem);
> + vmf->gfp_mask = old_mask;
> if (write)
> sb_end_pagefault(sb);
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 52202b4..b1b8524 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1533,29 +1533,31 @@ xfs_filemap_fault(
> STATIC int
> xfs_filemap_pmd_fault(
> struct vm_area_struct *vma,
> - unsigned long addr,
> - pmd_t *pmd,
> - unsigned int flags)
> + struct vm_fault *vmf)
> {
> struct inode *inode = file_inode(vma->vm_file);
> struct xfs_inode *ip = XFS_I(inode);
> int ret;
> + gfp_t old_mask;
>
> if (!IS_DAX(inode))
> return VM_FAULT_FALLBACK;
>
> trace_xfs_filemap_pmd_fault(ip);
>
> - if (flags & FAULT_FLAG_WRITE) {
> + if (vmf->flags & FAULT_FLAG_WRITE) {
> sb_start_pagefault(inode->i_sb);
> file_update_time(vma->vm_file);
> }
>
> + old_mask = vmf->gfp_mask;
> + vmf->gfp_mask &= ~__GFP_FS;
> xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
> + ret = dax_iomap_pmd_fault(vma, vmf, &xfs_iomap_ops);
> xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> + vmf->gfp_mask = old_mask;
>
> - if (flags & FAULT_FLAG_WRITE)
> + if (vmf->flags & FAULT_FLAG_WRITE)
> sb_end_pagefault(inode->i_sb);
>
> return ret;
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 6e36b11..9761c90 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -71,16 +71,15 @@ static inline unsigned int dax_radix_order(void *entry)
> return PMD_SHIFT - PAGE_SHIFT;
> return 0;
> }
> -int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> - pmd_t *pmd, unsigned int flags, struct iomap_ops *ops);
> +int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> + struct iomap_ops *ops);
> #else
> static inline unsigned int dax_radix_order(void *entry)
> {
> return 0;
> }
> static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
> - unsigned long address, pmd_t *pmd, unsigned int flags,
> - struct iomap_ops *ops)
> + struct vm_fault *vmf, struct iomap_ops *ops)
> {
> return VM_FAULT_FALLBACK;
> }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index 30f416a..aef645b 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -347,8 +347,7 @@ struct vm_operations_struct {
> void (*close)(struct vm_area_struct * area);
> int (*mremap)(struct vm_area_struct * area);
> int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
> - int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
> - pmd_t *, unsigned int flags);
> + int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
> void (*map_pages)(struct vm_fault *vmf,
> pgoff_t start_pgoff, pgoff_t end_pgoff);
>
> diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
> index c3b0aae..a98665b 100644
> --- a/include/trace/events/fs_dax.h
> +++ b/include/trace/events/fs_dax.h
> @@ -8,9 +8,8 @@
>
> DECLARE_EVENT_CLASS(dax_pmd_fault_class,
> TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
> - unsigned long address, unsigned int flags, pgoff_t pgoff,
> - pgoff_t max_pgoff, int result),
> - TP_ARGS(inode, vma, address, flags, pgoff, max_pgoff, result),
> + struct vm_fault *vmf, pgoff_t max_pgoff, int result),
> + TP_ARGS(inode, vma, vmf, max_pgoff, result),
> TP_STRUCT__entry(
> __field(unsigned long, ino)
> __field(unsigned long, vm_start)
> @@ -29,9 +28,9 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
> __entry->vm_start = vma->vm_start;
> __entry->vm_end = vma->vm_end;
> __entry->vm_flags = vma->vm_flags;
> - __entry->address = address;
> - __entry->flags = flags;
> - __entry->pgoff = pgoff;
> + __entry->address = vmf->address;
> + __entry->flags = vmf->flags;
> + __entry->pgoff = vmf->pgoff;
> __entry->max_pgoff = max_pgoff;
> __entry->result = result;
> ),
> @@ -54,9 +53,9 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
> #define DEFINE_PMD_FAULT_EVENT(name) \
> DEFINE_EVENT(dax_pmd_fault_class, name, \
> TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
> - unsigned long address, unsigned int flags, pgoff_t pgoff, \
> + struct vm_fault *vmf, \
> pgoff_t max_pgoff, int result), \
> - TP_ARGS(inode, vma, address, flags, pgoff, max_pgoff, result))
> + TP_ARGS(inode, vma, vmf, max_pgoff, result))
>
> DEFINE_PMD_FAULT_EVENT(dax_pmd_fault);
> DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
> diff --git a/mm/memory.c b/mm/memory.c
> index e37250f..8ec36cf 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3447,8 +3447,7 @@ static int create_huge_pmd(struct vm_fault *vmf)
> if (vma_is_anonymous(vma))
> return do_huge_pmd_anonymous_page(vmf);
> if (vma->vm_ops->pmd_fault)
> - return vma->vm_ops->pmd_fault(vma, vmf->address, vmf->pmd,
> - vmf->flags);
> + return vma->vm_ops->pmd_fault(vma, vmf);
> return VM_FAULT_FALLBACK;
> }
>
> @@ -3457,8 +3456,7 @@ static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
> if (vma_is_anonymous(vmf->vma))
> return do_huge_pmd_wp_page(vmf, orig_pmd);
> if (vmf->vma->vm_ops->pmd_fault)
> - return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf->address,
> - vmf->pmd, vmf->flags);
> + return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf);
>
> /* COW handled on pte level: split pmd */
> VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
next prev parent reply other threads:[~2016-12-15 13:51 UTC|newest]
Thread overview: 7+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-12-14 19:55 [PATCH v2 1/3] dax: masking off __GFP_FS in fs DAX handlers Dave Jiang
2016-12-14 19:55 ` [PATCH v2 2/3] mm, dax: make pmd_fault() and friends to be the same as fault() Dave Jiang
2016-12-15 13:51 ` Jan Kara [this message]
2016-12-14 19:55 ` [PATCH v2 3/3] mm, dax: move pmd_fault() to take only vmf parameter Dave Jiang
2016-12-15 14:04 ` Jan Kara
2016-12-14 20:41 ` [PATCH v2 1/3] dax: masking off __GFP_FS in fs DAX handlers Ross Zwisler
2016-12-15 13:46 ` Jan Kara
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20161215135136.GB13811@quack2.suse.cz \
--to=jack@suse.cz \
--cc=akpm@linux-foundation.org \
--cc=dan.j.williams@intel.com \
--cc=dave.jiang@intel.com \
--cc=david@fromorbit.com \
--cc=hch@lst.de \
--cc=linux-mm@kvack.org \
--cc=linux-nvdimm@lists.01.org \
--cc=ross.zwisler@linux.intel.com \
--cc=tytso@mit.edu \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox