* [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers
@ 2016-12-15 20:50 Dave Jiang
2016-12-15 20:51 ` [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault() Dave Jiang
` (2 more replies)
0 siblings, 3 replies; 7+ messages in thread
From: Dave Jiang @ 2016-12-15 20:50 UTC (permalink / raw)
To: akpm
Cc: jack, linux-nvdimm, david, hch, linux-mm, tytso, ross.zwisler,
dan.j.williams
The caller into dax needs to clear __GFP_FS mask bit since it's
responsible for acquiring locks / transactions that blocks __GFP_FS
allocation. The caller will restore the original mask when dax function
returns.
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
fs/dax.c | 1 +
fs/ext2/file.c | 9 ++++++++-
fs/ext4/file.c | 10 +++++++++-
fs/xfs/xfs_file.c | 14 +++++++++++++-
4 files changed, 31 insertions(+), 3 deletions(-)
diff --git a/fs/dax.c b/fs/dax.c
index d3fe880..6395bc6 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1380,6 +1380,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
vmf.pgoff = pgoff;
vmf.flags = flags;
vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
+ vmf.gfp_mask &= ~__GFP_FS;
switch (iomap.type) {
case IOMAP_MAPPED:
diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index b0f2415..8422d5f 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -92,16 +92,19 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct ext2_inode_info *ei = EXT2_I(inode);
int ret;
+ gfp_t old_gfp = vmf->gfp_mask;
if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
}
+ vmf->gfp_mask &= ~__GFP_FS;
down_read(&ei->dax_sem);
ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
up_read(&ei->dax_sem);
+ vmf->gfp_mask = old_gfp;
if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(inode->i_sb);
return ret;
@@ -114,6 +117,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
struct ext2_inode_info *ei = EXT2_I(inode);
loff_t size;
int ret;
+ gfp_t old_gfp = vmf->gfp_mask;
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
@@ -123,8 +127,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
- else
+ else {
+ vmf->gfp_mask &= ~__GFP_FS;
ret = dax_pfn_mkwrite(vma, vmf);
+ vmf->gfp_mask = old_gfp;
+ }
up_read(&ei->dax_sem);
sb_end_pagefault(inode->i_sb);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index d663d3d..a3f2bf0 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -261,14 +261,17 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
+ gfp_t old_gfp = vmf->gfp_mask;
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
}
+ vmf->gfp_mask &= ~__GFP_FS;
down_read(&EXT4_I(inode)->i_mmap_sem);
result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
up_read(&EXT4_I(inode)->i_mmap_sem);
+ vmf->gfp_mask = old_gfp;
if (write)
sb_end_pagefault(sb);
@@ -320,8 +323,13 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
- else
+ else {
+ gfp_t old_gfp = vmf->gfp_mask;
+
+ vmf->gfp_mask &= ~__GFP_FS;
ret = dax_pfn_mkwrite(vma, vmf);
+ vmf->gfp_mask = old_gfp;
+ }
up_read(&EXT4_I(inode)->i_mmap_sem);
sb_end_pagefault(sb);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index d818c16..52202b4 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1474,7 +1474,11 @@ xfs_filemap_page_mkwrite(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
+ gfp_t old_gfp = vmf->gfp_mask;
+
+ vmf->gfp_mask &= ~__GFP_FS;
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ vmf->gfp_mask = old_gfp;
} else {
ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
ret = block_page_mkwrite_return(ret);
@@ -1502,13 +1506,16 @@ xfs_filemap_fault(
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
if (IS_DAX(inode)) {
+ gfp_t old_gfp = vmf->gfp_mask;
/*
* we do not want to trigger unwritten extent conversion on read
* faults - that is unnecessary overhead and would also require
* changes to xfs_get_blocks_direct() to map unwritten extent
* ioend for conversion on read-only mappings.
*/
+ vmf->gfp_mask &= ~__GFP_FS;
ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
+ vmf->gfp_mask = old_gfp;
} else
ret = filemap_fault(vma, vmf);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
@@ -1581,8 +1588,13 @@ xfs_filemap_pfn_mkwrite(
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size)
ret = VM_FAULT_SIGBUS;
- else if (IS_DAX(inode))
+ else if (IS_DAX(inode)) {
+ gfp_t old_gfp = vmf->gfp_mask;
+
+ vmf->gfp_mask &= ~__GFP_FS;
ret = dax_pfn_mkwrite(vma, vmf);
+ vmf->gfp_mask = old_gfp;
+ }
xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
sb_end_pagefault(inode->i_sb);
return ret;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault()
2016-12-15 20:50 [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Dave Jiang
@ 2016-12-15 20:51 ` Dave Jiang
2016-12-15 23:23 ` Ross Zwisler
2016-12-15 20:51 ` [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter Dave Jiang
2016-12-16 8:34 ` [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Michal Hocko
2 siblings, 1 reply; 7+ messages in thread
From: Dave Jiang @ 2016-12-15 20:51 UTC (permalink / raw)
To: akpm
Cc: jack, linux-nvdimm, david, hch, linux-mm, tytso, ross.zwisler,
dan.j.williams
Instead of passing in multiple parameters in the pmd_fault() handler,
a vmf can be passed in just like a fault() handler. This will simplify
code and remove the need for the actual pmd fault handlers to allocate a
vmf. Related functions are also modified to do the same.
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
---
drivers/dax/dax.c | 16 ++++++---------
fs/dax.c | 45 ++++++++++++++++++-----------------------
fs/ext4/file.c | 14 ++++++++-----
fs/xfs/xfs_file.c | 14 +++++++------
include/linux/dax.h | 7 +++---
include/linux/mm.h | 3 +--
include/trace/events/fs_dax.h | 15 ++++++--------
mm/memory.c | 6 ++---
8 files changed, 57 insertions(+), 63 deletions(-)
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index c753a4c..947e49a 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -379,10 +379,9 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
- struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd,
- unsigned int flags)
+ struct vm_area_struct *vma, struct vm_fault *vmf)
{
- unsigned long pmd_addr = addr & PMD_MASK;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
struct device *dev = &dax_dev->dev;
struct dax_region *dax_region;
phys_addr_t phys;
@@ -414,23 +413,22 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
- return vmf_insert_pfn_pmd(vma, addr, pmd, pfn,
- flags & FAULT_FLAG_WRITE);
+ return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
+ vmf->flags & FAULT_FLAG_WRITE);
}
-static int dax_dev_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, unsigned int flags)
+static int dax_dev_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int rc;
struct file *filp = vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
- current->comm, (flags & FAULT_FLAG_WRITE)
+ current->comm, (vmf->flags & FAULT_FLAG_WRITE)
? "write" : "read", vma->vm_start, vma->vm_end);
rcu_read_lock();
- rc = __dax_dev_pmd_fault(dax_dev, vma, addr, pmd, flags);
+ rc = __dax_dev_pmd_fault(dax_dev, vma, vmf);
rcu_read_unlock();
return rc;
diff --git a/fs/dax.c b/fs/dax.c
index 6395bc6..157f77f 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1310,18 +1310,17 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
return VM_FAULT_FALLBACK;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, struct iomap_ops *ops)
+int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ struct iomap_ops *ops)
{
struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
- bool write = flags & FAULT_FLAG_WRITE;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
unsigned int iomap_flags = (write ? IOMAP_WRITE : 0) | IOMAP_FAULT;
struct inode *inode = mapping->host;
int result = VM_FAULT_FALLBACK;
struct iomap iomap = { 0 };
- pgoff_t max_pgoff, pgoff;
- struct vm_fault vmf;
+ pgoff_t max_pgoff, old_pgoff;
void *entry;
loff_t pos;
int error;
@@ -1331,10 +1330,11 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
* supposed to hold locks serializing us with truncate / punch hole so
* this is a reliable test.
*/
- pgoff = linear_page_index(vma, pmd_addr);
+ old_pgoff = vmf->pgoff;
+ vmf->pgoff = linear_page_index(vma, pmd_addr);
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
- trace_dax_pmd_fault(inode, vma, address, flags, pgoff, max_pgoff, 0);
+ trace_dax_pmd_fault(inode, vma, vmf, max_pgoff, 0);
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
@@ -1346,13 +1346,13 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
if ((pmd_addr + PMD_SIZE) > vma->vm_end)
goto fallback;
- if (pgoff > max_pgoff) {
+ if (vmf->pgoff > max_pgoff) {
result = VM_FAULT_SIGBUS;
goto out;
}
/* If the PMD would extend beyond the file size */
- if ((pgoff | PG_PMD_COLOUR) > max_pgoff)
+ if ((vmf->pgoff | PG_PMD_COLOUR) > max_pgoff)
goto fallback;
/*
@@ -1360,7 +1360,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
* setting up a mapping, so really we're using iomap_begin() as a way
* to look up our filesystem block.
*/
- pos = (loff_t)pgoff << PAGE_SHIFT;
+ pos = (loff_t)vmf->pgoff << PAGE_SHIFT;
error = ops->iomap_begin(inode, pos, PMD_SIZE, iomap_flags, &iomap);
if (error)
goto fallback;
@@ -1370,29 +1370,24 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
* the tree, for instance), it will return -EEXIST and we just fall
* back to 4k entries.
*/
- entry = grab_mapping_entry(mapping, pgoff, RADIX_DAX_PMD);
+ entry = grab_mapping_entry(mapping, vmf->pgoff, RADIX_DAX_PMD);
if (IS_ERR(entry))
goto finish_iomap;
if (iomap.offset + iomap.length < pos + PMD_SIZE)
goto unlock_entry;
- vmf.pgoff = pgoff;
- vmf.flags = flags;
- vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
- vmf.gfp_mask &= ~__GFP_FS;
-
switch (iomap.type) {
case IOMAP_MAPPED:
- result = dax_pmd_insert_mapping(vma, pmd, &vmf, address,
- &iomap, pos, write, &entry);
+ result = dax_pmd_insert_mapping(vma, vmf->pmd, vmf,
+ vmf->address, &iomap, pos, write, &entry);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
goto unlock_entry;
- result = dax_pmd_load_hole(vma, pmd, &vmf, address, &iomap,
- &entry);
+ result = dax_pmd_load_hole(vma, vmf->pmd, vmf, vmf->address,
+ &iomap, &entry);
break;
default:
WARN_ON_ONCE(1);
@@ -1400,7 +1395,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
unlock_entry:
- put_locked_mapping_entry(mapping, pgoff, entry);
+ put_locked_mapping_entry(mapping, vmf->pgoff, entry);
finish_iomap:
if (ops->iomap_end) {
int copied = PMD_SIZE;
@@ -1418,12 +1413,12 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
}
fallback:
if (result == VM_FAULT_FALLBACK) {
- split_huge_pmd(vma, pmd, address);
+ split_huge_pmd(vma, vmf->pmd, vmf->address);
count_vm_event(THP_FAULT_FALLBACK);
}
out:
- trace_dax_pmd_fault_done(inode, vma, address, flags, pgoff, max_pgoff,
- result);
+ trace_dax_pmd_fault_done(inode, vma, vmf, max_pgoff, result);
+ vmf->pgoff = old_pgoff;
return result;
}
EXPORT_SYMBOL_GPL(dax_iomap_pmd_fault);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index a3f2bf0..e6cdb78 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -278,22 +278,26 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return result;
}
-static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
- pmd_t *pmd, unsigned int flags)
+static int
+ext4_dax_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
int result;
struct inode *inode = file_inode(vma->vm_file);
struct super_block *sb = inode->i_sb;
- bool write = flags & FAULT_FLAG_WRITE;
+ bool write = vmf->flags & FAULT_FLAG_WRITE;
+ gfp_t old_mask;
if (write) {
sb_start_pagefault(sb);
file_update_time(vma->vm_file);
}
+
+ old_mask = vmf->gfp_mask;
+ vmf->gfp_mask &= ~__GFP_FS;
down_read(&EXT4_I(inode)->i_mmap_sem);
- result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
- &ext4_iomap_ops);
+ result = dax_iomap_pmd_fault(vma, vmf, &ext4_iomap_ops);
up_read(&EXT4_I(inode)->i_mmap_sem);
+ vmf->gfp_mask = old_mask;
if (write)
sb_end_pagefault(sb);
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 52202b4..b1b8524 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1533,29 +1533,31 @@ xfs_filemap_fault(
STATIC int
xfs_filemap_pmd_fault(
struct vm_area_struct *vma,
- unsigned long addr,
- pmd_t *pmd,
- unsigned int flags)
+ struct vm_fault *vmf)
{
struct inode *inode = file_inode(vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
int ret;
+ gfp_t old_mask;
if (!IS_DAX(inode))
return VM_FAULT_FALLBACK;
trace_xfs_filemap_pmd_fault(ip);
- if (flags & FAULT_FLAG_WRITE) {
+ if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
file_update_time(vma->vm_file);
}
+ old_mask = vmf->gfp_mask;
+ vmf->gfp_mask &= ~__GFP_FS;
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_iomap_pmd_fault(vma, addr, pmd, flags, &xfs_iomap_ops);
+ ret = dax_iomap_pmd_fault(vma, vmf, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
+ vmf->gfp_mask = old_mask;
- if (flags & FAULT_FLAG_WRITE)
+ if (vmf->flags & FAULT_FLAG_WRITE)
sb_end_pagefault(inode->i_sb);
return ret;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 6e36b11..9761c90 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -71,16 +71,15 @@ static inline unsigned int dax_radix_order(void *entry)
return PMD_SHIFT - PAGE_SHIFT;
return 0;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
- pmd_t *pmd, unsigned int flags, struct iomap_ops *ops);
+int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
+ struct iomap_ops *ops);
#else
static inline unsigned int dax_radix_order(void *entry)
{
return 0;
}
static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
- unsigned long address, pmd_t *pmd, unsigned int flags,
- struct iomap_ops *ops)
+ struct vm_fault *vmf, struct iomap_ops *ops)
{
return VM_FAULT_FALLBACK;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 30f416a..aef645b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -347,8 +347,7 @@ struct vm_operations_struct {
void (*close)(struct vm_area_struct * area);
int (*mremap)(struct vm_area_struct * area);
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
- int (*pmd_fault)(struct vm_area_struct *, unsigned long address,
- pmd_t *, unsigned int flags);
+ int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
void (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
index c3b0aae..a98665b 100644
--- a/include/trace/events/fs_dax.h
+++ b/include/trace/events/fs_dax.h
@@ -8,9 +8,8 @@
DECLARE_EVENT_CLASS(dax_pmd_fault_class,
TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
- unsigned long address, unsigned int flags, pgoff_t pgoff,
- pgoff_t max_pgoff, int result),
- TP_ARGS(inode, vma, address, flags, pgoff, max_pgoff, result),
+ struct vm_fault *vmf, pgoff_t max_pgoff, int result),
+ TP_ARGS(inode, vma, vmf, max_pgoff, result),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long, vm_start)
@@ -29,9 +28,9 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
__entry->vm_start = vma->vm_start;
__entry->vm_end = vma->vm_end;
__entry->vm_flags = vma->vm_flags;
- __entry->address = address;
- __entry->flags = flags;
- __entry->pgoff = pgoff;
+ __entry->address = vmf->address;
+ __entry->flags = vmf->flags;
+ __entry->pgoff = vmf->pgoff;
__entry->max_pgoff = max_pgoff;
__entry->result = result;
),
@@ -54,9 +53,9 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
#define DEFINE_PMD_FAULT_EVENT(name) \
DEFINE_EVENT(dax_pmd_fault_class, name, \
TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
- unsigned long address, unsigned int flags, pgoff_t pgoff, \
+ struct vm_fault *vmf, \
pgoff_t max_pgoff, int result), \
- TP_ARGS(inode, vma, address, flags, pgoff, max_pgoff, result))
+ TP_ARGS(inode, vma, vmf, max_pgoff, result))
DEFINE_PMD_FAULT_EVENT(dax_pmd_fault);
DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
diff --git a/mm/memory.c b/mm/memory.c
index e37250f..8ec36cf 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3447,8 +3447,7 @@ static int create_huge_pmd(struct vm_fault *vmf)
if (vma_is_anonymous(vma))
return do_huge_pmd_anonymous_page(vmf);
if (vma->vm_ops->pmd_fault)
- return vma->vm_ops->pmd_fault(vma, vmf->address, vmf->pmd,
- vmf->flags);
+ return vma->vm_ops->pmd_fault(vma, vmf);
return VM_FAULT_FALLBACK;
}
@@ -3457,8 +3456,7 @@ static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(vmf, orig_pmd);
if (vmf->vma->vm_ops->pmd_fault)
- return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf->address,
- vmf->pmd, vmf->flags);
+ return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf);
/* COW handled on pte level: split pmd */
VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter
2016-12-15 20:50 [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Dave Jiang
2016-12-15 20:51 ` [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault() Dave Jiang
@ 2016-12-15 20:51 ` Dave Jiang
2016-12-15 23:23 ` Ross Zwisler
2016-12-16 7:17 ` Jan Kara
2016-12-16 8:34 ` [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Michal Hocko
2 siblings, 2 replies; 7+ messages in thread
From: Dave Jiang @ 2016-12-15 20:51 UTC (permalink / raw)
To: akpm
Cc: jack, linux-nvdimm, david, hch, linux-mm, tytso, ross.zwisler,
dan.j.williams
pmd_fault() and related functions really only need the vmf parameter since
the additional parameters are all included in the vmf struct. Removing
additional parameter and simplify pmd_fault() and friends.
Signed-off-by: Dave Jiang <dave.jiang@intel.com>
---
drivers/dax/dax.c | 18 +++++++-------
fs/dax.c | 54 +++++++++++++++++++----------------------
fs/ext4/file.c | 8 +++---
fs/xfs/xfs_file.c | 7 ++---
include/linux/dax.h | 7 ++---
include/linux/mm.h | 2 +-
include/trace/events/fs_dax.h | 54 +++++++++++++++++++----------------------
mm/memory.c | 9 +++----
8 files changed, 74 insertions(+), 85 deletions(-)
diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
index 947e49a..55160f8 100644
--- a/drivers/dax/dax.c
+++ b/drivers/dax/dax.c
@@ -378,8 +378,7 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return rc;
}
-static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
- struct vm_area_struct *vma, struct vm_fault *vmf)
+static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
{
unsigned long pmd_addr = vmf->address & PMD_MASK;
struct device *dev = &dax_dev->dev;
@@ -388,7 +387,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
pgoff_t pgoff;
pfn_t pfn;
- if (check_vma(dax_dev, vma, __func__))
+ if (check_vma(dax_dev, vmf->vma, __func__))
return VM_FAULT_SIGBUS;
dax_region = dax_dev->region;
@@ -403,7 +402,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
return VM_FAULT_SIGBUS;
}
- pgoff = linear_page_index(vma, pmd_addr);
+ pgoff = linear_page_index(vmf->vma, pmd_addr);
phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
if (phys == -1) {
dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
@@ -413,22 +412,23 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
- return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
+ return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn,
vmf->flags & FAULT_FLAG_WRITE);
}
-static int dax_dev_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+static int dax_dev_pmd_fault(struct vm_fault *vmf)
{
int rc;
- struct file *filp = vma->vm_file;
+ struct file *filp = vmf->vma->vm_file;
struct dax_dev *dax_dev = filp->private_data;
dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
current->comm, (vmf->flags & FAULT_FLAG_WRITE)
- ? "write" : "read", vma->vm_start, vma->vm_end);
+ ? "write" : "read",
+ vmf->vma->vm_start, vmf->vma->vm_end);
rcu_read_lock();
- rc = __dax_dev_pmd_fault(dax_dev, vma, vmf);
+ rc = __dax_dev_pmd_fault(dax_dev, vmf);
rcu_read_unlock();
return rc;
diff --git a/fs/dax.c b/fs/dax.c
index 157f77f..bc39809 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -1226,11 +1226,10 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
*/
#define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
-static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, loff_t pos, bool write, void **entryp)
+static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
+ loff_t pos, void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
struct block_device *bdev = iomap->bdev;
struct inode *inode = mapping->host;
struct blk_dax_ctl dax = {
@@ -1257,31 +1256,30 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
goto fallback;
*entryp = ret;
- trace_dax_pmd_insert_mapping(inode, vma, address, write, length,
- dax.pfn, ret);
- return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
+ trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret);
+ return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
+ dax.pfn, vmf->flags & FAULT_FLAG_WRITE);
unmap_fallback:
dax_unmap_atomic(bdev, &dax);
fallback:
- trace_dax_pmd_insert_mapping_fallback(inode, vma, address, write,
- length, dax.pfn, ret);
+ trace_dax_pmd_insert_mapping_fallback(inode, vmf, length,
+ dax.pfn, ret);
return VM_FAULT_FALLBACK;
}
-static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
- struct vm_fault *vmf, unsigned long address,
- struct iomap *iomap, void **entryp)
+static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
+ void **entryp)
{
- struct address_space *mapping = vma->vm_file->f_mapping;
- unsigned long pmd_addr = address & PMD_MASK;
+ struct address_space *mapping = vmf->vma->vm_file->f_mapping;
+ unsigned long pmd_addr = vmf->address & PMD_MASK;
struct inode *inode = mapping->host;
struct page *zero_page;
void *ret = NULL;
spinlock_t *ptl;
pmd_t pmd_entry;
- zero_page = mm_get_huge_zero_page(vma->vm_mm);
+ zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
if (unlikely(!zero_page))
goto fallback;
@@ -1292,27 +1290,27 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
goto fallback;
*entryp = ret;
- ptl = pmd_lock(vma->vm_mm, pmd);
- if (!pmd_none(*pmd)) {
+ ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
+ if (!pmd_none(*(vmf->pmd))) {
spin_unlock(ptl);
goto fallback;
}
- pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
+ pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
pmd_entry = pmd_mkhuge(pmd_entry);
- set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
+ set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
spin_unlock(ptl);
- trace_dax_pmd_load_hole(inode, vma, address, zero_page, ret);
+ trace_dax_pmd_load_hole(inode, vmf, zero_page, ret);
return VM_FAULT_NOPAGE;
fallback:
- trace_dax_pmd_load_hole_fallback(inode, vma, address, zero_page, ret);
+ trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret);
return VM_FAULT_FALLBACK;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- struct iomap_ops *ops)
+int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops)
{
+ struct vm_area_struct *vma = vmf->vma;
struct address_space *mapping = vma->vm_file->f_mapping;
unsigned long pmd_addr = vmf->address & PMD_MASK;
bool write = vmf->flags & FAULT_FLAG_WRITE;
@@ -1334,7 +1332,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
vmf->pgoff = linear_page_index(vma, pmd_addr);
max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
- trace_dax_pmd_fault(inode, vma, vmf, max_pgoff, 0);
+ trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
/* Fall back to PTEs if we're going to COW */
if (write && !(vma->vm_flags & VM_SHARED))
@@ -1379,15 +1377,13 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
switch (iomap.type) {
case IOMAP_MAPPED:
- result = dax_pmd_insert_mapping(vma, vmf->pmd, vmf,
- vmf->address, &iomap, pos, write, &entry);
+ result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
break;
case IOMAP_UNWRITTEN:
case IOMAP_HOLE:
if (WARN_ON_ONCE(write))
goto unlock_entry;
- result = dax_pmd_load_hole(vma, vmf->pmd, vmf, vmf->address,
- &iomap, &entry);
+ result = dax_pmd_load_hole(vmf, &iomap, &entry);
break;
default:
WARN_ON_ONCE(1);
@@ -1417,7 +1413,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
count_vm_event(THP_FAULT_FALLBACK);
}
out:
- trace_dax_pmd_fault_done(inode, vma, vmf, max_pgoff, result);
+ trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
vmf->pgoff = old_pgoff;
return result;
}
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index e6cdb78..2f4fd28 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -279,23 +279,23 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
static int
-ext4_dax_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
+ext4_dax_pmd_fault(struct vm_fault *vmf)
{
int result;
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct super_block *sb = inode->i_sb;
bool write = vmf->flags & FAULT_FLAG_WRITE;
gfp_t old_mask;
if (write) {
sb_start_pagefault(sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
old_mask = vmf->gfp_mask;
vmf->gfp_mask &= ~__GFP_FS;
down_read(&EXT4_I(inode)->i_mmap_sem);
- result = dax_iomap_pmd_fault(vma, vmf, &ext4_iomap_ops);
+ result = dax_iomap_pmd_fault(vmf, &ext4_iomap_ops);
up_read(&EXT4_I(inode)->i_mmap_sem);
vmf->gfp_mask = old_mask;
if (write)
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index b1b8524..b548fc5 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -1532,10 +1532,9 @@ xfs_filemap_fault(
*/
STATIC int
xfs_filemap_pmd_fault(
- struct vm_area_struct *vma,
struct vm_fault *vmf)
{
- struct inode *inode = file_inode(vma->vm_file);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
struct xfs_inode *ip = XFS_I(inode);
int ret;
gfp_t old_mask;
@@ -1547,13 +1546,13 @@ xfs_filemap_pmd_fault(
if (vmf->flags & FAULT_FLAG_WRITE) {
sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
+ file_update_time(vmf->vma->vm_file);
}
old_mask = vmf->gfp_mask;
vmf->gfp_mask &= ~__GFP_FS;
xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
- ret = dax_iomap_pmd_fault(vma, vmf, &xfs_iomap_ops);
+ ret = dax_iomap_pmd_fault(vmf, &xfs_iomap_ops);
xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
vmf->gfp_mask = old_mask;
diff --git a/include/linux/dax.h b/include/linux/dax.h
index 9761c90..1ffdb4d 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -71,15 +71,14 @@ static inline unsigned int dax_radix_order(void *entry)
return PMD_SHIFT - PAGE_SHIFT;
return 0;
}
-int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
- struct iomap_ops *ops);
+int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops);
#else
static inline unsigned int dax_radix_order(void *entry)
{
return 0;
}
-static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
- struct vm_fault *vmf, struct iomap_ops *ops)
+static inline int dax_iomap_pmd_fault(struct vm_fault *vmf,
+ struct iomap_ops *ops)
{
return VM_FAULT_FALLBACK;
}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index aef645b..795f03e 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -347,7 +347,7 @@ struct vm_operations_struct {
void (*close)(struct vm_area_struct * area);
int (*mremap)(struct vm_area_struct * area);
int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
- int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
+ int (*pmd_fault)(struct vm_fault *vmf);
void (*map_pages)(struct vm_fault *vmf,
pgoff_t start_pgoff, pgoff_t end_pgoff);
diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
index a98665b..c566ddc 100644
--- a/include/trace/events/fs_dax.h
+++ b/include/trace/events/fs_dax.h
@@ -7,9 +7,9 @@
#include <linux/tracepoint.h>
DECLARE_EVENT_CLASS(dax_pmd_fault_class,
- TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
- struct vm_fault *vmf, pgoff_t max_pgoff, int result),
- TP_ARGS(inode, vma, vmf, max_pgoff, result),
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ pgoff_t max_pgoff, int result),
+ TP_ARGS(inode, vmf, max_pgoff, result),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long, vm_start)
@@ -25,9 +25,9 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->vm_start = vma->vm_start;
- __entry->vm_end = vma->vm_end;
- __entry->vm_flags = vma->vm_flags;
+ __entry->vm_start = vmf->vma->vm_start;
+ __entry->vm_end = vmf->vma->vm_end;
+ __entry->vm_flags = vmf->vma->vm_flags;
__entry->address = vmf->address;
__entry->flags = vmf->flags;
__entry->pgoff = vmf->pgoff;
@@ -52,19 +52,18 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
#define DEFINE_PMD_FAULT_EVENT(name) \
DEFINE_EVENT(dax_pmd_fault_class, name, \
- TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
- struct vm_fault *vmf, \
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
pgoff_t max_pgoff, int result), \
- TP_ARGS(inode, vma, vmf, max_pgoff, result))
+ TP_ARGS(inode, vmf, max_pgoff, result))
DEFINE_PMD_FAULT_EVENT(dax_pmd_fault);
DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
- TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
- unsigned long address, struct page *zero_page,
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ struct page *zero_page,
void *radix_entry),
- TP_ARGS(inode, vma, address, zero_page, radix_entry),
+ TP_ARGS(inode, vmf, zero_page, radix_entry),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long, vm_flags)
@@ -76,8 +75,8 @@ DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->vm_flags = vma->vm_flags;
- __entry->address = address;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
__entry->zero_page = zero_page;
__entry->radix_entry = radix_entry;
),
@@ -95,19 +94,17 @@ DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
#define DEFINE_PMD_LOAD_HOLE_EVENT(name) \
DEFINE_EVENT(dax_pmd_load_hole_class, name, \
- TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
- unsigned long address, struct page *zero_page, \
- void *radix_entry), \
- TP_ARGS(inode, vma, address, zero_page, radix_entry))
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ struct page *zero_page, void *radix_entry), \
+ TP_ARGS(inode, vmf, zero_page, radix_entry))
DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole);
DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole_fallback);
DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
- TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
- unsigned long address, int write, long length, pfn_t pfn,
- void *radix_entry),
- TP_ARGS(inode, vma, address, write, length, pfn, radix_entry),
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf,
+ long length, pfn_t pfn, void *radix_entry),
+ TP_ARGS(inode, vmf, length, pfn, radix_entry),
TP_STRUCT__entry(
__field(unsigned long, ino)
__field(unsigned long, vm_flags)
@@ -121,9 +118,9 @@ DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
TP_fast_assign(
__entry->dev = inode->i_sb->s_dev;
__entry->ino = inode->i_ino;
- __entry->vm_flags = vma->vm_flags;
- __entry->address = address;
- __entry->write = write;
+ __entry->vm_flags = vmf->vma->vm_flags;
+ __entry->address = vmf->address;
+ __entry->write = vmf->flags & FAULT_FLAG_WRITE;
__entry->length = length;
__entry->pfn_val = pfn.val;
__entry->radix_entry = radix_entry;
@@ -146,10 +143,9 @@ DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
#define DEFINE_PMD_INSERT_MAPPING_EVENT(name) \
DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \
- TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
- unsigned long address, int write, long length, pfn_t pfn, \
- void *radix_entry), \
- TP_ARGS(inode, vma, address, write, length, pfn, radix_entry))
+ TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
+ long length, pfn_t pfn, void *radix_entry), \
+ TP_ARGS(inode, vmf, length, pfn, radix_entry))
DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping);
DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback);
diff --git a/mm/memory.c b/mm/memory.c
index 8ec36cf..e929c41 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -3443,11 +3443,10 @@ static int do_numa_page(struct vm_fault *vmf)
static int create_huge_pmd(struct vm_fault *vmf)
{
- struct vm_area_struct *vma = vmf->vma;
- if (vma_is_anonymous(vma))
+ if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_anonymous_page(vmf);
- if (vma->vm_ops->pmd_fault)
- return vma->vm_ops->pmd_fault(vma, vmf);
+ if (vmf->vma->vm_ops->pmd_fault)
+ return vmf->vma->vm_ops->pmd_fault(vmf);
return VM_FAULT_FALLBACK;
}
@@ -3456,7 +3455,7 @@ static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
if (vma_is_anonymous(vmf->vma))
return do_huge_pmd_wp_page(vmf, orig_pmd);
if (vmf->vma->vm_ops->pmd_fault)
- return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf);
+ return vmf->vma->vm_ops->pmd_fault(vmf);
/* COW handled on pte level: split pmd */
VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault()
2016-12-15 20:51 ` [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault() Dave Jiang
@ 2016-12-15 23:23 ` Ross Zwisler
0 siblings, 0 replies; 7+ messages in thread
From: Ross Zwisler @ 2016-12-15 23:23 UTC (permalink / raw)
To: Dave Jiang
Cc: akpm, jack, linux-nvdimm, david, hch, linux-mm, tytso,
ross.zwisler, dan.j.williams
On Thu, Dec 15, 2016 at 01:51:05PM -0700, Dave Jiang wrote:
> Instead of passing in multiple parameters in the pmd_fault() handler,
> a vmf can be passed in just like a fault() handler. This will simplify
> code and remove the need for the actual pmd fault handlers to allocate a
> vmf. Related functions are also modified to do the same.
>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> Reviewed-by: Jan Kara <jack@suse.cz>
> ---
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index a3f2bf0..e6cdb78 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -278,22 +278,26 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> return result;
> }
>
> -static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
> - pmd_t *pmd, unsigned int flags)
> +static int
> +ext4_dax_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> {
> int result;
> struct inode *inode = file_inode(vma->vm_file);
> struct super_block *sb = inode->i_sb;
> - bool write = flags & FAULT_FLAG_WRITE;
> + bool write = vmf->flags & FAULT_FLAG_WRITE;
> + gfp_t old_mask;
>
> if (write) {
> sb_start_pagefault(sb);
> file_update_time(vma->vm_file);
> }
> +
> + old_mask = vmf->gfp_mask;
> + vmf->gfp_mask &= ~__GFP_FS;
> down_read(&EXT4_I(inode)->i_mmap_sem);
> - result = dax_iomap_pmd_fault(vma, addr, pmd, flags,
> - &ext4_iomap_ops);
> + result = dax_iomap_pmd_fault(vma, vmf, &ext4_iomap_ops);
> up_read(&EXT4_I(inode)->i_mmap_sem);
> + vmf->gfp_mask = old_mask;
> if (write)
> sb_end_pagefault(sb);
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index 52202b4..b1b8524 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1533,29 +1533,31 @@ xfs_filemap_fault(
> STATIC int
> xfs_filemap_pmd_fault(
> struct vm_area_struct *vma,
> - unsigned long addr,
> - pmd_t *pmd,
> - unsigned int flags)
> + struct vm_fault *vmf)
> {
> struct inode *inode = file_inode(vma->vm_file);
> struct xfs_inode *ip = XFS_I(inode);
> int ret;
> + gfp_t old_mask;
>
> if (!IS_DAX(inode))
> return VM_FAULT_FALLBACK;
>
> trace_xfs_filemap_pmd_fault(ip);
>
> - if (flags & FAULT_FLAG_WRITE) {
> + if (vmf->flags & FAULT_FLAG_WRITE) {
> sb_start_pagefault(inode->i_sb);
> file_update_time(vma->vm_file);
> }
>
> + old_mask = vmf->gfp_mask;
One small nit for both xfs and ext4 - in patch 1 you named your local
'old_gfp' and set it when it was defined, but in this patch it's 'old_mask'
and it's set later. Probably best to keep this patch consistent with the
first one.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter
2016-12-15 20:51 ` [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter Dave Jiang
@ 2016-12-15 23:23 ` Ross Zwisler
2016-12-16 7:17 ` Jan Kara
1 sibling, 0 replies; 7+ messages in thread
From: Ross Zwisler @ 2016-12-15 23:23 UTC (permalink / raw)
To: Dave Jiang
Cc: akpm, jack, linux-nvdimm, david, hch, linux-mm, tytso,
ross.zwisler, dan.j.williams
On Thu, Dec 15, 2016 at 01:51:11PM -0700, Dave Jiang wrote:
> pmd_fault() and related functions really only need the vmf parameter since
> the additional parameters are all included in the vmf struct. Removing
> additional parameter and simplify pmd_fault() and friends.
>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
This seems correct to me.
Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter
2016-12-15 20:51 ` [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter Dave Jiang
2016-12-15 23:23 ` Ross Zwisler
@ 2016-12-16 7:17 ` Jan Kara
1 sibling, 0 replies; 7+ messages in thread
From: Jan Kara @ 2016-12-16 7:17 UTC (permalink / raw)
To: Dave Jiang
Cc: akpm, jack, linux-nvdimm, david, hch, linux-mm, tytso,
ross.zwisler, dan.j.williams
On Thu 15-12-16 13:51:11, Dave Jiang wrote:
> pmd_fault() and related functions really only need the vmf parameter since
> the additional parameters are all included in the vmf struct. Removing
> additional parameter and simplify pmd_fault() and friends.
>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
Looks good. You can add:
Reviewed-by: Jan Kara <jack@suse.cz>
BTW, if you want to continue with this, it may be worthwhile to also
convert vmf_insert_pfn_pmd() to take only 'vmf' and 'pfn' arguments.
Honza
> ---
> drivers/dax/dax.c | 18 +++++++-------
> fs/dax.c | 54 +++++++++++++++++++----------------------
> fs/ext4/file.c | 8 +++---
> fs/xfs/xfs_file.c | 7 ++---
> include/linux/dax.h | 7 ++---
> include/linux/mm.h | 2 +-
> include/trace/events/fs_dax.h | 54 +++++++++++++++++++----------------------
> mm/memory.c | 9 +++----
> 8 files changed, 74 insertions(+), 85 deletions(-)
>
> diff --git a/drivers/dax/dax.c b/drivers/dax/dax.c
> index 947e49a..55160f8 100644
> --- a/drivers/dax/dax.c
> +++ b/drivers/dax/dax.c
> @@ -378,8 +378,7 @@ static int dax_dev_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> return rc;
> }
>
> -static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
> - struct vm_area_struct *vma, struct vm_fault *vmf)
> +static int __dax_dev_pmd_fault(struct dax_dev *dax_dev, struct vm_fault *vmf)
> {
> unsigned long pmd_addr = vmf->address & PMD_MASK;
> struct device *dev = &dax_dev->dev;
> @@ -388,7 +387,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
> pgoff_t pgoff;
> pfn_t pfn;
>
> - if (check_vma(dax_dev, vma, __func__))
> + if (check_vma(dax_dev, vmf->vma, __func__))
> return VM_FAULT_SIGBUS;
>
> dax_region = dax_dev->region;
> @@ -403,7 +402,7 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
> return VM_FAULT_SIGBUS;
> }
>
> - pgoff = linear_page_index(vma, pmd_addr);
> + pgoff = linear_page_index(vmf->vma, pmd_addr);
> phys = pgoff_to_phys(dax_dev, pgoff, PMD_SIZE);
> if (phys == -1) {
> dev_dbg(dev, "%s: phys_to_pgoff(%#lx) failed\n", __func__,
> @@ -413,22 +412,23 @@ static int __dax_dev_pmd_fault(struct dax_dev *dax_dev,
>
> pfn = phys_to_pfn_t(phys, dax_region->pfn_flags);
>
> - return vmf_insert_pfn_pmd(vma, vmf->address, vmf->pmd, pfn,
> + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn,
> vmf->flags & FAULT_FLAG_WRITE);
> }
>
> -static int dax_dev_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +static int dax_dev_pmd_fault(struct vm_fault *vmf)
> {
> int rc;
> - struct file *filp = vma->vm_file;
> + struct file *filp = vmf->vma->vm_file;
> struct dax_dev *dax_dev = filp->private_data;
>
> dev_dbg(&dax_dev->dev, "%s: %s: %s (%#lx - %#lx)\n", __func__,
> current->comm, (vmf->flags & FAULT_FLAG_WRITE)
> - ? "write" : "read", vma->vm_start, vma->vm_end);
> + ? "write" : "read",
> + vmf->vma->vm_start, vmf->vma->vm_end);
>
> rcu_read_lock();
> - rc = __dax_dev_pmd_fault(dax_dev, vma, vmf);
> + rc = __dax_dev_pmd_fault(dax_dev, vmf);
> rcu_read_unlock();
>
> return rc;
> diff --git a/fs/dax.c b/fs/dax.c
> index 157f77f..bc39809 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1226,11 +1226,10 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault);
> */
> #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1)
>
> -static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
> - struct vm_fault *vmf, unsigned long address,
> - struct iomap *iomap, loff_t pos, bool write, void **entryp)
> +static int dax_pmd_insert_mapping(struct vm_fault *vmf, struct iomap *iomap,
> + loff_t pos, void **entryp)
> {
> - struct address_space *mapping = vma->vm_file->f_mapping;
> + struct address_space *mapping = vmf->vma->vm_file->f_mapping;
> struct block_device *bdev = iomap->bdev;
> struct inode *inode = mapping->host;
> struct blk_dax_ctl dax = {
> @@ -1257,31 +1256,30 @@ static int dax_pmd_insert_mapping(struct vm_area_struct *vma, pmd_t *pmd,
> goto fallback;
> *entryp = ret;
>
> - trace_dax_pmd_insert_mapping(inode, vma, address, write, length,
> - dax.pfn, ret);
> - return vmf_insert_pfn_pmd(vma, address, pmd, dax.pfn, write);
> + trace_dax_pmd_insert_mapping(inode, vmf, length, dax.pfn, ret);
> + return vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd,
> + dax.pfn, vmf->flags & FAULT_FLAG_WRITE);
>
> unmap_fallback:
> dax_unmap_atomic(bdev, &dax);
> fallback:
> - trace_dax_pmd_insert_mapping_fallback(inode, vma, address, write,
> - length, dax.pfn, ret);
> + trace_dax_pmd_insert_mapping_fallback(inode, vmf, length,
> + dax.pfn, ret);
> return VM_FAULT_FALLBACK;
> }
>
> -static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> - struct vm_fault *vmf, unsigned long address,
> - struct iomap *iomap, void **entryp)
> +static int dax_pmd_load_hole(struct vm_fault *vmf, struct iomap *iomap,
> + void **entryp)
> {
> - struct address_space *mapping = vma->vm_file->f_mapping;
> - unsigned long pmd_addr = address & PMD_MASK;
> + struct address_space *mapping = vmf->vma->vm_file->f_mapping;
> + unsigned long pmd_addr = vmf->address & PMD_MASK;
> struct inode *inode = mapping->host;
> struct page *zero_page;
> void *ret = NULL;
> spinlock_t *ptl;
> pmd_t pmd_entry;
>
> - zero_page = mm_get_huge_zero_page(vma->vm_mm);
> + zero_page = mm_get_huge_zero_page(vmf->vma->vm_mm);
>
> if (unlikely(!zero_page))
> goto fallback;
> @@ -1292,27 +1290,27 @@ static int dax_pmd_load_hole(struct vm_area_struct *vma, pmd_t *pmd,
> goto fallback;
> *entryp = ret;
>
> - ptl = pmd_lock(vma->vm_mm, pmd);
> - if (!pmd_none(*pmd)) {
> + ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd);
> + if (!pmd_none(*(vmf->pmd))) {
> spin_unlock(ptl);
> goto fallback;
> }
>
> - pmd_entry = mk_pmd(zero_page, vma->vm_page_prot);
> + pmd_entry = mk_pmd(zero_page, vmf->vma->vm_page_prot);
> pmd_entry = pmd_mkhuge(pmd_entry);
> - set_pmd_at(vma->vm_mm, pmd_addr, pmd, pmd_entry);
> + set_pmd_at(vmf->vma->vm_mm, pmd_addr, vmf->pmd, pmd_entry);
> spin_unlock(ptl);
> - trace_dax_pmd_load_hole(inode, vma, address, zero_page, ret);
> + trace_dax_pmd_load_hole(inode, vmf, zero_page, ret);
> return VM_FAULT_NOPAGE;
>
> fallback:
> - trace_dax_pmd_load_hole_fallback(inode, vma, address, zero_page, ret);
> + trace_dax_pmd_load_hole_fallback(inode, vmf, zero_page, ret);
> return VM_FAULT_FALLBACK;
> }
>
> -int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> - struct iomap_ops *ops)
> +int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops)
> {
> + struct vm_area_struct *vma = vmf->vma;
> struct address_space *mapping = vma->vm_file->f_mapping;
> unsigned long pmd_addr = vmf->address & PMD_MASK;
> bool write = vmf->flags & FAULT_FLAG_WRITE;
> @@ -1334,7 +1332,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> vmf->pgoff = linear_page_index(vma, pmd_addr);
> max_pgoff = (i_size_read(inode) - 1) >> PAGE_SHIFT;
>
> - trace_dax_pmd_fault(inode, vma, vmf, max_pgoff, 0);
> + trace_dax_pmd_fault(inode, vmf, max_pgoff, 0);
>
> /* Fall back to PTEs if we're going to COW */
> if (write && !(vma->vm_flags & VM_SHARED))
> @@ -1379,15 +1377,13 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
>
> switch (iomap.type) {
> case IOMAP_MAPPED:
> - result = dax_pmd_insert_mapping(vma, vmf->pmd, vmf,
> - vmf->address, &iomap, pos, write, &entry);
> + result = dax_pmd_insert_mapping(vmf, &iomap, pos, &entry);
> break;
> case IOMAP_UNWRITTEN:
> case IOMAP_HOLE:
> if (WARN_ON_ONCE(write))
> goto unlock_entry;
> - result = dax_pmd_load_hole(vma, vmf->pmd, vmf, vmf->address,
> - &iomap, &entry);
> + result = dax_pmd_load_hole(vmf, &iomap, &entry);
> break;
> default:
> WARN_ON_ONCE(1);
> @@ -1417,7 +1413,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> count_vm_event(THP_FAULT_FALLBACK);
> }
> out:
> - trace_dax_pmd_fault_done(inode, vma, vmf, max_pgoff, result);
> + trace_dax_pmd_fault_done(inode, vmf, max_pgoff, result);
> vmf->pgoff = old_pgoff;
> return result;
> }
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index e6cdb78..2f4fd28 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -279,23 +279,23 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> }
>
> static int
> -ext4_dax_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> +ext4_dax_pmd_fault(struct vm_fault *vmf)
> {
> int result;
> - struct inode *inode = file_inode(vma->vm_file);
> + struct inode *inode = file_inode(vmf->vma->vm_file);
> struct super_block *sb = inode->i_sb;
> bool write = vmf->flags & FAULT_FLAG_WRITE;
> gfp_t old_mask;
>
> if (write) {
> sb_start_pagefault(sb);
> - file_update_time(vma->vm_file);
> + file_update_time(vmf->vma->vm_file);
> }
>
> old_mask = vmf->gfp_mask;
> vmf->gfp_mask &= ~__GFP_FS;
> down_read(&EXT4_I(inode)->i_mmap_sem);
> - result = dax_iomap_pmd_fault(vma, vmf, &ext4_iomap_ops);
> + result = dax_iomap_pmd_fault(vmf, &ext4_iomap_ops);
> up_read(&EXT4_I(inode)->i_mmap_sem);
> vmf->gfp_mask = old_mask;
> if (write)
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index b1b8524..b548fc5 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1532,10 +1532,9 @@ xfs_filemap_fault(
> */
> STATIC int
> xfs_filemap_pmd_fault(
> - struct vm_area_struct *vma,
> struct vm_fault *vmf)
> {
> - struct inode *inode = file_inode(vma->vm_file);
> + struct inode *inode = file_inode(vmf->vma->vm_file);
> struct xfs_inode *ip = XFS_I(inode);
> int ret;
> gfp_t old_mask;
> @@ -1547,13 +1546,13 @@ xfs_filemap_pmd_fault(
>
> if (vmf->flags & FAULT_FLAG_WRITE) {
> sb_start_pagefault(inode->i_sb);
> - file_update_time(vma->vm_file);
> + file_update_time(vmf->vma->vm_file);
> }
>
> old_mask = vmf->gfp_mask;
> vmf->gfp_mask &= ~__GFP_FS;
> xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> - ret = dax_iomap_pmd_fault(vma, vmf, &xfs_iomap_ops);
> + ret = dax_iomap_pmd_fault(vmf, &xfs_iomap_ops);
> xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> vmf->gfp_mask = old_mask;
>
> diff --git a/include/linux/dax.h b/include/linux/dax.h
> index 9761c90..1ffdb4d 100644
> --- a/include/linux/dax.h
> +++ b/include/linux/dax.h
> @@ -71,15 +71,14 @@ static inline unsigned int dax_radix_order(void *entry)
> return PMD_SHIFT - PAGE_SHIFT;
> return 0;
> }
> -int dax_iomap_pmd_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
> - struct iomap_ops *ops);
> +int dax_iomap_pmd_fault(struct vm_fault *vmf, struct iomap_ops *ops);
> #else
> static inline unsigned int dax_radix_order(void *entry)
> {
> return 0;
> }
> -static inline int dax_iomap_pmd_fault(struct vm_area_struct *vma,
> - struct vm_fault *vmf, struct iomap_ops *ops)
> +static inline int dax_iomap_pmd_fault(struct vm_fault *vmf,
> + struct iomap_ops *ops)
> {
> return VM_FAULT_FALLBACK;
> }
> diff --git a/include/linux/mm.h b/include/linux/mm.h
> index aef645b..795f03e 100644
> --- a/include/linux/mm.h
> +++ b/include/linux/mm.h
> @@ -347,7 +347,7 @@ struct vm_operations_struct {
> void (*close)(struct vm_area_struct * area);
> int (*mremap)(struct vm_area_struct * area);
> int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
> - int (*pmd_fault)(struct vm_area_struct *vma, struct vm_fault *vmf);
> + int (*pmd_fault)(struct vm_fault *vmf);
> void (*map_pages)(struct vm_fault *vmf,
> pgoff_t start_pgoff, pgoff_t end_pgoff);
>
> diff --git a/include/trace/events/fs_dax.h b/include/trace/events/fs_dax.h
> index a98665b..c566ddc 100644
> --- a/include/trace/events/fs_dax.h
> +++ b/include/trace/events/fs_dax.h
> @@ -7,9 +7,9 @@
> #include <linux/tracepoint.h>
>
> DECLARE_EVENT_CLASS(dax_pmd_fault_class,
> - TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
> - struct vm_fault *vmf, pgoff_t max_pgoff, int result),
> - TP_ARGS(inode, vma, vmf, max_pgoff, result),
> + TP_PROTO(struct inode *inode, struct vm_fault *vmf,
> + pgoff_t max_pgoff, int result),
> + TP_ARGS(inode, vmf, max_pgoff, result),
> TP_STRUCT__entry(
> __field(unsigned long, ino)
> __field(unsigned long, vm_start)
> @@ -25,9 +25,9 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
> TP_fast_assign(
> __entry->dev = inode->i_sb->s_dev;
> __entry->ino = inode->i_ino;
> - __entry->vm_start = vma->vm_start;
> - __entry->vm_end = vma->vm_end;
> - __entry->vm_flags = vma->vm_flags;
> + __entry->vm_start = vmf->vma->vm_start;
> + __entry->vm_end = vmf->vma->vm_end;
> + __entry->vm_flags = vmf->vma->vm_flags;
> __entry->address = vmf->address;
> __entry->flags = vmf->flags;
> __entry->pgoff = vmf->pgoff;
> @@ -52,19 +52,18 @@ DECLARE_EVENT_CLASS(dax_pmd_fault_class,
>
> #define DEFINE_PMD_FAULT_EVENT(name) \
> DEFINE_EVENT(dax_pmd_fault_class, name, \
> - TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
> - struct vm_fault *vmf, \
> + TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
> pgoff_t max_pgoff, int result), \
> - TP_ARGS(inode, vma, vmf, max_pgoff, result))
> + TP_ARGS(inode, vmf, max_pgoff, result))
>
> DEFINE_PMD_FAULT_EVENT(dax_pmd_fault);
> DEFINE_PMD_FAULT_EVENT(dax_pmd_fault_done);
>
> DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
> - TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
> - unsigned long address, struct page *zero_page,
> + TP_PROTO(struct inode *inode, struct vm_fault *vmf,
> + struct page *zero_page,
> void *radix_entry),
> - TP_ARGS(inode, vma, address, zero_page, radix_entry),
> + TP_ARGS(inode, vmf, zero_page, radix_entry),
> TP_STRUCT__entry(
> __field(unsigned long, ino)
> __field(unsigned long, vm_flags)
> @@ -76,8 +75,8 @@ DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
> TP_fast_assign(
> __entry->dev = inode->i_sb->s_dev;
> __entry->ino = inode->i_ino;
> - __entry->vm_flags = vma->vm_flags;
> - __entry->address = address;
> + __entry->vm_flags = vmf->vma->vm_flags;
> + __entry->address = vmf->address;
> __entry->zero_page = zero_page;
> __entry->radix_entry = radix_entry;
> ),
> @@ -95,19 +94,17 @@ DECLARE_EVENT_CLASS(dax_pmd_load_hole_class,
>
> #define DEFINE_PMD_LOAD_HOLE_EVENT(name) \
> DEFINE_EVENT(dax_pmd_load_hole_class, name, \
> - TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
> - unsigned long address, struct page *zero_page, \
> - void *radix_entry), \
> - TP_ARGS(inode, vma, address, zero_page, radix_entry))
> + TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
> + struct page *zero_page, void *radix_entry), \
> + TP_ARGS(inode, vmf, zero_page, radix_entry))
>
> DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole);
> DEFINE_PMD_LOAD_HOLE_EVENT(dax_pmd_load_hole_fallback);
>
> DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
> - TP_PROTO(struct inode *inode, struct vm_area_struct *vma,
> - unsigned long address, int write, long length, pfn_t pfn,
> - void *radix_entry),
> - TP_ARGS(inode, vma, address, write, length, pfn, radix_entry),
> + TP_PROTO(struct inode *inode, struct vm_fault *vmf,
> + long length, pfn_t pfn, void *radix_entry),
> + TP_ARGS(inode, vmf, length, pfn, radix_entry),
> TP_STRUCT__entry(
> __field(unsigned long, ino)
> __field(unsigned long, vm_flags)
> @@ -121,9 +118,9 @@ DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
> TP_fast_assign(
> __entry->dev = inode->i_sb->s_dev;
> __entry->ino = inode->i_ino;
> - __entry->vm_flags = vma->vm_flags;
> - __entry->address = address;
> - __entry->write = write;
> + __entry->vm_flags = vmf->vma->vm_flags;
> + __entry->address = vmf->address;
> + __entry->write = vmf->flags & FAULT_FLAG_WRITE;
> __entry->length = length;
> __entry->pfn_val = pfn.val;
> __entry->radix_entry = radix_entry;
> @@ -146,10 +143,9 @@ DECLARE_EVENT_CLASS(dax_pmd_insert_mapping_class,
>
> #define DEFINE_PMD_INSERT_MAPPING_EVENT(name) \
> DEFINE_EVENT(dax_pmd_insert_mapping_class, name, \
> - TP_PROTO(struct inode *inode, struct vm_area_struct *vma, \
> - unsigned long address, int write, long length, pfn_t pfn, \
> - void *radix_entry), \
> - TP_ARGS(inode, vma, address, write, length, pfn, radix_entry))
> + TP_PROTO(struct inode *inode, struct vm_fault *vmf, \
> + long length, pfn_t pfn, void *radix_entry), \
> + TP_ARGS(inode, vmf, length, pfn, radix_entry))
>
> DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping);
> DEFINE_PMD_INSERT_MAPPING_EVENT(dax_pmd_insert_mapping_fallback);
> diff --git a/mm/memory.c b/mm/memory.c
> index 8ec36cf..e929c41 100644
> --- a/mm/memory.c
> +++ b/mm/memory.c
> @@ -3443,11 +3443,10 @@ static int do_numa_page(struct vm_fault *vmf)
>
> static int create_huge_pmd(struct vm_fault *vmf)
> {
> - struct vm_area_struct *vma = vmf->vma;
> - if (vma_is_anonymous(vma))
> + if (vma_is_anonymous(vmf->vma))
> return do_huge_pmd_anonymous_page(vmf);
> - if (vma->vm_ops->pmd_fault)
> - return vma->vm_ops->pmd_fault(vma, vmf);
> + if (vmf->vma->vm_ops->pmd_fault)
> + return vmf->vma->vm_ops->pmd_fault(vmf);
> return VM_FAULT_FALLBACK;
> }
>
> @@ -3456,7 +3455,7 @@ static int wp_huge_pmd(struct vm_fault *vmf, pmd_t orig_pmd)
> if (vma_is_anonymous(vmf->vma))
> return do_huge_pmd_wp_page(vmf, orig_pmd);
> if (vmf->vma->vm_ops->pmd_fault)
> - return vmf->vma->vm_ops->pmd_fault(vmf->vma, vmf);
> + return vmf->vma->vm_ops->pmd_fault(vmf);
>
> /* COW handled on pte level: split pmd */
> VM_BUG_ON_VMA(vmf->vma->vm_flags & VM_SHARED, vmf->vma);
>
--
Jan Kara <jack@suse.com>
SUSE Labs, CR
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers
2016-12-15 20:50 [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Dave Jiang
2016-12-15 20:51 ` [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault() Dave Jiang
2016-12-15 20:51 ` [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter Dave Jiang
@ 2016-12-16 8:34 ` Michal Hocko
2 siblings, 0 replies; 7+ messages in thread
From: Michal Hocko @ 2016-12-16 8:34 UTC (permalink / raw)
To: Dave Jiang
Cc: akpm, jack, linux-nvdimm, david, hch, linux-mm, tytso,
ross.zwisler, dan.j.williams
On Thu 15-12-16 13:50:59, Dave Jiang wrote:
> The caller into dax needs to clear __GFP_FS mask bit since it's
> responsible for acquiring locks / transactions that blocks __GFP_FS
> allocation. The caller will restore the original mask when dax function
> returns.
Could you have a look at [1]. Does the new API look like a fit for your
use case here? It at least sounds a bit easier from the code POV to me.
[1] http://lkml.kernel.org/r/20161215140715.12732-1-mhocko@kernel.org
>
> Signed-off-by: Dave Jiang <dave.jiang@intel.com>
> Reviewed-by: Ross Zwisler <ross.zwisler@linux.intel.com>
> Reviewed-by: Jan Kara <jack@suse.cz>
> ---
> fs/dax.c | 1 +
> fs/ext2/file.c | 9 ++++++++-
> fs/ext4/file.c | 10 +++++++++-
> fs/xfs/xfs_file.c | 14 +++++++++++++-
> 4 files changed, 31 insertions(+), 3 deletions(-)
>
> diff --git a/fs/dax.c b/fs/dax.c
> index d3fe880..6395bc6 100644
> --- a/fs/dax.c
> +++ b/fs/dax.c
> @@ -1380,6 +1380,7 @@ int dax_iomap_pmd_fault(struct vm_area_struct *vma, unsigned long address,
> vmf.pgoff = pgoff;
> vmf.flags = flags;
> vmf.gfp_mask = mapping_gfp_mask(mapping) | __GFP_IO;
> + vmf.gfp_mask &= ~__GFP_FS;
>
> switch (iomap.type) {
> case IOMAP_MAPPED:
> diff --git a/fs/ext2/file.c b/fs/ext2/file.c
> index b0f2415..8422d5f 100644
> --- a/fs/ext2/file.c
> +++ b/fs/ext2/file.c
> @@ -92,16 +92,19 @@ static int ext2_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> struct inode *inode = file_inode(vma->vm_file);
> struct ext2_inode_info *ei = EXT2_I(inode);
> int ret;
> + gfp_t old_gfp = vmf->gfp_mask;
>
> if (vmf->flags & FAULT_FLAG_WRITE) {
> sb_start_pagefault(inode->i_sb);
> file_update_time(vma->vm_file);
> }
> + vmf->gfp_mask &= ~__GFP_FS;
> down_read(&ei->dax_sem);
>
> ret = dax_iomap_fault(vma, vmf, &ext2_iomap_ops);
>
> up_read(&ei->dax_sem);
> + vmf->gfp_mask = old_gfp;
> if (vmf->flags & FAULT_FLAG_WRITE)
> sb_end_pagefault(inode->i_sb);
> return ret;
> @@ -114,6 +117,7 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
> struct ext2_inode_info *ei = EXT2_I(inode);
> loff_t size;
> int ret;
> + gfp_t old_gfp = vmf->gfp_mask;
>
> sb_start_pagefault(inode->i_sb);
> file_update_time(vma->vm_file);
> @@ -123,8 +127,11 @@ static int ext2_dax_pfn_mkwrite(struct vm_area_struct *vma,
> size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
> if (vmf->pgoff >= size)
> ret = VM_FAULT_SIGBUS;
> - else
> + else {
> + vmf->gfp_mask &= ~__GFP_FS;
> ret = dax_pfn_mkwrite(vma, vmf);
> + vmf->gfp_mask = old_gfp;
> + }
>
> up_read(&ei->dax_sem);
> sb_end_pagefault(inode->i_sb);
> diff --git a/fs/ext4/file.c b/fs/ext4/file.c
> index d663d3d..a3f2bf0 100644
> --- a/fs/ext4/file.c
> +++ b/fs/ext4/file.c
> @@ -261,14 +261,17 @@ static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
> struct inode *inode = file_inode(vma->vm_file);
> struct super_block *sb = inode->i_sb;
> bool write = vmf->flags & FAULT_FLAG_WRITE;
> + gfp_t old_gfp = vmf->gfp_mask;
>
> if (write) {
> sb_start_pagefault(sb);
> file_update_time(vma->vm_file);
> }
> + vmf->gfp_mask &= ~__GFP_FS;
> down_read(&EXT4_I(inode)->i_mmap_sem);
> result = dax_iomap_fault(vma, vmf, &ext4_iomap_ops);
> up_read(&EXT4_I(inode)->i_mmap_sem);
> + vmf->gfp_mask = old_gfp;
> if (write)
> sb_end_pagefault(sb);
>
> @@ -320,8 +323,13 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
> size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
> if (vmf->pgoff >= size)
> ret = VM_FAULT_SIGBUS;
> - else
> + else {
> + gfp_t old_gfp = vmf->gfp_mask;
> +
> + vmf->gfp_mask &= ~__GFP_FS;
> ret = dax_pfn_mkwrite(vma, vmf);
> + vmf->gfp_mask = old_gfp;
> + }
> up_read(&EXT4_I(inode)->i_mmap_sem);
> sb_end_pagefault(sb);
>
> diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
> index d818c16..52202b4 100644
> --- a/fs/xfs/xfs_file.c
> +++ b/fs/xfs/xfs_file.c
> @@ -1474,7 +1474,11 @@ xfs_filemap_page_mkwrite(
> xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
>
> if (IS_DAX(inode)) {
> + gfp_t old_gfp = vmf->gfp_mask;
> +
> + vmf->gfp_mask &= ~__GFP_FS;
> ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
> + vmf->gfp_mask = old_gfp;
> } else {
> ret = iomap_page_mkwrite(vma, vmf, &xfs_iomap_ops);
> ret = block_page_mkwrite_return(ret);
> @@ -1502,13 +1506,16 @@ xfs_filemap_fault(
>
> xfs_ilock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> if (IS_DAX(inode)) {
> + gfp_t old_gfp = vmf->gfp_mask;
> /*
> * we do not want to trigger unwritten extent conversion on read
> * faults - that is unnecessary overhead and would also require
> * changes to xfs_get_blocks_direct() to map unwritten extent
> * ioend for conversion on read-only mappings.
> */
> + vmf->gfp_mask &= ~__GFP_FS;
> ret = dax_iomap_fault(vma, vmf, &xfs_iomap_ops);
> + vmf->gfp_mask = old_gfp;
> } else
> ret = filemap_fault(vma, vmf);
> xfs_iunlock(XFS_I(inode), XFS_MMAPLOCK_SHARED);
> @@ -1581,8 +1588,13 @@ xfs_filemap_pfn_mkwrite(
> size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
> if (vmf->pgoff >= size)
> ret = VM_FAULT_SIGBUS;
> - else if (IS_DAX(inode))
> + else if (IS_DAX(inode)) {
> + gfp_t old_gfp = vmf->gfp_mask;
> +
> + vmf->gfp_mask &= ~__GFP_FS;
> ret = dax_pfn_mkwrite(vma, vmf);
> + vmf->gfp_mask = old_gfp;
> + }
> xfs_iunlock(ip, XFS_MMAPLOCK_SHARED);
> sb_end_pagefault(inode->i_sb);
> return ret;
>
> --
> To unsubscribe, send a message with 'unsubscribe linux-mm' in
> the body to majordomo@kvack.org. For more info on Linux MM,
> see: http://www.linux-mm.org/ .
> Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
--
Michal Hocko
SUSE Labs
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2016-12-16 8:34 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2016-12-15 20:50 [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Dave Jiang
2016-12-15 20:51 ` [PATCH v3 2/3] mm, dax: make pmd_fault() and friends to be the same as fault() Dave Jiang
2016-12-15 23:23 ` Ross Zwisler
2016-12-15 20:51 ` [PATCH v3 3/3] mm, dax: move pmd_fault() to take only vmf parameter Dave Jiang
2016-12-15 23:23 ` Ross Zwisler
2016-12-16 7:17 ` Jan Kara
2016-12-16 8:34 ` [PATCH v3 1/3] dax: masking off __GFP_FS in fs DAX handlers Michal Hocko
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox