From: Michel Lespinasse <walken@google.com>
To: Peter Zijlstra <peterz@infradead.org>,
Andrew Morton <akpm@linux-foundation.org>,
Laurent Dufour <ldufour@linux.ibm.com>,
Vlastimil Babka <vbabka@suse.cz>,
Matthew Wilcox <willy@infradead.org>,
"Liam R . Howlett" <Liam.Howlett@oracle.com>,
Jerome Glisse <jglisse@redhat.com>,
Davidlohr Bueso <dave@stgolabs.net>,
David Rientjes <rientjes@google.com>
Cc: linux-mm <linux-mm@kvack.org>, Michel Lespinasse <walken@google.com>
Subject: [RFC PATCH 22/24] do_mmap: implement locked argument
Date: Mon, 24 Feb 2020 12:30:55 -0800 [thread overview]
Message-ID: <20200224203057.162467-23-walken@google.com> (raw)
In-Reply-To: <20200224203057.162467-1-walken@google.com>
When locked is true, preserve the current behavior - the do_mmap()
caller is expected to already hold a coarse write lock on current->mmap_sem
When locked is false, change do_mmap() to acquiring the appropriate
MM locks. do_mmap() still acquires a coarse lock in this change, but can
now be locally changed to acquire a fine grained lock in the future.
Signed-off-by: Michel Lespinasse <walken@google.com>
---
mm/mmap.c | 106 ++++++++++++++++++++++++++++++++++++-----------------
mm/nommu.c | 19 +++++++++-
2 files changed, 89 insertions(+), 36 deletions(-)
diff --git mm/mmap.c mm/mmap.c
index 2868e61927a1..75755f1cbd0b 100644
--- mm/mmap.c
+++ mm/mmap.c
@@ -1406,22 +1406,29 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
if ((pgoff + (len >> PAGE_SHIFT)) < pgoff)
return -EOVERFLOW;
+ if (!locked && mm_write_lock_killable(mm))
+ return -EINTR;
+
/* Too many mappings? */
- if (mm->map_count > sysctl_max_map_count)
- return -ENOMEM;
+ if (mm->map_count > sysctl_max_map_count) {
+ addr = -ENOMEM;
+ goto unlock;
+ }
/* Obtain the address to map to. we verify (or select) it and ensure
* that it represents a valid section of the address space.
*/
addr = get_unmapped_area(file, addr, len, pgoff, flags);
if (IS_ERR_VALUE(addr))
- return addr;
+ goto unlock;
if (flags & MAP_FIXED_NOREPLACE) {
struct vm_area_struct *vma = find_vma(mm, addr);
- if (vma && vma->vm_start < addr + len)
- return -EEXIST;
+ if (vma && vma->vm_start < addr + len) {
+ addr = -EEXIST;
+ goto unlock;
+ }
}
if (prot == PROT_EXEC) {
@@ -1437,19 +1444,24 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
vm_flags |= calc_vm_prot_bits(prot, pkey) | calc_vm_flag_bits(flags) |
mm->def_flags | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC;
- if (flags & MAP_LOCKED)
- if (!can_do_mlock())
- return -EPERM;
+ if ((flags & MAP_LOCKED) && !can_do_mlock()) {
+ addr = -EPERM;
+ goto unlock;
+ }
- if (mlock_future_check(mm, vm_flags, len))
- return -EAGAIN;
+ if (mlock_future_check(mm, vm_flags, len)) {
+ addr = -EAGAIN;
+ goto unlock;
+ }
if (file) {
struct inode *inode = file_inode(file);
unsigned long flags_mask;
- if (!file_mmap_ok(file, inode, pgoff, len))
- return -EOVERFLOW;
+ if (!file_mmap_ok(file, inode, pgoff, len)) {
+ addr = -EOVERFLOW;
+ goto unlock;
+ }
flags_mask = LEGACY_MAP_MASK | file->f_op->mmap_supported_flags;
@@ -1465,27 +1477,37 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
flags &= LEGACY_MAP_MASK;
/* fall through */
case MAP_SHARED_VALIDATE:
- if (flags & ~flags_mask)
- return -EOPNOTSUPP;
+ if (flags & ~flags_mask) {
+ addr = -EOPNOTSUPP;
+ goto unlock;
+ }
if (prot & PROT_WRITE) {
- if (!(file->f_mode & FMODE_WRITE))
- return -EACCES;
- if (IS_SWAPFILE(file->f_mapping->host))
- return -ETXTBSY;
+ if (!(file->f_mode & FMODE_WRITE)) {
+ addr = -EACCES;
+ goto unlock;
+ }
+ if (IS_SWAPFILE(file->f_mapping->host)) {
+ addr = -ETXTBSY;
+ goto unlock;
+ }
}
/*
* Make sure we don't allow writing to an append-only
* file..
*/
- if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE))
- return -EACCES;
+ if (IS_APPEND(inode) && (file->f_mode & FMODE_WRITE)) {
+ addr = -EACCES;
+ goto unlock;
+ }
/*
* Make sure there are no mandatory locks on the file.
*/
- if (locks_verify_locked(file))
- return -EAGAIN;
+ if (locks_verify_locked(file)) {
+ addr = -EAGAIN;
+ goto unlock;
+ }
vm_flags |= VM_SHARED | VM_MAYSHARE;
if (!(file->f_mode & FMODE_WRITE))
@@ -1493,28 +1515,39 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
/* fall through */
case MAP_PRIVATE:
- if (!(file->f_mode & FMODE_READ))
- return -EACCES;
+ if (!(file->f_mode & FMODE_READ)) {
+ addr = -EACCES;
+ goto unlock;
+ }
if (path_noexec(&file->f_path)) {
- if (vm_flags & VM_EXEC)
- return -EPERM;
+ if (vm_flags & VM_EXEC) {
+ addr = -EPERM;
+ goto unlock;
+ }
vm_flags &= ~VM_MAYEXEC;
}
- if (!file->f_op->mmap)
- return -ENODEV;
- if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
- return -EINVAL;
+ if (!file->f_op->mmap) {
+ addr = -ENODEV;
+ goto unlock;
+ }
+ if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) {
+ addr = -EINVAL;
+ goto unlock;
+ }
break;
default:
- return -EINVAL;
+ addr = -EINVAL;
+ goto unlock;
}
} else {
switch (flags & MAP_TYPE) {
case MAP_SHARED:
- if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP))
- return -EINVAL;
+ if (vm_flags & (VM_GROWSDOWN|VM_GROWSUP)) {
+ addr = -EINVAL;
+ goto unlock;
+ }
/*
* Ignore pgoff.
*/
@@ -1528,7 +1561,8 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
pgoff = addr >> PAGE_SHIFT;
break;
default:
- return -EINVAL;
+ addr = -EINVAL;
+ goto unlock;
}
}
@@ -1551,6 +1585,10 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
((vm_flags & VM_LOCKED) ||
(flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE))
*populate = len;
+
+unlock:
+ if (!locked)
+ mm_write_unlock(mm);
return addr;
}
diff --git mm/nommu.c mm/nommu.c
index a2c2bf8d7676..7fb1db89d4f8 100644
--- mm/nommu.c
+++ mm/nommu.c
@@ -1107,6 +1107,7 @@ unsigned long do_mmap(struct file *file,
unsigned long *populate,
struct list_head *uf)
{
+ struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_region *region;
struct rb_node *rb;
@@ -1115,12 +1116,18 @@ unsigned long do_mmap(struct file *file,
*populate = 0;
+ if (!locked && mm_write_lock_killable(mm))
+ return -EINTR;
+
/* decide whether we should attempt the mapping, and if so what sort of
* mapping */
ret = validate_mmap_request(file, addr, len, prot, flags, pgoff,
&capabilities);
- if (ret < 0)
+ if (ret < 0) {
+ if (!locked)
+ mm_write_unlock(mm);
return ret;
+ }
/* we ignore the address hint */
addr = 0;
@@ -1135,7 +1142,7 @@ unsigned long do_mmap(struct file *file,
if (!region)
goto error_getting_region;
- vma = vm_area_alloc(current->mm);
+ vma = vm_area_alloc(mm);
if (!vma)
goto error_getting_vma;
@@ -1289,6 +1296,8 @@ unsigned long do_mmap(struct file *file,
}
up_write(&nommu_region_sem);
+ if (!locked)
+ mm_write_unlock(mm);
return result;
@@ -1301,6 +1310,8 @@ unsigned long do_mmap(struct file *file,
if (vma->vm_file)
fput(vma->vm_file);
vm_area_free(vma);
+ if (!locked)
+ mm_write_unlock(mm);
return ret;
sharing_violation:
@@ -1314,12 +1325,16 @@ unsigned long do_mmap(struct file *file,
pr_warn("Allocation of vma for %lu byte allocation from process %d failed\n",
len, current->pid);
show_free_areas(0, NULL);
+ if (!locked)
+ mm_write_unlock(mm);
return -ENOMEM;
error_getting_region:
pr_warn("Allocation of vm region for %lu byte allocation from process %d failed\n",
len, current->pid);
show_free_areas(0, NULL);
+ if (!locked)
+ mm_write_unlock(mm);
return -ENOMEM;
}
--
2.25.0.341.g760bfbb309-goog
next prev parent reply other threads:[~2020-02-24 20:32 UTC|newest]
Thread overview: 28+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-02-24 20:30 [RFC PATCH 00/24] Fine grained MM locking Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 01/24] MM locking API: initial implementation as rwsem wrappers Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 02/24] MM locking API: use coccinelle to convert mmap_sem rwsem call sites Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 03/24] MM locking API: manual conversion of mmap_sem call sites missed by coccinelle Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 04/24] MM locking API: add range arguments Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 05/24] MM locking API: allow for sleeping during unlock Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 06/24] MM locking API: implement fine grained range locks Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 07/24] mm/memory: add range field to struct vm_fault Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 08/24] mm/memory: allow specifying MM lock range to handle_mm_fault() Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 09/24] do_swap_page: use the vmf->range field when dropping mmap_sem Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 10/24] handle_userfault: " Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 11/24] x86 fault handler: merge bad_area() functions Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 12/24] x86 fault handler: use an explicit MM lock range Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 13/24] mm/memory: add prepare_mm_fault() function Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 14/24] mm/swap_state: disable swap vma readahead Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 15/24] x86 fault handler: use a pseudo-vma when operating on anonymous vmas Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 16/24] MM locking API: add vma locking API Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 17/24] x86 fault handler: implement range locking Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 18/24] shared file mappings: use the vmf->range field when dropping mmap_sem Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 19/24] mm: add field to annotate vm_operations that support range locking Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 20/24] x86 fault handler: extend range locking to supported file vmas Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 21/24] do_mmap: add locked argument Michel Lespinasse
2020-02-24 20:30 ` Michel Lespinasse [this message]
2020-02-24 20:30 ` [RFC PATCH 23/24] do_mmap: use locked=false in vm_mmap_pgoff() and aio_setup_ring() Michel Lespinasse
2020-02-24 20:30 ` [RFC PATCH 24/24] do_mmap: implement easiest cases of fine grained locking Michel Lespinasse
2022-03-20 22:08 ` [RFC PATCH 00/24] Fine grained MM locking Barry Song
2022-03-20 23:14 ` Matthew Wilcox
2022-03-21 0:20 ` Barry Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200224203057.162467-23-walken@google.com \
--to=walken@google.com \
--cc=Liam.Howlett@oracle.com \
--cc=akpm@linux-foundation.org \
--cc=dave@stgolabs.net \
--cc=jglisse@redhat.com \
--cc=ldufour@linux.ibm.com \
--cc=linux-mm@kvack.org \
--cc=peterz@infradead.org \
--cc=rientjes@google.com \
--cc=vbabka@suse.cz \
--cc=willy@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox