* [PATCH 01/11] RFP: new bitmask_trans in <linux/bitops.h>
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 02/11] RFP prot support: add needed macros Paolo 'Blaisorblade' Giarrusso
` (9 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
Generalize _calc_vm_trans macro for subsequent use in remap_file_pages
protection support.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
include/linux/bitops.h | 10 ++++++++++
include/linux/mman.h | 25 ++++++++-----------------
2 files changed, 18 insertions(+), 17 deletions(-)
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index 638165f..d8cdfd0 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -3,6 +3,16 @@
#include <asm/types.h>
/*
+ * Optimisation macro. It is equivalent to:
+ * (x & bit1) ? bit2 : 0
+ * but this version is faster.
+ * ("bit1" and "bit2" must be single bits)
+ */
+#define bitmask_trans(x, bit1, bit2) \
+ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
+ : ((x) & (bit1)) / ((bit1) / (bit2)))
+
+/*
* Include this here because some architectures need generic_ffs/fls in
* scope
*/
diff --git a/include/linux/mman.h b/include/linux/mman.h
index 87920a0..6ac90be 100644
--- a/include/linux/mman.h
+++ b/include/linux/mman.h
@@ -14,6 +14,7 @@
#include <linux/mm.h>
#include <asm/atomic.h>
+#include <linux/bitops.h>
extern int sysctl_overcommit_memory;
extern int sysctl_overcommit_ratio;
@@ -34,24 +35,14 @@ static inline void vm_unacct_memory(long pages)
}
/*
- * Optimisation macro. It is equivalent to:
- * (x & bit1) ? bit2 : 0
- * but this version is faster.
- * ("bit1" and "bit2" must be single bits)
- */
-#define _calc_vm_trans(x, bit1, bit2) \
- ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \
- : ((x) & (bit1)) / ((bit1) / (bit2)))
-
-/*
* Combine the mmap "prot" argument into "vm_flags" used internally.
*/
static inline unsigned long
calc_vm_prot_bits(unsigned long prot)
{
- return _calc_vm_trans(prot, PROT_READ, VM_READ ) |
- _calc_vm_trans(prot, PROT_WRITE, VM_WRITE) |
- _calc_vm_trans(prot, PROT_EXEC, VM_EXEC );
+ return bitmask_trans(prot, PROT_READ, VM_READ ) |
+ bitmask_trans(prot, PROT_WRITE, VM_WRITE) |
+ bitmask_trans(prot, PROT_EXEC, VM_EXEC );
}
/*
@@ -60,10 +51,10 @@ calc_vm_prot_bits(unsigned long prot)
static inline unsigned long
calc_vm_flag_bits(unsigned long flags)
{
- return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
- _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
- _calc_vm_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
- _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED );
+ return bitmask_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) |
+ bitmask_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) |
+ bitmask_trans(flags, MAP_EXECUTABLE, VM_EXECUTABLE) |
+ bitmask_trans(flags, MAP_LOCKED, VM_LOCKED );
}
#endif /* __KERNEL__ */
#endif /* _LINUX_MMAN_H */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 02/11] RFP prot support: add needed macros
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 01/11] RFP: new bitmask_trans in <linux/bitops.h> Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 03/11] RFP prot support: handle MANYPROTS VMAs Paolo 'Blaisorblade' Giarrusso
` (8 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
Add pte_to_pgprot() and pgoff_prot_to_pte() macros, in generic versions; so we
can safely use it and keep the kernel compiling. For some architectures real
definitions of the macros are actually provided.
Also, add the MAP_CHGPROT flag to all arch headers (was MAP_NOINHERIT, changed on
Hugh Dickins' suggestion).
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
include/asm-alpha/mman.h | 3 +++
include/asm-arm/mman.h | 3 +++
include/asm-arm26/mman.h | 3 +++
include/asm-cris/mman.h | 3 +++
include/asm-frv/mman.h | 3 +++
include/asm-generic/pgtable.h | 13 +++++++++++++
include/asm-h8300/mman.h | 3 +++
include/asm-i386/mman.h | 3 +++
include/asm-ia64/mman.h | 3 +++
include/asm-m32r/mman.h | 3 +++
include/asm-m68k/mman.h | 3 +++
include/asm-mips/mman.h | 3 +++
include/asm-parisc/mman.h | 3 +++
include/asm-powerpc/mman.h | 3 +++
include/asm-s390/mman.h | 3 +++
include/asm-sh/mman.h | 3 +++
include/asm-sparc/mman.h | 3 +++
include/asm-sparc64/mman.h | 3 +++
include/asm-x86_64/mman.h | 3 +++
include/asm-xtensa/mman.h | 3 +++
20 files changed, 70 insertions(+), 0 deletions(-)
diff --git a/include/asm-alpha/mman.h b/include/asm-alpha/mman.h
index 90d7c35..71c1d06 100644
--- a/include/asm-alpha/mman.h
+++ b/include/asm-alpha/mman.h
@@ -28,6 +28,9 @@
#define MAP_NORESERVE 0x10000 /* don't check for reservations */
#define MAP_POPULATE 0x20000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x40000 /* do not block on IO */
+#define MAP_CHGPROT 0x80000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MS_ASYNC 1 /* sync memory asynchronously */
#define MS_SYNC 2 /* synchronous memory sync */
diff --git a/include/asm-arm/mman.h b/include/asm-arm/mman.h
index 54570d2..a5b3c37 100644
--- a/include/asm-arm/mman.h
+++ b/include/asm-arm/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-arm26/mman.h b/include/asm-arm26/mman.h
index 4000a6c..de73e1b 100644
--- a/include/asm-arm26/mman.h
+++ b/include/asm-arm26/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-cris/mman.h b/include/asm-cris/mman.h
index 1c35e1b..a75ee61 100644
--- a/include/asm-cris/mman.h
+++ b/include/asm-cris/mman.h
@@ -12,6 +12,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-frv/mman.h b/include/asm-frv/mman.h
index b4371e9..320816d 100644
--- a/include/asm-frv/mman.h
+++ b/include/asm-frv/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 00c2343..a8ac660 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -263,4 +263,17 @@ static inline int pmd_none_or_clear_bad(pmd_t *pmd)
}
#endif /* !__ASSEMBLY__ */
+#ifndef __HAVE_ARCH_PTE_TO_PGPROT
+/* Wrappers for architectures which don't support yet page protections for
+ * remap_file_pages. */
+
+/* Dummy define - if the architecture has no special support, access is denied
+ * in VM_MANYPROTS vma's. */
+#define pte_to_pgprot(pte) __P000
+#define pte_file_to_pgprot(pte) __P000
+
+#define pgoff_prot_to_pte(off, prot) pgoff_to_pte(off)
+
+#endif
+
#endif /* _ASM_GENERIC_PGTABLE_H */
diff --git a/include/asm-h8300/mman.h b/include/asm-h8300/mman.h
index b9f104f..3ae27ca 100644
--- a/include/asm-h8300/mman.h
+++ b/include/asm-h8300/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-i386/mman.h b/include/asm-i386/mman.h
index 8fd9d7a..182452b 100644
--- a/include/asm-i386/mman.h
+++ b/include/asm-i386/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-ia64/mman.h b/include/asm-ia64/mman.h
index c73b878..81a9cff 100644
--- a/include/asm-ia64/mman.h
+++ b/include/asm-ia64/mman.h
@@ -18,6 +18,9 @@
#define MAP_NORESERVE 0x04000 /* don't check for reservations */
#define MAP_POPULATE 0x08000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-m32r/mman.h b/include/asm-m32r/mman.h
index 516a897..3d3f4fd 100644
--- a/include/asm-m32r/mman.h
+++ b/include/asm-m32r/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-m68k/mman.h b/include/asm-m68k/mman.h
index 1626d37..2ff6ae2 100644
--- a/include/asm-m68k/mman.h
+++ b/include/asm-m68k/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-mips/mman.h b/include/asm-mips/mman.h
index e4d6f1f..a60e657 100644
--- a/include/asm-mips/mman.h
+++ b/include/asm-mips/mman.h
@@ -46,6 +46,9 @@
#define MAP_LOCKED 0x8000 /* pages are locked */
#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x20000 /* do not block on IO */
+#define MAP_CHGPROT 0x40000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
/*
* Flags for msync
diff --git a/include/asm-parisc/mman.h b/include/asm-parisc/mman.h
index defe752..8fb8080 100644
--- a/include/asm-parisc/mman.h
+++ b/include/asm-parisc/mman.h
@@ -22,6 +22,9 @@
#define MAP_GROWSDOWN 0x8000 /* stack-like segment */
#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x20000 /* do not block on IO */
+#define MAP_CHGPROT 0x40000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MS_SYNC 1 /* synchronous memory sync */
#define MS_ASYNC 2 /* sync memory asynchronously */
diff --git a/include/asm-powerpc/mman.h b/include/asm-powerpc/mman.h
index 24cf664..56a76bb 100644
--- a/include/asm-powerpc/mman.h
+++ b/include/asm-powerpc/mman.h
@@ -23,5 +23,8 @@
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#endif /* _ASM_POWERPC_MMAN_H */
diff --git a/include/asm-s390/mman.h b/include/asm-s390/mman.h
index 7839767..a8ff4f4 100644
--- a/include/asm-s390/mman.h
+++ b/include/asm-s390/mman.h
@@ -18,6 +18,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-sh/mman.h b/include/asm-sh/mman.h
index 156eb02..29eb229 100644
--- a/include/asm-sh/mman.h
+++ b/include/asm-sh/mman.h
@@ -10,6 +10,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) page tables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-sparc/mman.h b/include/asm-sparc/mman.h
index b7dc40b..2ac052e 100644
--- a/include/asm-sparc/mman.h
+++ b/include/asm-sparc/mman.h
@@ -21,6 +21,9 @@
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
/* XXX Need to add flags to SunOS's mctl, mlockall, and madvise system
* XXX calls.
diff --git a/include/asm-sparc64/mman.h b/include/asm-sparc64/mman.h
index 8cc1860..ae3e438 100644
--- a/include/asm-sparc64/mman.h
+++ b/include/asm-sparc64/mman.h
@@ -21,6 +21,9 @@
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
/* XXX Need to add flags to SunOS's mctl, mlockall, and madvise system
* XXX calls.
diff --git a/include/asm-x86_64/mman.h b/include/asm-x86_64/mman.h
index dd5cb05..ceffa4f 100644
--- a/include/asm-x86_64/mman.h
+++ b/include/asm-x86_64/mman.h
@@ -12,6 +12,9 @@
#define MAP_NORESERVE 0x4000 /* don't check for reservations */
#define MAP_POPULATE 0x8000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x10000 /* do not block on IO */
+#define MAP_CHGPROT 0x20000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
#define MCL_CURRENT 1 /* lock all current mappings */
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/asm-xtensa/mman.h b/include/asm-xtensa/mman.h
index 9b92620..a8435d7 100644
--- a/include/asm-xtensa/mman.h
+++ b/include/asm-xtensa/mman.h
@@ -53,6 +53,9 @@
#define MAP_LOCKED 0x8000 /* pages are locked */
#define MAP_POPULATE 0x10000 /* populate (prefault) pagetables */
#define MAP_NONBLOCK 0x20000 /* do not block on IO */
+#define MAP_CHGPROT 0x40000 /* don't inherit the protection bits of
+ the underlying vma, to be passed to
+ remap_file_pages() only */
/*
* Flags for msync
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 03/11] RFP prot support: handle MANYPROTS VMAs
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 01/11] RFP: new bitmask_trans in <linux/bitops.h> Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 02/11] RFP prot support: add needed macros Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 04/11] RFP prot support: disallow mprotect() on manyprots mappings Paolo 'Blaisorblade' Giarrusso
` (7 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
Handle the possible existance of VM_MANYPROTS vmas, without actually creating
them.
* Replace old uses of pgoff_to_pte with pgoff_prot_to_pte.
* Introduce the flag, use it to read permissions from the PTE rather than from
the VMA flags.
* Replace the linear_page_index() check with save_nonlinear_pte(), which
encapsulates the check.
2.6.14+ updates:
* Add VM_MANYPROTS among cases needing copying of PTE at fork time rather than
faulting.
* check for VM_MANYPROTS in do_file_pte before complaining for pte_file PTE
* check for VM_MANYPROTS in *_populate, when we skip installing pte_file PTE's
for linear areas
Below there is a long explaination of why I've added VM_MANYPROTS, rather
than simply overload VM_NONLINEAR. You can freely skip that if you have real
work to do :-).
However, this patch is only sufficient if VM_MANYPROTS vmas are also marked as
nonlinear. Otherwise also other changes are needed.
I've implemented both solutions - I've sent only full support for the easy case,
but possibly I'll afterwards reintroduce the other changes; in particular,
they're needed to make this useful for general usage beyond UML.
*) remap_file_pages protection support: add VM_MANYPROTS to fix existing usage of mprotect()
Distinguish between "normal" VMA and VMA with variable protection, by
adding the VM_MANYPROTS flag. This is needed for various reasons:
* notify the arch fault handlers that they must not check VMA protection for
giving SIGSEGV
* fixing regression of mprotect() on !VM_MANYPROTS mappings (see below)
* (in next patches) giving a sensible behaviour to mprotect on VM_MANYPROTS
mappings
* (theoretical, rejected) avoid regression in max file offset with r_f_p() for
older mappings; we could use either the old offset encoding or the new
offset-prot encoding depending on this flag.
It's trivial to do, just I don't know whether existing apps will overflow
the new limits. They go down from 2Tb to 1Tb on i386 and 512G on PPC, and
from 256G to 128G on S390/31 bits. However this was rejected by a comment in
an earlier iteration of this patch, because such applications should have
moved to 64bit anyway.
* (possible feature) on MAP_PRIVATE mappings, especially when they are readonly,
we can easily support VM_MANYPROTS. This has been explicitly requested by
Ulrich Drepper for DSO handling - creating a PROT_NONE VMA for guard pages is
bad. And that is worse when you have a binary with 100 DSO, or a program with
really many threads - Ulrich profiled a workload where the RB-tree lookup
function is a performance bottleneck.
In fact, without this flag, we'd have indeed a regression with
remap_file_pages VS mprotect, on uniform nonlinear VMAs.
mprotect alters the VMA prots and walks each present PTE, ignoring installed
ones, even when pte_file() is on; their saved prots will be restored on faults,
ignoring VMA ones and losing the mprotect() on them. So, in do_file_page(), we
must restore anyway VMA prots when the VMA is uniform, as we used to do before
this trail of patches.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
include/linux/mm.h | 7 +++++++
include/linux/pagemap.h | 22 ++++++++++++++++++++++
mm/fremap.c | 4 ++--
mm/memory.c | 41 +++++++++++++++++++++++++++++------------
mm/rmap.c | 3 +--
5 files changed, 61 insertions(+), 16 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bcea993..1959d9b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -168,7 +168,14 @@ extern int do_mprotect(unsigned long start, size_t len, unsigned long prot);
#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */
#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */
#define VM_NONLINEAR 0x00800000 /* Is non-linear (remap_file_pages) */
+
+#ifndef CONFIG_MMU
#define VM_MAPPED_COPY 0x01000000 /* T if mapped copy of data (nommu mmap) */
+#else
+#define VM_MANYPROTS 0x01000000 /* The VM individual pages have
+ different protections
+ (remap_file_pages)*/
+#endif
#define VM_INSERTPAGE 0x02000000 /* The vma has had "vm_insert_page()" done on it */
#define VM_ALWAYSDUMP 0x04000000 /* Always include in core dumps */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 10b96cc..acd10e8 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -155,6 +155,28 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT);
}
+/***
+ * Checks if the PTE is nonlinear, and if yes sets it.
+ * @vma: the VMA in which @addr is; we don't check if it's VM_NONLINEAR, just
+ * if this PTE is nonlinear.
+ * @addr: the addr which @pte refers to.
+ * @pte: the old PTE value (to read its protections.
+ * @ptep: the PTE pointer (for setting it).
+ * @mm: passed to set_pte_at.
+ * @page: the page which was installed (to read its ->index, i.e. the old
+ * offset inside the file.
+ */
+static inline void save_nonlinear_pte(pte_t pte, pte_t * ptep, struct
+ vm_area_struct *vma, struct mm_struct *mm, struct page* page,
+ unsigned long addr)
+{
+ pgprot_t pgprot = pte_to_pgprot(pte);
+ if (linear_page_index(vma, addr) != page->index ||
+ pgprot_val(pgprot) != pgprot_val(vma->vm_page_prot))
+ set_pte_at(mm, addr, ptep, pgoff_prot_to_pte(page->index,
+ pgprot));
+}
+
extern void FASTCALL(__lock_page(struct page *page));
extern void FASTCALL(__lock_page_nosync(struct page *page));
extern void FASTCALL(unlock_page(struct page *page));
diff --git a/mm/fremap.c b/mm/fremap.c
index 5f50d73..f571674 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -51,7 +51,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
* previously existing mapping.
*/
static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long pgoff, pgprot_t prot)
+ unsigned long addr, unsigned long pgoff, pgprot_t pgprot)
{
int err = -ENOMEM;
pte_t *pte;
@@ -64,7 +64,7 @@ static int install_file_pte(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte_none(*pte))
zap_pte(mm, vma, addr, pte);
- set_pte_at(mm, addr, pte, pgoff_to_pte(pgoff));
+ set_pte_at(mm, addr, pte, pgoff_prot_to_pte(pgoff, pgprot));
/*
* We don't need to run update_mmu_cache() here because the "file pte"
* being installed by install_file_pte() is not a real pte - it's a
diff --git a/mm/memory.c b/mm/memory.c
index 57559a5..577b8bc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -597,7 +597,8 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm,
* readonly mappings. The tradeoff is that copy_page_range is more
* efficient than faulting.
*/
- if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_PFNMAP|VM_INSERTPAGE))) {
+ if (!(vma->vm_flags & (VM_HUGETLB|VM_NONLINEAR|VM_MANYPROTS|
+ VM_PFNMAP|VM_INSERTPAGE))) {
if (!vma->anon_vma)
return 0;
}
@@ -667,11 +668,11 @@ static unsigned long zap_pte_range(struct mmu_gather *tlb,
tlb_remove_tlb_entry(tlb, pte, addr);
if (unlikely(!page))
continue;
- if (unlikely(details) && details->nonlinear_vma
- && linear_page_index(details->nonlinear_vma,
- addr) != page->index)
- set_pte_at(mm, addr, pte,
- pgoff_to_pte(page->index));
+ if (unlikely(details) && details->nonlinear_vma) {
+ save_nonlinear_pte(ptent, pte,
+ details->nonlinear_vma,
+ mm, page, addr);
+ }
if (PageAnon(page))
anon_rss--;
else {
@@ -2213,10 +2214,14 @@ oom:
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), and pte mapped but not yet locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
+ *
+ * __do_fault_pgprot allows specifying also page protection for VM_MANYPROTS
+ * vmas.
*/
-static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+static int __do_fault_pgprot(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
- pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
+ pgoff_t pgoff, pgprot_t pgprot, unsigned int flags,
+ pte_t orig_pte)
{
spinlock_t *ptl;
struct page *page, *faulted_page;
@@ -2307,7 +2312,7 @@ static int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
/* Only go through if we didn't race with anybody else... */
if (likely(pte_same(*page_table, orig_pte))) {
flush_icache_page(vma, page);
- entry = mk_pte(page, vma->vm_page_prot);
+ entry = mk_pte(page, pgprot);
if (flags & FAULT_FLAG_WRITE)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
set_pte_at(mm, address, page_table, entry);
@@ -2348,6 +2353,15 @@ out:
return fdata.type;
}
+static inline int __do_fault(struct mm_struct *mm, struct vm_area_struct *vma,
+ unsigned long address, pte_t *page_table, pmd_t *pmd,
+ pgoff_t pgoff, unsigned int flags, pte_t orig_pte)
+{
+ return __do_fault_pgprot(mm, vma, address, page_table, pmd, pgoff,
+ vma->vm_page_prot, flags, orig_pte);
+
+}
+
static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
int write_access, pte_t orig_pte)
@@ -2377,11 +2391,12 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned int flags = FAULT_FLAG_NONLINEAR |
(write_access ? FAULT_FLAG_WRITE : 0);
pgoff_t pgoff;
+ pgprot_t pgprot;
if (!pte_unmap_same(mm, pmd, page_table, orig_pte))
return VM_FAULT_MINOR;
- if (unlikely(!(vma->vm_flags & VM_NONLINEAR) ||
+ if (unlikely(!(vma->vm_flags & (VM_NONLINEAR | VM_MANYPROTS)) ||
!(vma->vm_flags & VM_CAN_NONLINEAR))) {
/*
* Page table corrupted: show pte and kill process.
@@ -2391,9 +2406,11 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
}
pgoff = pte_to_pgoff(orig_pte);
+ pgprot = (vma->vm_flags & VM_MANYPROTS) ? pte_file_to_pgprot(orig_pte) :
+ vma->vm_page_prot;
- return __do_fault(mm, vma, address, page_table, pmd, pgoff,
- flags, orig_pte);
+ return __do_fault_pgprot(mm, vma, address, page_table, pmd, pgoff,
+ pgprot, flags, orig_pte);
}
/*
diff --git a/mm/rmap.c b/mm/rmap.c
index 31d758d..63cd875 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -831,8 +831,7 @@ static void try_to_unmap_cluster(unsigned long cursor,
pteval = ptep_clear_flush(vma, address, pte);
/* If nonlinear, store the file page offset in the pte. */
- if (page->index != linear_page_index(vma, address))
- set_pte_at(mm, address, pte, pgoff_to_pte(page->index));
+ save_nonlinear_pte(pteval, pte, vma, mm, page, address);
/* Move the dirty bit to the physical page now the pte is gone. */
if (pte_dirty(pteval))
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 04/11] RFP prot support: disallow mprotect() on manyprots mappings
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (2 preceding siblings ...)
2007-03-31 0:35 ` [PATCH 03/11] RFP prot support: handle MANYPROTS VMAs Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 05/11] RFP prot support: introduce FAULT_SIGSEGV for protection checking Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
` (6 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
For now we (I and Hugh) have found no agreement on which behavior to implement
here. So, at least as a stop-gap, return an error here.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
mm/mprotect.c | 7 +++++++
1 files changed, 7 insertions(+), 0 deletions(-)
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 07f04fa..f372c20 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -251,6 +251,13 @@ int do_mprotect(unsigned long start, size_t len, unsigned long prot)
error = -ENOMEM;
if (!vma)
goto out;
+
+ /* If a need is felt, an appropriate behaviour may be implemented for
+ * this case. We haven't agreed yet on which behavior is appropriate. */
+ error = -EACCES;
+ if (vma->vm_flags & VM_MANYPROTS)
+ goto out;
+
if (unlikely(grows & PROT_GROWSDOWN)) {
if (vma->vm_start >= end)
goto out;
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 05/11] RFP prot support: introduce FAULT_SIGSEGV for protection checking
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (3 preceding siblings ...)
2007-03-31 0:35 ` [PATCH 04/11] RFP prot support: disallow mprotect() on manyprots mappings Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
2007-03-31 0:35 ` [PATCH 06/11] RFP prot support: fix get_user_pages() on VM_MANYPROTS vmas Paolo 'Blaisorblade' Giarrusso
` (5 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
This is the more intrusive patch, but it couldn't be reduced a lot, not even if
I limited the protection support to the bare minimum for Uml (and thus I left
the interface generic).
The arch handler used to check itself protection flags.
But when the found VMA is non-uniform, vma->vm_flags protection flags do not matter
(except for pages not yet faulted in), so this case is handled by do_file_page()
by checking page tables.
So, we change the prototype of __handle_mm_fault() to inform it of the access
kind (read/write/exec).
handle_mm_fault() keeps its API, but has the new VM_FAULT_SIGSEGV return value.
=== Issue (trivial changes to do in every arch):
This value should be handled in every arch-specific fault handlers.
But we can get spurious BUG/oom killings _only_ when the new functionality is
used.
=== Implementation and tradeoff notes:
FIXME:
* I've made sure do_no_page to fault in pages with their *exact* permissions
for non-uniform VMAs. The change was here, in do_no_page():
- if (write_access)
+ if (write_access || (vma->vm_flags & VM_MANYPROTS))
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
Actually, the code already works so for shared vmas, since vma->vm_page_prot
is (supposed to be) already writable when the VMA is. Hope this holds across
all arches.
NOTE: I've just discovered this does not hold when vma_wants_writenotify(),
i.e. on file mappings (at least on my system, since backing_device_info is
involved I'm not sure it holds everywhere).
However: this does not matter for my uses because the default protection is
MAP_NONE for UML, and because we only need this for tmpfs.
It doesn't matter for Oracle, because when VM_MANYPROTS is not set,
maybe_mkwrite_file() will still set the page r/w.
So, currently, the above change is not applied.
However, for future possible handling of private mappings, this may be
needed again.
* For checking, we simply reuse the standard protection_map, by creating a
pte_t value with the vma->vm_page_prot protection and testing directly
pte_{read,write,exec} on it.
I use the physical frame number "0" to create the PTE. I assume that pfn_pte()
and the access macros will work anyway. If this is invalid for any arch, let
me know.
Changes are included for the i386, x86_64 and UML handler.
This breaks get_user_pages(force = 1) (i.e. PTRACE_POKETEXT, access_process_vm())
on VM_MANYPROTS write-protected. Next patch fixes that.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
arch/i386/mm/fault.c | 10 +++++++
arch/um/kernel/trap.c | 10 ++++++-
arch/x86_64/mm/fault.c | 13 ++++++++-
include/linux/mm.h | 36 ++++++++++++++++++++----
mm/memory.c | 71 +++++++++++++++++++++++++++++++++++++++++++++---
5 files changed, 127 insertions(+), 13 deletions(-)
diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c
index 2368a77..8c02945 100644
--- a/arch/i386/mm/fault.c
+++ b/arch/i386/mm/fault.c
@@ -400,6 +400,14 @@ fastcall void __kprobes do_page_fault(struct pt_regs *regs,
good_area:
si_code = SEGV_ACCERR;
write = 0;
+
+ /* If the PTE is not present, the vma protection are not accurate if
+ * VM_MANYPROTS; present PTE's are correct for VM_MANYPROTS. */
+ if (unlikely(vma->vm_flags & VM_MANYPROTS)) {
+ write = error_code & 2;
+ goto survive;
+ }
+
switch (error_code & 3) {
default: /* 3: write, present */
/* fall through */
@@ -432,6 +440,8 @@ good_area:
goto do_sigbus;
case VM_FAULT_OOM:
goto out_of_memory;
+ case VM_FAULT_SIGSEGV:
+ goto bad_area;
default:
BUG();
}
diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c
index 2de81d4..cb7eb33 100644
--- a/arch/um/kernel/trap.c
+++ b/arch/um/kernel/trap.c
@@ -68,6 +68,11 @@ int handle_page_fault(unsigned long address, unsigned long ip,
good_area:
*code_out = SEGV_ACCERR;
+ /* If the PTE is not present, the vma protection are not accurate if
+ * VM_MANYPROTS; present PTE's are correct for VM_MANYPROTS. */
+ if (unlikely(vma->vm_flags & VM_MANYPROTS))
+ goto survive;
+
if(is_write && !(vma->vm_flags & VM_WRITE))
goto out;
@@ -77,7 +82,7 @@ good_area:
do {
survive:
- switch (handle_mm_fault(mm, vma, address, is_write)){
+ switch (handle_mm_fault(mm, vma, address, is_write)) {
case VM_FAULT_MINOR:
current->min_flt++;
break;
@@ -87,6 +92,9 @@ survive:
case VM_FAULT_SIGBUS:
err = -EACCES;
goto out;
+ case VM_FAULT_SIGSEGV:
+ err = -EFAULT;
+ goto out;
case VM_FAULT_OOM:
err = -ENOMEM;
goto out_of_memory;
diff --git a/arch/x86_64/mm/fault.c b/arch/x86_64/mm/fault.c
index 2728a50..e3a0906 100644
--- a/arch/x86_64/mm/fault.c
+++ b/arch/x86_64/mm/fault.c
@@ -429,6 +429,12 @@ asmlinkage void __kprobes do_page_fault(struct pt_regs *regs,
good_area:
info.si_code = SEGV_ACCERR;
write = 0;
+
+ if (unlikely(vma->vm_flags & VM_MANYPROTS)) {
+ write = error_code & PF_WRITE;
+ goto handle_fault;
+ }
+
switch (error_code & (PF_PROT|PF_WRITE)) {
default: /* 3: write, present */
/* fall through */
@@ -444,6 +450,7 @@ good_area:
goto bad_area;
}
+handle_fault:
/*
* If for any reason at all we couldn't handle the fault,
* make sure we exit gracefully rather than endlessly redo
@@ -458,8 +465,12 @@ good_area:
break;
case VM_FAULT_SIGBUS:
goto do_sigbus;
- default:
+ case VM_FAULT_OOM:
goto out_of_memory;
+ case VM_FAULT_SIGSEGV:
+ goto bad_area;
+ default:
+ BUG();
}
up_read(&mm->mmap_sem);
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 1959d9b..53a7793 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -673,10 +673,11 @@ static inline int page_mapped(struct page *page)
* Used to decide whether a process gets delivered SIGBUS or
* just gets major/minor fault counters bumped up.
*/
-#define VM_FAULT_OOM 0x00
-#define VM_FAULT_SIGBUS 0x01
-#define VM_FAULT_MINOR 0x02
-#define VM_FAULT_MAJOR 0x03
+#define VM_FAULT_OOM 0x00
+#define VM_FAULT_SIGBUS 0x01
+#define VM_FAULT_MINOR 0x02
+#define VM_FAULT_MAJOR 0x03
+#define VM_FAULT_SIGSEGV 0x04
/*
* Special case for get_user_pages.
@@ -774,15 +775,38 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
extern int vmtruncate(struct inode * inode, loff_t offset);
extern int vmtruncate_range(struct inode * inode, loff_t offset, loff_t end);
+/* Fault Types: give information on the needed protection. */
+#define FT_READ 1
+#define FT_WRITE 2
+#define FT_EXEC 4
+#define FT_FORCE 8
+#define FT_MASK (FT_READ|FT_WRITE|FT_EXEC|FT_FORCE)
+
#ifdef CONFIG_MMU
+
+/* We use FT_READ, FT_WRITE and (optionally) FT_EXEC for the @access_mask, to
+ * report the kind of access we request for permission checking, in case the VMA
+ * is VM_MANYPROTS.
+ *
+ * get_user_pages( force == 1 ) is a special case. It's allowed to override
+ * protection checks, even on VM_MANYPROTS vma.
+ *
+ * To express that, you must add FT_FORCE to the FT_READ / FT_WRITE flags.
+ * You (get_user_pages) are expected to check yourself for the presence of
+ * VM_MAYREAD/VM_MAYWRITE flags on the vma itself.
+ *
+ * This allows to force copying COW pages to break sharing even on read-only
+ * page table entries.
+ */
+
extern int __handle_mm_fault(struct mm_struct *mm,struct vm_area_struct *vma,
- unsigned long address, int write_access);
+ unsigned long address, unsigned int access_mask);
static inline int handle_mm_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
int write_access)
{
- return __handle_mm_fault(mm, vma, address, write_access) &
+ return __handle_mm_fault(mm, vma, address, write_access ? FT_WRITE : FT_READ) &
(~VM_FAULT_WRITE);
}
#else
diff --git a/mm/memory.c b/mm/memory.c
index 577b8bc..d66c8ca 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -977,6 +977,7 @@ no_page_table:
return page;
}
+/* Return number of faulted-in pages. */
int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
unsigned long start, int len, int write, int force,
struct page **pages, struct vm_area_struct **vmas)
@@ -1080,6 +1081,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
case VM_FAULT_MAJOR:
tsk->maj_flt++;
break;
+ case VM_FAULT_SIGSEGV:
case VM_FAULT_SIGBUS:
return i ? i : -EFAULT;
case VM_FAULT_OOM:
@@ -2312,6 +2314,8 @@ static int __do_fault_pgprot(struct mm_struct *mm, struct vm_area_struct *vma,
/* Only go through if we didn't race with anybody else... */
if (likely(pte_same(*page_table, orig_pte))) {
flush_icache_page(vma, page);
+ /* This already sets the PTE to be rw if appropriate, except for
+ * private COW pages. */
entry = mk_pte(page, pgprot);
if (flags & FAULT_FLAG_WRITE)
entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -2374,7 +2378,6 @@ static int do_linear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
flags, orig_pte);
}
-
/*
* Fault of a previously existing named mapping. Repopulate the pte
* from the encoded file_pte if possible. This enables swappable
@@ -2413,6 +2416,40 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
pgprot, flags, orig_pte);
}
+/* Are the permissions of this PTE insufficient to satisfy the fault described
+ * in access_mask? */
+static inline int insufficient_perms(pte_t pte, int access_mask)
+{
+ if (unlikely(access_mask & FT_FORCE))
+ return 0;
+
+ if ((access_mask & FT_WRITE) && !pte_write(pte))
+ goto err;
+ if ((access_mask & FT_READ) && !pte_read(pte))
+ goto err;
+ if ((access_mask & FT_EXEC) && !pte_exec(pte))
+ goto err;
+ return 0;
+err:
+ return 1;
+}
+
+static inline int insufficient_vma_perms(struct vm_area_struct * vma, int access_mask)
+{
+ if (unlikely(vma->vm_flags & VM_MANYPROTS)) {
+ /*
+ * we used to check protections in arch handler, but with
+ * VM_MANYPROTS, and only with it, the check is skipped.
+ * access_mask contains the type of the access, vm_flags are the
+ * declared protections, pte has the protection which will be
+ * given to the PTE's in that area.
+ */
+ pte_t pte = pfn_pte(0UL, vma->vm_page_prot);
+ return insufficient_perms(pte, access_mask);
+ }
+ return 0;
+}
+
/*
* These routines also need to handle stuff like marking pages dirty
* and/or accessed for architectures that don't do it in hardware (most
@@ -2428,14 +2465,21 @@ static int do_nonlinear_fault(struct mm_struct *mm, struct vm_area_struct *vma,
*/
static inline int handle_pte_fault(struct mm_struct *mm,
struct vm_area_struct *vma, unsigned long address,
- pte_t *pte, pmd_t *pmd, int write_access)
+ pte_t *pte, pmd_t *pmd, int access_mask)
{
pte_t entry;
pte_t old_entry;
spinlock_t *ptl;
+ int write_access = access_mask & FT_WRITE;
old_entry = entry = *pte;
if (!pte_present(entry)) {
+ /* when pte_file(), the VMA protections are useless. Otherwise,
+ * we need to check VM_MANYPROTS, because in that case the arch
+ * fault handler skips the VMA protection check. */
+ if (!pte_file(entry) && unlikely(insufficient_vma_perms(vma, access_mask)))
+ goto segv;
+
if (pte_none(entry)) {
if (vma->vm_ops) {
if (vma->vm_ops->fault || vma->vm_ops->nopage)
@@ -2456,6 +2500,16 @@ static inline int handle_pte_fault(struct mm_struct *mm,
spin_lock(ptl);
if (unlikely(!pte_same(*pte, entry)))
goto unlock;
+
+ /* VM_MANYPROTS vma's have PTE's always installed with the correct
+ * protection, so if we got a fault on a present PTE we're in trouble.
+ * However, the pte_present() may simply be the result of a race
+ * condition with another thread having already fixed the fault. So go
+ * the slow way. */
+ if (unlikely(vma->vm_flags & VM_MANYPROTS) &&
+ unlikely(insufficient_perms(entry, access_mask)))
+ goto segv_unlock;
+
if (write_access) {
if (!pte_write(entry))
return do_wp_page(mm, vma, address,
@@ -2480,13 +2534,18 @@ static inline int handle_pte_fault(struct mm_struct *mm,
unlock:
pte_unmap_unlock(pte, ptl);
return VM_FAULT_MINOR;
+
+segv_unlock:
+ pte_unmap_unlock(pte, ptl);
+segv:
+ return VM_FAULT_SIGSEGV;
}
/*
* By the time we get here, we already hold the mm semaphore
*/
int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long address, int write_access)
+ unsigned long address, unsigned int access_mask)
{
pgd_t *pgd;
pud_t *pud;
@@ -2497,8 +2556,10 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
count_vm_event(PGFAULT);
+ WARN_ON(access_mask & ~FT_MASK);
+
if (unlikely(is_vm_hugetlb_page(vma)))
- return hugetlb_fault(mm, vma, address, write_access);
+ return hugetlb_fault(mm, vma, address, access_mask & FT_WRITE);
if (unlikely(vma->vm_flags & VM_REVOKED))
return VM_FAULT_SIGBUS;
@@ -2514,7 +2575,7 @@ int __handle_mm_fault(struct mm_struct *mm, struct vm_area_struct *vma,
if (!pte)
return VM_FAULT_OOM;
- return handle_pte_fault(mm, vma, address, pte, pmd, write_access);
+ return handle_pte_fault(mm, vma, address, pte, pmd, access_mask);
}
EXPORT_SYMBOL_GPL(__handle_mm_fault);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 06/11] RFP prot support: fix get_user_pages() on VM_MANYPROTS vmas
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (4 preceding siblings ...)
2007-03-31 0:35 ` [PATCH 05/11] RFP prot support: introduce FAULT_SIGSEGV for protection checking Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 07/11] RFP prot support: uml, i386, x64 bits Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
` (4 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton; +Cc: mingo, linux-mm, Jeff Dike
*Non unit-tested patch* - I've not written a test case to verify functionality of
ptrace on VM_MANYPROTS area.
get_user_pages may well call __handle_mm_fault() wanting to override protections,
so in this case __handle_mm_fault() should still avoid checking VM access rights.
Also, get_user_pages() may give write faults on present readonly PTEs in
VM_MANYPROTS areas (think of PTRACE_POKETEXT), so we must still do do_wp_page
even on VM_MANYPROTS areas.
So, possibly use VM_MAYWRITE and/or VM_MAYREAD in the access_mask and check
VM_MANYPROTS in maybe_mkwrite_file (new variant of maybe_mkwrite).
API Note: there are many flags parameter which can be constructed but which
don't make any sense, but the code very freely interprets them too.
For instance VM_MAYREAD|VM_WRITE is interpreted as VM_MAYWRITE|VM_WRITE.
This is fixed in next patch (to merge here).
====
pte_to_pgprot is to be used only with encoded PTEs.
This is needed since now pte_to_pgprot does heavy changes to the pte, it looks
for _PAGE_FILE_PROTNONE and translates it to _PAGE_PROTNONE.
---
mm/memory.c | 36 +++++++++++++++++++++++++++++-------
1 files changed, 29 insertions(+), 7 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index d66c8ca..8572033 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -984,6 +984,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
{
int i;
unsigned int vm_flags;
+ int ft_flags;
/*
* Require read or write permissions.
@@ -991,6 +992,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
*/
vm_flags = write ? (VM_WRITE | VM_MAYWRITE) : (VM_READ | VM_MAYREAD);
vm_flags &= force ? (VM_MAYREAD | VM_MAYWRITE) : (VM_READ | VM_WRITE);
+ ft_flags = (write ? FT_WRITE : FT_READ) | (force ? FT_FORCE : 0);
i = 0;
do {
@@ -1057,22 +1059,25 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,
do {
struct page *page;
- if (write)
+ if (write) {
foll_flags |= FOLL_WRITE;
+ ft_flags |= FT_WRITE;
+ }
cond_resched();
while (!(page = follow_page(vma, start, foll_flags))) {
int ret;
- ret = __handle_mm_fault(mm, vma, start,
- foll_flags & FOLL_WRITE);
+ ret = __handle_mm_fault(mm, vma, start, ft_flags);
/*
* The VM_FAULT_WRITE bit tells us that do_wp_page has
* broken COW when necessary, even if maybe_mkwrite
* decided not to set pte_write. We can thus safely do
* subsequent page lookups as if they were reads.
*/
- if (ret & VM_FAULT_WRITE)
+ if (ret & VM_FAULT_WRITE) {
foll_flags &= ~FOLL_WRITE;
+ ft_flags &= ~FT_WRITE;
+ }
switch (ret & ~VM_FAULT_WRITE) {
case VM_FAULT_MINOR:
@@ -1486,7 +1491,20 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd,
* servicing faults for write access. In the normal case, do always want
* pte_mkwrite. But get_user_pages can cause write faults for mappings
* that do not have writing enabled, when used by access_process_vm.
+ *
+ * Also, we must never change protections on VM_MANYPROTS pages; that's only
+ * allowed in do_no_page(), so test only VMA protections there. For other cases
+ * we *know* that VM_MANYPROTS is clear, such as anonymous/swap pages, and in
+ * that case using plain maybe_mkwrite() is an optimization.
+ * Instead, when we may be mapping a file, we must use maybe_mkwrite_file.
*/
+static inline pte_t maybe_mkwrite_file(pte_t pte, struct vm_area_struct *vma)
+{
+ if (likely((vma->vm_flags & (VM_WRITE | VM_MANYPROTS)) == VM_WRITE))
+ pte = pte_mkwrite(pte);
+ return pte;
+}
+
static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma)
{
if (likely(vma->vm_flags & VM_WRITE))
@@ -1539,6 +1557,9 @@ static inline void cow_user_page(struct page *dst, struct page *src, unsigned lo
* We enter with non-exclusive mmap_sem (to exclude vma changes,
* but allow concurrent faults), with pte both mapped and locked.
* We return with mmap_sem still held, but pte unmapped and unlocked.
+ *
+ * Note that a page here can be a shared readonly page where
+ * get_user_pages() (for instance for ptrace()) wants to write to it!
*/
static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
unsigned long address, pte_t *page_table, pmd_t *pmd,
@@ -1604,7 +1625,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
if (reuse) {
flush_cache_page(vma, address, pte_pfn(orig_pte));
entry = pte_mkyoung(orig_pte);
- entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ /* Since it can be shared, it can be VM_MANYPROTS! */
+ entry = maybe_mkwrite_file(pte_mkdirty(entry), vma);
ptep_set_access_flags(vma, address, page_table, entry, 1);
update_mmu_cache(vma, address, entry);
lazy_mmu_prot_update(entry);
@@ -1647,7 +1669,7 @@ gotten:
inc_mm_counter(mm, anon_rss);
flush_cache_page(vma, address, pte_pfn(orig_pte));
entry = mk_pte(new_page, vma->vm_page_prot);
- entry = maybe_mkwrite(pte_mkdirty(entry), vma);
+ entry = maybe_mkwrite_file(pte_mkdirty(entry), vma);
lazy_mmu_prot_update(entry);
/*
* Clear the pte entry and flush it first, before updating the
@@ -2109,7 +2131,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
inc_mm_counter(mm, anon_rss);
pte = mk_pte(page, vma->vm_page_prot);
if (write_access && can_share_swap_page(page)) {
- pte = maybe_mkwrite(pte_mkdirty(pte), vma);
+ pte = maybe_mkwrite_file(pte_mkdirty(pte), vma);
write_access = 0;
}
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 07/11] RFP prot support: uml, i386, x64 bits
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (5 preceding siblings ...)
2007-03-31 0:35 ` [PATCH 06/11] RFP prot support: fix get_user_pages() on VM_MANYPROTS vmas Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
2007-03-31 0:35 ` [PATCH 08/11] Fix comment about remap_file_pages Paolo 'Blaisorblade' Giarrusso
` (3 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
Various boilerplate stuff.
Update pte encoding macros for UML, i386 and x86-64.
*) remap_file_pages protection support: improvement for UML bits
Recover one bit by additionally using _PAGE_NEWPROT. Since I wasn't sure this
would work, I've split this out, but it has worked well. We rely on the fact
that pte_newprot always checks first if the PTE is marked present. This is
joined because it worked well during the unit testing I performed, beyond
making sense.
========
RFP: Avoid using _PAGE_PROTNONE
For i386, x86_64, uml:
To encode a pte_file PROTNONE pte, since _PAGE_PROTNONE makes pte_present be
set, and since such a pte actually still references a page, we need to use
another bit for our purposes. Implement this.
* Add _PAGE_FILE_PROTNONE, the bit describe above.
* Add to each arch pgprot_access_bits() to extract the value of protection bits
(i.e._PAGE_RW and _PAGE_PROTNONE) and encode them (translate _PAGE_PROTNONE to
_PAGE_FILE_PROTNONE), and use it in pgoff_prot_to_pte().
* Modify pte_to_pgprot() to do the inverse translation.
* Modify pte_to_pgoff() and pgoff_prot_to_pte() to leave alone the newly used
bit (for 32-bit PTEs).
* Join for UML and x86 pte_to_pgprot() for 2level and 3level page tables, since
they were identical.
* Decrease by 1 PTE_FILE_MAX_BITS where appropriate.
* Also replace in bit operations + with | where appropriate.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
include/asm-i386/pgtable-2level.h | 11 ++++++-----
include/asm-i386/pgtable-3level.h | 7 +++++--
include/asm-i386/pgtable.h | 24 ++++++++++++++++++++++++
include/asm-um/pgtable-2level.h | 16 ++++++++++++----
include/asm-um/pgtable-3level.h | 21 ++++++++++++++-------
include/asm-um/pgtable.h | 21 +++++++++++++++++++++
include/asm-x86_64/pgtable.h | 29 +++++++++++++++++++++++++++--
7 files changed, 109 insertions(+), 20 deletions(-)
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
index 38c3fcc..31f1d3b 100644
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -48,16 +48,17 @@ static inline int pte_exec_kernel(pte_t pte)
}
/*
- * Bits 0, 6 and 7 are taken, split up the 29 bits of offset
+ * Bits 0, 1, 6, 7 and 8 are taken, split up the 27 bits of offset
* into this range:
*/
-#define PTE_FILE_MAX_BITS 29
+#define PTE_FILE_MAX_BITS 27
#define pte_to_pgoff(pte) \
- ((((pte).pte_low >> 1) & 0x1f ) + (((pte).pte_low >> 8) << 5 ))
+ ((((pte).pte_low >> 2) & 0xf ) | (((pte).pte_low >> 9) << 4 ))
-#define pgoff_to_pte(off) \
- ((pte_t) { (((off) & 0x1f) << 1) + (((off) >> 5) << 8) + _PAGE_FILE })
+#define pgoff_prot_to_pte(off, prot) \
+ ((pte_t) { (((off) & 0xf) << 2) | (((off) >> 4) << 9) | \
+ pgprot_access_bits(prot) | _PAGE_FILE })
/* Encode and de-code a swap entry */
#define __swp_type(x) (((x).val >> 1) & 0x1f)
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
index 7a2318f..aa4ba07 100644
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -171,11 +171,14 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
}
/*
- * Bits 0, 6 and 7 are taken in the low part of the pte,
+ * Bits 0, 1, 6, 7 and 8 are taken in the low part of the pte,
* put the 32 bits of offset into the high part.
*/
#define pte_to_pgoff(pte) ((pte).pte_high)
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
+
+#define pgoff_prot_to_pte(off, prot) \
+ ((pte_t) { _PAGE_FILE | pgprot_access_bits(prot), (off) })
+
#define PTE_FILE_MAX_BITS 32
/* Encode and de-code a swap entry */
diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h
index d36b241..ed10cf4 100644
--- a/include/asm-i386/pgtable.h
+++ b/include/asm-i386/pgtable.h
@@ -14,6 +14,7 @@
#ifndef __ASSEMBLY__
#include <asm/processor.h>
#include <asm/fixmap.h>
+#include <linux/bitops.h>
#include <linux/threads.h>
#include <asm/paravirt.h>
@@ -100,8 +101,10 @@ void paging_init(void);
#define _PAGE_BIT_PCD 4
#define _PAGE_BIT_ACCESSED 5
#define _PAGE_BIT_DIRTY 6
+#define _PAGE_BIT_PROTNONE 6
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page, Pentium+, if present.. */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
+#define _PAGE_BIT_FILE_PROTNONE 8
#define _PAGE_BIT_UNUSED1 9 /* available for programmer */
#define _PAGE_BIT_UNUSED2 10
#define _PAGE_BIT_UNUSED3 11
@@ -124,6 +127,27 @@ void paging_init(void);
#define _PAGE_FILE 0x040 /* nonlinear file mapping, saved PTE; unset:swap */
#define _PAGE_PROTNONE 0x080 /* if the user mapped it with PROT_NONE;
pte_present gives true */
+#define _PAGE_FILE_PROTNONE 0x100 /* indicate that the page is remapped
+ with PROT_NONE - this is different
+ from _PAGE_PROTNONE as no page is
+ held here, so pte_present() is false
+ */
+
+/* Extracts _PAGE_RW and _PAGE_PROTNONE and replace the latter with
+ * _PAGE_FILE_PROTNONE. */
+#define pgprot_access_bits(prot) \
+ ((pgprot_val(prot) & _PAGE_RW) | \
+ bitmask_trans(pgprot_val(prot), _PAGE_PROTNONE, _PAGE_FILE_PROTNONE))
+
+#define __HAVE_ARCH_PTE_TO_PGPROT
+#define pte_to_pgprot(pte) \
+ __pgprot(((pte).pte_low & (_PAGE_RW|_PAGE_PROTNONE)))
+
+#define pte_file_to_pgprot(pte) \
+ __pgprot(((pte).pte_low & _PAGE_RW) | _PAGE_ACCESSED | \
+ (((pte).pte_low & _PAGE_FILE_PROTNONE) ? _PAGE_PROTNONE : \
+ (_PAGE_USER | _PAGE_PRESENT)))
+
#ifdef CONFIG_X86_PAE
#define _PAGE_NX (1ULL<<_PAGE_BIT_NX)
#else
diff --git a/include/asm-um/pgtable-2level.h b/include/asm-um/pgtable-2level.h
index 172a75f..23e1750 100644
--- a/include/asm-um/pgtable-2level.h
+++ b/include/asm-um/pgtable-2level.h
@@ -45,12 +45,20 @@ static inline void pgd_mkuptodate(pgd_t pgd) { }
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
/*
- * Bits 0 through 4 are taken
+ * Bits 0, 1, 3 to 5 and 8 are taken, split up the 26 bits of offset
+ * into this range:
*/
-#define PTE_FILE_MAX_BITS 27
+#define PTE_FILE_MAX_BITS 26
-#define pte_to_pgoff(pte) (pte_val(pte) >> 5)
+#define pte_to_pgoff(pte) (((pte_val(pte) >> 2) & 0x1) | \
+ (((pte_val(pte) >> 6) & 0x3) << 1) | \
+ ((pte_val(pte) >> 9) << 3))
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 5) + _PAGE_FILE })
+#define pgoff_prot_to_pte(off, prot) \
+ __pte((((off) & 0x1) << 2) | ((((off) & 0x7) >> 1) << 6) | \
+ ((off >> 3) << 9) | pgprot_access_bits(prot) | _PAGE_FILE)
+
+/* For pte_file_to_pgprot definition only */
+#define __pte_low(pte) pte_val(pte)
#endif
diff --git a/include/asm-um/pgtable-3level.h b/include/asm-um/pgtable-3level.h
index ca0c2a9..0444dc4 100644
--- a/include/asm-um/pgtable-3level.h
+++ b/include/asm-um/pgtable-3level.h
@@ -102,25 +102,32 @@ static inline pmd_t pfn_pmd(pfn_t page_nr, pgprot_t pgprot)
}
/*
- * Bits 0 through 3 are taken in the low part of the pte,
+ * Bits 0 through 5 are taken in the low part of the pte,
* put the 32 bits of offset into the high part.
*/
#define PTE_FILE_MAX_BITS 32
-#ifdef CONFIG_64BIT
-#define pte_to_pgoff(p) ((p).pte >> 32)
+#ifdef CONFIG_64BIT
-#define pgoff_to_pte(off) ((pte_t) { ((off) << 32) | _PAGE_FILE })
+/* For pte_file_to_pgprot definition only */
+#define __pte_low(pte) pte_val(pte)
+#define __pte_high(pte) (pte_val(pte) >> 32)
+#define __build_pte(low, high) ((pte_t) { (high) << 32 | (low)})
#else
-#define pte_to_pgoff(pte) ((pte).pte_high)
-
-#define pgoff_to_pte(off) ((pte_t) { _PAGE_FILE, (off) })
+/* Don't use pte_val below, useless to join the two halves */
+#define __pte_low(pte) ((pte).pte_low)
+#define __pte_high(pte) ((pte).pte_high)
+#define __build_pte(low, high) ((pte_t) {(low), (high)})
#endif
+#define pte_to_pgoff(pte) __pte_high(pte)
+#define pgoff_prot_to_pte(off, prot) \
+ __build_pte(_PAGE_FILE | pgprot_access_bits(prot), (off))
+
#endif
/*
diff --git a/include/asm-um/pgtable.h b/include/asm-um/pgtable.h
index 1b1090a..9ff1ca7 100644
--- a/include/asm-um/pgtable.h
+++ b/include/asm-um/pgtable.h
@@ -10,6 +10,7 @@
#include "linux/sched.h"
#include "linux/linkage.h"
+#include "linux/bitops.h"
#include "asm/processor.h"
#include "asm/page.h"
#include "asm/fixmap.h"
@@ -25,6 +26,17 @@
#define _PAGE_FILE 0x008 /* nonlinear file mapping, saved PTE; unset:swap */
#define _PAGE_PROTNONE 0x010 /* if the user mapped it with PROT_NONE;
pte_present gives true */
+#define _PAGE_FILE_PROTNONE 0x100 /* indicate that the page is remapped
+ with PROT_NONE - this is different
+ from _PAGE_PROTNONE as no page is
+ held here, so pte_present() is false
+ */
+
+/* Extracts _PAGE_RW and _PAGE_PROTNONE and replace the latter with
+ * _PAGE_FILE_PROTNONE. */
+#define pgprot_access_bits(prot) \
+ ((pgprot_val(prot) & _PAGE_RW) | \
+ bitmask_trans(pgprot_val(prot), _PAGE_PROTNONE, _PAGE_FILE_PROTNONE))
#ifdef CONFIG_3_LEVEL_PGTABLES
#include "asm/pgtable-3level.h"
@@ -32,6 +44,14 @@
#include "asm/pgtable-2level.h"
#endif
+#define pte_to_pgprot(pte) \
+ __pgprot((__pte_low(pte) & (_PAGE_RW|_PAGE_PROTNONE)))
+
+#define pte_file_to_pgprot(pte) \
+ __pgprot((__pte_low(pte) & _PAGE_RW) | _PAGE_ACCESSED | \
+ ((__pte_low(pte) & _PAGE_FILE_PROTNONE) ? _PAGE_PROTNONE : \
+ (_PAGE_USER | _PAGE_PRESENT)))
+
extern pgd_t swapper_pg_dir[PTRS_PER_PGD];
extern void *um_virt_to_phys(struct task_struct *task, unsigned long virt,
@@ -404,6 +424,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
#define kern_addr_valid(addr) (1)
+#define __HAVE_ARCH_PTE_TO_PGPROT
#include <asm-generic/pgtable.h>
#include <asm-generic/pgtable-nopud.h>
diff --git a/include/asm-x86_64/pgtable.h b/include/asm-x86_64/pgtable.h
index 599993f..9dad0dd 100644
--- a/include/asm-x86_64/pgtable.h
+++ b/include/asm-x86_64/pgtable.h
@@ -9,7 +9,7 @@
* the x86-64 page table tree.
*/
#include <asm/processor.h>
-#include <asm/bitops.h>
+#include <linux/bitops.h>
#include <linux/threads.h>
#include <asm/pda.h>
@@ -150,6 +150,7 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
#define _PAGE_BIT_DIRTY 6
#define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */
#define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */
+#define _PAGE_BIT_FILE_PROTNONE 8
#define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */
#define _PAGE_PRESENT 0x001
@@ -164,6 +165,12 @@ static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long
#define _PAGE_GLOBAL 0x100 /* Global TLB entry */
#define _PAGE_PROTNONE 0x080 /* If not present */
+#define _PAGE_FILE_PROTNONE 0x100 /* indicate that the page is remapped
+ with PROT_NONE - this is different
+ from _PAGE_PROTNONE as no page is
+ held here, so pte_present() is false
+ */
+
#define _PAGE_NX (_AC(1,UL)<<_PAGE_BIT_NX)
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | _PAGE_ACCESSED | _PAGE_DIRTY)
@@ -357,9 +364,26 @@ static inline int pmd_large(pmd_t pte) {
#define pmd_pfn(x) ((pmd_val(x) & __PHYSICAL_MASK) >> PAGE_SHIFT)
#define pte_to_pgoff(pte) ((pte_val(pte) & PHYSICAL_PAGE_MASK) >> PAGE_SHIFT)
-#define pgoff_to_pte(off) ((pte_t) { ((off) << PAGE_SHIFT) | _PAGE_FILE })
#define PTE_FILE_MAX_BITS __PHYSICAL_MASK_SHIFT
+#define pte_to_pgprot(pte) \
+ __pgprot((pte_val(pte) & (_PAGE_RW|_PAGE_PROTNONE)))
+
+#define pte_file_to_pgprot(pte) \
+ __pgprot((pte_val(pte) & _PAGE_RW) | _PAGE_ACCESSED | \
+ ((pte_val(pte) & _PAGE_FILE_PROTNONE) ? _PAGE_PROTNONE : \
+ (_PAGE_USER | _PAGE_PRESENT)))
+
+/* Extracts _PAGE_RW and _PAGE_PROTNONE and replace the latter with
+ * _PAGE_FILE_PROTNONE. */
+#define pgprot_access_bits(prot) \
+ ((pgprot_val(prot) & _PAGE_RW) | \
+ bitmask_trans(pgprot_val(prot), _PAGE_PROTNONE, _PAGE_FILE_PROTNONE))
+
+#define pgoff_prot_to_pte(off, prot) \
+ ((pte_t) { _PAGE_FILE | pgprot_access_bits(prot) | ((off) << PAGE_SHIFT) })
+
+
/* PTE - Level 1 access. */
/* page, protection -> pte */
@@ -441,6 +465,7 @@ extern int kern_addr_valid(unsigned long addr);
#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
#define __HAVE_ARCH_PTEP_SET_WRPROTECT
#define __HAVE_ARCH_PTE_SAME
+#define __HAVE_ARCH_PTE_TO_PGPROT
#include <asm-generic/pgtable.h>
#endif /* !__ASSEMBLY__ */
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 08/11] Fix comment about remap_file_pages
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (6 preceding siblings ...)
2007-03-31 0:35 ` [PATCH 07/11] RFP prot support: uml, i386, x64 bits Paolo 'Blaisorblade' Giarrusso, Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:35 ` [PATCH 09/11] RFP prot support: enhance syscall interface Paolo 'Blaisorblade' Giarrusso, Ingo Molnar, Paolo 'Blaisorblade' Giarrusso
` (2 subsequent siblings)
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
This comment is a bit unclear and also stale. So fix it. Thanks to Hugh Dickins
for explaining me what it really referred to, and correcting my first fix.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
mm/fremap.c | 7 ++++---
1 files changed, 4 insertions(+), 3 deletions(-)
diff --git a/mm/fremap.c b/mm/fremap.c
index f571674..6cb2cc5 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -200,9 +200,10 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
}
/*
- * We can't clear VM_NONLINEAR because we'd have to do
- * it after ->populate completes, and that would prevent
- * downgrading the lock. (Locks can't be upgraded).
+ * We would like to clear VM_NONLINEAR, in the case when
+ * sys_remap_file_pages covers the whole vma, so making
+ * it linear again. But cannot do so until after a
+ * successful populate, and have no way to upgrade sem.
*/
out:
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 09/11] RFP prot support: enhance syscall interface
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (7 preceding siblings ...)
2007-03-31 0:35 ` [PATCH 08/11] Fix comment about remap_file_pages Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:35 ` Paolo 'Blaisorblade' Giarrusso, Ingo Molnar, Paolo 'Blaisorblade' Giarrusso
2007-03-31 0:36 ` [PATCH 10/11] RFP prot support: support private vma for MAP_POPULATE Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
2007-03-31 0:36 ` [PATCH 11/11] RFP prot support: also set VM_NONLINEAR on nonuniform VMAs Paolo 'Blaisorblade' Giarrusso
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso, Ingo Molnar, Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:35 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
Enable the 'prot' parameter for shared-writable mappings (the ones which are
the primary target for remap_file_pages), without breaking up the vma.
This contains simply the changes to the syscall code, based on Ingo's patch.
Differently from his one, I've *not* added a new syscall, choosing to add a
new flag (MAP_CHGPROT) which the application must specify to get the new
behavior (prot != 0 is accepted and prot == 0 means PROT_NONE).
Upon Hugh's suggestion, simplify the permission checking on the VMA, reusing
mprotect()'s trick.
RFP prot support: cleanup syscall checks
From: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
*) remap_file_pages protection support: use EOVERFLOW ret code
Use -EOVERFLOW ("Value too large for defined data type") rather than -EINVAL
when we cannot store the file offset in the PTE.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
mm/fremap.c | 52 ++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 40 insertions(+), 12 deletions(-)
diff --git a/mm/fremap.c b/mm/fremap.c
index 6cb2cc5..b1a4c34 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -4,6 +4,10 @@
* Explicit pagetable population and nonlinear (random) mappings support.
*
* started by Ingo Molnar, Copyright (C) 2002, 2003
+ *
+ * support of nonuniform remappings:
+ * Copyright (C) 2004 Ingo Molnar
+ * Copyright (C) 2005 Paolo 'Blaisorblade' Giarrusso
*/
#include <linux/mm.h>
@@ -79,12 +83,13 @@ out:
}
static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
- unsigned long addr, unsigned long size, pgoff_t pgoff)
+ unsigned long addr, unsigned long size, pgoff_t pgoff,
+ pgprot_t pgprot)
{
int err;
do {
- err = install_file_pte(mm, vma, addr, pgoff, vma->vm_page_prot);
+ err = install_file_pte(mm, vma, addr, pgoff, pgprot);
if (err)
return err;
@@ -102,21 +107,17 @@ static int populate_range(struct mm_struct *mm, struct vm_area_struct *vma,
* file within an existing vma.
* @start: start of the remapped virtual memory range
* @size: size of the remapped virtual memory range
- * @prot: new protection bits of the range
+ * @prot: new protection bits of the range, must be 0 if not using MAP_CHGPROT
* @pgoff: to be mapped page of the backing store file
- * @flags: 0 or MAP_NONBLOCKED - the later will cause no IO.
+ * @flags: bits MAP_CHGPROT or MAP_NONBLOCKED - the later will cause no IO.
*
* this syscall works purely via pagetables, so it's the most efficient
* way to map the same (large) file into a given virtual window. Unlike
* mmap()/mremap() it does not create any new vmas. The new mappings are
* also safe across swapout.
- *
- * NOTE: the 'prot' parameter right now is ignored, and the vma's default
- * protection is used. Arbitrary protections might be implemented in the
- * future.
*/
asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
- unsigned long __prot, unsigned long pgoff, unsigned long flags)
+ unsigned long prot, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct address_space *mapping;
@@ -124,8 +125,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
struct vm_area_struct *vma;
int err = -EINVAL;
int has_write_lock = 0;
+ pgprot_t pgprot;
- if (__prot)
+ if (prot && !(flags & MAP_CHGPROT))
return err;
/*
* Sanitize the syscall parameters:
@@ -139,8 +141,10 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
/* Can we represent this offset inside this architecture's pte's? */
#if PTE_FILE_MAX_BITS < BITS_PER_LONG
- if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS))
+ if (pgoff + (size >> PAGE_SHIFT) >= (1UL << PTE_FILE_MAX_BITS)) {
+ err = -EOVERFLOW;
return err;
+ }
#endif
/* We need down_write() to change vma->vm_flags. */
@@ -190,7 +194,31 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
spin_unlock(&mapping->i_mmap_lock);
}
- err = populate_range(mm, vma, start, size, pgoff);
+ if (flags & MAP_CHGPROT && !(vma->vm_flags & VM_MANYPROTS)) {
+ if (!has_write_lock) {
+ up_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ has_write_lock = 1;
+ goto retry;
+ }
+ vma->vm_flags |= VM_MANYPROTS;
+ }
+
+ if (flags & MAP_CHGPROT) {
+ unsigned long vm_prots = calc_vm_prot_bits(prot);
+
+ /* vma->vm_flags >> 4 shifts VM_MAY% in place of VM_% */
+ if ((vm_prots & ~(vma->vm_flags >> 4)) &
+ (VM_READ | VM_WRITE | VM_EXEC)) {
+ err = -EPERM;
+ goto out;
+ }
+
+ pgprot = protection_map[vm_prots | VM_SHARED];
+ } else
+ pgprot = vma->vm_page_prot;
+
+ err = populate_range(mm, vma, start, size, pgoff, pgprot);
if (!err && !(flags & MAP_NONBLOCK)) {
if (unlikely(has_write_lock)) {
downgrade_write(&mm->mmap_sem);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 10/11] RFP prot support: support private vma for MAP_POPULATE
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (8 preceding siblings ...)
2007-03-31 0:35 ` [PATCH 09/11] RFP prot support: enhance syscall interface Paolo 'Blaisorblade' Giarrusso, Ingo Molnar, Paolo 'Blaisorblade' Giarrusso
@ 2007-03-31 0:36 ` Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
2007-03-31 0:36 ` [PATCH 11/11] RFP prot support: also set VM_NONLINEAR on nonuniform VMAs Paolo 'Blaisorblade' Giarrusso
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso, Ingo Molnar @ 2007-03-31 0:36 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
Fix mmap(MAP_POPULATE | MAP_PRIVATE). We don't need the VMA to be shared if we
don't rearrange pages around. And it's trivial to do.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
mm/fremap.c | 8 +++++++-
1 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/mm/fremap.c b/mm/fremap.c
index b1a4c34..f4536e9 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -158,7 +158,7 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
* the single existing vma. vm_private_data is used as a
* swapout cursor in a VM_NONLINEAR vma.
*/
- if (!vma || !(vma->vm_flags & VM_SHARED))
+ if (!vma)
goto out;
if (vma->vm_private_data && !(vma->vm_flags & VM_NONLINEAR))
@@ -178,6 +178,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
goto out;
}
+ if (!(vma->vm_flags & VM_SHARED))
+ goto out;
+
if (!has_write_lock) {
up_read(&mm->mmap_sem);
down_write(&mm->mmap_sem);
@@ -195,6 +198,9 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
}
if (flags & MAP_CHGPROT && !(vma->vm_flags & VM_MANYPROTS)) {
+ if (!(vma->vm_flags & VM_SHARED))
+ goto out;
+
if (!has_write_lock) {
up_read(&mm->mmap_sem);
down_write(&mm->mmap_sem);
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread* [PATCH 11/11] RFP prot support: also set VM_NONLINEAR on nonuniform VMAs
2007-03-31 0:35 [PATCH 00/11] remap_file_pages protection support Paolo 'Blaisorblade' Giarrusso
` (9 preceding siblings ...)
2007-03-31 0:36 ` [PATCH 10/11] RFP prot support: support private vma for MAP_POPULATE Paolo 'Blaisorblade' Giarrusso, Ingo Molnar
@ 2007-03-31 0:36 ` Paolo 'Blaisorblade' Giarrusso
10 siblings, 0 replies; 12+ messages in thread
From: Paolo 'Blaisorblade' Giarrusso @ 2007-03-31 0:36 UTC (permalink / raw)
To: Andrew Morton
Cc: mingo, linux-mm, Jeff Dike, Paolo 'Blaisorblade' Giarrusso
To simplify the VM code, and to reflect expected application usage, we decide
to also set VM_NONLINEAR when setting VM_MANYPROTS. Otherwise, we'd have to
possibly save nonlinear PTEs even on paths which cope with linear VMAs. It's
possible, but intrusive (it's done in one of the next patches).
Obviously, this has a performance cost, since we potentially have to handle a
linear VMA with nonlinear handling code. But I didn't know of any application
which might have this usage.
XXX: update: glibc wants to replace mprotect() with linear VM_MANYPROTS areas,
to handle guard pages and data mappings of shared objects.
Signed-off-by: Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
---
mm/fremap.c | 22 ++++++++++------------
1 files changed, 10 insertions(+), 12 deletions(-)
diff --git a/mm/fremap.c b/mm/fremap.c
index f4536e9..83aaa8c 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -173,7 +173,8 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
/* Must set VM_NONLINEAR before any pages are populated. */
if (!(vma->vm_flags & VM_NONLINEAR)) {
/* Don't need a nonlinear mapping, exit success */
- if (pgoff == linear_page_index(vma, start)) {
+ if (pgoff == linear_page_index(vma, start) &&
+ !(flags & MAP_CHGPROT)) {
err = 0;
goto out;
}
@@ -195,19 +196,16 @@ asmlinkage long sys_remap_file_pages(unsigned long start, unsigned long size,
vma_nonlinear_insert(vma, &mapping->i_mmap_nonlinear);
flush_dcache_mmap_unlock(mapping);
spin_unlock(&mapping->i_mmap_lock);
- }
- if (flags & MAP_CHGPROT && !(vma->vm_flags & VM_MANYPROTS)) {
- if (!(vma->vm_flags & VM_SHARED))
- goto out;
-
- if (!has_write_lock) {
- up_read(&mm->mmap_sem);
- down_write(&mm->mmap_sem);
- has_write_lock = 1;
- goto retry;
+ if (flags & MAP_CHGPROT && !(vma->vm_flags & VM_MANYPROTS)) {
+ if (!has_write_lock) {
+ up_read(&mm->mmap_sem);
+ down_write(&mm->mmap_sem);
+ has_write_lock = 1;
+ goto retry;
+ }
+ vma->vm_flags |= VM_MANYPROTS;
}
- vma->vm_flags |= VM_MANYPROTS;
}
if (flags & MAP_CHGPROT) {
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 12+ messages in thread