* [PATCH 5.10.y 1/2] mm: add remap_pfn_range_notrack
2024-11-05 15:50 [PATCH 5.10.y 0/2] Backport fix of CVE-2024-47674 to 5.10 Harshvardhan Jha
@ 2024-11-05 15:50 ` Harshvardhan Jha
2024-11-05 15:50 ` [PATCH 5.10.y 2/2] mm: avoid leaving partial pfn mappings around in error case Harshvardhan Jha
1 sibling, 0 replies; 3+ messages in thread
From: Harshvardhan Jha @ 2024-11-05 15:50 UTC (permalink / raw)
To: akpm; +Cc: harshvardhan.j.jha, linux-mm, stable
From: Christoph Hellwig <hch@lst.de>
commit 74ffa5a3e68504dd289135b1cf0422c19ffb3f2e
Patch series "add remap_pfn_range_notrack instead of reinventing it in i915", v2.
i915 has some reason to want to avoid the track_pfn_remap overhead in
remap_pfn_range. Add a function to the core VM to do just that rather
than reinventing the functionality poorly in the driver.
Note that the remap_io_sg path does get exercises when using Xorg on my
Thinkpad X1, so this should be considered lightly tested, I've not managed
to hit the remap_io_mapping path at all.
This patch (of 4):
Add a version of remap_pfn_range that does not call track_pfn_range. This
will be used to fix horrible abuses of VM internals in the i915 driver.
Link: https://lkml.kernel.org/r/20210326055505.1424432-1-hch@lst.de
Link: https://lkml.kernel.org/r/20210326055505.1424432-2-hch@lst.de
Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Cc: Jani Nikula <jani.nikula@linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen@linux.intel.com>
Cc: Rodrigo Vivi <rodrigo.vivi@intel.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
(cherry picked from commit 74ffa5a3e68504dd289135b1cf0422c19ffb3f2e)
Signed-off-by: Harshvardhan Jha <harshvardhan.j.jha@oracle.com>
---
include/linux/mm.h | 2 ++
mm/memory.c | 51 ++++++++++++++++++++++++++++------------------
2 files changed, 33 insertions(+), 20 deletions(-)
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b8b677f47a8da..94e630862d58c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2749,6 +2749,8 @@ unsigned long change_prot_numa(struct vm_area_struct *vma,
struct vm_area_struct *find_extend_vma(struct mm_struct *, unsigned long addr);
int remap_pfn_range(struct vm_area_struct *, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t);
+int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t prot);
int vm_insert_page(struct vm_area_struct *, unsigned long addr, struct page *);
int vm_insert_pages(struct vm_area_struct *vma, unsigned long addr,
struct page **pages, unsigned long *num);
diff --git a/mm/memory.c b/mm/memory.c
index 2183003687cec..40a6cc6df9003 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2290,26 +2290,17 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
return 0;
}
-/**
- * remap_pfn_range - remap kernel memory to userspace
- * @vma: user vma to map to
- * @addr: target page aligned user address to start at
- * @pfn: page frame number of kernel physical memory address
- * @size: size of mapping area
- * @prot: page protection flags for this mapping
- *
- * Note: this is only safe if the mm semaphore is held when called.
- *
- * Return: %0 on success, negative error code otherwise.
+/*
+ * Variant of remap_pfn_range that does not call track_pfn_remap. The caller
+ * must have pre-validated the caching bits of the pgprot_t.
*/
-int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
- unsigned long pfn, unsigned long size, pgprot_t prot)
+int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
{
pgd_t *pgd;
unsigned long next;
unsigned long end = addr + PAGE_ALIGN(size);
struct mm_struct *mm = vma->vm_mm;
- unsigned long remap_pfn = pfn;
int err;
if (WARN_ON_ONCE(!PAGE_ALIGNED(addr)))
@@ -2339,10 +2330,6 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
vma->vm_pgoff = pfn;
}
- err = track_pfn_remap(vma, &prot, remap_pfn, addr, PAGE_ALIGN(size));
- if (err)
- return -EINVAL;
-
vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
BUG_ON(addr >= end);
@@ -2354,12 +2341,36 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
err = remap_p4d_range(mm, pgd, addr, next,
pfn + (addr >> PAGE_SHIFT), prot);
if (err)
- break;
+ return err;
} while (pgd++, addr = next, addr != end);
+ return 0;
+}
+
+/**
+ * remap_pfn_range - remap kernel memory to userspace
+ * @vma: user vma to map to
+ * @addr: target page aligned user address to start at
+ * @pfn: page frame number of kernel physical memory address
+ * @size: size of mapping area
+ * @prot: page protection flags for this mapping
+ *
+ * Note: this is only safe if the mm semaphore is held when called.
+ *
+ * Return: %0 on success, negative error code otherwise.
+ */
+int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ int err;
+
+ err = track_pfn_remap(vma, &prot, pfn, addr, PAGE_ALIGN(size));
if (err)
- untrack_pfn(vma, remap_pfn, PAGE_ALIGN(size));
+ return -EINVAL;
+ err = remap_pfn_range_notrack(vma, addr, pfn, size, prot);
+ if (err)
+ untrack_pfn(vma, pfn, PAGE_ALIGN(size));
return err;
}
EXPORT_SYMBOL(remap_pfn_range);
--
2.46.0
^ permalink raw reply [flat|nested] 3+ messages in thread* [PATCH 5.10.y 2/2] mm: avoid leaving partial pfn mappings around in error case
2024-11-05 15:50 [PATCH 5.10.y 0/2] Backport fix of CVE-2024-47674 to 5.10 Harshvardhan Jha
2024-11-05 15:50 ` [PATCH 5.10.y 1/2] mm: add remap_pfn_range_notrack Harshvardhan Jha
@ 2024-11-05 15:50 ` Harshvardhan Jha
1 sibling, 0 replies; 3+ messages in thread
From: Harshvardhan Jha @ 2024-11-05 15:50 UTC (permalink / raw)
To: akpm; +Cc: harshvardhan.j.jha, linux-mm, stable
From: Linus Torvalds <torvalds@linux-foundation.org>
commit 79a61cc3fc0466ad2b7b89618a6157785f0293b3 upstream.
As Jann points out, PFN mappings are special, because unlike normal
memory mappings, there is no lifetime information associated with the
mapping - it is just a raw mapping of PFNs with no reference counting of
a 'struct page'.
That's all very much intentional, but it does mean that it's easy to
mess up the cleanup in case of errors. Yes, a failed mmap() will always
eventually clean up any partial mappings, but without any explicit
lifetime in the page table mapping itself, it's very easy to do the
error handling in the wrong order.
In particular, it's easy to mistakenly free the physical backing store
before the page tables are actually cleaned up and (temporarily) have
stale dangling PTE entries.
To make this situation less error-prone, just make sure that any partial
pfn mapping is torn down early, before any other error handling.
Reported-and-tested-by: Jann Horn <jannh@google.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Simona Vetter <simona.vetter@ffwll.ch>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
(cherry picked from commit 5b2c8b34f6d76bfbd1dd4936eb8a0fbfb9af3959)
Signed-off-by: Harshvardhan Jha <harshvardhan.j.jha@oracle.com>
---
mm/memory.c | 27 ++++++++++++++++++++++-----
1 file changed, 22 insertions(+), 5 deletions(-)
diff --git a/mm/memory.c b/mm/memory.c
index 40a6cc6df9003..29cce8aadb618 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2290,11 +2290,7 @@ static inline int remap_p4d_range(struct mm_struct *mm, pgd_t *pgd,
return 0;
}
-/*
- * Variant of remap_pfn_range that does not call track_pfn_remap. The caller
- * must have pre-validated the caching bits of the pgprot_t.
- */
-int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+static int remap_pfn_range_internal(struct vm_area_struct *vma, unsigned long addr,
unsigned long pfn, unsigned long size, pgprot_t prot)
{
pgd_t *pgd;
@@ -2347,6 +2343,27 @@ int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
return 0;
}
+/*
+ * Variant of remap_pfn_range that does not call track_pfn_remap. The caller
+ * must have pre-validated the caching bits of the pgprot_t.
+ */
+int remap_pfn_range_notrack(struct vm_area_struct *vma, unsigned long addr,
+ unsigned long pfn, unsigned long size, pgprot_t prot)
+{
+ int error = remap_pfn_range_internal(vma, addr, pfn, size, prot);
+
+ if (!error)
+ return 0;
+
+ /*
+ * A partial pfn range mapping is dangerous: it does not
+ * maintain page reference counts, and callers may free
+ * pages due to the error. So zap it early.
+ */
+ zap_page_range_single(vma, addr, size, NULL);
+ return error;
+}
+
/**
* remap_pfn_range - remap kernel memory to userspace
* @vma: user vma to map to
--
2.46.0
^ permalink raw reply [flat|nested] 3+ messages in thread