* [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0
@ 2024-07-24 18:19 hailong.liu
2024-07-24 18:28 ` Hailong.Liu
2024-07-24 22:23 ` Barry Song
0 siblings, 2 replies; 6+ messages in thread
From: hailong.liu @ 2024-07-24 18:19 UTC (permalink / raw)
To: Andrew Morton, Uladzislau Rezki, Christoph Hellwig,
Lorenzo Stoakes, Vlastimil Babka, Michal Hocko
Cc: Hailong.Liu, Barry Song, Tangquan . Zheng, linux-mm, linux-kernel
From: "Hailong.Liu" <hailong.liu@oppo.com>
The scenario where the issue occurs is as follows:
CONFIG: vmap_allow_huge = true && 2M is for PMD_SIZE
kvmalloc(2M)
__vmalloc_node_range(vm_flags=VM_ALLOW_HUGE_VMAP)
vm_area_alloc_pages(order=9) --->allocs order9 failed and fallback to order0
and phys_addr is aligned with PMD_SIZE
vmap_pages_range
vmap_pages_range_noflush
__vmap_pages_range_noflush(page_shift = 21) ----> incorrect vmap *huge* here
Fix it by introducing VM_AREA_ALLOC_PAGES_FALLBACK in page->private if fallback to 0.
Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations")
CC: Barry Song <21cnbao@gmail.com>
Reported-by: Tangquan.Zheng <zhengtangquan@oppo.com>
Signed-off-by: Hailong.Liu <hailong.liu@oppo.com>
---
mm/vmalloc.c | 14 ++++++++++++--
1 file changed, 12 insertions(+), 2 deletions(-)
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 03c78fae06f3..b35dfd3eeee3 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -75,6 +75,8 @@ early_param("nohugevmalloc", set_nohugevmalloc);
static const bool vmap_allow_huge = false;
#endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */
+#define VM_AREA_ALLOC_PAGES_FALLBACK 0x1
+
bool is_vmalloc_addr(const void *x)
{
unsigned long addr = (unsigned long)kasan_reset_tag(x);
@@ -604,8 +606,13 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end,
WARN_ON(page_shift < PAGE_SHIFT);
if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) ||
- page_shift == PAGE_SHIFT)
- return vmap_small_pages_range_noflush(addr, end, prot, pages);
+ page_shift == PAGE_SHIFT ||
+ page_private(pages[0]) == VM_AREA_ALLOC_PAGES_FALLBACK) {
+ int ret = vmap_small_pages_range_noflush(addr, end, prot, pages);
+
+ set_page_private(pages[0], 0);
+ return ret;
+ }
for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) {
int err;
@@ -3583,6 +3590,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
/* fall back to the zero order allocations */
alloc_gfp |= __GFP_NOFAIL;
+ fallback = true;
order = 0;
continue;
}
@@ -3608,6 +3616,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid,
cond_resched();
nr_allocated += 1U << order;
}
+ if (nr_allocated && fallback)
+ set_page_private(pages[0], VM_AREA_ALLOC_PAGES_FALLBACK);
return nr_allocated;
}
--
2.34.1
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 2024-07-24 18:19 [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 hailong.liu @ 2024-07-24 18:28 ` Hailong.Liu 2024-07-24 20:02 ` Matthew Wilcox 2024-07-24 22:23 ` Barry Song 1 sibling, 1 reply; 6+ messages in thread From: Hailong.Liu @ 2024-07-24 18:28 UTC (permalink / raw) To: Andrew Morton, Uladzislau Rezki, Christoph Hellwig, Lorenzo Stoakes, Vlastimil Babka, Michal Hocko Cc: Barry Song, Tangquan . Zheng, linux-mm, linux-kernel On Thu, 25. Jul 02:19, hailong.liu@oppo.com wrote: > From: "Hailong.Liu" <hailong.liu@oppo.com> > > The scenario where the issue occurs is as follows: > CONFIG: vmap_allow_huge = true && 2M is for PMD_SIZE > kvmalloc(2M) > __vmalloc_node_range(vm_flags=VM_ALLOW_HUGE_VMAP) > vm_area_alloc_pages(order=9) --->allocs order9 failed and fallback to order0 > and phys_addr is aligned with PMD_SIZE > vmap_pages_range > vmap_pages_range_noflush > __vmap_pages_range_noflush(page_shift = 21) ----> incorrect vmap *huge* here > > Fix it by introducing VM_AREA_ALLOC_PAGES_FALLBACK in page->private if fallback to 0. > Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations") > > CC: Barry Song <21cnbao@gmail.com> > Reported-by: Tangquan.Zheng <zhengtangquan@oppo.com> > Signed-off-by: Hailong.Liu <hailong.liu@oppo.com> > --- > mm/vmalloc.c | 14 ++++++++++++-- > 1 file changed, 12 insertions(+), 2 deletions(-) > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index 03c78fae06f3..b35dfd3eeee3 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -75,6 +75,8 @@ early_param("nohugevmalloc", set_nohugevmalloc); > static const bool vmap_allow_huge = false; > #endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */ > > +#define VM_AREA_ALLOC_PAGES_FALLBACK 0x1 > + > bool is_vmalloc_addr(const void *x) > { > unsigned long addr = (unsigned long)kasan_reset_tag(x); > @@ -604,8 +606,13 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end, > WARN_ON(page_shift < PAGE_SHIFT); > > if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > - page_shift == PAGE_SHIFT) > - return vmap_small_pages_range_noflush(addr, end, prot, pages); > + page_shift == PAGE_SHIFT || > + page_private(pages[0]) == VM_AREA_ALLOC_PAGES_FALLBACK) { > + int ret = vmap_small_pages_range_noflush(addr, end, prot, pages); > + > + set_page_private(pages[0], 0); > + return ret; > + } > > for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { > int err; > @@ -3583,6 +3590,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, > > /* fall back to the zero order allocations */ > alloc_gfp |= __GFP_NOFAIL; > + fallback = true; Sry for my mistake, I forget define fallback here. BTW, This is not the optimal solution. Does anyone have a better idea? Glad to hear:) > order = 0; > continue; > } > @@ -3608,6 +3616,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, > cond_resched(); > nr_allocated += 1U << order; > } > + if (nr_allocated && fallback) > + set_page_private(pages[0], VM_AREA_ALLOC_PAGES_FALLBACK); > > return nr_allocated; > } > -- > 2.34.1 > -- help you, help me, Hailong. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 2024-07-24 18:28 ` Hailong.Liu @ 2024-07-24 20:02 ` Matthew Wilcox 2024-07-24 22:11 ` Barry Song 2024-07-25 6:15 ` Hailong.Liu 0 siblings, 2 replies; 6+ messages in thread From: Matthew Wilcox @ 2024-07-24 20:02 UTC (permalink / raw) To: Hailong.Liu Cc: Andrew Morton, Uladzislau Rezki, Christoph Hellwig, Lorenzo Stoakes, Vlastimil Babka, Michal Hocko, Barry Song, Tangquan . Zheng, linux-mm, linux-kernel On Thu, Jul 25, 2024 at 02:28:27AM +0800, Hailong.Liu wrote: > > if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > > - page_shift == PAGE_SHIFT) > > - return vmap_small_pages_range_noflush(addr, end, prot, pages); > > + page_shift == PAGE_SHIFT || > > + page_private(pages[0]) == VM_AREA_ALLOC_PAGES_FALLBACK) { > > + int ret = vmap_small_pages_range_noflush(addr, end, prot, pages); > > + > > + set_page_private(pages[0], 0); > > + return ret; > > + } > > > > for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { > > int err; > > @@ -3583,6 +3590,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, > > > > /* fall back to the zero order allocations */ > > alloc_gfp |= __GFP_NOFAIL; > > + fallback = true; > Sry for my mistake, I forget define fallback here. > BTW, This is not the optimal solution. Does anyone have a better idea? Glad to > hear:) Yeah, I really don't like this approach. You could return a small struct indicating both nr_allocated and whether you had to fall back. Or you could pass a bool * parameter. They're both pretty nasty. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 2024-07-24 20:02 ` Matthew Wilcox @ 2024-07-24 22:11 ` Barry Song 2024-07-25 6:15 ` Hailong.Liu 1 sibling, 0 replies; 6+ messages in thread From: Barry Song @ 2024-07-24 22:11 UTC (permalink / raw) To: Matthew Wilcox Cc: Hailong.Liu, Andrew Morton, Uladzislau Rezki, Christoph Hellwig, Lorenzo Stoakes, Vlastimil Babka, Michal Hocko, Tangquan . Zheng, linux-mm, linux-kernel On Thu, Jul 25, 2024 at 8:02 AM Matthew Wilcox <willy@infradead.org> wrote: > > On Thu, Jul 25, 2024 at 02:28:27AM +0800, Hailong.Liu wrote: > > > if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > > > - page_shift == PAGE_SHIFT) > > > - return vmap_small_pages_range_noflush(addr, end, prot, pages); > > > + page_shift == PAGE_SHIFT || > > > + page_private(pages[0]) == VM_AREA_ALLOC_PAGES_FALLBACK) { > > > + int ret = vmap_small_pages_range_noflush(addr, end, prot, pages); > > > + > > > + set_page_private(pages[0], 0); > > > + return ret; > > > + } > > > > > > for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { > > > int err; > > > @@ -3583,6 +3590,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, > > > > > > /* fall back to the zero order allocations */ > > > alloc_gfp |= __GFP_NOFAIL; > > > + fallback = true; > > Sry for my mistake, I forget define fallback here. > > BTW, This is not the optimal solution. Does anyone have a better idea? Glad to > > hear:) > > Yeah, I really don't like this approach. You could return a small > struct indicating both nr_allocated and whether you had to fall back. > Or you could pass a bool * parameter. They're both pretty nasty. Yes, I feel returning a bool won't work very well. the result could be a mixture of PMD and PTE if the allocated pages are larger than a PMD. For example, if we allocate 8MB, it might result in the first 4MB being 2* PMD, and the remaining 4MB being PTE order-0 pages. I am also curious what will happen if we allocate 3MB(1PMD + some PTEs), is the below doing the correct mapping? do { ret = vmap_pages_range(addr, addr + size, prot, area->pages, page_shift); if (nofail && (ret < 0)) schedule_timeout_uninterruptible(1); } while (nofail && (ret < 0)); Is it possible we have only mapped the first 2MB if page_shift is PMD? Thanks Barry ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 2024-07-24 20:02 ` Matthew Wilcox 2024-07-24 22:11 ` Barry Song @ 2024-07-25 6:15 ` Hailong.Liu 1 sibling, 0 replies; 6+ messages in thread From: Hailong.Liu @ 2024-07-25 6:15 UTC (permalink / raw) To: Matthew Wilcox Cc: Andrew Morton, Uladzislau Rezki, Christoph Hellwig, Lorenzo Stoakes, Vlastimil Babka, Michal Hocko, Barry Song, Tangquan . Zheng, linux-mm, linux-kernel On Wed, 24. Jul 21:02, Matthew Wilcox wrote: > On Thu, Jul 25, 2024 at 02:28:27AM +0800, Hailong.Liu wrote: > > > if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > > > - page_shift == PAGE_SHIFT) > > > - return vmap_small_pages_range_noflush(addr, end, prot, pages); > > > + page_shift == PAGE_SHIFT || > > > + page_private(pages[0]) == VM_AREA_ALLOC_PAGES_FALLBACK) { > > > + int ret = vmap_small_pages_range_noflush(addr, end, prot, pages); > > > + > > > + set_page_private(pages[0], 0); > > > + return ret; > > > + } > > > > > > for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { > > > int err; > > > @@ -3583,6 +3590,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, > > > > > > /* fall back to the zero order allocations */ > > > alloc_gfp |= __GFP_NOFAIL; > > > + fallback = true; > > Sry for my mistake, I forget define fallback here. > > BTW, This is not the optimal solution. Does anyone have a better idea? Glad to > > hear:) > > Yeah, I really don't like this approach. You could return a small > struct indicating both nr_allocated and whether you had to fall back. > Or you could pass a bool * parameter. They're both pretty nasty. Agree. Thanks for pointing out. I send a rfc-v2 patch with a different solution. pls help review. https://lore.kernel.org/all/20240725035318.471-1-hailong.liu@oppo.com/T/#u -- help you, help me, Hailong. ^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 2024-07-24 18:19 [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 hailong.liu 2024-07-24 18:28 ` Hailong.Liu @ 2024-07-24 22:23 ` Barry Song 1 sibling, 0 replies; 6+ messages in thread From: Barry Song @ 2024-07-24 22:23 UTC (permalink / raw) To: hailong.liu Cc: Andrew Morton, Uladzislau Rezki, Christoph Hellwig, Lorenzo Stoakes, Vlastimil Babka, Michal Hocko, Tangquan . Zheng, linux-mm, linux-kernel On Thu, Jul 25, 2024 at 6:19 AM <hailong.liu@oppo.com> wrote: > > From: "Hailong.Liu" <hailong.liu@oppo.com> > > The scenario where the issue occurs is as follows: > CONFIG: vmap_allow_huge = true && 2M is for PMD_SIZE > kvmalloc(2M) > __vmalloc_node_range(vm_flags=VM_ALLOW_HUGE_VMAP) > vm_area_alloc_pages(order=9) --->allocs order9 failed and fallback to order0 > and phys_addr is aligned with PMD_SIZE > vmap_pages_range > vmap_pages_range_noflush > __vmap_pages_range_noflush(page_shift = 21) ----> incorrect vmap *huge* here > > Fix it by introducing VM_AREA_ALLOC_PAGES_FALLBACK in page->private if fallback to 0. > Fixes: e9c3cda4d86e ("mm, vmalloc: fix high order __GFP_NOFAIL allocations") > > CC: Barry Song <21cnbao@gmail.com> > Reported-by: Tangquan.Zheng <zhengtangquan@oppo.com> > Signed-off-by: Hailong.Liu <hailong.liu@oppo.com> > --- > mm/vmalloc.c | 14 ++++++++++++-- > 1 file changed, 12 insertions(+), 2 deletions(-) > > diff --git a/mm/vmalloc.c b/mm/vmalloc.c > index 03c78fae06f3..b35dfd3eeee3 100644 > --- a/mm/vmalloc.c > +++ b/mm/vmalloc.c > @@ -75,6 +75,8 @@ early_param("nohugevmalloc", set_nohugevmalloc); > static const bool vmap_allow_huge = false; > #endif /* CONFIG_HAVE_ARCH_HUGE_VMALLOC */ > > +#define VM_AREA_ALLOC_PAGES_FALLBACK 0x1 > + > bool is_vmalloc_addr(const void *x) > { > unsigned long addr = (unsigned long)kasan_reset_tag(x); > @@ -604,8 +606,13 @@ int __vmap_pages_range_noflush(unsigned long addr, unsigned long end, > WARN_ON(page_shift < PAGE_SHIFT); > > if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || > - page_shift == PAGE_SHIFT) > - return vmap_small_pages_range_noflush(addr, end, prot, pages); > + page_shift == PAGE_SHIFT || > + page_private(pages[0]) == VM_AREA_ALLOC_PAGES_FALLBACK) { > + int ret = vmap_small_pages_range_noflush(addr, end, prot, pages); > + > + set_page_private(pages[0], 0); > + return ret; > + } we could have more than one *serious* bug here? do we also need the below if ((end - start) % PMD_SIZE) != 0) ? no ? int __vmap_pages_range_noflush(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift) { unsigned int i, nr = (end - addr) >> PAGE_SHIFT; WARN_ON(page_shift < PAGE_SHIFT); if (!IS_ENABLED(CONFIG_HAVE_ARCH_HUGE_VMALLOC) || page_shift == PAGE_SHIFT) return vmap_small_pages_range_noflush(addr, end, prot, pages); for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { int err; err = vmap_range_noflush(addr, addr + (1UL << page_shift), page_to_phys(pages[i]), prot, page_shift); if (err) return err; addr += 1UL << page_shift; } + if (addr < end) + return vmap_small_pages_range_noflush(addr, end, prot, pages + i); return 0; } > > for (i = 0; i < nr; i += 1U << (page_shift - PAGE_SHIFT)) { > int err; > @@ -3583,6 +3590,7 @@ vm_area_alloc_pages(gfp_t gfp, int nid, > > /* fall back to the zero order allocations */ > alloc_gfp |= __GFP_NOFAIL; > + fallback = true; > order = 0; > continue; > } > @@ -3608,6 +3616,8 @@ vm_area_alloc_pages(gfp_t gfp, int nid, > cond_resched(); > nr_allocated += 1U << order; > } > + if (nr_allocated && fallback) > + set_page_private(pages[0], VM_AREA_ALLOC_PAGES_FALLBACK); > > return nr_allocated; > } > -- > 2.34.1 > ^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2024-07-25 6:15 UTC | newest] Thread overview: 6+ messages (download: mbox.gz / follow: Atom feed) -- links below jump to the message on this page -- 2024-07-24 18:19 [RFC PATCH v1] mm/vmalloc: fix incorrect __vmap_pages_range_noflush() if vm_area_alloc_pages() from high order fallback to order0 hailong.liu 2024-07-24 18:28 ` Hailong.Liu 2024-07-24 20:02 ` Matthew Wilcox 2024-07-24 22:11 ` Barry Song 2024-07-25 6:15 ` Hailong.Liu 2024-07-24 22:23 ` Barry Song
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox