* [PATCH 01/14] mm: move get_dev_pagemap out of line
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 18:34 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 02/14] mm: optimize dev_pagemap reference counting around get_dev_pagemap Christoph Hellwig
` (13 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
This is a pretty big function, which should be out of line in general,
and a no-op stub if CONFIG_ZONE_DEVICD? is not set.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/memremap.h | 42 +++++-------------------------------------
kernel/memremap.c | 36 ++++++++++++++++++++++++++++++++++--
2 files changed, 39 insertions(+), 39 deletions(-)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 10d23c367048..f24e0c71d6a6 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -136,8 +136,8 @@ struct dev_pagemap {
#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct resource *res,
struct percpu_ref *ref, struct vmem_altmap *altmap);
-struct dev_pagemap *find_dev_pagemap(resource_size_t phys);
-
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ struct dev_pagemap *pgmap);
static inline bool is_zone_device_page(const struct page *page);
#else
static inline void *devm_memremap_pages(struct device *dev,
@@ -153,11 +153,12 @@ static inline void *devm_memremap_pages(struct device *dev,
return ERR_PTR(-ENXIO);
}
-static inline struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ struct dev_pagemap *pgmap)
{
return NULL;
}
-#endif
+#endif /* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
static inline bool is_device_private_page(const struct page *page)
@@ -173,39 +174,6 @@ static inline bool is_device_public_page(const struct page *page)
}
#endif /* CONFIG_DEVICE_PRIVATE || CONFIG_DEVICE_PUBLIC */
-/**
- * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
- * @pfn: page frame number to lookup page_map
- * @pgmap: optional known pgmap that already has a reference
- *
- * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
- * same mapping.
- */
-static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
- struct dev_pagemap *pgmap)
-{
- const struct resource *res = pgmap ? pgmap->res : NULL;
- resource_size_t phys = PFN_PHYS(pfn);
-
- /*
- * In the cached case we're already holding a live reference so
- * we can simply do a blind increment
- */
- if (res && phys >= res->start && phys <= res->end) {
- percpu_ref_get(pgmap->ref);
- return pgmap;
- }
-
- /* fall back to slow path lookup */
- rcu_read_lock();
- pgmap = find_dev_pagemap(phys);
- if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
- pgmap = NULL;
- rcu_read_unlock();
-
- return pgmap;
-}
-
static inline void put_dev_pagemap(struct dev_pagemap *pgmap)
{
if (pgmap)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 403ab9cdb949..f0b54eca85b0 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -314,7 +314,7 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
}
/* assumes rcu_read_lock() held at entry */
-struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
+static struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
{
struct page_map *page_map;
@@ -500,8 +500,40 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
return pgmap ? pgmap->altmap : NULL;
}
-#endif /* CONFIG_ZONE_DEVICE */
+/**
+ * get_dev_pagemap() - take a new live reference on the dev_pagemap for @pfn
+ * @pfn: page frame number to lookup page_map
+ * @pgmap: optional known pgmap that already has a reference
+ *
+ * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
+ * same mapping.
+ */
+struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
+ struct dev_pagemap *pgmap)
+{
+ const struct resource *res = pgmap ? pgmap->res : NULL;
+ resource_size_t phys = PFN_PHYS(pfn);
+
+ /*
+ * In the cached case we're already holding a live reference so
+ * we can simply do a blind increment
+ */
+ if (res && phys >= res->start && phys <= res->end) {
+ percpu_ref_get(pgmap->ref);
+ return pgmap;
+ }
+
+ /* fall back to slow path lookup */
+ rcu_read_lock();
+ pgmap = find_dev_pagemap(phys);
+ if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
+ pgmap = NULL;
+ rcu_read_unlock();
+
+ return pgmap;
+}
+#endif /* CONFIG_ZONE_DEVICE */
#if IS_ENABLED(CONFIG_DEVICE_PRIVATE) || IS_ENABLED(CONFIG_DEVICE_PUBLIC)
void put_zone_device_private_or_public_page(struct page *page)
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 01/14] mm: move get_dev_pagemap out of line
2017-12-07 15:08 ` [PATCH 01/14] mm: move get_dev_pagemap out of line Christoph Hellwig
@ 2017-12-07 18:34 ` Logan Gunthorpe
0 siblings, 0 replies; 29+ messages in thread
From: Logan Gunthorpe @ 2017-12-07 18:34 UTC (permalink / raw)
To: Christoph Hellwig, Dan Williams
Cc: Jérôme Glisse, linux-nvdimm, linux-mm
On 07/12/17 08:08 AM, Christoph Hellwig wrote:
> This is a pretty big function, which should be out of line in general,
> and a no-op stub if CONFIG_ZONE_DEVICD? is not set.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 02/14] mm: optimize dev_pagemap reference counting around get_dev_pagemap
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
2017-12-07 15:08 ` [PATCH 01/14] mm: move get_dev_pagemap out of line Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 18:46 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 03/14] mm: better abstract out dev_pagemap freeing Christoph Hellwig
` (12 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
Change the calling convention so that get_dev_pagemap always consumes the
previous reference instead of doing this using an explicit earlier call to
put_dev_pagemap in the callers.
The callers will still need to put the final reference after finishing the
loop over the pages.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
kernel/memremap.c | 17 +++++++++--------
mm/gup.c | 7 +++++--
2 files changed, 14 insertions(+), 10 deletions(-)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index f0b54eca85b0..502fa107a585 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -506,22 +506,23 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
* @pfn: page frame number to lookup page_map
* @pgmap: optional known pgmap that already has a reference
*
- * @pgmap allows the overhead of a lookup to be bypassed when @pfn lands in the
- * same mapping.
+ * If @pgmap is non-NULL and covers @pfn it will be returned as-is. If @pgmap
+ * is non-NULL but does not cover @pfn the reference to it while be released.
*/
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
- const struct resource *res = pgmap ? pgmap->res : NULL;
resource_size_t phys = PFN_PHYS(pfn);
/*
- * In the cached case we're already holding a live reference so
- * we can simply do a blind increment
+ * In the cached case we're already holding a live reference.
*/
- if (res && phys >= res->start && phys <= res->end) {
- percpu_ref_get(pgmap->ref);
- return pgmap;
+ if (pgmap) {
+ const struct resource *res = pgmap ? pgmap->res : NULL;
+
+ if (res && phys >= res->start && phys <= res->end)
+ return pgmap;
+ put_dev_pagemap(pgmap);
}
/* fall back to slow path lookup */
diff --git a/mm/gup.c b/mm/gup.c
index d3fb60e5bfac..9d142eb9e2e9 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1410,7 +1410,6 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
VM_BUG_ON_PAGE(compound_head(page) != head, page);
- put_dev_pagemap(pgmap);
SetPageReferenced(page);
pages[*nr] = page;
(*nr)++;
@@ -1420,6 +1419,8 @@ static int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end,
ret = 1;
pte_unmap:
+ if (pgmap)
+ put_dev_pagemap(pgmap);
pte_unmap(ptem);
return ret;
}
@@ -1459,10 +1460,12 @@ static int __gup_device_huge(unsigned long pfn, unsigned long addr,
SetPageReferenced(page);
pages[*nr] = page;
get_page(page);
- put_dev_pagemap(pgmap);
(*nr)++;
pfn++;
} while (addr += PAGE_SIZE, addr != end);
+
+ if (pgmap)
+ put_dev_pagemap(pgmap);
return 1;
}
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 02/14] mm: optimize dev_pagemap reference counting around get_dev_pagemap
2017-12-07 15:08 ` [PATCH 02/14] mm: optimize dev_pagemap reference counting around get_dev_pagemap Christoph Hellwig
@ 2017-12-07 18:46 ` Logan Gunthorpe
0 siblings, 0 replies; 29+ messages in thread
From: Logan Gunthorpe @ 2017-12-07 18:46 UTC (permalink / raw)
To: Christoph Hellwig, Dan Williams
Cc: Jérôme Glisse, linux-nvdimm, linux-mm
On 07/12/17 08:08 AM, Christoph Hellwig wrote:
> Change the calling convention so that get_dev_pagemap always consumes the
> previous reference instead of doing this using an explicit earlier call to
> put_dev_pagemap in the callers.
>
> The callers will still need to put the final reference after finishing the
> loop over the pages.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 03/14] mm: better abstract out dev_pagemap freeing
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
2017-12-07 15:08 ` [PATCH 01/14] mm: move get_dev_pagemap out of line Christoph Hellwig
2017-12-07 15:08 ` [PATCH 02/14] mm: optimize dev_pagemap reference counting around get_dev_pagemap Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 18:49 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 04/14] mm: better abstract out dev_pagemap alloc Christoph Hellwig
` (11 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
Add a new helper that both looks up the pagemap and updates the alloc
counter.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/mm/init_64.c | 6 ++----
arch/x86/mm/init_64.c | 5 +----
include/linux/memremap.h | 7 ++++++-
kernel/memremap.c | 9 +++++++--
4 files changed, 16 insertions(+), 11 deletions(-)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index a07722531b32..d6a040198edf 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -268,7 +268,6 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
for (; start < end; start += page_size) {
unsigned long nr_pages, addr;
- struct vmem_altmap *altmap;
struct page *section_base;
struct page *page;
@@ -288,9 +287,8 @@ void __ref vmemmap_free(unsigned long start, unsigned long end)
section_base = pfn_to_page(vmemmap_section_start(start));
nr_pages = 1 << page_order;
- altmap = to_vmem_altmap((unsigned long) section_base);
- if (altmap) {
- vmem_altmap_free(altmap, nr_pages);
+ if (dev_pagemap_free_pages(section_base, nr_pages)) {
+ ;
} else if (PageReserved(page)) {
/* allocated from bootmem */
if (page_size < PAGE_SIZE) {
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4a837289f2ad..f5e51b941d19 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -804,12 +804,9 @@ static void __meminit free_pagetable(struct page *page, int order)
{
unsigned long magic;
unsigned int nr_pages = 1 << order;
- struct vmem_altmap *altmap = to_vmem_altmap((unsigned long) page);
- if (altmap) {
- vmem_altmap_free(altmap, nr_pages);
+ if (dev_pagemap_free_pages(page, nr_pages))
return;
- }
/* bootmem page has reserved flag */
if (PageReserved(page)) {
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index f24e0c71d6a6..8f4d96f0e265 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -27,7 +27,6 @@ struct vmem_altmap {
};
unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
#ifdef CONFIG_ZONE_DEVICE
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
@@ -139,6 +138,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
static inline bool is_zone_device_page(const struct page *page);
+bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages);
#else
static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref,
@@ -158,6 +158,11 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
{
return NULL;
}
+
+static inline bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
+{
+ return false;
+}
#endif /* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 502fa107a585..1b7c5bc93162 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -470,9 +470,14 @@ unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
return altmap->reserve + altmap->free;
}
-void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns)
+bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
{
- altmap->alloc -= nr_pfns;
+ struct vmem_altmap *pgmap = to_vmem_altmap((uintptr_t)page);
+
+ if (!pgmap)
+ return false;
+ pgmap->alloc -= nr_pages;
+ return true;
}
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 04/14] mm: better abstract out dev_pagemap alloc
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (2 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 03/14] mm: better abstract out dev_pagemap freeing Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 18:52 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 05/14] mm: better abstract out dev_pagemap offset calculation Christoph Hellwig
` (10 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
Add a new helper that both looks up the pagemap and initializes the
alloc counter.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/memremap.h | 7 +++++++
kernel/memremap.c | 18 ++++++++++++++++++
mm/memory_hotplug.c | 23 +++++------------------
3 files changed, 30 insertions(+), 18 deletions(-)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 8f4d96f0e265..054397a9414f 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -138,6 +138,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
static inline bool is_zone_device_page(const struct page *page);
+int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages);
bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages);
#else
static inline void *devm_memremap_pages(struct device *dev,
@@ -159,6 +160,12 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
return NULL;
}
+static inline int dev_pagemap_add_pages(unsigned long phys_start_pfn,
+ unsigned nr_pages)
+{
+ return 0;
+}
+
static inline bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
{
return false;
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 1b7c5bc93162..c86bcd63e2cd 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -470,6 +470,24 @@ unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
return altmap->reserve + altmap->free;
}
+int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages)
+{
+ struct vmem_altmap *pgmap;
+
+ pgmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
+ if (!pgmap)
+ return 0;
+
+ if (pgmap->base_pfn != phys_start_pfn ||
+ vmem_altmap_offset(pgmap) > nr_pages) {
+ pr_warn_once("memory add fail, invalid map\n");
+ return -EINVAL;
+ }
+
+ pgmap->alloc = 0;
+ return 0;
+}
+
bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
{
struct vmem_altmap *pgmap = to_vmem_altmap((uintptr_t)page);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index c52aa05b106c..3e7c728f97e3 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -297,25 +297,14 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
unsigned long i;
int err = 0;
int start_sec, end_sec;
- struct vmem_altmap *altmap;
/* during initialize mem_map, align hot-added range to section */
start_sec = pfn_to_section_nr(phys_start_pfn);
end_sec = pfn_to_section_nr(phys_start_pfn + nr_pages - 1);
- altmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
- if (altmap) {
- /*
- * Validate altmap is within bounds of the total request
- */
- if (altmap->base_pfn != phys_start_pfn
- || vmem_altmap_offset(altmap) > nr_pages) {
- pr_warn_once("memory add fail, invalid altmap\n");
- err = -EINVAL;
- goto out;
- }
- altmap->alloc = 0;
- }
+ err = dev_pagemap_add_pages(phys_start_pfn, nr_pages);
+ if (err)
+ return err;
for (i = start_sec; i <= end_sec; i++) {
err = __add_section(nid, section_nr_to_pfn(i), want_memblock);
@@ -326,13 +315,11 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
* Warning will be printed if there is collision.
*/
if (err && (err != -EEXIST))
- break;
- err = 0;
+ return err;
cond_resched();
}
vmemmap_populate_print_last();
-out:
- return err;
+ return 0;
}
EXPORT_SYMBOL_GPL(__add_pages);
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 05/14] mm: better abstract out dev_pagemap offset calculation
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (3 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 04/14] mm: better abstract out dev_pagemap alloc Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 18:54 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 06/14] mm: better abstract out dev_pagemap start_pfn Christoph Hellwig
` (9 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
Add a helper that looks up the altmap (or later dev_pagemap) and returns
the offset.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/mm/mem.c | 9 +++------
arch/x86/mm/init_64.c | 6 +-----
include/linux/memremap.h | 8 ++++++--
kernel/memremap.c | 21 +++++++++++++--------
mm/memory_hotplug.c | 7 +------
5 files changed, 24 insertions(+), 27 deletions(-)
diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
index 4362b86ef84c..c7cf396fdabc 100644
--- a/arch/powerpc/mm/mem.c
+++ b/arch/powerpc/mm/mem.c
@@ -152,18 +152,15 @@ int arch_remove_memory(u64 start, u64 size)
{
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
- struct vmem_altmap *altmap;
struct page *page;
int ret;
/*
- * If we have an altmap then we need to skip over any reserved PFNs
- * when querying the zone.
+ * If we have a device page map then we need to skip over any reserved
+ * PFNs when querying the zone.
*/
page = pfn_to_page(start_pfn);
- altmap = to_vmem_altmap((unsigned long) page);
- if (altmap)
- page += vmem_altmap_offset(altmap);
+ page += dev_pagemap_offset(page);
ret = __remove_pages(page_zone(page), start_pfn, nr_pages);
if (ret)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index f5e51b941d19..4f79ee1ef501 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1134,14 +1134,10 @@ int __ref arch_remove_memory(u64 start, u64 size)
unsigned long start_pfn = start >> PAGE_SHIFT;
unsigned long nr_pages = size >> PAGE_SHIFT;
struct page *page = pfn_to_page(start_pfn);
- struct vmem_altmap *altmap;
struct zone *zone;
int ret;
- /* With altmap the first mapped page is offset from @start */
- altmap = to_vmem_altmap((unsigned long) page);
- if (altmap)
- page += vmem_altmap_offset(altmap);
+ page += dev_pagemap_offset(page);
zone = page_zone(page);
ret = __remove_pages(zone, start_pfn, nr_pages);
WARN_ON_ONCE(ret);
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 054397a9414f..d221f4c0ccac 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -26,8 +26,6 @@ struct vmem_altmap {
unsigned long alloc;
};
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
-
#ifdef CONFIG_ZONE_DEVICE
struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
#else
@@ -140,6 +138,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
static inline bool is_zone_device_page(const struct page *page);
int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages);
bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages);
+unsigned long dev_pagemap_offset(struct page *page);
#else
static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref,
@@ -170,6 +169,11 @@ static inline bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
{
return false;
}
+
+static inline unsigned long dev_pagemap_offset(struct page *page)
+{
+ return 0;
+}
#endif /* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index c86bcd63e2cd..91a5fc1146b5 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -248,6 +248,17 @@ int device_private_entry_fault(struct vm_area_struct *vma,
EXPORT_SYMBOL(device_private_entry_fault);
#endif /* CONFIG_DEVICE_PRIVATE */
+static unsigned long __dev_pagemap_offset(struct vmem_altmap *pgmap)
+{
+ /* number of pfns from base where pfn_to_page() is valid */
+ return pgmap ? (pgmap->reserve + pgmap->free) : 0;
+}
+
+unsigned long dev_pagemap_offset(struct page *page)
+{
+ return __dev_pagemap_offset(to_vmem_altmap((uintptr_t)page));
+}
+
static void pgmap_radix_release(struct resource *res)
{
unsigned long pgoff, order;
@@ -269,7 +280,7 @@ static unsigned long pfn_first(struct page_map *page_map)
pfn = res->start >> PAGE_SHIFT;
if (altmap)
- pfn += vmem_altmap_offset(altmap);
+ pfn += __dev_pagemap_offset(altmap);
return pfn;
}
@@ -464,12 +475,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
}
EXPORT_SYMBOL(devm_memremap_pages);
-unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
-{
- /* number of pfns from base where pfn_to_page() is valid */
- return altmap->reserve + altmap->free;
-}
-
int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages)
{
struct vmem_altmap *pgmap;
@@ -479,7 +484,7 @@ int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages)
return 0;
if (pgmap->base_pfn != phys_start_pfn ||
- vmem_altmap_offset(pgmap) > nr_pages) {
+ __dev_pagemap_offset(pgmap) > nr_pages) {
pr_warn_once("memory add fail, invalid map\n");
return -EINVAL;
}
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 3e7c728f97e3..a7a719f057dc 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -565,12 +565,7 @@ int __remove_pages(struct zone *zone, unsigned long phys_start_pfn,
/* In the ZONE_DEVICE case device driver owns the memory region */
if (is_dev_zone(zone)) {
- struct page *page = pfn_to_page(phys_start_pfn);
- struct vmem_altmap *altmap;
-
- altmap = to_vmem_altmap((unsigned long) page);
- if (altmap)
- map_offset = vmem_altmap_offset(altmap);
+ map_offset = dev_pagemap_offset(pfn_to_page(phys_start_pfn));
} else {
resource_size_t start, size;
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 06/14] mm: better abstract out dev_pagemap start_pfn
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (4 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 05/14] mm: better abstract out dev_pagemap offset calculation Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 18:57 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 07/14] mm: split dev_pagemap memory map allocation from normal case Christoph Hellwig
` (8 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/memremap.h | 6 ++++++
kernel/memremap.c | 9 +++++++++
mm/page_alloc.c | 4 +---
3 files changed, 16 insertions(+), 3 deletions(-)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index d221f4c0ccac..fe60b4895f56 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -139,6 +139,7 @@ static inline bool is_zone_device_page(const struct page *page);
int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages);
bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages);
unsigned long dev_pagemap_offset(struct page *page);
+unsigned long dev_pagemap_start_pfn(unsigned long start_pfn);
#else
static inline void *devm_memremap_pages(struct device *dev,
struct resource *res, struct percpu_ref *ref,
@@ -174,6 +175,11 @@ static inline unsigned long dev_pagemap_offset(struct page *page)
{
return 0;
}
+
+static inline unsigned long dev_pagemap_start_pfn(unsigned long start_pfn)
+{
+ return 0;
+}
#endif /* CONFIG_ZONE_DEVICE */
#if defined(CONFIG_DEVICE_PRIVATE) || defined(CONFIG_DEVICE_PUBLIC)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 91a5fc1146b5..01025c5f3037 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -493,6 +493,15 @@ int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages)
return 0;
}
+unsigned long dev_pagemap_start_pfn(unsigned long start_pfn)
+{
+ struct vmem_altmap *pgmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
+
+ if (pgmap && start_pfn == pgmap->base_pfn)
+ return pgmap->reserve;
+ return 0;
+}
+
bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
{
struct vmem_altmap *pgmap = to_vmem_altmap((uintptr_t)page);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 73f5d4556b3d..cf6a702222c3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5305,7 +5305,6 @@ void __ref build_all_zonelists(pg_data_t *pgdat)
void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
unsigned long start_pfn, enum memmap_context context)
{
- struct vmem_altmap *altmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
unsigned long end_pfn = start_pfn + size;
pg_data_t *pgdat = NODE_DATA(nid);
unsigned long pfn;
@@ -5321,8 +5320,7 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
* Honor reservation requested by the driver for this ZONE_DEVICE
* memory
*/
- if (altmap && start_pfn == altmap->base_pfn)
- start_pfn += altmap->reserve;
+ start_pfn += dev_pagemap_start_pfn(start_pfn);
for (pfn = start_pfn; pfn < end_pfn; pfn++) {
/*
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 06/14] mm: better abstract out dev_pagemap start_pfn
2017-12-07 15:08 ` [PATCH 06/14] mm: better abstract out dev_pagemap start_pfn Christoph Hellwig
@ 2017-12-07 18:57 ` Logan Gunthorpe
0 siblings, 0 replies; 29+ messages in thread
From: Logan Gunthorpe @ 2017-12-07 18:57 UTC (permalink / raw)
To: Christoph Hellwig, Dan Williams
Cc: Jérôme Glisse, linux-nvdimm, linux-mm
Someone usually would yell at me if I sent a patch with no commit message ;)
But the changes look good and it's pretty clear even without the message.
On 07/12/17 08:08 AM, Christoph Hellwig wrote:
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 07/14] mm: split dev_pagemap memory map allocation from normal case
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (5 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 06/14] mm: better abstract out dev_pagemap start_pfn Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 19:08 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 08/14] mm: merge vmem_altmap_alloc into dev_pagemap_alloc_block_buf Christoph Hellwig
` (7 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
No functional changes, just untangling the call chain.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/mm/init_64.c | 6 ++++--
arch/x86/mm/init_64.c | 5 ++++-
include/linux/mm.h | 8 ++------
mm/sparse-vmemmap.c | 15 +++------------
4 files changed, 13 insertions(+), 21 deletions(-)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index d6a040198edf..3a39a644e96c 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -202,8 +202,10 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
/* altmap lookups only work at section boundaries */
altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
-
- p = __vmemmap_alloc_block_buf(page_size, node, altmap);
+ if (altmap)
+ p = dev_pagemap_alloc_block_buf(page_size, altmap);
+ else
+ p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
return -ENOMEM;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 4f79ee1ef501..9e1b489aa826 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1371,7 +1371,10 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (pmd_none(*pmd)) {
void *p;
- p = __vmemmap_alloc_block_buf(PMD_SIZE, node, altmap);
+ if (altmap)
+ p = dev_pagemap_alloc_block_buf(PMD_SIZE, altmap);
+ else
+ p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (p) {
pte_t entry;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index ea818ff739cd..856869e2c119 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2546,13 +2546,9 @@ pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
-void *__vmemmap_alloc_block_buf(unsigned long size, int node,
+void *vmemmap_alloc_block_buf(unsigned long size, int node);
+void *dev_pagemap_alloc_block_buf(unsigned long size,
struct vmem_altmap *altmap);
-static inline void *vmemmap_alloc_block_buf(unsigned long size, int node)
-{
- return __vmemmap_alloc_block_buf(size, node, NULL);
-}
-
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 17acf01791fa..268b6c7dfdf4 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -74,7 +74,7 @@ void * __meminit vmemmap_alloc_block(unsigned long size, int node)
}
/* need to make sure size is all the same during early stage */
-static void * __meminit alloc_block_buf(unsigned long size, int node)
+void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
{
void *ptr;
@@ -129,7 +129,7 @@ static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap,
return pfn + nr_align;
}
-static void * __meminit altmap_alloc_block_buf(unsigned long size,
+void * __meminit dev_pagemap_alloc_block_buf(unsigned long size,
struct vmem_altmap *altmap)
{
unsigned long pfn, nr_pfns;
@@ -153,15 +153,6 @@ static void * __meminit altmap_alloc_block_buf(unsigned long size,
return ptr;
}
-/* need to make sure size is all the same during early stage */
-void * __meminit __vmemmap_alloc_block_buf(unsigned long size, int node,
- struct vmem_altmap *altmap)
-{
- if (altmap)
- return altmap_alloc_block_buf(size, altmap);
- return alloc_block_buf(size, node);
-}
-
void __meminit vmemmap_verify(pte_t *pte, int node,
unsigned long start, unsigned long end)
{
@@ -178,7 +169,7 @@ pte_t * __meminit vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node)
pte_t *pte = pte_offset_kernel(pmd, addr);
if (pte_none(*pte)) {
pte_t entry;
- void *p = alloc_block_buf(PAGE_SIZE, node);
+ void *p = vmemmap_alloc_block_buf(PAGE_SIZE, node);
if (!p)
return NULL;
entry = pfn_pte(__pa(p) >> PAGE_SHIFT, PAGE_KERNEL);
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 08/14] mm: merge vmem_altmap_alloc into dev_pagemap_alloc_block_buf
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (6 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 07/14] mm: split dev_pagemap memory map allocation from normal case Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 19:14 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 09/14] memremap: drop private struct page_map Christoph Hellwig
` (6 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
There is no clear separation between the two, so merge them. Also move
the device page map argument first for the more natural calling
convention.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/mm/init_64.c | 2 +-
arch/x86/mm/init_64.c | 2 +-
include/linux/mm.h | 4 ++--
mm/sparse-vmemmap.c | 51 ++++++++++++++++++-----------------------------
4 files changed, 23 insertions(+), 36 deletions(-)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 3a39a644e96c..ec706857bdd6 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -203,7 +203,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
/* altmap lookups only work at section boundaries */
altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
if (altmap)
- p = dev_pagemap_alloc_block_buf(page_size, altmap);
+ p = dev_pagemap_alloc_block_buf(altmap, page_size);
else
p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 9e1b489aa826..131749080874 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1372,7 +1372,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
void *p;
if (altmap)
- p = dev_pagemap_alloc_block_buf(PMD_SIZE, altmap);
+ p = dev_pagemap_alloc_block_buf(altmap, PMD_SIZE);
else
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (p) {
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 856869e2c119..cd3d1c00f6a3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2547,8 +2547,8 @@ pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node);
-void *dev_pagemap_alloc_block_buf(unsigned long size,
- struct vmem_altmap *altmap);
+void *dev_pagemap_alloc_block_buf(struct vmem_altmap *pgmap,
+ unsigned long size);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
int node);
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index 268b6c7dfdf4..fef41a6a9f64 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -107,33 +107,16 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
}
/**
- * vmem_altmap_alloc - allocate pages from the vmem_altmap reservation
- * @altmap - reserved page pool for the allocation
- * @nr_pfns - size (in pages) of the allocation
+ * dev_pagemap_alloc_block_buf - allocate pages from the device page map
+ * @pgmap: device page map
+ * @size: size (in bytes) of the allocation
*
- * Allocations are aligned to the size of the request
+ * Allocations are aligned to the size of the request.
*/
-static unsigned long __meminit vmem_altmap_alloc(struct vmem_altmap *altmap,
- unsigned long nr_pfns)
+void * __meminit dev_pagemap_alloc_block_buf(struct vmem_altmap *pgmap,
+ unsigned long size)
{
- unsigned long pfn = vmem_altmap_next_pfn(altmap);
- unsigned long nr_align;
-
- nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
- nr_align = ALIGN(pfn, nr_align) - pfn;
-
- if (nr_pfns + nr_align > vmem_altmap_nr_free(altmap))
- return ULONG_MAX;
- altmap->alloc += nr_pfns;
- altmap->align += nr_align;
- return pfn + nr_align;
-}
-
-void * __meminit dev_pagemap_alloc_block_buf(unsigned long size,
- struct vmem_altmap *altmap)
-{
- unsigned long pfn, nr_pfns;
- void *ptr;
+ unsigned long pfn, nr_pfns, nr_align;
if (size & ~PAGE_MASK) {
pr_warn_once("%s: allocations must be multiple of PAGE_SIZE (%ld)\n",
@@ -141,16 +124,20 @@ void * __meminit dev_pagemap_alloc_block_buf(unsigned long size,
return NULL;
}
+ pfn = vmem_altmap_next_pfn(pgmap);
nr_pfns = size >> PAGE_SHIFT;
- pfn = vmem_altmap_alloc(altmap, nr_pfns);
- if (pfn < ULONG_MAX)
- ptr = __va(__pfn_to_phys(pfn));
- else
- ptr = NULL;
- pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
- __func__, pfn, altmap->alloc, altmap->align, nr_pfns);
+ nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
+ nr_align = ALIGN(pfn, nr_align) - pfn;
+ if (nr_pfns + nr_align > vmem_altmap_nr_free(pgmap))
+ return NULL;
- return ptr;
+ pgmap->alloc += nr_pfns;
+ pgmap->align += nr_align;
+ pfn += nr_align;
+
+ pr_debug("%s: pfn: %#lx alloc: %ld align: %ld nr: %#lx\n",
+ __func__, pfn, pgmap->alloc, pgmap->align, nr_pfns);
+ return __va(__pfn_to_phys(pfn));
}
void __meminit vmemmap_verify(pte_t *pte, int node,
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 09/14] memremap: drop private struct page_map
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (7 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 08/14] mm: merge vmem_altmap_alloc into dev_pagemap_alloc_block_buf Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 15:08 ` [PATCH 10/14] memremap: change devm_memremap_pages interface to use struct dev_pagemap Christoph Hellwig
` (5 subsequent siblings)
14 siblings, 0 replies; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
From: Logan Gunthorpe <logang@deltatee.com>
'struct page_map' is a private structure of 'struct dev_pagemap' but the
latter replicates all the same fields as the former so there isn't much
value in it. Thus drop it in favour of a completely public struct.
This is a clean up in preperation for a more generally useful
'devm_memeremap_pages' interface.
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
include/linux/memremap.h | 5 ++--
kernel/memremap.c | 67 +++++++++++++++++++-----------------------------
mm/hmm.c | 2 +-
3 files changed, 30 insertions(+), 44 deletions(-)
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index fe60b4895f56..316effd61f16 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -122,8 +122,9 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
struct dev_pagemap {
dev_page_fault_t page_fault;
dev_page_free_t page_free;
- struct vmem_altmap *altmap;
- const struct resource *res;
+ struct vmem_altmap altmap;
+ bool altmap_valid;
+ struct resource res;
struct percpu_ref *ref;
struct device *dev;
void *data;
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 01025c5f3037..31bcfeea4a73 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -188,13 +188,6 @@ static RADIX_TREE(pgmap_radix, GFP_KERNEL);
#define SECTION_MASK ~((1UL << PA_SECTION_SHIFT) - 1)
#define SECTION_SIZE (1UL << PA_SECTION_SHIFT)
-struct page_map {
- struct resource res;
- struct percpu_ref *ref;
- struct dev_pagemap pgmap;
- struct vmem_altmap altmap;
-};
-
static unsigned long order_at(struct resource *res, unsigned long pgoff)
{
unsigned long phys_pgoff = PHYS_PFN(res->start) + pgoff;
@@ -271,22 +264,21 @@ static void pgmap_radix_release(struct resource *res)
synchronize_rcu();
}
-static unsigned long pfn_first(struct page_map *page_map)
+static unsigned long pfn_first(struct dev_pagemap *pgmap)
{
- struct dev_pagemap *pgmap = &page_map->pgmap;
- const struct resource *res = &page_map->res;
- struct vmem_altmap *altmap = pgmap->altmap;
+ const struct resource *res = &pgmap->res;
+ struct vmem_altmap *altmap = &pgmap->altmap;
unsigned long pfn;
pfn = res->start >> PAGE_SHIFT;
- if (altmap)
+ if (pgmap->altmap_valid)
pfn += __dev_pagemap_offset(altmap);
return pfn;
}
-static unsigned long pfn_end(struct page_map *page_map)
+static unsigned long pfn_end(struct dev_pagemap *pgmap)
{
- const struct resource *res = &page_map->res;
+ const struct resource *res = &pgmap->res;
return (res->start + resource_size(res)) >> PAGE_SHIFT;
}
@@ -296,13 +288,12 @@ static unsigned long pfn_end(struct page_map *page_map)
static void devm_memremap_pages_release(struct device *dev, void *data)
{
- struct page_map *page_map = data;
- struct resource *res = &page_map->res;
+ struct dev_pagemap *pgmap = data;
+ struct resource *res = &pgmap->res;
resource_size_t align_start, align_size;
- struct dev_pagemap *pgmap = &page_map->pgmap;
unsigned long pfn;
- for_each_device_pfn(pfn, page_map)
+ for_each_device_pfn(pfn, pgmap)
put_page(pfn_to_page(pfn));
if (percpu_ref_tryget_live(pgmap->ref)) {
@@ -320,19 +311,16 @@ static void devm_memremap_pages_release(struct device *dev, void *data)
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
pgmap_radix_release(res);
- dev_WARN_ONCE(dev, pgmap->altmap && pgmap->altmap->alloc,
- "%s: failed to free all reserved pages\n", __func__);
+ dev_WARN_ONCE(dev, pgmap->altmap.alloc,
+ "%s: failed to free all reserved pages\n", __func__);
}
/* assumes rcu_read_lock() held at entry */
static struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
{
- struct page_map *page_map;
-
WARN_ON_ONCE(!rcu_read_lock_held());
- page_map = radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
- return page_map ? &page_map->pgmap : NULL;
+ return radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
}
/**
@@ -360,7 +348,6 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
unsigned long pfn, pgoff, order;
pgprot_t pgprot = PAGE_KERNEL;
struct dev_pagemap *pgmap;
- struct page_map *page_map;
int error, nid, is_ram, i = 0;
align_start = res->start & ~(SECTION_SIZE - 1);
@@ -381,21 +368,19 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (!ref)
return ERR_PTR(-EINVAL);
- page_map = devres_alloc_node(devm_memremap_pages_release,
- sizeof(*page_map), GFP_KERNEL, dev_to_node(dev));
- if (!page_map)
+ pgmap = devres_alloc_node(devm_memremap_pages_release,
+ sizeof(*pgmap), GFP_KERNEL, dev_to_node(dev));
+ if (!pgmap)
return ERR_PTR(-ENOMEM);
- pgmap = &page_map->pgmap;
- memcpy(&page_map->res, res, sizeof(*res));
+ memcpy(&pgmap->res, res, sizeof(*res));
pgmap->dev = dev;
if (altmap) {
- memcpy(&page_map->altmap, altmap, sizeof(*altmap));
- pgmap->altmap = &page_map->altmap;
+ memcpy(&pgmap->altmap, altmap, sizeof(*altmap));
+ pgmap->altmap_valid = true;
}
pgmap->ref = ref;
- pgmap->res = &page_map->res;
pgmap->type = MEMORY_DEVICE_HOST;
pgmap->page_fault = NULL;
pgmap->page_free = NULL;
@@ -418,7 +403,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
break;
}
error = __radix_tree_insert(&pgmap_radix,
- PHYS_PFN(res->start) + pgoff, order, page_map);
+ PHYS_PFN(res->start) + pgoff, order, pgmap);
if (error) {
dev_err(dev, "%s: failed: %d\n", __func__, error);
break;
@@ -447,7 +432,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (error)
goto err_add_memory;
- for_each_device_pfn(pfn, page_map) {
+ for_each_device_pfn(pfn, pgmap) {
struct page *page = pfn_to_page(pfn);
/*
@@ -462,7 +447,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (!(++i % 1024))
cond_resched();
}
- devres_add(dev, page_map);
+ devres_add(dev, pgmap);
return __va(res->start);
err_add_memory:
@@ -470,7 +455,7 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
err_pfn_remap:
err_radix:
pgmap_radix_release(res);
- devres_free(page_map);
+ devres_free(pgmap);
return ERR_PTR(error);
}
EXPORT_SYMBOL(devm_memremap_pages);
@@ -535,7 +520,9 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page)));
rcu_read_unlock();
- return pgmap ? pgmap->altmap : NULL;
+ if (!pgmap || !pgmap->altmap_valid)
+ return NULL;
+ return &pgmap->altmap;
}
/**
@@ -555,9 +542,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
* In the cached case we're already holding a live reference.
*/
if (pgmap) {
- const struct resource *res = pgmap ? pgmap->res : NULL;
-
- if (res && phys >= res->start && phys <= res->end)
+ if (phys >= pgmap->res.start && phys <= pgmap->res.end)
return pgmap;
put_dev_pagemap(pgmap);
}
diff --git a/mm/hmm.c b/mm/hmm.c
index 3a5c172af560..6d45c499c761 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -882,7 +882,7 @@ static int hmm_devmem_pages_create(struct hmm_devmem *devmem)
else
devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
- devmem->pagemap.res = devmem->resource;
+ devmem->pagemap.res = *devmem->resource;
devmem->pagemap.page_fault = hmm_devmem_fault;
devmem->pagemap.page_free = hmm_devmem_free;
devmem->pagemap.dev = devmem->device;
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 10/14] memremap: change devm_memremap_pages interface to use struct dev_pagemap
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (8 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 09/14] memremap: drop private struct page_map Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-08 4:03 ` Dan Williams
2017-12-07 15:08 ` [PATCH 11/14] memremap: simplify duplicate region handling in devm_memremap_pages Christoph Hellwig
` (4 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
From: Logan Gunthorpe <logang@deltatee.com>
This new interface is similar to how struct device (and many others)
work. The caller initializes a 'struct dev_pagemap' as required
and calls 'devm_memremap_pages'. This allows the pagemap structure to
be embedded in another structure and thus container_of can be used. In
this way application specific members can be stored in a containing
struct.
This will be used by the P2P infrastructure and HMM could probably
be cleaned up to use it as well (instead of having it's own, similar
'hmm_devmem_pages_create' function).
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
drivers/dax/pmem.c | 20 ++++++++--------
drivers/nvdimm/nd.h | 9 ++++----
drivers/nvdimm/pfn_devs.c | 25 ++++++++++----------
drivers/nvdimm/pmem.c | 37 ++++++++++++++++--------------
drivers/nvdimm/pmem.h | 1 +
include/linux/memremap.h | 6 ++---
kernel/memremap.c | 48 ++++++++++++++++-----------------------
tools/testing/nvdimm/test/iomap.c | 7 +++---
8 files changed, 73 insertions(+), 80 deletions(-)
diff --git a/drivers/dax/pmem.c b/drivers/dax/pmem.c
index 8d8c852ba8f2..31b6ecce4c64 100644
--- a/drivers/dax/pmem.c
+++ b/drivers/dax/pmem.c
@@ -21,6 +21,7 @@
struct dax_pmem {
struct device *dev;
struct percpu_ref ref;
+ struct dev_pagemap pgmap;
struct completion cmp;
};
@@ -69,20 +70,23 @@ static int dax_pmem_probe(struct device *dev)
struct nd_namespace_common *ndns;
struct nd_dax *nd_dax = to_nd_dax(dev);
struct nd_pfn *nd_pfn = &nd_dax->nd_pfn;
- struct vmem_altmap __altmap, *altmap = NULL;
ndns = nvdimm_namespace_common_probe(dev);
if (IS_ERR(ndns))
return PTR_ERR(ndns);
nsio = to_nd_namespace_io(&ndns->dev);
+ dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
+ if (!dax_pmem)
+ return -ENOMEM;
+
/* parse the 'pfn' info block via ->rw_bytes */
rc = devm_nsio_enable(dev, nsio);
if (rc)
return rc;
- altmap = nvdimm_setup_pfn(nd_pfn, &res, &__altmap);
- if (IS_ERR(altmap))
- return PTR_ERR(altmap);
+ rc = nvdimm_setup_pfn(nd_pfn, &dax_pmem->pgmap);
+ if (rc)
+ return rc;
devm_nsio_disable(dev, nsio);
pfn_sb = nd_pfn->pfn_sb;
@@ -94,10 +98,6 @@ static int dax_pmem_probe(struct device *dev)
return -EBUSY;
}
- dax_pmem = devm_kzalloc(dev, sizeof(*dax_pmem), GFP_KERNEL);
- if (!dax_pmem)
- return -ENOMEM;
-
dax_pmem->dev = dev;
init_completion(&dax_pmem->cmp);
rc = percpu_ref_init(&dax_pmem->ref, dax_pmem_percpu_release, 0,
@@ -110,7 +110,8 @@ static int dax_pmem_probe(struct device *dev)
if (rc)
return rc;
- addr = devm_memremap_pages(dev, &res, &dax_pmem->ref, altmap);
+ dax_pmem->pgmap.ref = &dax_pmem->ref;
+ addr = devm_memremap_pages(dev, &dax_pmem->pgmap);
if (IS_ERR(addr))
return PTR_ERR(addr);
@@ -120,6 +121,7 @@ static int dax_pmem_probe(struct device *dev)
return rc;
/* adjust the dax_region resource to the start of data */
+ memcpy(&res, &dax_pmem->pgmap.res, sizeof(res));
res.start += le64_to_cpu(pfn_sb->dataoff);
rc = sscanf(dev_name(&ndns->dev), "namespace%d.%d", ®ion_id, &id);
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index e958f3724c41..8d6375ee0fda 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -368,15 +368,14 @@ unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
void nvdimm_badblocks_populate(struct nd_region *nd_region,
struct badblocks *bb, const struct resource *res);
#if IS_ENABLED(CONFIG_ND_CLAIM)
-struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap);
+int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap);
int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio);
void devm_nsio_disable(struct device *dev, struct nd_namespace_io *nsio);
#else
-static inline struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap)
+static inline int nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
+ struct dev_pagemap *pgmap)
{
- return ERR_PTR(-ENXIO);
+ return -ENXIO;
}
static inline int devm_nsio_enable(struct device *dev,
struct nd_namespace_io *nsio)
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 65cc171c721d..6f58615ddb85 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -541,9 +541,10 @@ static unsigned long init_altmap_reserve(resource_size_t base)
return reserve;
}
-static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap)
+static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{
+ struct resource *res = &pgmap->res;
+ struct vmem_altmap *altmap = &pgmap->altmap;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = le64_to_cpu(pfn_sb->dataoff);
u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
@@ -562,9 +563,9 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
if (nd_pfn->mode == PFN_MODE_RAM) {
if (offset < SZ_8K)
- return ERR_PTR(-EINVAL);
+ return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
- altmap = NULL;
+ pgmap->altmap_valid = false;
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- offset) / PAGE_SIZE);
@@ -576,10 +577,11 @@ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
memcpy(altmap, &__altmap, sizeof(*altmap));
altmap->free = PHYS_PFN(offset - SZ_8K);
altmap->alloc = 0;
+ pgmap->altmap_valid = true;
} else
- return ERR_PTR(-ENXIO);
+ return -ENXIO;
- return altmap;
+ return 0;
}
static int nd_pfn_init(struct nd_pfn *nd_pfn)
@@ -698,19 +700,18 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
* Determine the effective resource range and vmem_altmap from an nd_pfn
* instance.
*/
-struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
- struct resource *res, struct vmem_altmap *altmap)
+int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{
int rc;
if (!nd_pfn->uuid || !nd_pfn->ndns)
- return ERR_PTR(-ENODEV);
+ return -ENODEV;
rc = nd_pfn_init(nd_pfn);
if (rc)
- return ERR_PTR(rc);
+ return rc;
- /* we need a valid pfn_sb before we can init a vmem_altmap */
- return __nvdimm_setup_pfn(nd_pfn, res, altmap);
+ /* we need a valid pfn_sb before we can init a dev_pagemap */
+ return __nvdimm_setup_pfn(nd_pfn, pgmap);
}
EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 7fbc5c5dc8e1..cf074b1ce219 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -298,34 +298,34 @@ static int pmem_attach_disk(struct device *dev,
{
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
struct nd_region *nd_region = to_nd_region(dev->parent);
- struct vmem_altmap __altmap, *altmap = NULL;
int nid = dev_to_node(dev), fua, wbc;
struct resource *res = &nsio->res;
+ struct resource bb_res;
struct nd_pfn *nd_pfn = NULL;
struct dax_device *dax_dev;
struct nd_pfn_sb *pfn_sb;
struct pmem_device *pmem;
- struct resource pfn_res;
struct request_queue *q;
struct device *gendev;
struct gendisk *disk;
void *addr;
+ int rc;
+
+ pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
+ if (!pmem)
+ return -ENOMEM;
/* while nsio_rw_bytes is active, parse a pfn info block if present */
if (is_nd_pfn(dev)) {
nd_pfn = to_nd_pfn(dev);
- altmap = nvdimm_setup_pfn(nd_pfn, &pfn_res, &__altmap);
- if (IS_ERR(altmap))
- return PTR_ERR(altmap);
+ rc = nvdimm_setup_pfn(nd_pfn, &pmem->pgmap);
+ if (rc)
+ return rc;
}
/* we're attaching a block device, disable raw namespace access */
devm_nsio_disable(dev, nsio);
- pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
- if (!pmem)
- return -ENOMEM;
-
dev_set_drvdata(dev, pmem);
pmem->phys_addr = res->start;
pmem->size = resource_size(res);
@@ -350,19 +350,22 @@ static int pmem_attach_disk(struct device *dev,
return -ENOMEM;
pmem->pfn_flags = PFN_DEV;
+ pmem->pgmap.ref = &q->q_usage_counter;
if (is_nd_pfn(dev)) {
- addr = devm_memremap_pages(dev, &pfn_res, &q->q_usage_counter,
- altmap);
+ addr = devm_memremap_pages(dev, &pmem->pgmap);
pfn_sb = nd_pfn->pfn_sb;
pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
- pmem->pfn_pad = resource_size(res) - resource_size(&pfn_res);
+ pmem->pfn_pad = resource_size(res) -
+ resource_size(&pmem->pgmap.res);
pmem->pfn_flags |= PFN_MAP;
- res = &pfn_res; /* for badblocks populate */
- res->start += pmem->data_offset;
+ memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+ bb_res.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) {
- addr = devm_memremap_pages(dev, &nsio->res,
- &q->q_usage_counter, NULL);
+ memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
+ pmem->pgmap.altmap_valid = false;
+ addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
+ memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
} else
addr = devm_memremap(dev, pmem->phys_addr,
pmem->size, ARCH_MEMREMAP_PMEM);
@@ -401,7 +404,7 @@ static int pmem_attach_disk(struct device *dev,
/ 512);
if (devm_init_badblocks(dev, &pmem->bb))
return -ENOMEM;
- nvdimm_badblocks_populate(nd_region, &pmem->bb, res);
+ nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
disk->bb = &pmem->bb;
dax_dev = alloc_dax(pmem, disk->disk_name, &pmem_dax_ops);
diff --git a/drivers/nvdimm/pmem.h b/drivers/nvdimm/pmem.h
index 6a3cd2a10db6..a64ebc78b5df 100644
--- a/drivers/nvdimm/pmem.h
+++ b/drivers/nvdimm/pmem.h
@@ -22,6 +22,7 @@ struct pmem_device {
struct badblocks bb;
struct dax_device *dax_dev;
struct gendisk *disk;
+ struct dev_pagemap pgmap;
};
long __pmem_direct_access(struct pmem_device *pmem, pgoff_t pgoff,
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 316effd61f16..e973a069646c 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -132,8 +132,7 @@ struct dev_pagemap {
};
#ifdef CONFIG_ZONE_DEVICE
-void *devm_memremap_pages(struct device *dev, struct resource *res,
- struct percpu_ref *ref, struct vmem_altmap *altmap);
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
static inline bool is_zone_device_page(const struct page *page);
@@ -143,8 +142,7 @@ unsigned long dev_pagemap_offset(struct page *page);
unsigned long dev_pagemap_start_pfn(unsigned long start_pfn);
#else
static inline void *devm_memremap_pages(struct device *dev,
- struct resource *res, struct percpu_ref *ref,
- struct vmem_altmap *altmap)
+ struct dev_pagemap *pgmap)
{
/*
* Fail attempts to call devm_memremap_pages() without
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 31bcfeea4a73..f4b198304e3b 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -286,9 +286,10 @@ static unsigned long pfn_end(struct dev_pagemap *pgmap)
#define for_each_device_pfn(pfn, map) \
for (pfn = pfn_first(map); pfn < pfn_end(map); pfn++)
-static void devm_memremap_pages_release(struct device *dev, void *data)
+static void devm_memremap_pages_release(void *data)
{
struct dev_pagemap *pgmap = data;
+ struct device *dev = pgmap->dev;
struct resource *res = &pgmap->res;
resource_size_t align_start, align_size;
unsigned long pfn;
@@ -326,29 +327,32 @@ static struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
/**
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
- * @res: "host memory" address range
- * @ref: a live per-cpu reference count
- * @altmap: optional descriptor for allocating the memmap from @res
+ * @pgmap: pointer to a struct dev_pgmap
*
* Notes:
- * 1/ @ref must be 'live' on entry and 'dead' before devm_memunmap_pages() time
- * (or devm release event). The expected order of events is that @ref has
+ * 1/ At a minimum the res, ref and type members of @pgmap must be initialized
+ * by the caller before passing it to this function
+ *
+ * 2/ The altmap field may optionally be initialized, in which case altmap_valid
+ * must be set to true
+ *
+ * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
+ * time (or devm release event). The expected order of events is that ref has
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
* wait for the completion of all references being dropped and
* percpu_ref_exit() must occur after devm_memremap_pages_release().
*
- * 2/ @res is expected to be a host memory range that could feasibly be
+ * 4/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
* this is not enforced.
*/
-void *devm_memremap_pages(struct device *dev, struct resource *res,
- struct percpu_ref *ref, struct vmem_altmap *altmap)
+void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
resource_size_t align_start, align_size, align_end;
unsigned long pfn, pgoff, order;
pgprot_t pgprot = PAGE_KERNEL;
- struct dev_pagemap *pgmap;
int error, nid, is_ram, i = 0;
+ struct resource *res = &pgmap->res;
align_start = res->start & ~(SECTION_SIZE - 1);
align_size = ALIGN(res->start + resource_size(res), SECTION_SIZE)
@@ -365,26 +369,10 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
if (is_ram == REGION_INTERSECTS)
return __va(res->start);
- if (!ref)
+ if (!pgmap->ref)
return ERR_PTR(-EINVAL);
- pgmap = devres_alloc_node(devm_memremap_pages_release,
- sizeof(*pgmap), GFP_KERNEL, dev_to_node(dev));
- if (!pgmap)
- return ERR_PTR(-ENOMEM);
-
- memcpy(&pgmap->res, res, sizeof(*res));
-
pgmap->dev = dev;
- if (altmap) {
- memcpy(&pgmap->altmap, altmap, sizeof(*altmap));
- pgmap->altmap_valid = true;
- }
- pgmap->ref = ref;
- pgmap->type = MEMORY_DEVICE_HOST;
- pgmap->page_fault = NULL;
- pgmap->page_free = NULL;
- pgmap->data = NULL;
mutex_lock(&pgmap_lock);
error = 0;
@@ -443,11 +431,13 @@ void *devm_memremap_pages(struct device *dev, struct resource *res,
*/
list_del(&page->lru);
page->pgmap = pgmap;
- percpu_ref_get(ref);
+ percpu_ref_get(pgmap->ref);
if (!(++i % 1024))
cond_resched();
}
- devres_add(dev, pgmap);
+
+ devm_add_action(dev, devm_memremap_pages_release, pgmap);
+
return __va(res->start);
err_add_memory:
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index e1f75a1914a1..9631993b6ee0 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
}
EXPORT_SYMBOL(__wrap_devm_memremap);
-void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
- struct percpu_ref *ref, struct vmem_altmap *altmap)
+void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
{
- resource_size_t offset = res->start;
+ resource_size_t offset = pgmap->res.start;
struct nfit_test_resource *nfit_res = get_nfit_res(offset);
if (nfit_res)
return nfit_res->buf + offset - nfit_res->res.start;
- return devm_memremap_pages(dev, res, ref, altmap);
+ return devm_memremap_pages(dev, pgmap)
}
EXPORT_SYMBOL(__wrap_devm_memremap_pages);
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 10/14] memremap: change devm_memremap_pages interface to use struct dev_pagemap
2017-12-07 15:08 ` [PATCH 10/14] memremap: change devm_memremap_pages interface to use struct dev_pagemap Christoph Hellwig
@ 2017-12-08 4:03 ` Dan Williams
0 siblings, 0 replies; 29+ messages in thread
From: Dan Williams @ 2017-12-08 4:03 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, Linux MM,
kbuild test robot
On Thu, Dec 7, 2017 at 7:08 AM, Christoph Hellwig <hch@lst.de> wrote:
> From: Logan Gunthorpe <logang@deltatee.com>
>
> This new interface is similar to how struct device (and many others)
> work. The caller initializes a 'struct dev_pagemap' as required
> and calls 'devm_memremap_pages'. This allows the pagemap structure to
> be embedded in another structure and thus container_of can be used. In
> this way application specific members can be stored in a containing
> struct.
>
> This will be used by the P2P infrastructure and HMM could probably
> be cleaned up to use it as well (instead of having it's own, similar
> 'hmm_devmem_pages_create' function).
>
> Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
[..]
> diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
> index e1f75a1914a1..9631993b6ee0 100644
> --- a/tools/testing/nvdimm/test/iomap.c
> +++ b/tools/testing/nvdimm/test/iomap.c
> @@ -104,15 +104,14 @@ void *__wrap_devm_memremap(struct device *dev, resource_size_t offset,
> }
> EXPORT_SYMBOL(__wrap_devm_memremap);
>
> -void *__wrap_devm_memremap_pages(struct device *dev, struct resource *res,
> - struct percpu_ref *ref, struct vmem_altmap *altmap)
> +void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
> {
> - resource_size_t offset = res->start;
> + resource_size_t offset = pgmap->res.start;
> struct nfit_test_resource *nfit_res = get_nfit_res(offset);
>
> if (nfit_res)
> return nfit_res->buf + offset - nfit_res->res.start;
> - return devm_memremap_pages(dev, res, ref, altmap);
> + return devm_memremap_pages(dev, pgmap)
Missed semicolon...
I need to follow up with the kbuild robot about including
tools/testing/nvdimm in its build tests.
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 11/14] memremap: simplify duplicate region handling in devm_memremap_pages
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (9 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 10/14] memremap: change devm_memremap_pages interface to use struct dev_pagemap Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 19:34 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 12/14] memremap: remove find_dev_pagemap Christoph Hellwig
` (3 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
__radix_tree_insert already checks for duplicates and returns -EEXIST in
that case, so remove the duplicate (and racy) duplicates check.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
kernel/memremap.c | 11 -----------
1 file changed, 11 deletions(-)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index f4b198304e3b..4301fb657150 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -379,17 +379,6 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
align_end = align_start + align_size - 1;
foreach_order_pgoff(res, order, pgoff) {
- struct dev_pagemap *dup;
-
- rcu_read_lock();
- dup = find_dev_pagemap(res->start + PFN_PHYS(pgoff));
- rcu_read_unlock();
- if (dup) {
- dev_err(dev, "%s: %pr collides with mapping for %s\n",
- __func__, res, dev_name(dup->dev));
- error = -EBUSY;
- break;
- }
error = __radix_tree_insert(&pgmap_radix,
PHYS_PFN(res->start) + pgoff, order, pgmap);
if (error) {
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 12/14] memremap: remove find_dev_pagemap
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (10 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 11/14] memremap: simplify duplicate region handling in devm_memremap_pages Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 19:35 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 13/14] memremap: remove struct vmem_altmap Christoph Hellwig
` (2 subsequent siblings)
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
We already have the proper pfn value in both callers, so just open code
the function there.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
kernel/memremap.c | 16 ++++------------
1 file changed, 4 insertions(+), 12 deletions(-)
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 4301fb657150..ba5068b9ce07 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -316,14 +316,6 @@ static void devm_memremap_pages_release(void *data)
"%s: failed to free all reserved pages\n", __func__);
}
-/* assumes rcu_read_lock() held at entry */
-static struct dev_pagemap *find_dev_pagemap(resource_size_t phys)
-{
- WARN_ON_ONCE(!rcu_read_lock_held());
-
- return radix_tree_lookup(&pgmap_radix, PHYS_PFN(phys));
-}
-
/**
* devm_memremap_pages - remap and provide memmap backing for the given resource
* @dev: hosting device for @res
@@ -496,7 +488,7 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
* the memmap.
*/
rcu_read_lock();
- pgmap = find_dev_pagemap(__pfn_to_phys(page_to_pfn(page)));
+ pgmap = radix_tree_lookup(&pgmap_radix, page_to_pfn(page));
rcu_read_unlock();
if (!pgmap || !pgmap->altmap_valid)
@@ -515,12 +507,12 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
- resource_size_t phys = PFN_PHYS(pfn);
-
/*
* In the cached case we're already holding a live reference.
*/
if (pgmap) {
+ resource_size_t phys = PFN_PHYS(pfn);
+
if (phys >= pgmap->res.start && phys <= pgmap->res.end)
return pgmap;
put_dev_pagemap(pgmap);
@@ -528,7 +520,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
/* fall back to slow path lookup */
rcu_read_lock();
- pgmap = find_dev_pagemap(phys);
+ pgmap = radix_tree_lookup(&pgmap_radix, pfn);
if (pgmap && !percpu_ref_tryget_live(pgmap->ref))
pgmap = NULL;
rcu_read_unlock();
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* [PATCH 13/14] memremap: remove struct vmem_altmap
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (11 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 12/14] memremap: remove find_dev_pagemap Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 19:40 ` Logan Gunthorpe
2017-12-07 15:08 ` [PATCH 14/14] memremap: RCU protect data returned from dev_pagemap lookups Christoph Hellwig
2017-12-08 4:14 ` revamp vmem_altmap / dev_pagemap handling Williams, Dan J
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
There is no value in a separate vmem_altmap vs just embedding it into
struct dev_pagemap, so merge the two.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/mm/init_64.c | 10 +++++-----
arch/x86/mm/init_64.c | 16 ++++++++--------
drivers/nvdimm/pfn_devs.c | 22 ++++++++--------------
drivers/nvdimm/pmem.c | 1 -
include/linux/memremap.h | 33 ++++++++++++---------------------
include/linux/mm.h | 4 ++--
kernel/memremap.c | 28 ++++++++++++----------------
mm/sparse-vmemmap.c | 19 +++++++++----------
8 files changed, 56 insertions(+), 77 deletions(-)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index ec706857bdd6..872eed5a0867 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -193,17 +193,17 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node);
for (; start < end; start += page_size) {
- struct vmem_altmap *altmap;
+ struct dev_pagemap *pgmap;
void *p;
int rc;
if (vmemmap_populated(start, page_size))
continue;
- /* altmap lookups only work at section boundaries */
- altmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
- if (altmap)
- p = dev_pagemap_alloc_block_buf(altmap, page_size);
+ /* pgmap lookups only work at section boundaries */
+ pgmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
+ if (pgmap)
+ p = dev_pagemap_alloc_block_buf(pgmap, page_size);
else
p = vmemmap_alloc_block_buf(page_size, node);
if (!p)
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 131749080874..bc01dc3b07a5 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1343,7 +1343,7 @@ static void __meminitdata *p_start, *p_end;
static int __meminitdata node_start;
static int __meminit vmemmap_populate_hugepages(unsigned long start,
- unsigned long end, int node, struct vmem_altmap *altmap)
+ unsigned long end, int node, struct dev_pagemap *pgmap)
{
unsigned long addr;
unsigned long next;
@@ -1371,8 +1371,8 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
if (pmd_none(*pmd)) {
void *p;
- if (altmap)
- p = dev_pagemap_alloc_block_buf(altmap, PMD_SIZE);
+ if (pgmap)
+ p = dev_pagemap_alloc_block_buf(pgmap, PMD_SIZE);
else
p = vmemmap_alloc_block_buf(PMD_SIZE, node);
if (p) {
@@ -1395,7 +1395,7 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
addr_end = addr + PMD_SIZE;
p_end = p + PMD_SIZE;
continue;
- } else if (altmap)
+ } else if (pgmap)
return -ENOMEM; /* no fallback */
} else if (pmd_large(*pmd)) {
vmemmap_verify((pte_t *)pmd, node, addr, next);
@@ -1409,13 +1409,13 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
- struct vmem_altmap *altmap = to_vmem_altmap(start);
+ struct dev_pagemap *pgmap = to_vmem_altmap(start);
int err;
if (boot_cpu_has(X86_FEATURE_PSE))
- err = vmemmap_populate_hugepages(start, end, node, altmap);
- else if (altmap) {
- pr_err_once("%s: no cpu support for altmap allocations\n",
+ err = vmemmap_populate_hugepages(start, end, node, pgmap);
+ else if (pgmap) {
+ pr_err_once("%s: no cpu support for device page map allocations\n",
__func__);
err = -ENOMEM;
} else
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 6f58615ddb85..8367cf7bef99 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -525,14 +525,14 @@ EXPORT_SYMBOL(nd_pfn_probe);
* We hotplug memory at section granularity, pad the reserved area from
* the previous section base to the namespace base address.
*/
-static unsigned long init_altmap_base(resource_size_t base)
+static unsigned long init_pgmap_base(resource_size_t base)
{
unsigned long base_pfn = PHYS_PFN(base);
return PFN_SECTION_ALIGN_DOWN(base_pfn);
}
-static unsigned long init_altmap_reserve(resource_size_t base)
+static unsigned long init_pgmap_reserve(resource_size_t base)
{
unsigned long reserve = PHYS_PFN(SZ_8K);
unsigned long base_pfn = PHYS_PFN(base);
@@ -544,7 +544,6 @@ static unsigned long init_altmap_reserve(resource_size_t base)
static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
{
struct resource *res = &pgmap->res;
- struct vmem_altmap *altmap = &pgmap->altmap;
struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
u64 offset = le64_to_cpu(pfn_sb->dataoff);
u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
@@ -552,10 +551,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
struct nd_namespace_common *ndns = nd_pfn->ndns;
struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
resource_size_t base = nsio->res.start + start_pad;
- struct vmem_altmap __altmap = {
- .base_pfn = init_altmap_base(base),
- .reserve = init_altmap_reserve(base),
- };
memcpy(res, &nsio->res, sizeof(*res));
res->start += start_pad;
@@ -565,7 +560,6 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
if (offset < SZ_8K)
return -EINVAL;
nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
- pgmap->altmap_valid = false;
} else if (nd_pfn->mode == PFN_MODE_PMEM) {
nd_pfn->npfns = PFN_SECTION_ALIGN_UP((resource_size(res)
- offset) / PAGE_SIZE);
@@ -574,10 +568,10 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
"number of pfns truncated from %lld to %ld\n",
le64_to_cpu(nd_pfn->pfn_sb->npfns),
nd_pfn->npfns);
- memcpy(altmap, &__altmap, sizeof(*altmap));
- altmap->free = PHYS_PFN(offset - SZ_8K);
- altmap->alloc = 0;
- pgmap->altmap_valid = true;
+ pgmap->base_pfn = init_pgmap_base(base),
+ pgmap->reserve = init_pgmap_reserve(base),
+ pgmap->free = PHYS_PFN(offset - SZ_8K);
+ pgmap->alloc = 0;
} else
return -ENXIO;
@@ -660,7 +654,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
/ PAGE_SIZE);
if (nd_pfn->mode == PFN_MODE_PMEM) {
/*
- * The altmap should be padded out to the block size used
+ * The page map should be padded out to the block size used
* when populating the vmemmap. This *should* be equal to
* PMD_SIZE for most architectures.
*/
@@ -697,7 +691,7 @@ static int nd_pfn_init(struct nd_pfn *nd_pfn)
}
/*
- * Determine the effective resource range and vmem_altmap from an nd_pfn
+ * Determine the effective resource range and page map from an nd_pfn
* instance.
*/
int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index cf074b1ce219..9e77a557a9af 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -362,7 +362,6 @@ static int pmem_attach_disk(struct device *dev,
bb_res.start += pmem->data_offset;
} else if (pmem_should_map_pages(dev)) {
memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
- pmem->pgmap.altmap_valid = false;
addr = devm_memremap_pages(dev, &pmem->pgmap);
pmem->pfn_flags |= PFN_MAP;
memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index e973a069646c..7bea9a1b75f7 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -10,26 +10,10 @@
struct resource;
struct device;
-/**
- * struct vmem_altmap - pre-allocated storage for vmemmap_populate
- * @base_pfn: base of the entire dev_pagemap mapping
- * @reserve: pages mapped, but reserved for driver use (relative to @base)
- * @free: free pages set aside in the mapping for memmap storage
- * @align: pages reserved to meet allocation alignments
- * @alloc: track pages consumed, private to vmemmap_populate()
- */
-struct vmem_altmap {
- const unsigned long base_pfn;
- const unsigned long reserve;
- unsigned long free;
- unsigned long align;
- unsigned long alloc;
-};
-
#ifdef CONFIG_ZONE_DEVICE
-struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start);
+struct dev_pagemap *to_vmem_altmap(unsigned long memmap_start);
#else
-static inline struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
+static inline struct dev_pagemap *to_vmem_altmap(unsigned long memmap_start)
{
return NULL;
}
@@ -112,7 +96,11 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
* struct dev_pagemap - metadata for ZONE_DEVICE mappings
* @page_fault: callback when CPU fault on an unaddressable device page
* @page_free: free page callback when page refcount reaches 1
- * @altmap: pre-allocated/reserved memory for vmemmap allocations
+ * @base_pfn: base of the entire dev_pagemap mapping
+ * @reserve: pages mapped, but reserved for driver use (relative to @base)
+ * @free: free pages set aside in the mapping for memmap storage
+ * @align: pages reserved to meet allocation alignments
+ * @alloc: track pages consumed, private to vmemmap_populate()
* @res: physical address range covered by @ref
* @ref: reference count that pins the devm_memremap_pages() mapping
* @dev: host device of the mapping for debug
@@ -122,8 +110,11 @@ typedef void (*dev_page_free_t)(struct page *page, void *data);
struct dev_pagemap {
dev_page_fault_t page_fault;
dev_page_free_t page_free;
- struct vmem_altmap altmap;
- bool altmap_valid;
+ unsigned long base_pfn;
+ unsigned long reserve;
+ unsigned long free;
+ unsigned long align;
+ unsigned long alloc;
struct resource res;
struct percpu_ref *ref;
struct device *dev;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index cd3d1c00f6a3..b718c06a79ba 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -33,6 +33,7 @@ struct file_ra_state;
struct user_struct;
struct writeback_control;
struct bdi_writeback;
+struct dev_pagemap;
void init_mm_internals(void);
@@ -2545,9 +2546,8 @@ pud_t *vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node);
pmd_t *vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node);
pte_t *vmemmap_pte_populate(pmd_t *pmd, unsigned long addr, int node);
void *vmemmap_alloc_block(unsigned long size, int node);
-struct vmem_altmap;
void *vmemmap_alloc_block_buf(unsigned long size, int node);
-void *dev_pagemap_alloc_block_buf(struct vmem_altmap *pgmap,
+void *dev_pagemap_alloc_block_buf(struct dev_pagemap *pgmap,
unsigned long size);
void vmemmap_verify(pte_t *, int, unsigned long, unsigned long);
int vmemmap_populate_basepages(unsigned long start, unsigned long end,
diff --git a/kernel/memremap.c b/kernel/memremap.c
index ba5068b9ce07..01529eeb06ad 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -241,7 +241,7 @@ int device_private_entry_fault(struct vm_area_struct *vma,
EXPORT_SYMBOL(device_private_entry_fault);
#endif /* CONFIG_DEVICE_PRIVATE */
-static unsigned long __dev_pagemap_offset(struct vmem_altmap *pgmap)
+static unsigned long __dev_pagemap_offset(struct dev_pagemap *pgmap)
{
/* number of pfns from base where pfn_to_page() is valid */
return pgmap ? (pgmap->reserve + pgmap->free) : 0;
@@ -267,12 +267,11 @@ static void pgmap_radix_release(struct resource *res)
static unsigned long pfn_first(struct dev_pagemap *pgmap)
{
const struct resource *res = &pgmap->res;
- struct vmem_altmap *altmap = &pgmap->altmap;
unsigned long pfn;
pfn = res->start >> PAGE_SHIFT;
- if (pgmap->altmap_valid)
- pfn += __dev_pagemap_offset(altmap);
+ if (pgmap->base_pfn)
+ pfn += __dev_pagemap_offset(pgmap);
return pfn;
}
@@ -312,7 +311,7 @@ static void devm_memremap_pages_release(void *data)
untrack_pfn(NULL, PHYS_PFN(align_start), align_size);
pgmap_radix_release(res);
- dev_WARN_ONCE(dev, pgmap->altmap.alloc,
+ dev_WARN_ONCE(dev, pgmap->alloc,
"%s: failed to free all reserved pages\n", __func__);
}
@@ -325,16 +324,13 @@ static void devm_memremap_pages_release(void *data)
* 1/ At a minimum the res, ref and type members of @pgmap must be initialized
* by the caller before passing it to this function
*
- * 2/ The altmap field may optionally be initialized, in which case altmap_valid
- * must be set to true
- *
- * 3/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
+ * 2/ pgmap.ref must be 'live' on entry and 'dead' before devm_memunmap_pages()
* time (or devm release event). The expected order of events is that ref has
* been through percpu_ref_kill() before devm_memremap_pages_release(). The
* wait for the completion of all references being dropped and
* percpu_ref_exit() must occur after devm_memremap_pages_release().
*
- * 4/ res is expected to be a host memory range that could feasibly be
+ * 3/ res is expected to be a host memory range that could feasibly be
* treated as a "System RAM" range, i.e. not a device mmio range, but
* this is not enforced.
*/
@@ -433,7 +429,7 @@ EXPORT_SYMBOL(devm_memremap_pages);
int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages)
{
- struct vmem_altmap *pgmap;
+ struct dev_pagemap *pgmap;
pgmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
if (!pgmap)
@@ -451,7 +447,7 @@ int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages)
unsigned long dev_pagemap_start_pfn(unsigned long start_pfn)
{
- struct vmem_altmap *pgmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
+ struct dev_pagemap *pgmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
if (pgmap && start_pfn == pgmap->base_pfn)
return pgmap->reserve;
@@ -460,7 +456,7 @@ unsigned long dev_pagemap_start_pfn(unsigned long start_pfn)
bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
{
- struct vmem_altmap *pgmap = to_vmem_altmap((uintptr_t)page);
+ struct dev_pagemap *pgmap = to_vmem_altmap((uintptr_t)page);
if (!pgmap)
return false;
@@ -468,7 +464,7 @@ bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
return true;
}
-struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
+struct dev_pagemap *to_vmem_altmap(unsigned long memmap_start)
{
/*
* 'memmap_start' is the virtual address for the first "struct
@@ -491,9 +487,9 @@ struct vmem_altmap *to_vmem_altmap(unsigned long memmap_start)
pgmap = radix_tree_lookup(&pgmap_radix, page_to_pfn(page));
rcu_read_unlock();
- if (!pgmap || !pgmap->altmap_valid)
+ if (!pgmap || !pgmap->base_pfn)
return NULL;
- return &pgmap->altmap;
+ return pgmap;
}
/**
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index fef41a6a9f64..541d87c2a2c1 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -91,18 +91,17 @@ void * __meminit vmemmap_alloc_block_buf(unsigned long size, int node)
return ptr;
}
-static unsigned long __meminit vmem_altmap_next_pfn(struct vmem_altmap *altmap)
+static unsigned long __meminit dev_pagemap_next_pfn(struct dev_pagemap *pgmap)
{
- return altmap->base_pfn + altmap->reserve + altmap->alloc
- + altmap->align;
+ return pgmap->base_pfn + pgmap->reserve + pgmap->alloc + pgmap->align;
}
-static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
+static unsigned long __meminit dev_pagemap_nr_free(struct dev_pagemap *pgmap)
{
- unsigned long allocated = altmap->alloc + altmap->align;
+ unsigned long allocated = pgmap->alloc + pgmap->align;
- if (altmap->free > allocated)
- return altmap->free - allocated;
+ if (pgmap->free > allocated)
+ return pgmap->free - allocated;
return 0;
}
@@ -113,7 +112,7 @@ static unsigned long __meminit vmem_altmap_nr_free(struct vmem_altmap *altmap)
*
* Allocations are aligned to the size of the request.
*/
-void * __meminit dev_pagemap_alloc_block_buf(struct vmem_altmap *pgmap,
+void * __meminit dev_pagemap_alloc_block_buf(struct dev_pagemap *pgmap,
unsigned long size)
{
unsigned long pfn, nr_pfns, nr_align;
@@ -124,11 +123,11 @@ void * __meminit dev_pagemap_alloc_block_buf(struct vmem_altmap *pgmap,
return NULL;
}
- pfn = vmem_altmap_next_pfn(pgmap);
+ pfn = dev_pagemap_next_pfn(pgmap);
nr_pfns = size >> PAGE_SHIFT;
nr_align = 1UL << find_first_bit(&nr_pfns, BITS_PER_LONG);
nr_align = ALIGN(pfn, nr_align) - pfn;
- if (nr_pfns + nr_align > vmem_altmap_nr_free(pgmap))
+ if (nr_pfns + nr_align > dev_pagemap_nr_free(pgmap))
return NULL;
pgmap->alloc += nr_pfns;
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 13/14] memremap: remove struct vmem_altmap
2017-12-07 15:08 ` [PATCH 13/14] memremap: remove struct vmem_altmap Christoph Hellwig
@ 2017-12-07 19:40 ` Logan Gunthorpe
0 siblings, 0 replies; 29+ messages in thread
From: Logan Gunthorpe @ 2017-12-07 19:40 UTC (permalink / raw)
To: Christoph Hellwig, Dan Williams
Cc: Jérôme Glisse, linux-nvdimm, linux-mm
On 07/12/17 08:08 AM, Christoph Hellwig wrote:
> There is no value in a separate vmem_altmap vs just embedding it into
> struct dev_pagemap, so merge the two.
>
> Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Logan Gunthorpe <logang@deltatee.com>
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread
* [PATCH 14/14] memremap: RCU protect data returned from dev_pagemap lookups
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (12 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 13/14] memremap: remove struct vmem_altmap Christoph Hellwig
@ 2017-12-07 15:08 ` Christoph Hellwig
2017-12-07 19:53 ` Logan Gunthorpe
2017-12-08 4:14 ` revamp vmem_altmap / dev_pagemap handling Williams, Dan J
14 siblings, 1 reply; 29+ messages in thread
From: Christoph Hellwig @ 2017-12-07 15:08 UTC (permalink / raw)
To: Dan Williams
Cc: Jérôme Glisse, Logan Gunthorpe, linux-nvdimm, linux-mm
Take the RCU critical sections into the callers of to_vmem_altmap so that
we can read the page map inside the critical section. Also rename the
remaining helper to __lookup_dev_pagemap to fit into the current naming
scheme.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
arch/powerpc/mm/init_64.c | 5 ++-
arch/x86/mm/init_64.c | 5 ++-
include/linux/memremap.h | 15 ++++----
kernel/memremap.c | 90 +++++++++++++++++++++++++----------------------
4 files changed, 61 insertions(+), 54 deletions(-)
diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c
index 872eed5a0867..7a78e432813f 100644
--- a/arch/powerpc/mm/init_64.c
+++ b/arch/powerpc/mm/init_64.c
@@ -201,11 +201,14 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
continue;
/* pgmap lookups only work at section boundaries */
- pgmap = to_vmem_altmap(SECTION_ALIGN_DOWN(start));
+ rcu_read_lock();
+ pgmap = __lookup_dev_pagemap((struct page *)
+ SECTION_ALIGN_DOWN(start));
if (pgmap)
p = dev_pagemap_alloc_block_buf(pgmap, page_size);
else
p = vmemmap_alloc_block_buf(page_size, node);
+ rcu_read_unlock();
if (!p)
return -ENOMEM;
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index bc01dc3b07a5..d07b173d277c 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -1409,9 +1409,11 @@ static int __meminit vmemmap_populate_hugepages(unsigned long start,
int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
{
- struct dev_pagemap *pgmap = to_vmem_altmap(start);
+ struct dev_pagemap *pgmap;
int err;
+ rcu_read_lock();
+ pgmap = __lookup_dev_pagemap((struct page *)start);
if (boot_cpu_has(X86_FEATURE_PSE))
err = vmemmap_populate_hugepages(start, end, node, pgmap);
else if (pgmap) {
@@ -1420,6 +1422,7 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
err = -ENOMEM;
} else
err = vmemmap_populate_basepages(start, end, node);
+ rcu_read_unlock();
if (!err)
sync_global_pgds(start, end - 1);
return err;
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index 7bea9a1b75f7..a7faf9174977 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -10,15 +10,6 @@
struct resource;
struct device;
-#ifdef CONFIG_ZONE_DEVICE
-struct dev_pagemap *to_vmem_altmap(unsigned long memmap_start);
-#else
-static inline struct dev_pagemap *to_vmem_altmap(unsigned long memmap_start)
-{
- return NULL;
-}
-#endif
-
/*
* Specialize ZONE_DEVICE memory into multiple types each having differents
* usage.
@@ -124,6 +115,7 @@ struct dev_pagemap {
#ifdef CONFIG_ZONE_DEVICE
void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
+struct dev_pagemap *__lookup_dev_pagemap(struct page *start_page);
struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap);
static inline bool is_zone_device_page(const struct page *page);
@@ -144,6 +136,11 @@ static inline void *devm_memremap_pages(struct device *dev,
return ERR_PTR(-ENXIO);
}
+static inline struct dev_pagemap *__lookup_dev_pagemap(struct page *start_page)
+{
+ return NULL;
+}
+
static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
struct dev_pagemap *pgmap)
{
diff --git a/kernel/memremap.c b/kernel/memremap.c
index 01529eeb06ad..b3e8b5028bec 100644
--- a/kernel/memremap.c
+++ b/kernel/memremap.c
@@ -241,6 +241,16 @@ int device_private_entry_fault(struct vm_area_struct *vma,
EXPORT_SYMBOL(device_private_entry_fault);
#endif /* CONFIG_DEVICE_PRIVATE */
+struct dev_pagemap *__lookup_dev_pagemap(struct page *start_page)
+{
+ struct dev_pagemap *pgmap;
+
+ pgmap = radix_tree_lookup(&pgmap_radix, page_to_pfn(start_page));
+ if (!pgmap || !pgmap->base_pfn)
+ return NULL;
+ return pgmap;
+}
+
static unsigned long __dev_pagemap_offset(struct dev_pagemap *pgmap)
{
/* number of pfns from base where pfn_to_page() is valid */
@@ -249,7 +259,16 @@ static unsigned long __dev_pagemap_offset(struct dev_pagemap *pgmap)
unsigned long dev_pagemap_offset(struct page *page)
{
- return __dev_pagemap_offset(to_vmem_altmap((uintptr_t)page));
+ struct dev_pagemap *pgmap;
+ unsigned long ret = 0;
+
+ rcu_read_lock();
+ pgmap = __lookup_dev_pagemap(page);
+ if (pgmap)
+ ret = __dev_pagemap_offset(pgmap);
+ rcu_read_unlock();
+
+ return ret;
}
static void pgmap_radix_release(struct resource *res)
@@ -430,66 +449,51 @@ EXPORT_SYMBOL(devm_memremap_pages);
int dev_pagemap_add_pages(unsigned long phys_start_pfn, unsigned nr_pages)
{
struct dev_pagemap *pgmap;
+ int ret = 0;
- pgmap = to_vmem_altmap((unsigned long) pfn_to_page(phys_start_pfn));
- if (!pgmap)
- return 0;
+ rcu_read_lock();
+ pgmap = __lookup_dev_pagemap(pfn_to_page(phys_start_pfn));
+ if (pgmap) {
+ if (pgmap->base_pfn != phys_start_pfn ||
+ __dev_pagemap_offset(pgmap) > nr_pages) {
+ pr_warn_once("memory add fail, invalid map\n");
+ ret = -EINVAL;
+ }
- if (pgmap->base_pfn != phys_start_pfn ||
- __dev_pagemap_offset(pgmap) > nr_pages) {
- pr_warn_once("memory add fail, invalid map\n");
- return -EINVAL;
+ pgmap->alloc = 0;
}
-
- pgmap->alloc = 0;
- return 0;
+ rcu_read_unlock();
+ return ret;
}
unsigned long dev_pagemap_start_pfn(unsigned long start_pfn)
{
- struct dev_pagemap *pgmap = to_vmem_altmap(__pfn_to_phys(start_pfn));
+ struct page *page = (struct page *)__pfn_to_phys(start_pfn);
+ struct dev_pagemap *pgmap;
+ unsigned long ret = 0;
+ rcu_read_lock();
+ pgmap = __lookup_dev_pagemap(page);
if (pgmap && start_pfn == pgmap->base_pfn)
- return pgmap->reserve;
- return 0;
+ ret = pgmap->reserve;
+ rcu_read_unlock();
+ return ret;
}
bool dev_pagemap_free_pages(struct page *page, unsigned nr_pages)
{
- struct dev_pagemap *pgmap = to_vmem_altmap((uintptr_t)page);
-
- if (!pgmap)
- return false;
- pgmap->alloc -= nr_pages;
- return true;
-}
-
-struct dev_pagemap *to_vmem_altmap(unsigned long memmap_start)
-{
- /*
- * 'memmap_start' is the virtual address for the first "struct
- * page" in this range of the vmemmap array. In the case of
- * CONFIG_SPARSEMEM_VMEMMAP a page_to_pfn conversion is simple
- * pointer arithmetic, so we can perform this to_vmem_altmap()
- * conversion without concern for the initialization state of
- * the struct page fields.
- */
- struct page *page = (struct page *) memmap_start;
struct dev_pagemap *pgmap;
+ bool ret = false;
- /*
- * Unconditionally retrieve a dev_pagemap associated with the
- * given physical address, this is only for use in the
- * arch_{add|remove}_memory() for setting up and tearing down
- * the memmap.
- */
rcu_read_lock();
- pgmap = radix_tree_lookup(&pgmap_radix, page_to_pfn(page));
+ pgmap = __lookup_dev_pagemap(page);
+ if (pgmap) {
+ pgmap->alloc -= nr_pages;
+ ret = true;
+ }
rcu_read_unlock();
- if (!pgmap || !pgmap->base_pfn)
- return NULL;
- return pgmap;
+ return ret;
}
/**
--
2.14.2
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread* Re: [PATCH 14/14] memremap: RCU protect data returned from dev_pagemap lookups
2017-12-07 15:08 ` [PATCH 14/14] memremap: RCU protect data returned from dev_pagemap lookups Christoph Hellwig
@ 2017-12-07 19:53 ` Logan Gunthorpe
0 siblings, 0 replies; 29+ messages in thread
From: Logan Gunthorpe @ 2017-12-07 19:53 UTC (permalink / raw)
To: Christoph Hellwig, Dan Williams
Cc: Jérôme Glisse, linux-nvdimm, linux-mm
On 07/12/17 08:08 AM, Christoph Hellwig wrote:
> Take the RCU critical sections into the callers of to_vmem_altmap so that
> we can read the page map inside the critical section. Also rename the
> remaining helper to __lookup_dev_pagemap to fit into the current naming
> scheme.
I'm not saying I disagree, but what's the reasoning behind the double
underscore prefix to the function?
> +struct dev_pagemap *__lookup_dev_pagemap(struct page *start_page)
> +{
> + struct dev_pagemap *pgmap;
> +
> + pgmap = radix_tree_lookup(&pgmap_radix, page_to_pfn(start_page));
> + if (!pgmap || !pgmap->base_pfn)
> + return NULL;
> + return pgmap;
> +}
I'm also wondering why we are still looking up the dev_pagemap via the
radix tree when struct page already has a pointer to it (page->pgmap).
Thanks,
Logan
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread
* Re: revamp vmem_altmap / dev_pagemap handling
2017-12-07 15:08 revamp vmem_altmap / dev_pagemap handling Christoph Hellwig
` (13 preceding siblings ...)
2017-12-07 15:08 ` [PATCH 14/14] memremap: RCU protect data returned from dev_pagemap lookups Christoph Hellwig
@ 2017-12-08 4:14 ` Williams, Dan J
14 siblings, 0 replies; 29+ messages in thread
From: Williams, Dan J @ 2017-12-08 4:14 UTC (permalink / raw)
To: hch; +Cc: jglisse, logang, linux-nvdimm, linux-mm
On Thu, 2017-12-07 at 07:08 -0800, Christoph Hellwig wrote:
> Hi all,
>
> this series started with two patches from Logan that now are in the
> middle of the series to kill the memremap-internal pgmap structure
> and to redo the dev_memreamp_pages interface to be better suitable
> for future PCI P2P uses. I reviewed them and noticed that there
> isn't really any good reason to keep struct vmem_altmap either,
> and that a lot of these alternative device page map access should
> be better abstracted out instead of being sprinkled all over the
> mm code.
>
> Please review carefully, this has only been tested with my legacy
> e820 NVDIMM system.
I get this lockdep report booting it on my test-VM. I'll take a closer
look next week... the fsdax-vs-hole-punch-vs-dma fix is on the top of
my queue.
[ 7.631431] =============================
[ 7.632668] WARNING: suspicious RCU usage
[ 7.633494] 4.15.0-rc2+ #942 Tainted: G O
[ 7.635262] -----------------------------
[ 7.636764] ./include/linux/rcupdate.h:302 Illegal context switch in RCU read-side critical section!
[ 7.640139]
[ 7.640139] other info that might help us debug this:
[ 7.640139]
[ 7.643382]
[ 7.643382] rcu_scheduler_active = 2, debug_locks = 1
[ 7.645814] 5 locks held by systemd-udevd/835:
[ 7.647546] #0: (&dev->mutex){....}, at: [<0000000064217991>] __driver_attach+0x58/0xe0
[ 7.650171] #1: (&dev->mutex){....}, at: [<00000000527f6e1a>] __driver_attach+0x66/0xe0
[ 7.652779] #2: (cpu_hotplug_lock.rw_sem){++++}, at: [<00000000a8b47692>] mem_hotplug_begin+0xa/0x20
[ 7.655677] #3: (mem_hotplug_lock.rw_sem){++++}, at: [<000000003d83cb2a>] percpu_down_write+0x27/0x120
[ 7.658649] #4: (rcu_read_lock){....}, at: [<00000000bcd32a45>] vmemmap_populate+0x0/0x373
[ 7.661133]
[ 7.661133] stack backtrace:
[ 7.662650] CPU: 22 PID: 835 Comm: systemd-udevd Tainted: G O 4.15.0-rc2+ #942
[ 7.665264] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
[ 7.668873] Call Trace:
[ 7.668879] dump_stack+0x7d/0xbe
[ 7.668885] ___might_sleep+0xe2/0x250
[ 7.668890] __alloc_pages_nodemask+0x107/0x3b0
[ 7.668901] vmemmap_alloc_block+0x5a/0xc1
[ 7.668904] vmemmap_populate+0x16c/0x373
[ 7.668915] sparse_mem_map_populate+0x23/0x33
[ 7.668917] sparse_add_one_section+0x45/0x179
[ 7.668924] __add_pages+0xc4/0x1f0
[ 7.668935] add_pages+0x15/0x70
[ 7.668939] devm_memremap_pages+0x293/0x440
[ 7.668954] pmem_attach_disk+0x4f4/0x620 [nd_pmem]
[ 7.668966] ? nd_dax_probe+0x105/0x140 [libnvdimm]
[ 7.668971] ? nd_dax_probe+0x105/0x140 [libnvdimm]
[ 7.668981] nvdimm_bus_probe+0x63/0x100 [libnvdimm]
[ 7.668988] driver_probe_device+0x2a8/0x490
[ 7.668993] __driver_attach+0xde/0xe0
[ 7.668997] ? driver_probe_device+0x490/0x490
[ 7.668998] bus_for_each_dev+0x6a/0xb0
[ 7.669002] bus_add_driver+0x16d/0x260
[ 7.669005] driver_register+0x57/0xc0
[ 7.669007] ? 0xffffffffa0083000
[ 7.669009] do_one_initcall+0x4e/0x18f
[ 7.669012] ? rcu_read_lock_sched_held+0x3f/0x70
[ 7.669014] ? kmem_cache_alloc_trace+0x2a0/0x310
[ 7.669020] do_init_module+0x5b/0x213
[ 7.669023] load_module+0x1873/0x1f10
[ 7.669029] ? show_coresize+0x30/0x30
[ 7.669035] ? vfs_read+0x131/0x150
[ 7.669052] ? SYSC_finit_module+0xd2/0x100
[ 7.669053] SYSC_finit_module+0xd2/0x100
[ 7.669067] do_syscall_64+0x66/0x230
[ 7.669070] entry_SYSCALL64_slow_path+0x25/0x25
[ 7.669072] RIP: 0033:0x7fc493dd8229
[ 7.669073] RSP: 002b:00007ffcaab453d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[ 7.669074] RAX: ffffffffffffffda RBX: 00005643cb407bb0 RCX: 00007fc493dd8229
[ 7.669075] RDX: 0000000000000000 RSI: 00007fc4949189c5 RDI: 000000000000000f
[ 7.669076] RBP: 00007fc4949189c5 R08: 0000000000000000 R09: 00007ffcaab454f0
[ 7.669076] R10: 000000000000000f R11: 0000000000000246 R12: 0000000000000000
[ 7.669077] R13: 00005643cb408010 R14: 0000000000020000 R15: 00005643c97c8dec
[ 7.669112] BUG: sleeping function called from invalid context at mm/page_alloc.c:4174
[ 7.669113] in_atomic(): 1, irqs_disabled(): 0, pid: 835, name: systemd-udevd
[ 7.669115] 5 locks held by systemd-udevd/835:
[ 7.669115] #0: (&dev->mutex){....}, at: [<0000000064217991>] __driver_attach+0x58/0xe0
[ 7.669120] #1: (&dev->mutex){....}, at: [<00000000527f6e1a>] __driver_attach+0x66/0xe0
[ 7.669123] #2: (cpu_hotplug_lock.rw_sem){++++}, at: [<00000000a8b47692>] mem_hotplug_begin+0xa/0x20
[ 7.669126] #3: (mem_hotplug_lock.rw_sem){++++}, at: [<000000003d83cb2a>] percpu_down_write+0x27/0x120
[ 7.669130] #4: (rcu_read_lock){....}, at: [<00000000bcd32a45>] vmemmap_populate+0x0/0x373
[ 7.669135] CPU: 22 PID: 835 Comm: systemd-udevd Tainted: G O 4.15.0-rc2+ #942
[ 7.669136] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS rel-1.9.3-0-ge2fc41e-prebuilt.qemu-project.org 04/01/2014
[ 7.669136] Call Trace:
[ 7.669139] dump_stack+0x7d/0xbe
[ 7.669142] ___might_sleep+0x21e/0x250
[ 7.669146] __alloc_pages_nodemask+0x107/0x3b0
[ 7.669154] vmemmap_alloc_block+0x5a/0xc1
[ 7.669157] vmemmap_populate+0x16c/0x373
[ 7.669167] sparse_mem_map_populate+0x23/0x33
[ 7.669170] sparse_add_one_section+0x45/0x179
[ 7.669176] __add_pages+0xc4/0x1f0
[ 7.669187] add_pages+0x15/0x70
[ 7.669189] devm_memremap_pages+0x293/0x440
[ 7.669199] pmem_attach_disk+0x4f4/0x620 [nd_pmem]
[ 7.669210] ? nd_dax_probe+0x105/0x140 [libnvdimm]
[ 7.669215] ? nd_dax_probe+0x105/0x140 [libnvdimm]
[ 7.669226] nvdimm_bus_probe+0x63/0x100 [libnvdimm]
[ 7.669232] driver_probe_device+0x2a8/0x490
[ 7.669237] __driver_attach+0xde/0xe0
[ 7.669240] ? driver_probe_device+0x490/0x490
[ 7.669242] bus_for_each_dev+0x6a/0xb0
[ 7.669247] bus_add_driver+0x16d/0x260
[ 7.669251] driver_register+0x57/0xc0
[ 7.669253] ? 0xffffffffa0083000
[ 7.669255] do_one_initcall+0x4e/0x18f
[ 7.669257] ? rcu_read_lock_sched_held+0x3f/0x70
[ 7.669259] ? kmem_cache_alloc_trace+0x2a0/0x310
[ 7.669267] do_init_module+0x5b/0x213
[ 7.669271] load_module+0x1873/0x1f10
[ 7.669276] ? show_coresize+0x30/0x30
[ 7.669283] ? vfs_read+0x131/0x150
[ 7.669309] ? SYSC_finit_module+0xd2/0x100
[ 7.669312] SYSC_finit_module+0xd2/0x100
[ 7.669332] do_syscall_64+0x66/0x230
[ 7.669336] entry_SYSCALL64_slow_path+0x25/0x25
[ 7.669337] RIP: 0033:0x7fc493dd8229
[ 7.669338] RSP: 002b:00007ffcaab453d8 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[ 7.669340] RAX: ffffffffffffffda RBX: 00005643cb407bb0 RCX: 00007fc493dd8229
[ 7.669341] RDX: 0000000000000000 RSI: 00007fc4949189c5 RDI: 000000000000000f
[ 7.669342] RBP: 00007fc4949189c5 R08: 0000000000000000 R09: 00007ffcaab454f0
[ 7.669344] R10: 000000000000000f R11: 0000000000000246 R12: 0000000000000000
[ 7.669345] R13: 00005643cb408010 R14: 0000000000020000 R15: 00005643c97c8dec
[ 7.680772] pmem2: detected capacity change from 0 to 33285996544
[ 7.834748] pmem0: detected capacity change from 0 to 4294967296
--
To unsubscribe, send a message with 'unsubscribe linux-mm' in
the body to majordomo@kvack.org. For more info on Linux MM,
see: http://www.linux-mm.org/ .
Don't email: <a href=mailto:"dont@kvack.org"> email@kvack.org </a>
^ permalink raw reply [flat|nested] 29+ messages in thread