* [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC
2025-11-24 20:08 [PATCH v3 0/4] iommu: Add IOMMU_DEBUG_PAGEALLOC sanitizer Mostafa Saleh
@ 2025-11-24 20:08 ` Mostafa Saleh
2025-11-24 23:13 ` Randy Dunlap
2025-11-25 7:17 ` Baolu Lu
2025-11-24 20:08 ` [PATCH v3 2/4] drivers/iommu: Add calls " Mostafa Saleh
` (2 subsequent siblings)
3 siblings, 2 replies; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-24 20:08 UTC (permalink / raw)
To: linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin, Mostafa Saleh
Add a new config IOMMU_DEBUG_PAGEALLOC, which registers new data to
page_ext.
This config will be used by the IOMMU API to track pages mapped in
the IOMMU to catch drivers trying to free kernel memory that they
still map in their domains, causing all types of memory corruption.
This behaviour is disabled by default and can be enabled using
kernel cmdline iommu.debug_pagealloc.
Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
.../admin-guide/kernel-parameters.txt | 6 ++++
drivers/iommu/Kconfig | 19 +++++++++++
drivers/iommu/Makefile | 1 +
drivers/iommu/iommu-debug-pagealloc.c | 32 +++++++++++++++++++
include/linux/iommu-debug-pagealloc.h | 17 ++++++++++
mm/page_ext.c | 4 +++
6 files changed, 79 insertions(+)
create mode 100644 drivers/iommu/iommu-debug-pagealloc.c
create mode 100644 include/linux/iommu-debug-pagealloc.h
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 6c42061ca20e..dddf435a1c11 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2557,6 +2557,12 @@
1 - Bypass the IOMMU for DMA.
unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
+ iommu.debug_pagealloc=
+ [KNL,EARLY] When CONFIG_IOMMU_DEBUG_PAGEALLOC is set, this
+ parameter enables the feature at boot time. By default, it
+ is disabled and the system behave the same way as a kernel
+ built without CONFIG_IOMMU_DEBUG_PAGEALLOC.
+
io7= [HW] IO7 for Marvel-based Alpha systems
See comment before marvel_specify_io7 in
arch/alpha/kernel/core_marvel.c.
diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index 70d29b14d851..c9c1a1c1820e 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -383,4 +383,23 @@ config SPRD_IOMMU
Say Y here if you want to use the multimedia devices listed above.
+config IOMMU_DEBUG_PAGEALLOC
+ bool "Debug IOMMU mappings against page allocations"
+ depends on DEBUG_PAGEALLOC && IOMMU_API && PAGE_EXTENSION
+ help
+ This enables a consistency check between the kernel page allocator and
+ the IOMMU subsystem. It verifies that pages being allocated or freed
+ are not currently mapped in any IOMMU domain.
+
+ This helps detect DMA use-after-free bugs where a driver frees a page
+ but forgets to unmap it from the IOMMU, potentially allowing a device
+ to overwrite memory that the kernel has repurposed.
+
+ These checks are best-effort and may not detect all problems.
+
+ Due to performance overhead, this feature is disabled by default.
+ You must enable "iommu.debug_pagealloc" from the kernel command
+ line to activate the runtime checks.
+
+ If unsure, say N.
endif # IOMMU_SUPPORT
diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile
index 355294fa9033..8f5130b6a671 100644
--- a/drivers/iommu/Makefile
+++ b/drivers/iommu/Makefile
@@ -34,3 +34,4 @@ obj-$(CONFIG_IOMMU_SVA) += iommu-sva.o
obj-$(CONFIG_IOMMU_IOPF) += io-pgfault.o
obj-$(CONFIG_SPRD_IOMMU) += sprd-iommu.o
obj-$(CONFIG_APPLE_DART) += apple-dart.o
+obj-$(CONFIG_IOMMU_DEBUG_PAGEALLOC) += iommu-debug-pagealloc.o
diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
new file mode 100644
index 000000000000..4022e9af7f27
--- /dev/null
+++ b/drivers/iommu/iommu-debug-pagealloc.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 - Google Inc
+ * Author: Mostafa Saleh <smostafa@google.com>
+ * IOMMU API debug page alloc sanitizer
+ */
+#include <linux/atomic.h>
+#include <linux/iommu-debug-pagealloc.h>
+#include <linux/kernel.h>
+#include <linux/page_ext.h>
+
+static bool needed;
+
+struct iommu_debug_metadata {
+ atomic_t ref;
+};
+
+static __init bool need_iommu_debug(void)
+{
+ return needed;
+}
+
+struct page_ext_operations page_iommu_debug_ops = {
+ .size = sizeof(struct iommu_debug_metadata),
+ .need = need_iommu_debug,
+};
+
+static int __init iommu_debug_pagealloc(char *str)
+{
+ return kstrtobool(str, &needed);
+}
+early_param("iommu.debug_pagealloc", iommu_debug_pagealloc);
diff --git a/include/linux/iommu-debug-pagealloc.h b/include/linux/iommu-debug-pagealloc.h
new file mode 100644
index 000000000000..83e64d70bf6c
--- /dev/null
+++ b/include/linux/iommu-debug-pagealloc.h
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2025 - Google Inc
+ * Author: Mostafa Saleh <smostafa@google.com>
+ * IOMMU API debug page alloc sanitizer
+ */
+
+#ifndef __LINUX_IOMMU_DEBUG_PAGEALLOC_H
+#define __LINUX_IOMMU_DEBUG_PAGEALLOC_H
+
+#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
+
+extern struct page_ext_operations page_iommu_debug_ops;
+
+#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
+
+#endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
diff --git a/mm/page_ext.c b/mm/page_ext.c
index d7396a8970e5..297e4cd8ce90 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -11,6 +11,7 @@
#include <linux/page_table_check.h>
#include <linux/rcupdate.h>
#include <linux/pgalloc_tag.h>
+#include <linux/iommu-debug-pagealloc.h>
/*
* struct page extension
@@ -89,6 +90,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
#ifdef CONFIG_PAGE_TABLE_CHECK
&page_table_check_ops,
#endif
+#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
+ &page_iommu_debug_ops,
+#endif
};
unsigned long page_ext_size;
--
2.52.0.460.gd25c4c69ec-goog
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC
2025-11-24 20:08 ` [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC Mostafa Saleh
@ 2025-11-24 23:13 ` Randy Dunlap
2025-11-25 1:31 ` Randy Dunlap
2025-11-25 7:17 ` Baolu Lu
1 sibling, 1 reply; 14+ messages in thread
From: Randy Dunlap @ 2025-11-24 23:13 UTC (permalink / raw)
To: Mostafa Saleh, linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin
On 11/24/25 12:08 PM, Mostafa Saleh wrote:
> Add a new config IOMMU_DEBUG_PAGEALLOC, which registers new data to
> page_ext.
>
> This config will be used by the IOMMU API to track pages mapped in
> the IOMMU to catch drivers trying to free kernel memory that they
> still map in their domains, causing all types of memory corruption.
>
> This behaviour is disabled by default and can be enabled using
> kernel cmdline iommu.debug_pagealloc.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
> .../admin-guide/kernel-parameters.txt | 6 ++++
> drivers/iommu/Kconfig | 19 +++++++++++
> drivers/iommu/Makefile | 1 +
> drivers/iommu/iommu-debug-pagealloc.c | 32 +++++++++++++++++++
> include/linux/iommu-debug-pagealloc.h | 17 ++++++++++
> mm/page_ext.c | 4 +++
> 6 files changed, 79 insertions(+)
> create mode 100644 drivers/iommu/iommu-debug-pagealloc.c
> create mode 100644 include/linux/iommu-debug-pagealloc.h
>
> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> index 6c42061ca20e..dddf435a1c11 100644
> --- a/Documentation/admin-guide/kernel-parameters.txt
> +++ b/Documentation/admin-guide/kernel-parameters.txt
> @@ -2557,6 +2557,12 @@
> 1 - Bypass the IOMMU for DMA.
> unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
>
> + iommu.debug_pagealloc=
> + [KNL,EARLY] When CONFIG_IOMMU_DEBUG_PAGEALLOC is set, this
> + parameter enables the feature at boot time. By default, it
> + is disabled and the system behave the same way as a kernel
behaves
> + built without CONFIG_IOMMU_DEBUG_PAGEALLOC.
> +
> io7= [HW] IO7 for Marvel-based Alpha systems
> See comment before marvel_specify_io7 in
> arch/alpha/kernel/core_marvel.c.
--
~Randy
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC
2025-11-24 23:13 ` Randy Dunlap
@ 2025-11-25 1:31 ` Randy Dunlap
2025-11-25 9:42 ` Mostafa Saleh
0 siblings, 1 reply; 14+ messages in thread
From: Randy Dunlap @ 2025-11-25 1:31 UTC (permalink / raw)
To: Mostafa Saleh, linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin
I missed one other thing:
On 11/24/25 3:13 PM, Randy Dunlap wrote:
>
>
> On 11/24/25 12:08 PM, Mostafa Saleh wrote:
>> Add a new config IOMMU_DEBUG_PAGEALLOC, which registers new data to
>> page_ext.
>>
>> This config will be used by the IOMMU API to track pages mapped in
>> the IOMMU to catch drivers trying to free kernel memory that they
>> still map in their domains, causing all types of memory corruption.
>>
>> This behaviour is disabled by default and can be enabled using
>> kernel cmdline iommu.debug_pagealloc.
>>
>> Signed-off-by: Mostafa Saleh <smostafa@google.com>
>> ---
>> .../admin-guide/kernel-parameters.txt | 6 ++++
>> drivers/iommu/Kconfig | 19 +++++++++++
>> drivers/iommu/Makefile | 1 +
>> drivers/iommu/iommu-debug-pagealloc.c | 32 +++++++++++++++++++
>> include/linux/iommu-debug-pagealloc.h | 17 ++++++++++
>> mm/page_ext.c | 4 +++
>> 6 files changed, 79 insertions(+)
>> create mode 100644 drivers/iommu/iommu-debug-pagealloc.c
>> create mode 100644 include/linux/iommu-debug-pagealloc.h
>>
>> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
>> index 6c42061ca20e..dddf435a1c11 100644
>> --- a/Documentation/admin-guide/kernel-parameters.txt
>> +++ b/Documentation/admin-guide/kernel-parameters.txt
>> @@ -2557,6 +2557,12 @@
>> 1 - Bypass the IOMMU for DMA.
>> unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
>>
>> + iommu.debug_pagealloc=
Please note what format the option parameter value takes and possible values,
like iommu.passthrough above here in the kernel-parameters.txt file.
>> + [KNL,EARLY] When CONFIG_IOMMU_DEBUG_PAGEALLOC is set, this
>> + parameter enables the feature at boot time. By default, it
>> + is disabled and the system behave the same way as a kernel
>
> behaves
>
>> + built without CONFIG_IOMMU_DEBUG_PAGEALLOC.
>> +
>> io7= [HW] IO7 for Marvel-based Alpha systems
>> See comment before marvel_specify_io7 in
>> arch/alpha/kernel/core_marvel.c.
>
>
--
~Randy
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC
2025-11-25 1:31 ` Randy Dunlap
@ 2025-11-25 9:42 ` Mostafa Saleh
0 siblings, 0 replies; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-25 9:42 UTC (permalink / raw)
To: Randy Dunlap
Cc: linux-mm, iommu, linux-kernel, linux-doc, corbet, joro, will,
robin.murphy, akpm, vbabka, surenb, mhocko, jackmanb, hannes,
ziy, david, lorenzo.stoakes, Liam.Howlett, rppt, xiaqinxin
On Mon, Nov 24, 2025 at 05:31:12PM -0800, Randy Dunlap wrote:
> I missed one other thing:
>
> On 11/24/25 3:13 PM, Randy Dunlap wrote:
> >
> >
> > On 11/24/25 12:08 PM, Mostafa Saleh wrote:
> >> Add a new config IOMMU_DEBUG_PAGEALLOC, which registers new data to
> >> page_ext.
> >>
> >> This config will be used by the IOMMU API to track pages mapped in
> >> the IOMMU to catch drivers trying to free kernel memory that they
> >> still map in their domains, causing all types of memory corruption.
> >>
> >> This behaviour is disabled by default and can be enabled using
> >> kernel cmdline iommu.debug_pagealloc.
> >>
> >> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> >> ---
> >> .../admin-guide/kernel-parameters.txt | 6 ++++
> >> drivers/iommu/Kconfig | 19 +++++++++++
> >> drivers/iommu/Makefile | 1 +
> >> drivers/iommu/iommu-debug-pagealloc.c | 32 +++++++++++++++++++
> >> include/linux/iommu-debug-pagealloc.h | 17 ++++++++++
> >> mm/page_ext.c | 4 +++
> >> 6 files changed, 79 insertions(+)
> >> create mode 100644 drivers/iommu/iommu-debug-pagealloc.c
> >> create mode 100644 include/linux/iommu-debug-pagealloc.h
> >>
> >> diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
> >> index 6c42061ca20e..dddf435a1c11 100644
> >> --- a/Documentation/admin-guide/kernel-parameters.txt
> >> +++ b/Documentation/admin-guide/kernel-parameters.txt
> >> @@ -2557,6 +2557,12 @@
> >> 1 - Bypass the IOMMU for DMA.
> >> unset - Use value of CONFIG_IOMMU_DEFAULT_PASSTHROUGH.
> >>
> >> + iommu.debug_pagealloc=
>
> Please note what format the option parameter value takes and possible values,
> like iommu.passthrough above here in the kernel-parameters.txt file.
I see, I was following the kernel's “debug_pagealloc”. Although, the
implementation understands “={0,1}”, I can add something as:
Format: { "0" | "1" }
0 - Sanitizer disabled.
1 - Sanitizer enabled, expect run-time overhead.
To make it more clear.
Thanks,
Mostafa
>
> >> + [KNL,EARLY] When CONFIG_IOMMU_DEBUG_PAGEALLOC is set, this
> >> + parameter enables the feature at boot time. By default, it
> >> + is disabled and the system behave the same way as a kernel
> >
> > behaves
> >
> >> + built without CONFIG_IOMMU_DEBUG_PAGEALLOC.
> >> +
> >> io7= [HW] IO7 for Marvel-based Alpha systems
> >> See comment before marvel_specify_io7 in
> >> arch/alpha/kernel/core_marvel.c.
> >
> >
>
> --
> ~Randy
>
^ permalink raw reply [flat|nested] 14+ messages in thread
* Re: [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC
2025-11-24 20:08 ` [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC Mostafa Saleh
2025-11-24 23:13 ` Randy Dunlap
@ 2025-11-25 7:17 ` Baolu Lu
2025-11-25 9:54 ` Mostafa Saleh
1 sibling, 1 reply; 14+ messages in thread
From: Baolu Lu @ 2025-11-25 7:17 UTC (permalink / raw)
To: Mostafa Saleh, linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin
On 11/25/25 04:08, Mostafa Saleh wrote:
> Add a new config IOMMU_DEBUG_PAGEALLOC, which registers new data to
> page_ext.
>
> This config will be used by the IOMMU API to track pages mapped in
> the IOMMU to catch drivers trying to free kernel memory that they
> still map in their domains, causing all types of memory corruption.
>
> This behaviour is disabled by default and can be enabled using
> kernel cmdline iommu.debug_pagealloc.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
> .../admin-guide/kernel-parameters.txt | 6 ++++
> drivers/iommu/Kconfig | 19 +++++++++++
> drivers/iommu/Makefile | 1 +
> drivers/iommu/iommu-debug-pagealloc.c | 32 +++++++++++++++++++
> include/linux/iommu-debug-pagealloc.h | 17 ++++++++++
> mm/page_ext.c | 4 +++
> 6 files changed, 79 insertions(+)
> create mode 100644 drivers/iommu/iommu-debug-pagealloc.c
> create mode 100644 include/linux/iommu-debug-pagealloc.h
>
[..]
> diff --git a/include/linux/iommu-debug-pagealloc.h b/include/linux/iommu-debug-pagealloc.h
> new file mode 100644
> index 000000000000..83e64d70bf6c
> --- /dev/null
> +++ b/include/linux/iommu-debug-pagealloc.h
> @@ -0,0 +1,17 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +/*
> + * Copyright (C) 2025 - Google Inc
> + * Author: Mostafa Saleh <smostafa@google.com>
> + * IOMMU API debug page alloc sanitizer
> + */
> +
> +#ifndef __LINUX_IOMMU_DEBUG_PAGEALLOC_H
> +#define __LINUX_IOMMU_DEBUG_PAGEALLOC_H
> +
> +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
> +
> +extern struct page_ext_operations page_iommu_debug_ops;
How about moving above to below mm/page_ext.c and remove iommu-debug-
pagealloc.h header file? ...
> +
> +#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
> +
> +#endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index d7396a8970e5..297e4cd8ce90 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -11,6 +11,7 @@
> #include <linux/page_table_check.h>
> #include <linux/rcupdate.h>
> #include <linux/pgalloc_tag.h>
> +#include <linux/iommu-debug-pagealloc.h>
... remove this include line and put the "extern" line here,
extern struct page_ext_operations page_iommu_debug_ops;
>
> /*
> * struct page extension
> @@ -89,6 +90,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
> #ifdef CONFIG_PAGE_TABLE_CHECK
> &page_table_check_ops,
> #endif
> +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
> + &page_iommu_debug_ops,
> +#endif
> };
>
> unsigned long page_ext_size;
Or, put it directly in linux/iommu.h?
diff --git a/include/linux/iommu-debug-pagealloc.h
b/include/linux/iommu-debug-pagealloc.h
index 87f593305465..b2b82cf032e6 100644
--- a/include/linux/iommu-debug-pagealloc.h
+++ b/include/linux/iommu-debug-pagealloc.h
@@ -14,8 +14,6 @@ struct iommu_domain;
DECLARE_STATIC_KEY_FALSE(iommu_debug_initialized);
-extern struct page_ext_operations page_iommu_debug_ops;
-
void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys,
size_t size);
void __iommu_debug_unmap_begin(struct iommu_domain *domain,
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 801b2bd9e8d4..40133031985a 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -1185,6 +1185,10 @@ void iommu_detach_device_pasid(struct
iommu_domain *domain,
struct device *dev, ioasid_t pasid);
ioasid_t iommu_alloc_global_pasid(struct device *dev);
void iommu_free_global_pasid(ioasid_t pasid);
+
+#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
+extern struct page_ext_operations page_iommu_debug_ops;
+#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
#else /* CONFIG_IOMMU_API */
struct iommu_ops {};
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 297e4cd8ce90..345af8c9fcce 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -11,7 +11,7 @@
#include <linux/page_table_check.h>
#include <linux/rcupdate.h>
#include <linux/pgalloc_tag.h>
-#include <linux/iommu-debug-pagealloc.h>
+#include <linux/iommu.h>
/*
* struct page extension
Thanks,
baolu
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC
2025-11-25 7:17 ` Baolu Lu
@ 2025-11-25 9:54 ` Mostafa Saleh
0 siblings, 0 replies; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-25 9:54 UTC (permalink / raw)
To: Baolu Lu
Cc: linux-mm, iommu, linux-kernel, linux-doc, corbet, joro, will,
robin.murphy, akpm, vbabka, surenb, mhocko, jackmanb, hannes,
ziy, david, lorenzo.stoakes, Liam.Howlett, rppt, xiaqinxin
On Tue, Nov 25, 2025 at 03:17:47PM +0800, Baolu Lu wrote:
> On 11/25/25 04:08, Mostafa Saleh wrote:
> > Add a new config IOMMU_DEBUG_PAGEALLOC, which registers new data to
> > page_ext.
> >
> > This config will be used by the IOMMU API to track pages mapped in
> > the IOMMU to catch drivers trying to free kernel memory that they
> > still map in their domains, causing all types of memory corruption.
> >
> > This behaviour is disabled by default and can be enabled using
> > kernel cmdline iommu.debug_pagealloc.
> >
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> > .../admin-guide/kernel-parameters.txt | 6 ++++
> > drivers/iommu/Kconfig | 19 +++++++++++
> > drivers/iommu/Makefile | 1 +
> > drivers/iommu/iommu-debug-pagealloc.c | 32 +++++++++++++++++++
> > include/linux/iommu-debug-pagealloc.h | 17 ++++++++++
> > mm/page_ext.c | 4 +++
> > 6 files changed, 79 insertions(+)
> > create mode 100644 drivers/iommu/iommu-debug-pagealloc.c
> > create mode 100644 include/linux/iommu-debug-pagealloc.h
> >
>
> [..]
>
> > diff --git a/include/linux/iommu-debug-pagealloc.h b/include/linux/iommu-debug-pagealloc.h
> > new file mode 100644
> > index 000000000000..83e64d70bf6c
> > --- /dev/null
> > +++ b/include/linux/iommu-debug-pagealloc.h
> > @@ -0,0 +1,17 @@
> > +// SPDX-License-Identifier: GPL-2.0-only
> > +/*
> > + * Copyright (C) 2025 - Google Inc
> > + * Author: Mostafa Saleh <smostafa@google.com>
> > + * IOMMU API debug page alloc sanitizer
> > + */
> > +
> > +#ifndef __LINUX_IOMMU_DEBUG_PAGEALLOC_H
> > +#define __LINUX_IOMMU_DEBUG_PAGEALLOC_H
> > +
> > +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
> > +
> > +extern struct page_ext_operations page_iommu_debug_ops;
>
> How about moving above to below mm/page_ext.c and remove iommu-debug-
> pagealloc.h header file? ...
>
> > +
> > +#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
> > +
> > +#endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
> > diff --git a/mm/page_ext.c b/mm/page_ext.c
> > index d7396a8970e5..297e4cd8ce90 100644
> > --- a/mm/page_ext.c
> > +++ b/mm/page_ext.c
> > @@ -11,6 +11,7 @@
> > #include <linux/page_table_check.h>
> > #include <linux/rcupdate.h>
> > #include <linux/pgalloc_tag.h>
> > +#include <linux/iommu-debug-pagealloc.h>
>
> ... remove this include line and put the "extern" line here,
>
> extern struct page_ext_operations page_iommu_debug_ops;
This file is needed for other functions added later, which is then
included by iommu.c mm.h
Also, the pattern in page_ext looks that users have their own headers:
include/linux/page_owner.h:extern struct page_ext_operations page_owner_ops;
include/linux/page_table_check.h:extern struct page_ext_operations page_table_check_ops;
include/linux/pgalloc_tag.h:extern struct page_ext_operations page_alloc_tagging_ops;
>
> > /*
> > * struct page extension
> > @@ -89,6 +90,9 @@ static struct page_ext_operations *page_ext_ops[] __initdata = {
> > #ifdef CONFIG_PAGE_TABLE_CHECK
> > &page_table_check_ops,
> > #endif
> > +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
> > + &page_iommu_debug_ops,
> > +#endif
> > };
> > unsigned long page_ext_size;
>
> Or, put it directly in linux/iommu.h?
>
I tried that, but in the last patch we need to include that in linux/mm.h
which didn't compile and required including other files which seemed
unnecessary and that it would be better to isolate this feature.
Thanks,
Mostafa
> diff --git a/include/linux/iommu-debug-pagealloc.h
> b/include/linux/iommu-debug-pagealloc.h
> index 87f593305465..b2b82cf032e6 100644
> --- a/include/linux/iommu-debug-pagealloc.h
> +++ b/include/linux/iommu-debug-pagealloc.h
> @@ -14,8 +14,6 @@ struct iommu_domain;
>
> DECLARE_STATIC_KEY_FALSE(iommu_debug_initialized);
>
> -extern struct page_ext_operations page_iommu_debug_ops;
> -
> void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys,
> size_t size);
> void __iommu_debug_unmap_begin(struct iommu_domain *domain,
> diff --git a/include/linux/iommu.h b/include/linux/iommu.h
> index 801b2bd9e8d4..40133031985a 100644
> --- a/include/linux/iommu.h
> +++ b/include/linux/iommu.h
> @@ -1185,6 +1185,10 @@ void iommu_detach_device_pasid(struct iommu_domain
> *domain,
> struct device *dev, ioasid_t pasid);
> ioasid_t iommu_alloc_global_pasid(struct device *dev);
> void iommu_free_global_pasid(ioasid_t pasid);
> +
> +#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
> +extern struct page_ext_operations page_iommu_debug_ops;
> +#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
> #else /* CONFIG_IOMMU_API */
>
> struct iommu_ops {};
> diff --git a/mm/page_ext.c b/mm/page_ext.c
> index 297e4cd8ce90..345af8c9fcce 100644
> --- a/mm/page_ext.c
> +++ b/mm/page_ext.c
> @@ -11,7 +11,7 @@
> #include <linux/page_table_check.h>
> #include <linux/rcupdate.h>
> #include <linux/pgalloc_tag.h>
> -#include <linux/iommu-debug-pagealloc.h>
> +#include <linux/iommu.h>
>
> /*
> * struct page extension
>
> Thanks,
> baolu
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v3 2/4] drivers/iommu: Add calls for IOMMU_DEBUG_PAGEALLOC
2025-11-24 20:08 [PATCH v3 0/4] iommu: Add IOMMU_DEBUG_PAGEALLOC sanitizer Mostafa Saleh
2025-11-24 20:08 ` [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC Mostafa Saleh
@ 2025-11-24 20:08 ` Mostafa Saleh
2025-11-25 7:35 ` Baolu Lu
2025-11-24 20:08 ` [PATCH v3 3/4] drivers/iommu-debug-pagealloc: Track IOMMU pages Mostafa Saleh
2025-11-24 20:08 ` [PATCH v3 4/4] drivers/iommu-debug-pagealloc: Check mapped/unmapped kernel memory Mostafa Saleh
3 siblings, 1 reply; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-24 20:08 UTC (permalink / raw)
To: linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin, Mostafa Saleh
Add calls for the new iommu debug config IOMMU_DEBUG_PAGEALLOC:
- iommu_debug_init: Enable the debug mode if configured by the user.
- iommu_debug_map: Track iommu pages mapped, using physical address.
- iommu_debug_unmap_begin: Track start of iommu unmap operation, with
IOVA and size.
- iommu_debug_unmap_end: Track the end of unmap operation, passing the
actual unmapped size versus the tracked one at unmap_begin.
We have to do the unmap_begin/end as once pages are unmapped we lose
the information of the physical address.
This is racy, but the API is racy by construction as it uses refcounts
and doesn't attempt to lock/synchronize with the IOMMU API as that will
be costly, meaning that possibility of false negative exists.
Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
drivers/iommu/iommu-debug-pagealloc.c | 26 +++++++++++++
drivers/iommu/iommu.c | 12 +++++-
include/linux/iommu-debug-pagealloc.h | 56 +++++++++++++++++++++++++++
3 files changed, 92 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
index 4022e9af7f27..5665fd7360e0 100644
--- a/drivers/iommu/iommu-debug-pagealloc.c
+++ b/drivers/iommu/iommu-debug-pagealloc.c
@@ -5,11 +5,13 @@
* IOMMU API debug page alloc sanitizer
*/
#include <linux/atomic.h>
+#include <linux/iommu.h>
#include <linux/iommu-debug-pagealloc.h>
#include <linux/kernel.h>
#include <linux/page_ext.h>
static bool needed;
+DEFINE_STATIC_KEY_FALSE(iommu_debug_initialized);
struct iommu_debug_metadata {
atomic_t ref;
@@ -25,6 +27,30 @@ struct page_ext_operations page_iommu_debug_ops = {
.need = need_iommu_debug,
};
+void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size)
+{
+}
+
+void __iommu_debug_unmap_begin(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+}
+
+void __iommu_debug_unmap_end(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ size_t unmapped)
+{
+}
+
+void iommu_debug_init(void)
+{
+ if (!needed)
+ return;
+
+ pr_info("iommu: Debugging page allocations, expect overhead or disable iommu.debug_pagealloc");
+ static_branch_enable(&iommu_debug_initialized);
+}
+
static int __init iommu_debug_pagealloc(char *str)
{
return kstrtobool(str, &needed);
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 59244c744eab..4d1dc321e400 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -18,6 +18,7 @@
#include <linux/errno.h>
#include <linux/host1x_context_bus.h>
#include <linux/iommu.h>
+#include <linux/iommu-debug-pagealloc.h>
#include <linux/iommufd.h>
#include <linux/idr.h>
#include <linux/err.h>
@@ -231,6 +232,8 @@ static int __init iommu_subsys_init(void)
if (!nb)
return -ENOMEM;
+ iommu_debug_init();
+
for (int i = 0; i < ARRAY_SIZE(iommu_buses); i++) {
nb[i].notifier_call = iommu_bus_notifier;
bus_register_notifier(iommu_buses[i], &nb[i]);
@@ -2544,10 +2547,12 @@ int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova,
}
/* unroll mapping in case something went wrong */
- if (ret)
+ if (ret) {
iommu_unmap(domain, orig_iova, orig_size - size);
- else
+ } else {
trace_map(orig_iova, orig_paddr, orig_size);
+ iommu_debug_map(domain, orig_paddr, orig_size);
+ }
return ret;
}
@@ -2609,6 +2614,8 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
pr_debug("unmap this: iova 0x%lx size 0x%zx\n", iova, size);
+ iommu_debug_unmap_begin(domain, iova, size);
+
/*
* Keep iterating until we either unmap 'size' bytes (or more)
* or we hit an area that isn't mapped.
@@ -2629,6 +2636,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain,
}
trace_unmap(orig_iova, size, unmapped);
+ iommu_debug_unmap_end(domain, orig_iova, size, unmapped);
return unmapped;
}
diff --git a/include/linux/iommu-debug-pagealloc.h b/include/linux/iommu-debug-pagealloc.h
index 83e64d70bf6c..454303ec09c2 100644
--- a/include/linux/iommu-debug-pagealloc.h
+++ b/include/linux/iommu-debug-pagealloc.h
@@ -8,10 +8,66 @@
#ifndef __LINUX_IOMMU_DEBUG_PAGEALLOC_H
#define __LINUX_IOMMU_DEBUG_PAGEALLOC_H
+struct iommu_domain;
+
#ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
+DECLARE_STATIC_KEY_FALSE(iommu_debug_initialized);
+
extern struct page_ext_operations page_iommu_debug_ops;
+void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys,
+ size_t size);
+void __iommu_debug_unmap_begin(struct iommu_domain *domain,
+ unsigned long iova, size_t size);
+void __iommu_debug_unmap_end(struct iommu_domain *domain,
+ unsigned long iova, size_t size, size_t unmapped);
+
+static inline void iommu_debug_map(struct iommu_domain *domain,
+ phys_addr_t phys, size_t size)
+{
+ if (static_branch_unlikely(&iommu_debug_initialized))
+ __iommu_debug_map(domain, phys, size);
+}
+
+static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+ if (static_branch_unlikely(&iommu_debug_initialized))
+ __iommu_debug_unmap_begin(domain, iova, size);
+}
+
+static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ size_t unmapped)
+{
+ if (static_branch_unlikely(&iommu_debug_initialized))
+ __iommu_debug_unmap_end(domain, iova, size, unmapped);
+}
+
+void iommu_debug_init(void);
+
+#else
+static inline void iommu_debug_map(struct iommu_domain *domain,
+ phys_addr_t phys, size_t size)
+{
+}
+
+static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
+ unsigned long iova, size_t size)
+{
+}
+
+static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
+ unsigned long iova, size_t size,
+ size_t unmapped)
+{
+}
+
+static inline void iommu_debug_init(void)
+{
+}
+
#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
#endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
--
2.52.0.460.gd25c4c69ec-goog
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v3 2/4] drivers/iommu: Add calls for IOMMU_DEBUG_PAGEALLOC
2025-11-24 20:08 ` [PATCH v3 2/4] drivers/iommu: Add calls " Mostafa Saleh
@ 2025-11-25 7:35 ` Baolu Lu
2025-11-25 10:01 ` Mostafa Saleh
0 siblings, 1 reply; 14+ messages in thread
From: Baolu Lu @ 2025-11-25 7:35 UTC (permalink / raw)
To: Mostafa Saleh, linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin
On 11/25/25 04:08, Mostafa Saleh wrote:
> Add calls for the new iommu debug config IOMMU_DEBUG_PAGEALLOC:
> - iommu_debug_init: Enable the debug mode if configured by the user.
> - iommu_debug_map: Track iommu pages mapped, using physical address.
> - iommu_debug_unmap_begin: Track start of iommu unmap operation, with
> IOVA and size.
> - iommu_debug_unmap_end: Track the end of unmap operation, passing the
> actual unmapped size versus the tracked one at unmap_begin.
>
> We have to do the unmap_begin/end as once pages are unmapped we lose
> the information of the physical address.
> This is racy, but the API is racy by construction as it uses refcounts
> and doesn't attempt to lock/synchronize with the IOMMU API as that will
> be costly, meaning that possibility of false negative exists.
>
> Signed-off-by: Mostafa Saleh <smostafa@google.com>
> ---
> drivers/iommu/iommu-debug-pagealloc.c | 26 +++++++++++++
> drivers/iommu/iommu.c | 12 +++++-
> include/linux/iommu-debug-pagealloc.h | 56 +++++++++++++++++++++++++++
> 3 files changed, 92 insertions(+), 2 deletions(-)
>
Remove "drivers/" from the commit title.
$ git log --oneline drivers/iommu/iommu.c
[...]
> diff --git a/include/linux/iommu-debug-pagealloc.h b/include/linux/iommu-debug-pagealloc.h
> index 83e64d70bf6c..454303ec09c2 100644
> --- a/include/linux/iommu-debug-pagealloc.h
> +++ b/include/linux/iommu-debug-pagealloc.h
> @@ -8,10 +8,66 @@
> #ifndef __LINUX_IOMMU_DEBUG_PAGEALLOC_H
> #define __LINUX_IOMMU_DEBUG_PAGEALLOC_H
>
> +struct iommu_domain;
> +
> #ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
>
> +DECLARE_STATIC_KEY_FALSE(iommu_debug_initialized);
> +
> extern struct page_ext_operations page_iommu_debug_ops;
>
> +void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys,
> + size_t size);
> +void __iommu_debug_unmap_begin(struct iommu_domain *domain,
> + unsigned long iova, size_t size);
> +void __iommu_debug_unmap_end(struct iommu_domain *domain,
> + unsigned long iova, size_t size, size_t unmapped);
> +
> +static inline void iommu_debug_map(struct iommu_domain *domain,
> + phys_addr_t phys, size_t size)
> +{
> + if (static_branch_unlikely(&iommu_debug_initialized))
> + __iommu_debug_map(domain, phys, size);
> +}
> +
> +static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
> + unsigned long iova, size_t size)
> +{
> + if (static_branch_unlikely(&iommu_debug_initialized))
> + __iommu_debug_unmap_begin(domain, iova, size);
> +}
> +
> +static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
> + unsigned long iova, size_t size,
> + size_t unmapped)
> +{
> + if (static_branch_unlikely(&iommu_debug_initialized))
> + __iommu_debug_unmap_end(domain, iova, size, unmapped);
> +}
> +
> +void iommu_debug_init(void);
> +
> +#else
> +static inline void iommu_debug_map(struct iommu_domain *domain,
> + phys_addr_t phys, size_t size)
> +{
> +}
> +
> +static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
> + unsigned long iova, size_t size)
> +{
> +}
> +
> +static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
> + unsigned long iova, size_t size,
> + size_t unmapped)
> +{
> +}
> +
> +static inline void iommu_debug_init(void)
> +{
> +}
I suppose that all these should go to drivers/iommu/iommu-priv.h, as
they are for use in other files inside the IOMMU subsystem.
> +
> #endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
>
> #endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
Thanks,
baolu
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v3 2/4] drivers/iommu: Add calls for IOMMU_DEBUG_PAGEALLOC
2025-11-25 7:35 ` Baolu Lu
@ 2025-11-25 10:01 ` Mostafa Saleh
0 siblings, 0 replies; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-25 10:01 UTC (permalink / raw)
To: Baolu Lu
Cc: linux-mm, iommu, linux-kernel, linux-doc, corbet, joro, will,
robin.murphy, akpm, vbabka, surenb, mhocko, jackmanb, hannes,
ziy, david, lorenzo.stoakes, Liam.Howlett, rppt, xiaqinxin
On Tue, Nov 25, 2025 at 03:35:08PM +0800, Baolu Lu wrote:
> On 11/25/25 04:08, Mostafa Saleh wrote:
> > Add calls for the new iommu debug config IOMMU_DEBUG_PAGEALLOC:
> > - iommu_debug_init: Enable the debug mode if configured by the user.
> > - iommu_debug_map: Track iommu pages mapped, using physical address.
> > - iommu_debug_unmap_begin: Track start of iommu unmap operation, with
> > IOVA and size.
> > - iommu_debug_unmap_end: Track the end of unmap operation, passing the
> > actual unmapped size versus the tracked one at unmap_begin.
> >
> > We have to do the unmap_begin/end as once pages are unmapped we lose
> > the information of the physical address.
> > This is racy, but the API is racy by construction as it uses refcounts
> > and doesn't attempt to lock/synchronize with the IOMMU API as that will
> > be costly, meaning that possibility of false negative exists.
> >
> > Signed-off-by: Mostafa Saleh <smostafa@google.com>
> > ---
> > drivers/iommu/iommu-debug-pagealloc.c | 26 +++++++++++++
> > drivers/iommu/iommu.c | 12 +++++-
> > include/linux/iommu-debug-pagealloc.h | 56 +++++++++++++++++++++++++++
> > 3 files changed, 92 insertions(+), 2 deletions(-)
> >
>
> Remove "drivers/" from the commit title.
>
> $ git log --oneline drivers/iommu/iommu.c
My bad, I will fix it.
>
> [...]
> > diff --git a/include/linux/iommu-debug-pagealloc.h b/include/linux/iommu-debug-pagealloc.h
> > index 83e64d70bf6c..454303ec09c2 100644
> > --- a/include/linux/iommu-debug-pagealloc.h
> > +++ b/include/linux/iommu-debug-pagealloc.h
> > @@ -8,10 +8,66 @@
> > #ifndef __LINUX_IOMMU_DEBUG_PAGEALLOC_H
> > #define __LINUX_IOMMU_DEBUG_PAGEALLOC_H
> > +struct iommu_domain;
> > +
> > #ifdef CONFIG_IOMMU_DEBUG_PAGEALLOC
> > +DECLARE_STATIC_KEY_FALSE(iommu_debug_initialized);
> > +
> > extern struct page_ext_operations page_iommu_debug_ops;
> > +void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys,
> > + size_t size);
> > +void __iommu_debug_unmap_begin(struct iommu_domain *domain,
> > + unsigned long iova, size_t size);
> > +void __iommu_debug_unmap_end(struct iommu_domain *domain,
> > + unsigned long iova, size_t size, size_t unmapped);
> > +
> > +static inline void iommu_debug_map(struct iommu_domain *domain,
> > + phys_addr_t phys, size_t size)
> > +{
> > + if (static_branch_unlikely(&iommu_debug_initialized))
> > + __iommu_debug_map(domain, phys, size);
> > +}
> > +
> > +static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
> > + unsigned long iova, size_t size)
> > +{
> > + if (static_branch_unlikely(&iommu_debug_initialized))
> > + __iommu_debug_unmap_begin(domain, iova, size);
> > +}
> > +
> > +static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
> > + unsigned long iova, size_t size,
> > + size_t unmapped)
> > +{
> > + if (static_branch_unlikely(&iommu_debug_initialized))
> > + __iommu_debug_unmap_end(domain, iova, size, unmapped);
> > +}
> > +
> > +void iommu_debug_init(void);
> > +
> > +#else
> > +static inline void iommu_debug_map(struct iommu_domain *domain,
> > + phys_addr_t phys, size_t size)
> > +{
> > +}
> > +
> > +static inline void iommu_debug_unmap_begin(struct iommu_domain *domain,
> > + unsigned long iova, size_t size)
> > +{
> > +}
> > +
> > +static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
> > + unsigned long iova, size_t size,
> > + size_t unmapped)
> > +{
> > +}
> > +
> > +static inline void iommu_debug_init(void)
> > +{
> > +}
>
> I suppose that all these should go to drivers/iommu/iommu-priv.h, as
> they are for use in other files inside the IOMMU subsystem.
It seemed better to have all the feature functions/declarations in one
isolated file, as it is included outside of the iommu susbsystem also.
I have no strong opinion, I can keep them in drivers/iommu/iommu-priv.h
if you think it's better. But then we will have to include also
"iommu-debug-pagealloc.h" for the static key to avoid including extra
files to linux/mm.h.
Thanks,
Mostafa
>
> > +
> > #endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
> > #endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
>
> Thanks,
> baolu
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v3 3/4] drivers/iommu-debug-pagealloc: Track IOMMU pages
2025-11-24 20:08 [PATCH v3 0/4] iommu: Add IOMMU_DEBUG_PAGEALLOC sanitizer Mostafa Saleh
2025-11-24 20:08 ` [PATCH v3 1/4] drivers/iommu: Add page_ext for IOMMU_DEBUG_PAGEALLOC Mostafa Saleh
2025-11-24 20:08 ` [PATCH v3 2/4] drivers/iommu: Add calls " Mostafa Saleh
@ 2025-11-24 20:08 ` Mostafa Saleh
2025-11-25 7:54 ` Baolu Lu
2025-11-24 20:08 ` [PATCH v3 4/4] drivers/iommu-debug-pagealloc: Check mapped/unmapped kernel memory Mostafa Saleh
3 siblings, 1 reply; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-24 20:08 UTC (permalink / raw)
To: linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin, Mostafa Saleh
Using the new calls, use an atomic refcount to track how many times
a page is mapped in any of the IOMMUs.
For unmap we need to use iova_to_phys() to get the physical address
of the pages.
We use the smallest supported page size as the granularity of tracking
per domain.
This is important as it is possible to map pages and unmap them with
larger sizes (as in map_sg()) cases.
Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
drivers/iommu/iommu-debug-pagealloc.c | 91 +++++++++++++++++++++++++++
1 file changed, 91 insertions(+)
diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
index 5665fd7360e0..c420b006ef08 100644
--- a/drivers/iommu/iommu-debug-pagealloc.c
+++ b/drivers/iommu/iommu-debug-pagealloc.c
@@ -27,19 +27,110 @@ struct page_ext_operations page_iommu_debug_ops = {
.need = need_iommu_debug,
};
+static struct page_ext *get_iommu_page_ext(phys_addr_t phys)
+{
+ struct page *page = phys_to_page(phys);
+ struct page_ext *page_ext = page_ext_get(page);
+
+ return page_ext;
+}
+
+static struct iommu_debug_metadata *get_iommu_data(struct page_ext *page_ext)
+{
+ return page_ext_data(page_ext, &page_iommu_debug_ops);
+}
+
+static void iommu_debug_inc_page(phys_addr_t phys)
+{
+ struct page_ext *page_ext = get_iommu_page_ext(phys);
+ struct iommu_debug_metadata *d = get_iommu_data(page_ext);
+
+ WARN_ON(atomic_inc_return_relaxed(&d->ref) <= 0);
+ page_ext_put(page_ext);
+}
+
+static void iommu_debug_dec_page(phys_addr_t phys)
+{
+ struct page_ext *page_ext = get_iommu_page_ext(phys);
+ struct iommu_debug_metadata *d = get_iommu_data(page_ext);
+
+ WARN_ON(atomic_dec_return_relaxed(&d->ref) < 0);
+ page_ext_put(page_ext);
+}
+
+/*
+ * IOMMU page size doesn't have tomatch the CPU page size. So, we use
+ * the smallest IOMMU page size to refcount the pages in the vmemmap.
+ * That is important as both map and unmap has to use the same page size
+ * to update the refcount to avoid double counting the same page.
+ * And as we can't know from iommu_unmap() what was the original page size
+ * used for map, we just use the minimum supported one for both.
+ */
+static size_t iommu_debug_page_size(struct iommu_domain *domain)
+{
+ return 1UL << __ffs(domain->pgsize_bitmap);
+}
+
void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size)
{
+ size_t off, end;
+ size_t page_size = iommu_debug_page_size(domain);
+
+ if (WARN_ON(!phys || check_add_overflow(phys, size, &end)))
+ return;
+
+ for (off = 0 ; off < size ; off += page_size) {
+ if (!pfn_valid(__phys_to_pfn(phys + off)))
+ continue;
+ iommu_debug_inc_page(phys + off);
+ }
+}
+
+static void __iommu_debug_update_iova(struct iommu_domain *domain,
+ unsigned long iova, size_t size, bool inc)
+{
+ size_t off, end;
+ size_t page_size = iommu_debug_page_size(domain);
+
+ if (WARN_ON(check_add_overflow(iova, size, &end)))
+ return;
+
+ for (off = 0 ; off < size ; off += page_size) {
+ phys_addr_t phys = iommu_iova_to_phys(domain, iova + off);
+
+ if (!phys || !pfn_valid(__phys_to_pfn(phys)))
+ continue;
+
+ if (inc)
+ iommu_debug_inc_page(phys);
+ else
+ iommu_debug_dec_page(phys);
+ }
}
void __iommu_debug_unmap_begin(struct iommu_domain *domain,
unsigned long iova, size_t size)
{
+ __iommu_debug_update_iova(domain, iova, size, false);
}
void __iommu_debug_unmap_end(struct iommu_domain *domain,
unsigned long iova, size_t size,
size_t unmapped)
{
+ if (unmapped == size)
+ return;
+
+ /*
+ * If unmap failed, re-increment the refcount, but if it unmapped
+ * larger size, decrement the extra part.
+ */
+ if (unmapped < size)
+ __iommu_debug_update_iova(domain, iova + unmapped,
+ size - unmapped, true);
+ else
+ __iommu_debug_update_iova(domain, iova + size,
+ unmapped - size, false);
}
void iommu_debug_init(void)
--
2.52.0.460.gd25c4c69ec-goog
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v3 3/4] drivers/iommu-debug-pagealloc: Track IOMMU pages
2025-11-24 20:08 ` [PATCH v3 3/4] drivers/iommu-debug-pagealloc: Track IOMMU pages Mostafa Saleh
@ 2025-11-25 7:54 ` Baolu Lu
2025-11-25 10:03 ` Mostafa Saleh
0 siblings, 1 reply; 14+ messages in thread
From: Baolu Lu @ 2025-11-25 7:54 UTC (permalink / raw)
To: Mostafa Saleh, linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin
On 11/25/25 04:08, Mostafa Saleh wrote:
> void __iommu_debug_unmap_end(struct iommu_domain *domain,
> unsigned long iova, size_t size,
> size_t unmapped)
> {
> + if (unmapped == size)
> + return;
> +
> + /*
> + * If unmap failed, re-increment the refcount, but if it unmapped
> + * larger size, decrement the extra part.
> + */
> + if (unmapped < size)
> + __iommu_debug_update_iova(domain, iova + unmapped,
> + size - unmapped, true);
> + else
> + __iommu_debug_update_iova(domain, iova + size,
> + unmapped - size, false);
In any case, could the 'else' branch become a real operation?
In the __iommu_unmap():
/*
* Keep iterating until we either unmap 'size' bytes (or more)
* or we hit an area that isn't mapped.
*/
while (unmapped < size) {
size_t pgsize, count;
pgsize = iommu_pgsize(domain, iova, iova, size -
unmapped, &count);
unmapped_page = ops->unmap_pages(domain, iova, pgsize,
count, iotlb_gather);
if (!unmapped_page)
break;
pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
iova, unmapped_page);
iova += unmapped_page;
unmapped += unmapped_page;
}
The comments say that it is possible to unmap more bytes than 'size',
but isn't it a bug if this helper unmaps more than the caller desired?
Thanks,
baolu
^ permalink raw reply [flat|nested] 14+ messages in thread* Re: [PATCH v3 3/4] drivers/iommu-debug-pagealloc: Track IOMMU pages
2025-11-25 7:54 ` Baolu Lu
@ 2025-11-25 10:03 ` Mostafa Saleh
0 siblings, 0 replies; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-25 10:03 UTC (permalink / raw)
To: Baolu Lu
Cc: linux-mm, iommu, linux-kernel, linux-doc, corbet, joro, will,
robin.murphy, akpm, vbabka, surenb, mhocko, jackmanb, hannes,
ziy, david, lorenzo.stoakes, Liam.Howlett, rppt, xiaqinxin
On Tue, Nov 25, 2025 at 03:54:56PM +0800, Baolu Lu wrote:
> On 11/25/25 04:08, Mostafa Saleh wrote:
> > void __iommu_debug_unmap_end(struct iommu_domain *domain,
> > unsigned long iova, size_t size,
> > size_t unmapped)
> > {
> > + if (unmapped == size)
> > + return;
> > +
> > + /*
> > + * If unmap failed, re-increment the refcount, but if it unmapped
> > + * larger size, decrement the extra part.
> > + */
> > + if (unmapped < size)
> > + __iommu_debug_update_iova(domain, iova + unmapped,
> > + size - unmapped, true);
> > + else
> > + __iommu_debug_update_iova(domain, iova + size,
> > + unmapped - size, false);
>
> In any case, could the 'else' branch become a real operation?
>
> In the __iommu_unmap():
>
> /*
> * Keep iterating until we either unmap 'size' bytes (or more)
> * or we hit an area that isn't mapped.
> */
> while (unmapped < size) {
> size_t pgsize, count;
>
> pgsize = iommu_pgsize(domain, iova, iova, size - unmapped,
> &count);
> unmapped_page = ops->unmap_pages(domain, iova, pgsize,
> count, iotlb_gather);
> if (!unmapped_page)
> break;
>
> pr_debug("unmapped: iova 0x%lx size 0x%zx\n",
> iova, unmapped_page);
>
> iova += unmapped_page;
> unmapped += unmapped_page;
> }
>
> The comments say that it is possible to unmap more bytes than 'size',
> but isn't it a bug if this helper unmaps more than the caller desired?
I was wondering also why the core API allows that, I couldn't find
useful information from "git blame". But I vaguely remember that some
IOMMUs can't split blocks so they unmap the whole block when a smaller
size is requested.
Thanks,
Mostafa
>
> Thanks,
> baolu
^ permalink raw reply [flat|nested] 14+ messages in thread
* [PATCH v3 4/4] drivers/iommu-debug-pagealloc: Check mapped/unmapped kernel memory
2025-11-24 20:08 [PATCH v3 0/4] iommu: Add IOMMU_DEBUG_PAGEALLOC sanitizer Mostafa Saleh
` (2 preceding siblings ...)
2025-11-24 20:08 ` [PATCH v3 3/4] drivers/iommu-debug-pagealloc: Track IOMMU pages Mostafa Saleh
@ 2025-11-24 20:08 ` Mostafa Saleh
3 siblings, 0 replies; 14+ messages in thread
From: Mostafa Saleh @ 2025-11-24 20:08 UTC (permalink / raw)
To: linux-mm, iommu, linux-kernel, linux-doc
Cc: corbet, joro, will, robin.murphy, akpm, vbabka, surenb, mhocko,
jackmanb, hannes, ziy, david, lorenzo.stoakes, Liam.Howlett,
rppt, xiaqinxin, Mostafa Saleh
Now, as the page_ext holds count of IOMMU mappings, we can use it to
assert that any page allocated/freed is indeed not in the IOMMU.
The sanitizer doesn’t protect against mapping/unmapping during this
period. However, that’s less harmful as the page is not used by the
kernel.
Signed-off-by: Mostafa Saleh <smostafa@google.com>
---
drivers/iommu/iommu-debug-pagealloc.c | 23 +++++++++++++++++++++++
include/linux/iommu-debug-pagealloc.h | 12 ++++++++++++
include/linux/mm.h | 5 +++++
3 files changed, 40 insertions(+)
diff --git a/drivers/iommu/iommu-debug-pagealloc.c b/drivers/iommu/iommu-debug-pagealloc.c
index c420b006ef08..8af11630d0e3 100644
--- a/drivers/iommu/iommu-debug-pagealloc.c
+++ b/drivers/iommu/iommu-debug-pagealloc.c
@@ -9,6 +9,7 @@
#include <linux/iommu-debug-pagealloc.h>
#include <linux/kernel.h>
#include <linux/page_ext.h>
+#include <linux/page_owner.h>
static bool needed;
DEFINE_STATIC_KEY_FALSE(iommu_debug_initialized);
@@ -71,6 +72,28 @@ static size_t iommu_debug_page_size(struct iommu_domain *domain)
return 1UL << __ffs(domain->pgsize_bitmap);
}
+static bool iommu_debug_page_count(const struct page *page)
+{
+ unsigned int ref;
+ struct page_ext *page_ext = page_ext_get(page);
+ struct iommu_debug_metadata *d = get_iommu_data(page_ext);
+
+ ref = atomic_read(&d->ref);
+ page_ext_put(page_ext);
+ return ref != 0;
+}
+
+void __iommu_debug_check_unmapped(const struct page *page, int numpages)
+{
+ while (numpages--) {
+ if (WARN_ON(iommu_debug_page_count(page))) {
+ pr_warn("iommu: Detected page leak!\n");
+ dump_page_owner(page);
+ }
+ page++;
+ }
+}
+
void __iommu_debug_map(struct iommu_domain *domain, phys_addr_t phys, size_t size)
{
size_t off, end;
diff --git a/include/linux/iommu-debug-pagealloc.h b/include/linux/iommu-debug-pagealloc.h
index 454303ec09c2..87f593305465 100644
--- a/include/linux/iommu-debug-pagealloc.h
+++ b/include/linux/iommu-debug-pagealloc.h
@@ -22,6 +22,7 @@ void __iommu_debug_unmap_begin(struct iommu_domain *domain,
unsigned long iova, size_t size);
void __iommu_debug_unmap_end(struct iommu_domain *domain,
unsigned long iova, size_t size, size_t unmapped);
+void __iommu_debug_check_unmapped(const struct page *page, int numpages);
static inline void iommu_debug_map(struct iommu_domain *domain,
phys_addr_t phys, size_t size)
@@ -45,6 +46,12 @@ static inline void iommu_debug_unmap_end(struct iommu_domain *domain,
__iommu_debug_unmap_end(domain, iova, size, unmapped);
}
+static inline void iommu_debug_check_unmapped(const struct page *page, int numpages)
+{
+ if (static_branch_unlikely(&iommu_debug_initialized))
+ __iommu_debug_check_unmapped(page, numpages);
+}
+
void iommu_debug_init(void);
#else
@@ -68,6 +75,11 @@ static inline void iommu_debug_init(void)
{
}
+static inline void iommu_debug_check_unmapped(const struct page *page,
+ int numpages)
+{
+}
+
#endif /* CONFIG_IOMMU_DEBUG_PAGEALLOC */
#endif /* __LINUX_IOMMU_DEBUG_PAGEALLOC_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 7c79b3369b82..739cd3e168d7 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -36,6 +36,7 @@
#include <linux/rcuwait.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
+#include <linux/iommu-debug-pagealloc.h>
struct mempolicy;
struct anon_vma;
@@ -3818,12 +3819,16 @@ extern void __kernel_map_pages(struct page *page, int numpages, int enable);
#ifdef CONFIG_DEBUG_PAGEALLOC
static inline void debug_pagealloc_map_pages(struct page *page, int numpages)
{
+ iommu_debug_check_unmapped(page, numpages);
+
if (debug_pagealloc_enabled_static())
__kernel_map_pages(page, numpages, 1);
}
static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
{
+ iommu_debug_check_unmapped(page, numpages);
+
if (debug_pagealloc_enabled_static())
__kernel_map_pages(page, numpages, 0);
}
--
2.52.0.460.gd25c4c69ec-goog
^ permalink raw reply [flat|nested] 14+ messages in thread