From: Pasha Tatashin <pasha.tatashin@soleen.com>
To: akpm@linux-foundation.org, linux-mm@kvack.org,
pasha.tatashin@soleen.com, linux-kernel@vger.kernel.org,
rientjes@google.com, dwmw2@infradead.org,
baolu.lu@linux.intel.com, joro@8bytes.org, will@kernel.org,
robin.murphy@arm.com, iommu@lists.linux.dev
Subject: [RFC v2 2/3] iommu/intel: synchronize page table map and unmap operations
Date: Fri, 26 Apr 2024 03:43:22 +0000 [thread overview]
Message-ID: <20240426034323.417219-3-pasha.tatashin@soleen.com> (raw)
In-Reply-To: <20240426034323.417219-1-pasha.tatashin@soleen.com>
Since, we are going to update parent page table entries when lower
level page tables become emtpy and we add them to the free list.
We need a way to synchronize the operation.
Use domain->pgd_lock to protect all map and unmap operations.
This is reader/writer lock. At the beginning everything is going to be
read only mode, however, later, when free page table on unmap is added
we will add a writer section as well.
Signed-off-by: Pasha Tatashin <pasha.tatashin@soleen.com>
---
drivers/iommu/intel/iommu.c | 21 +++++++++++++++++++--
drivers/iommu/intel/iommu.h | 3 +++
2 files changed, 22 insertions(+), 2 deletions(-)
diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c
index 1bfb6eccad05..8c7e596728b5 100644
--- a/drivers/iommu/intel/iommu.c
+++ b/drivers/iommu/intel/iommu.c
@@ -995,11 +995,13 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain,
unsigned long last_pfn,
int retain_level)
{
+ read_lock(&domain->pgd_lock);
dma_pte_clear_range(domain, start_pfn, last_pfn);
/* We don't need lock here; nobody else touches the iova range */
dma_pte_free_level(domain, agaw_to_level(domain->agaw), retain_level,
domain->pgd, 0, start_pfn, last_pfn);
+ read_unlock(&domain->pgd_lock);
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
@@ -1093,9 +1095,11 @@ static void domain_unmap(struct dmar_domain *domain, unsigned long start_pfn,
WARN_ON(start_pfn > last_pfn))
return;
+ read_lock(&domain->pgd_lock);
/* we don't need lock here; nobody else touches the iova range */
dma_pte_clear_level(domain, agaw_to_level(domain->agaw),
domain->pgd, 0, start_pfn, last_pfn, freelist);
+ read_unlock(&domain->pgd_lock);
/* free pgd */
if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) {
@@ -2088,6 +2092,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | attr;
+ read_lock(&domain->pgd_lock);
while (nr_pages > 0) {
uint64_t tmp;
@@ -2097,8 +2102,10 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl,
gfp);
- if (!pte)
+ if (!pte) {
+ read_unlock(&domain->pgd_lock);
return -ENOMEM;
+ }
first_pte = pte;
lvl_pages = lvl_to_nr_pages(largepage_lvl);
@@ -2158,6 +2165,7 @@ __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn,
pte = NULL;
}
}
+ read_unlock(&domain->pgd_lock);
return 0;
}
@@ -3829,6 +3837,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width)
domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC);
if (!domain->pgd)
return -ENOMEM;
+ rwlock_init(&domain->pgd_lock);
domain_flush_cache(domain, domain->pgd, PAGE_SIZE);
return 0;
}
@@ -4074,11 +4083,15 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain,
unsigned long start_pfn, last_pfn;
int level = 0;
+ read_lock(&dmar_domain->pgd_lock);
/* Cope with horrid API which requires us to unmap more than the
size argument if it happens to be a large-page mapping. */
if (unlikely(!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT,
- &level, GFP_ATOMIC)))
+ &level, GFP_ATOMIC))) {
+ read_unlock(&dmar_domain->pgd_lock);
return 0;
+ }
+ read_unlock(&dmar_domain->pgd_lock);
if (size < VTD_PAGE_SIZE << level_to_offset_bits(level))
size = VTD_PAGE_SIZE << level_to_offset_bits(level);
@@ -4145,8 +4158,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain,
int level = 0;
u64 phys = 0;
+ read_lock(&dmar_domain->pgd_lock);
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level,
GFP_ATOMIC);
+ read_unlock(&dmar_domain->pgd_lock);
if (pte && dma_pte_present(pte))
phys = dma_pte_addr(pte) +
(iova & (BIT_MASK(level_to_offset_bits(level) +
@@ -4801,8 +4816,10 @@ static int intel_iommu_read_and_clear_dirty(struct iommu_domain *domain,
struct dma_pte *pte;
int lvl = 0;
+ read_lock(&dmar_domain->pgd_lock);
pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &lvl,
GFP_ATOMIC);
+ read_unlock(&dmar_domain->pgd_lock);
pgsize = level_size(lvl) << VTD_PAGE_SHIFT;
if (!pte || !dma_pte_present(pte)) {
iova += pgsize;
diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h
index e5c1eb23897f..2f38b087ea4f 100644
--- a/drivers/iommu/intel/iommu.h
+++ b/drivers/iommu/intel/iommu.h
@@ -615,6 +615,9 @@ struct dmar_domain {
struct {
/* virtual address */
struct dma_pte *pgd;
+
+ /* Synchronizes pgd map/unmap operations */
+ rwlock_t pgd_lock;
/* max guest address width */
int gaw;
/*
--
2.44.0.769.g3c40516874-goog
next prev parent reply other threads:[~2024-04-26 3:43 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-04-26 3:43 [RFC v2 0/3] iommu/intel: Free empty page tables on unmaps Pasha Tatashin
2024-04-26 3:43 ` [RFC v2 1/3] iommu/intel: Use page->_mapcount to count number of entries in IOMMU Pasha Tatashin
2024-04-26 3:43 ` Pasha Tatashin [this message]
2024-04-29 14:56 ` [RFC v2 2/3] iommu/intel: synchronize page table map and unmap operations Jason Gunthorpe
2024-04-26 3:43 ` [RFC v2 3/3] iommu/intel: free empty page tables on unmaps Pasha Tatashin
2024-04-26 6:42 ` [RFC v2 0/3] iommu/intel: Free " David Hildenbrand
2024-04-26 13:49 ` Pasha Tatashin
2024-04-26 14:39 ` David Hildenbrand
2024-04-26 19:39 ` Matthew Wilcox
2024-04-29 14:46 ` Jason Gunthorpe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240426034323.417219-3-pasha.tatashin@soleen.com \
--to=pasha.tatashin@soleen.com \
--cc=akpm@linux-foundation.org \
--cc=baolu.lu@linux.intel.com \
--cc=dwmw2@infradead.org \
--cc=iommu@lists.linux.dev \
--cc=joro@8bytes.org \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-mm@kvack.org \
--cc=rientjes@google.com \
--cc=robin.murphy@arm.com \
--cc=will@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox