Message ID | 20240207174102.1486130-2-pasha.tatashin@soleen.com |
---|---|
State | New |
Headers | show |
Series | IOMMU memory observability | expand |
On 2024-02-07 5:40 pm, Pasha Tatashin wrote: [...]> diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h > new file mode 100644 > index 000000000000..c412d0aaa399 > --- /dev/null > +++ b/drivers/iommu/iommu-pages.h > @@ -0,0 +1,204 @@ > +/* SPDX-License-Identifier: GPL-2.0-only */ > +/* > + * Copyright (c) 2024, Google LLC. > + * Pasha Tatashin <pasha.tatashin@soleen.com> > + */ > + > +#ifndef __IOMMU_PAGES_H > +#define __IOMMU_PAGES_H > + > +#include <linux/vmstat.h> > +#include <linux/gfp.h> > +#include <linux/mm.h> > + > +/* > + * All page allocation that are performed in the IOMMU subsystem must use one of "All page allocations" is too broad; As before, this is only about pagetable allocations, or I guess for the full nuance, allocations of pagetables and other per-iommu_domain configuration structures which are reasonable to report as "pagetables" to userspace. > + * the functions below. This is necessary for the proper accounting as IOMMU > + * state can be rather large, i.e. multiple gigabytes in size. > + */ > + > +/** > + * __iommu_alloc_pages_node - allocate a zeroed page of a given order from > + * specific NUMA node. > + * @nid: memory NUMA node id > + * @gfp: buddy allocator flags > + * @order: page order > + * > + * returns the head struct page of the allocated page. > + */ > +static inline struct page *__iommu_alloc_pages_node(int nid, gfp_t gfp, > + int order) > +{ > + struct page *page; > + > + page = alloc_pages_node(nid, gfp | __GFP_ZERO, order); > + if (unlikely(!page)) > + return NULL; > + > + return page; > +} All 3 invocations of this only use the returned struct page to trivially derive page_address(), so we really don't need it; just clean up these callsites a bit more. > + > +/** > + * __iommu_alloc_pages - allocate a zeroed page of a given order. > + * @gfp: buddy allocator flags > + * @order: page order > + * > + * returns the head struct page of the allocated page. > + */ > +static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order) > +{ > + struct page *page; > + > + page = alloc_pages(gfp | __GFP_ZERO, order); > + if (unlikely(!page)) > + return NULL; > + > + return page; > +} Same for the single invocation of this one. > + > +/** > + * __iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. > + * @nid: memory NUMA node id > + * @gfp: buddy allocator flags > + * > + * returns the struct page of the allocated page. > + */ > +static inline struct page *__iommu_alloc_page_node(int nid, gfp_t gfp) > +{ > + return __iommu_alloc_pages_node(nid, gfp, 0); > +} There are no users of this at all. > + > +/** > + * __iommu_alloc_page - allocate a zeroed page > + * @gfp: buddy allocator flags > + * > + * returns the struct page of the allocated page. > + */ > +static inline struct page *__iommu_alloc_page(gfp_t gfp) > +{ > + return __iommu_alloc_pages(gfp, 0); > +} > + > +/** > + * __iommu_free_pages - free page of a given order > + * @page: head struct page of the page > + * @order: page order > + */ > +static inline void __iommu_free_pages(struct page *page, int order) > +{ > + if (!page) > + return; > + > + __free_pages(page, order); > +} > + > +/** > + * __iommu_free_page - free page > + * @page: struct page of the page > + */ > +static inline void __iommu_free_page(struct page *page) > +{ > + __iommu_free_pages(page, 0); > +} Beyond one more trivial Intel cleanup for __iommu_alloc_pages(), these 3 are then only used by tegra-smmu, so honestly I'd be inclined to just open-code there page_address()/virt_to_page() conversions as appropriate there (once again I think the whole thing could in fact be refactored to not use struct pages at all because all it's ever ultimately doing with them is page_address(), but that would be a bigger job so definitely out-of-scope for this series). > + > +/** > + * iommu_alloc_pages_node - allocate a zeroed page of a given order from > + * specific NUMA node. > + * @nid: memory NUMA node id > + * @gfp: buddy allocator flags > + * @order: page order > + * > + * returns the virtual address of the allocated page > + */ > +static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) > +{ > + struct page *page = __iommu_alloc_pages_node(nid, gfp, order); > + > + if (unlikely(!page)) > + return NULL; As a general point I'd prefer to fold these checks into the accounting function itself rather than repeat them all over. > + > + return page_address(page); > +} > + > +/** > + * iommu_alloc_pages - allocate a zeroed page of a given order > + * @gfp: buddy allocator flags > + * @order: page order > + * > + * returns the virtual address of the allocated page > + */ > +static inline void *iommu_alloc_pages(gfp_t gfp, int order) > +{ > + struct page *page = __iommu_alloc_pages(gfp, order); > + > + if (unlikely(!page)) > + return NULL; > + > + return page_address(page); > +} > + > +/** > + * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. > + * @nid: memory NUMA node id > + * @gfp: buddy allocator flags > + * > + * returns the virtual address of the allocated page > + */ > +static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) > +{ > + return iommu_alloc_pages_node(nid, gfp, 0); > +} TBH I'm not entirely convinced that saving 4 characters per invocation times 11 invocations makes this wrapper worthwhile :/ > + > +/** > + * iommu_alloc_page - allocate a zeroed page > + * @gfp: buddy allocator flags > + * > + * returns the virtual address of the allocated page > + */ > +static inline void *iommu_alloc_page(gfp_t gfp) > +{ > + return iommu_alloc_pages(gfp, 0); > +} > + > +/** > + * iommu_free_pages - free page of a given order > + * @virt: virtual address of the page to be freed. > + * @order: page order > + */ > +static inline void iommu_free_pages(void *virt, int order) > +{ > + if (!virt) > + return; > + > + __iommu_free_pages(virt_to_page(virt), order); > +} > + > +/** > + * iommu_free_page - free page > + * @virt: virtual address of the page to be freed. > + */ > +static inline void iommu_free_page(void *virt) > +{ > + iommu_free_pages(virt, 0); > +} > + > +/** > + * iommu_free_pages_list - free a list of pages. > + * @page: the head of the lru list to be freed. > + * > + * There are no locking requirement for these pages, as they are going to be > + * put on a free list as soon as refcount reaches 0. Pages are put on this LRU > + * list once they are removed from the IOMMU page tables. However, they can > + * still be access through debugfs. > + */ > +static inline void iommu_free_pages_list(struct list_head *page) Nit: I'd be inclined to call this iommu_put_pages_list for consistency. > +{ > + while (!list_empty(page)) { > + struct page *p = list_entry(page->prev, struct page, lru); > + > + list_del(&p->lru); > + put_page(p); > + } > +} I realise now you've also missed the common freelist freeing sites in iommu-dma. Thanks, Robin. > + > +#endif /* __IOMMU_PAGES_H */
Hi Robin, Thank you for reviewing this. > > +#ifndef __IOMMU_PAGES_H > > +#define __IOMMU_PAGES_H > > + > > +#include <linux/vmstat.h> > > +#include <linux/gfp.h> > > +#include <linux/mm.h> > > + > > +/* > > + * All page allocation that are performed in the IOMMU subsystem must use one of > > "All page allocations" is too broad; As before, this is only about > pagetable allocations, or I guess for the full nuance, allocations of > pagetables and other per-iommu_domain configuration structures which are > reasonable to report as "pagetables" to userspace. I will update the comment. > > > + * the functions below. This is necessary for the proper accounting as IOMMU > > + * state can be rather large, i.e. multiple gigabytes in size. > > + */ > > + > > +/** > > + * __iommu_alloc_pages_node - allocate a zeroed page of a given order from > > + * specific NUMA node. > > + * @nid: memory NUMA node id > > + * @gfp: buddy allocator flags > > + * @order: page order > > + * > > + * returns the head struct page of the allocated page. > > + */ > > +static inline struct page *__iommu_alloc_pages_node(int nid, gfp_t gfp, > > + int order) > > +{ > > + struct page *page; > > + > > + page = alloc_pages_node(nid, gfp | __GFP_ZERO, order); > > + if (unlikely(!page)) > > + return NULL; > > + > > + return page; > > +} > > All 3 invocations of this only use the returned struct page to trivially > derive page_address(), so we really don't need it; just clean up these > callsites a bit more. I will remove this function, and update all invocations to use iommu_alloc_pages_node() directly. > > + * __iommu_alloc_pages - allocate a zeroed page of a given order. > > + * @gfp: buddy allocator flags > > + * @order: page order > > + * > > + * returns the head struct page of the allocated page. > > + */ > > +static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order) > > +{ > > + struct page *page; > > + > > + page = alloc_pages(gfp | __GFP_ZERO, order); > > + if (unlikely(!page)) > > + return NULL; > > + > > + return page; > > +} > > Same for the single invocation of this one. I kept this function, but removed __iommu_alloc_page() that depends on it. This is because tegra-smmu needs a "struct page" allocator. > > > + > > +/** > > + * __iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. > > + * @nid: memory NUMA node id > > + * @gfp: buddy allocator flags > > + * > > + * returns the struct page of the allocated page. > > + */ > > +static inline struct page *__iommu_alloc_page_node(int nid, gfp_t gfp) > > +{ > > + return __iommu_alloc_pages_node(nid, gfp, 0); > > +} > > There are no users of this at all. Yes, I added it just for completeness, I will remove it. > > + * __iommu_alloc_page - allocate a zeroed page > > + * @gfp: buddy allocator flags > > + * > > + * returns the struct page of the allocated page. > > + */ > > +static inline struct page *__iommu_alloc_page(gfp_t gfp) > > +{ > > + return __iommu_alloc_pages(gfp, 0); > > +} > > + > > +/** > > + * __iommu_free_pages - free page of a given order > > + * @page: head struct page of the page > > + * @order: page order > > + */ > > +static inline void __iommu_free_pages(struct page *page, int order) > > +{ > > + if (!page) > > + return; > > + > > + __free_pages(page, order); > > +} > > + > > +/** > > + * __iommu_free_page - free page > > + * @page: struct page of the page > > + */ > > +static inline void __iommu_free_page(struct page *page) > > +{ > > + __iommu_free_pages(page, 0); > > +} > > Beyond one more trivial Intel cleanup for __iommu_alloc_pages(), these 3 > are then only used by tegra-smmu, so honestly I'd be inclined to just > open-code there page_address()/virt_to_page() conversions as appropriate > there (once again I think the whole thing could in fact be refactored to > not use struct pages at all because all it's ever ultimately doing with > them is page_address(), but that would be a bigger job so definitely > out-of-scope for this series). I removed __iommu_free_page(), but kept __iommu_free_pages() variant. > > > + > > +/** > > + * iommu_alloc_pages_node - allocate a zeroed page of a given order from > > + * specific NUMA node. > > + * @nid: memory NUMA node id > > + * @gfp: buddy allocator flags > > + * @order: page order > > + * > > + * returns the virtual address of the allocated page > > + */ > > +static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) > > +{ > > + struct page *page = __iommu_alloc_pages_node(nid, gfp, order); > > + > > + if (unlikely(!page)) > > + return NULL; > > As a general point I'd prefer to fold these checks into the accounting > function itself rather than repeat them all over. For the free functions this saves a few cycles by not repeating this check again inside __free_pages(), to keep things symmetrical it makes sense to keep __iomu_free_account and __iomu_alloc_account the same. With the other clean-up there are not that many of these checks left. > > + */ > > +static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) > > +{ > > + return iommu_alloc_pages_node(nid, gfp, 0); > > +} > > TBH I'm not entirely convinced that saving 4 characters per invocation > times 11 invocations makes this wrapper worthwhile :/ Let's keep them. After the clean-up that you suggested, there are fewer functions left in this file, but I think that it is cleaner to keep these remaining, as it is beneficial to easily distinguish when exactly one page is allocated vs when multiple are allocated via code search. > > + * > > + * There are no locking requirement for these pages, as they are going to be > > + * put on a free list as soon as refcount reaches 0. Pages are put on this LRU > > + * list once they are removed from the IOMMU page tables. However, they can > > + * still be access through debugfs. > > + */ > > +static inline void iommu_free_pages_list(struct list_head *page) > > Nit: I'd be inclined to call this iommu_put_pages_list for consistency. I will rename it to iommu_put_pages_list(), indeed a better name. > > > +{ > > + while (!list_empty(page)) { > > + struct page *p = list_entry(page->prev, struct page, lru); > > + > > + list_del(&p->lru); > > + put_page(p); > > + } > > +} > > I realise now you've also missed the common freelist freeing sites in > iommu-dma. Ah yes, thank you for catching that. I will fix it. Pasha
On 10/02/2024 2:21 am, Pasha Tatashin wrote: [...] >>> +/** >>> + * iommu_alloc_pages_node - allocate a zeroed page of a given order from >>> + * specific NUMA node. >>> + * @nid: memory NUMA node id >>> + * @gfp: buddy allocator flags >>> + * @order: page order >>> + * >>> + * returns the virtual address of the allocated page >>> + */ >>> +static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) >>> +{ >>> + struct page *page = __iommu_alloc_pages_node(nid, gfp, order); >>> + >>> + if (unlikely(!page)) >>> + return NULL; >> >> As a general point I'd prefer to fold these checks into the accounting >> function itself rather than repeat them all over. > > For the free functions this saves a few cycles by not repeating this > check again inside __free_pages(), to keep things symmetrical it makes > sense to keep __iomu_free_account and __iomu_alloc_account the same. > With the other clean-up there are not that many of these checks left. __free_pages() doesn't accept NULL, so __iommu_free_pages() shouldn't need a check; free_pages() does, but correspondingly iommu_free_pages() needs its own check up-front to avoid virt_to_page(NULL); either way it means there are no callers of iommu_free_account() who should be passing NULL. The VA-returning allocators of course need to avoid page_address(NULL), so I clearly made this comment in the wrong place to begin with, oops. In the end I guess that will leave __iommu_alloc_pages() as the only caller of iommu_alloc_account() who doesn't already need to handle their own NULL. OK, I'm convinced, apologies for having to bounce it off you to work it through :) >>> + */ >>> +static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) >>> +{ >>> + return iommu_alloc_pages_node(nid, gfp, 0); >>> +} >> >> TBH I'm not entirely convinced that saving 4 characters per invocation >> times 11 invocations makes this wrapper worthwhile :/ > > Let's keep them. After the clean-up that you suggested, there are > fewer functions left in this file, but I think that it is cleaner to > keep these remaining, as it is beneficial to easily distinguish when > exactly one page is allocated vs when multiple are allocated via code > search. But is it, really? It's not at all obvious to me *why* it would be significantly interesting to distinguish fixed order-0 allocations from higher-order or variable-order (which may still be 0) ones. After all, there's no regular alloc_page_node() wrapper, yet plenty more callers of alloc_pages_node(..., 0) :/ Thanks, Robin.
> >>> + */ > >>> +static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) > >>> +{ > >>> + return iommu_alloc_pages_node(nid, gfp, 0); > >>> +} > >> > >> TBH I'm not entirely convinced that saving 4 characters per invocation > >> times 11 invocations makes this wrapper worthwhile :/ > > > > Let's keep them. After the clean-up that you suggested, there are > > fewer functions left in this file, but I think that it is cleaner to > > keep these remaining, as it is beneficial to easily distinguish when > > exactly one page is allocated vs when multiple are allocated via code > > search. > > But is it, really? It's not at all obvious to me *why* it would be > significantly interesting to distinguish fixed order-0 allocations from > higher-order or variable-order (which may still be 0) ones. After all, > there's no regular alloc_page_node() wrapper, yet plenty more callers of > alloc_pages_node(..., 0) :/ The pages that are allocated with order > 0 cannot be freed using iommu_put_pages_list(), without messing up refcounts in the tail pages. I think having a dedicated function that guarantees order = 0 pages allocation makes it easier for the reviewer to follow the code, and ensures that only these pages are put on the freelist. Even in the existing code, the order=0 allocation is wrapped in the *alloc_pgtable_page() function. Pasha > > Thanks, > Robin.
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 23cb80d62a9a..f72b1e4334b1 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -32,6 +32,7 @@ #include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "perf.h" #include "trace.h" #include "perfmon.h" @@ -1185,7 +1186,7 @@ static void free_iommu(struct intel_iommu *iommu) } if (iommu->qi) { - free_page((unsigned long)iommu->qi->desc); + iommu_free_page(iommu->qi->desc); kfree(iommu->qi->desc_status); kfree(iommu->qi); } @@ -1732,6 +1733,7 @@ int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; struct page *desc_page; + int order; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1752,8 +1754,8 @@ int dmar_enable_qi(struct intel_iommu *iommu) * Need two pages to accommodate 256 descriptors of 256 bits each * if the remapping hardware supports scalable mode translation. */ - desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, - !!ecap_smts(iommu->ecap)); + order = ecap_smts(iommu->ecap) ? 1 : 0; + desc_page = __iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order); if (!desc_page) { kfree(qi); iommu->qi = NULL; @@ -1764,7 +1766,7 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi->desc_status = kcalloc(QI_LENGTH, sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { - free_page((unsigned long) qi->desc); + iommu_free_page(qi->desc); kfree(qi); iommu->qi = NULL; return -ENOMEM; diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 6fb5f6fceea1..a9cfb41a6a94 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -28,6 +28,7 @@ #include "../dma-iommu.h" #include "../irq_remapping.h" #include "../iommu-sva.h" +#include "../iommu-pages.h" #include "pasid.h" #include "cap_audit.h" #include "perfmon.h" @@ -224,22 +225,6 @@ static int __init intel_iommu_setup(char *str) } __setup("intel_iommu=", intel_iommu_setup); -void *alloc_pgtable_page(int node, gfp_t gfp) -{ - struct page *page; - void *vaddr = NULL; - - page = alloc_pages_node(node, gfp | __GFP_ZERO, 0); - if (page) - vaddr = page_address(page); - return vaddr; -} - -void free_pgtable_page(void *vaddr) -{ - free_page((unsigned long)vaddr); -} - static int domain_type_is_si(struct dmar_domain *domain) { return domain->domain.type == IOMMU_DOMAIN_IDENTITY; @@ -473,7 +458,7 @@ struct context_entry *iommu_context_addr(struct intel_iommu *iommu, u8 bus, if (!alloc) return NULL; - context = alloc_pgtable_page(iommu->node, GFP_ATOMIC); + context = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); if (!context) return NULL; @@ -647,17 +632,17 @@ static void free_context_table(struct intel_iommu *iommu) for (i = 0; i < ROOT_ENTRY_NR; i++) { context = iommu_context_addr(iommu, i, 0, 0); if (context) - free_pgtable_page(context); + iommu_free_page(context); if (!sm_supported(iommu)) continue; context = iommu_context_addr(iommu, i, 0x80, 0); if (context) - free_pgtable_page(context); + iommu_free_page(context); } - free_pgtable_page(iommu->root_entry); + iommu_free_page(iommu->root_entry); iommu->root_entry = NULL; } @@ -795,7 +780,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(domain->nid, gfp); + tmp_page = iommu_alloc_page_node(domain->nid, gfp); if (!tmp_page) return NULL; @@ -807,7 +792,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (cmpxchg64(&pte->val, 0ULL, pteval)) /* Someone else set it while we were thinking; use theirs. */ - free_pgtable_page(tmp_page); + iommu_free_page(tmp_page); else domain_flush_cache(domain, pte, sizeof(*pte)); } @@ -920,7 +905,7 @@ static void dma_pte_free_level(struct dmar_domain *domain, int level, last_pfn < level_pfn + level_size(level) - 1)) { dma_clear_pte(pte); domain_flush_cache(domain, pte, sizeof(*pte)); - free_pgtable_page(level_pte); + iommu_free_page(level_pte); } next: pfn += level_size(level); @@ -944,7 +929,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, /* free pgd */ if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { - free_pgtable_page(domain->pgd); + iommu_free_page(domain->pgd); domain->pgd = NULL; } } @@ -1046,7 +1031,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { struct root_entry *root; - root = alloc_pgtable_page(iommu->node, GFP_ATOMIC); + root = iommu_alloc_page_node(iommu->node, GFP_ATOMIC); if (!root) { pr_err("Allocating root entry for %s failed\n", iommu->name); @@ -1718,7 +1703,7 @@ static void domain_exit(struct dmar_domain *domain) LIST_HEAD(freelist); domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw), &freelist); - put_pages_list(&freelist); + iommu_free_pages_list(&freelist); } if (WARN_ON(!list_empty(&domain->devices))) @@ -2452,7 +2437,7 @@ static int copy_context_table(struct intel_iommu *iommu, if (!old_ce) goto out; - new_ce = alloc_pgtable_page(iommu->node, GFP_KERNEL); + new_ce = iommu_alloc_page_node(iommu->node, GFP_KERNEL); if (!new_ce) goto out_unmap; @@ -3385,7 +3370,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, start_vpfn, mhp->nr_pages, list_empty(&freelist), 0); rcu_read_unlock(); - put_pages_list(&freelist); + iommu_free_pages_list(&freelist); } break; } @@ -3816,7 +3801,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->max_addr = 0; /* always allocate the top pgd */ - domain->pgd = alloc_pgtable_page(domain->nid, GFP_ATOMIC); + domain->pgd = iommu_alloc_page_node(domain->nid, GFP_ATOMIC); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); @@ -3960,7 +3945,7 @@ int prepare_domain_attach_device(struct iommu_domain *domain, pte = dmar_domain->pgd; if (dma_pte_present(pte)) { dmar_domain->pgd = phys_to_virt(dma_pte_addr(pte)); - free_pgtable_page(pte); + iommu_free_page(pte); } dmar_domain->agaw--; } @@ -4107,7 +4092,7 @@ static void intel_iommu_tlb_sync(struct iommu_domain *domain, start_pfn, nrpages, list_empty(&gather->freelist), 0); - put_pages_list(&gather->freelist); + iommu_free_pages_list(&gather->freelist); } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index d02f916d8e59..9fe04cea29c4 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -1069,8 +1069,6 @@ void domain_update_iommu_cap(struct dmar_domain *domain); int dmar_ir_support(void); -void *alloc_pgtable_page(int node, gfp_t gfp); -void free_pgtable_page(void *vaddr); void iommu_flush_write_buffer(struct intel_iommu *iommu); struct iommu_domain *intel_nested_domain_alloc(struct iommu_domain *parent, const struct iommu_user_data *user_data); diff --git a/drivers/iommu/intel/irq_remapping.c b/drivers/iommu/intel/irq_remapping.c index 566297bc87dd..4af73e87275a 100644 --- a/drivers/iommu/intel/irq_remapping.c +++ b/drivers/iommu/intel/irq_remapping.c @@ -22,6 +22,7 @@ #include "iommu.h" #include "../irq_remapping.h" +#include "../iommu-pages.h" #include "cap_audit.h" enum irq_mode { @@ -536,8 +537,8 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) if (!ir_table) return -ENOMEM; - pages = alloc_pages_node(iommu->node, GFP_KERNEL | __GFP_ZERO, - INTR_REMAP_PAGE_ORDER); + pages = __iommu_alloc_pages_node(iommu->node, GFP_KERNEL, + INTR_REMAP_PAGE_ORDER); if (!pages) { pr_err("IR%d: failed to allocate pages of order %d\n", iommu->seq_id, INTR_REMAP_PAGE_ORDER); @@ -622,7 +623,7 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) out_free_bitmap: bitmap_free(bitmap); out_free_pages: - __free_pages(pages, INTR_REMAP_PAGE_ORDER); + __iommu_free_pages(pages, INTR_REMAP_PAGE_ORDER); out_free_table: kfree(ir_table); @@ -643,8 +644,7 @@ static void intel_teardown_irq_remapping(struct intel_iommu *iommu) irq_domain_free_fwnode(fn); iommu->ir_domain = NULL; } - free_pages((unsigned long)iommu->ir_table->base, - INTR_REMAP_PAGE_ORDER); + iommu_free_pages(iommu->ir_table->base, INTR_REMAP_PAGE_ORDER); bitmap_free(iommu->ir_table->bitmap); kfree(iommu->ir_table); iommu->ir_table = NULL; diff --git a/drivers/iommu/intel/pasid.c b/drivers/iommu/intel/pasid.c index 3239cefa4c33..b6ecff24bafa 100644 --- a/drivers/iommu/intel/pasid.c +++ b/drivers/iommu/intel/pasid.c @@ -20,6 +20,7 @@ #include "iommu.h" #include "pasid.h" +#include "../iommu-pages.h" /* * Intel IOMMU system wide PASID name space: @@ -59,8 +60,7 @@ int intel_pasid_alloc_table(struct device *dev) size = max_pasid >> (PASID_PDE_SHIFT - 3); order = size ? get_order(size) : 0; - pages = alloc_pages_node(info->iommu->node, - GFP_KERNEL | __GFP_ZERO, order); + pages = __iommu_alloc_pages_node(info->iommu->node, GFP_KERNEL, order); if (!pages) { kfree(pasid_table); return -ENOMEM; @@ -97,10 +97,10 @@ void intel_pasid_free_table(struct device *dev) max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; for (i = 0; i < max_pde; i++) { table = get_pasid_table_from_pde(&dir[i]); - free_pgtable_page(table); + iommu_free_page(table); } - free_pages((unsigned long)pasid_table->table, pasid_table->order); + iommu_free_pages(pasid_table->table, pasid_table->order); kfree(pasid_table); } @@ -146,7 +146,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) retry: entries = get_pasid_table_from_pde(&dir[dir_index]); if (!entries) { - entries = alloc_pgtable_page(info->iommu->node, GFP_ATOMIC); + entries = iommu_alloc_page_node(info->iommu->node, GFP_ATOMIC); if (!entries) return NULL; @@ -158,7 +158,7 @@ static struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) */ if (cmpxchg64(&dir[dir_index].val, 0ULL, (u64)virt_to_phys(entries) | PASID_PTE_PRESENT)) { - free_pgtable_page(entries); + iommu_free_page(entries); goto retry; } if (!ecap_coherent(info->iommu->ecap)) { diff --git a/drivers/iommu/intel/svm.c b/drivers/iommu/intel/svm.c index 40edd282903f..ed3993657283 100644 --- a/drivers/iommu/intel/svm.c +++ b/drivers/iommu/intel/svm.c @@ -23,6 +23,7 @@ #include "pasid.h" #include "perf.h" #include "../iommu-sva.h" +#include "../iommu-pages.h" #include "trace.h" static irqreturn_t prq_event_thread(int irq, void *d); @@ -67,7 +68,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) struct page *pages; int irq, ret; - pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); + pages = __iommu_alloc_pages(GFP_KERNEL, PRQ_ORDER); if (!pages) { pr_warn("IOMMU: %s: Failed to allocate page request queue\n", iommu->name); @@ -118,7 +119,7 @@ int intel_svm_enable_prq(struct intel_iommu *iommu) dmar_free_hwirq(irq); iommu->pr_irq = 0; free_prq: - free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq, PRQ_ORDER); iommu->prq = NULL; return ret; @@ -141,7 +142,7 @@ int intel_svm_finish_prq(struct intel_iommu *iommu) iommu->iopf_queue = NULL; } - free_pages((unsigned long)iommu->prq, PRQ_ORDER); + iommu_free_pages(iommu->prq, PRQ_ORDER); iommu->prq = NULL; return 0; diff --git a/drivers/iommu/iommu-pages.h b/drivers/iommu/iommu-pages.h new file mode 100644 index 000000000000..c412d0aaa399 --- /dev/null +++ b/drivers/iommu/iommu-pages.h @@ -0,0 +1,204 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (c) 2024, Google LLC. + * Pasha Tatashin <pasha.tatashin@soleen.com> + */ + +#ifndef __IOMMU_PAGES_H +#define __IOMMU_PAGES_H + +#include <linux/vmstat.h> +#include <linux/gfp.h> +#include <linux/mm.h> + +/* + * All page allocation that are performed in the IOMMU subsystem must use one of + * the functions below. This is necessary for the proper accounting as IOMMU + * state can be rather large, i.e. multiple gigabytes in size. + */ + +/** + * __iommu_alloc_pages_node - allocate a zeroed page of a given order from + * specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @order: page order + * + * returns the head struct page of the allocated page. + */ +static inline struct page *__iommu_alloc_pages_node(int nid, gfp_t gfp, + int order) +{ + struct page *page; + + page = alloc_pages_node(nid, gfp | __GFP_ZERO, order); + if (unlikely(!page)) + return NULL; + + return page; +} + +/** + * __iommu_alloc_pages - allocate a zeroed page of a given order. + * @gfp: buddy allocator flags + * @order: page order + * + * returns the head struct page of the allocated page. + */ +static inline struct page *__iommu_alloc_pages(gfp_t gfp, int order) +{ + struct page *page; + + page = alloc_pages(gfp | __GFP_ZERO, order); + if (unlikely(!page)) + return NULL; + + return page; +} + +/** + * __iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * + * returns the struct page of the allocated page. + */ +static inline struct page *__iommu_alloc_page_node(int nid, gfp_t gfp) +{ + return __iommu_alloc_pages_node(nid, gfp, 0); +} + +/** + * __iommu_alloc_page - allocate a zeroed page + * @gfp: buddy allocator flags + * + * returns the struct page of the allocated page. + */ +static inline struct page *__iommu_alloc_page(gfp_t gfp) +{ + return __iommu_alloc_pages(gfp, 0); +} + +/** + * __iommu_free_pages - free page of a given order + * @page: head struct page of the page + * @order: page order + */ +static inline void __iommu_free_pages(struct page *page, int order) +{ + if (!page) + return; + + __free_pages(page, order); +} + +/** + * __iommu_free_page - free page + * @page: struct page of the page + */ +static inline void __iommu_free_page(struct page *page) +{ + __iommu_free_pages(page, 0); +} + +/** + * iommu_alloc_pages_node - allocate a zeroed page of a given order from + * specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * @order: page order + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_pages_node(int nid, gfp_t gfp, int order) +{ + struct page *page = __iommu_alloc_pages_node(nid, gfp, order); + + if (unlikely(!page)) + return NULL; + + return page_address(page); +} + +/** + * iommu_alloc_pages - allocate a zeroed page of a given order + * @gfp: buddy allocator flags + * @order: page order + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_pages(gfp_t gfp, int order) +{ + struct page *page = __iommu_alloc_pages(gfp, order); + + if (unlikely(!page)) + return NULL; + + return page_address(page); +} + +/** + * iommu_alloc_page_node - allocate a zeroed page at specific NUMA node. + * @nid: memory NUMA node id + * @gfp: buddy allocator flags + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_page_node(int nid, gfp_t gfp) +{ + return iommu_alloc_pages_node(nid, gfp, 0); +} + +/** + * iommu_alloc_page - allocate a zeroed page + * @gfp: buddy allocator flags + * + * returns the virtual address of the allocated page + */ +static inline void *iommu_alloc_page(gfp_t gfp) +{ + return iommu_alloc_pages(gfp, 0); +} + +/** + * iommu_free_pages - free page of a given order + * @virt: virtual address of the page to be freed. + * @order: page order + */ +static inline void iommu_free_pages(void *virt, int order) +{ + if (!virt) + return; + + __iommu_free_pages(virt_to_page(virt), order); +} + +/** + * iommu_free_page - free page + * @virt: virtual address of the page to be freed. + */ +static inline void iommu_free_page(void *virt) +{ + iommu_free_pages(virt, 0); +} + +/** + * iommu_free_pages_list - free a list of pages. + * @page: the head of the lru list to be freed. + * + * There are no locking requirement for these pages, as they are going to be + * put on a free list as soon as refcount reaches 0. Pages are put on this LRU + * list once they are removed from the IOMMU page tables. However, they can + * still be access through debugfs. + */ +static inline void iommu_free_pages_list(struct list_head *page) +{ + while (!list_empty(page)) { + struct page *p = list_entry(page->prev, struct page, lru); + + list_del(&p->lru); + put_page(p); + } +} + +#endif /* __IOMMU_PAGES_H */