diff mbox series

[18/19] iommu: Update various drivers to pass in lg2sz instead of order to iommu pages

Message ID 18-v1-416f64558c7c+2a5-iommu_pages_jgg@nvidia.com
State New
Headers show
Series iommu: Further abstract iommu-pages | expand

Commit Message

Jason Gunthorpe Feb. 4, 2025, 6:34 p.m. UTC
Convert most of the places calling get_order() as an argument to the
iommu-pages allocator into order_base_2() or the _sz flavour
instead. These places already have an exact size, there is no particular
reason to use order here.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/iommu/amd/init.c        | 29 ++++++++++++++---------------
 drivers/iommu/intel/dmar.c      |  7 ++++---
 drivers/iommu/io-pgtable-arm.c  |  4 ++--
 drivers/iommu/io-pgtable-dart.c | 12 +++---------
 drivers/iommu/sun50i-iommu.c    |  4 ++--
 5 files changed, 25 insertions(+), 31 deletions(-)

Comments

Robin Murphy Feb. 5, 2025, 6:03 p.m. UTC | #1
On 2025-02-05 4:10 pm, Jason Gunthorpe wrote:
> On Wed, Feb 05, 2025 at 03:47:03PM +0000, Robin Murphy wrote:
>> On 2025-02-04 6:34 pm, Jason Gunthorpe wrote:
>>> Convert most of the places calling get_order() as an argument to the
>>> iommu-pages allocator into order_base_2() or the _sz flavour
>>> instead. These places already have an exact size, there is no particular
>>> reason to use order here.
>>>
>>> Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
>>> ---
>> [...]
>>> @@ -826,7 +825,7 @@ void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
>>>    				  size_t size)
>>>    {
>>>    	int order = get_order(size);
>>> -	void *buf = iommu_alloc_pages(gfp, order);
>>> +	void *buf = iommu_alloc_pages_lg2(gfp, order + PAGE_SHIFT);
>>
>> This is a size, really - it's right there above.
> 
> I didn't want to make major surgery to this thing, but yes it
> could be:
> 
> void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
> 				  size_t size)
> {
> 	void *buf;
> 
> 	size = PAGE_ALIGN(size);
> 	buf = iommu_alloc_pages_sz(gfp, size);
> 	if (buf &&
> 	    check_feature(FEATURE_SNP) &&
> 	    set_memory_4k((unsigned long)buf, size / PAGE_SIZE )) {
> 		iommu_free_page(buf);
> 		buf = NULL;
> 	}
> 
> 	return buf;
> }
> 
>> (although alloc_cwwb_sem() passing 1 looks highly suspicious - judging by
>> other cmd_sem references that probably should be PAGE_SIZE...)
> 
> Indeed, amd folks?
> 
>>>    	if (buf &&
>>>    	    check_feature(FEATURE_SNP) &&
>> [...]
>>> @@ -1702,8 +1701,10 @@ int dmar_enable_qi(struct intel_iommu *iommu)
>>>    	 * Need two pages to accommodate 256 descriptors of 256 bits each
>>>    	 * if the remapping hardware supports scalable mode translation.
>>>    	 */
>>> -	order = ecap_smts(iommu->ecap) ? 1 : 0;
>>> -	desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order);
>>> +	desc = iommu_alloc_pages_node_lg2(iommu->node, GFP_ATOMIC,
>>> +					  ecap_smts(iommu->ecap) ?
>>> +						  order_base_2(SZ_8K) :
>>> +						  order_base_2(SZ_4K));
>>
>> These are also clearly sizes.
> 
> I didn't make a size wrapper version of the _node_ variation because
> there are only three callers.
> 
>> I don't see any need to have the log2 stuff at all, I think we just
>> switch iommu_alloc_pages{_node}() to take a size and keep things
>> simple.
> 
> Ok it is easy to remove lg2 calls from the drivers, but I would keep
> the internal function like this because most of the size callers have
> constants and the order_base_2() will become a constexpr when
> inlined. Only a few places are not like that.

But what's the benefit of having extra stuff capable of turning a 
constant into a different constant that doesn't represent anything we 
actually want? We still end up doing more runtime arithmetic on lg2sz 
within the allocation function itself to turn it into the order we 
ultimately still need, so that arithmetic could just as well be 
get_order(size) and have nothing to inline at all (other than NUMA_NO_NODE).

Thanks,
Robin.
diff mbox series

Patch

diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c
index 7d77929bc63af3..70c038a80874bf 100644
--- a/drivers/iommu/amd/init.c
+++ b/drivers/iommu/amd/init.c
@@ -640,8 +640,8 @@  static int __init find_last_devid_acpi(struct acpi_table_header *table, u16 pci_
 /* Allocate per PCI segment device table */
 static inline int __init alloc_dev_table(struct amd_iommu_pci_seg *pci_seg)
 {
-	pci_seg->dev_table = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
-					       get_order(pci_seg->dev_table_size));
+	pci_seg->dev_table = iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32,
+						  pci_seg->dev_table_size);
 	if (!pci_seg->dev_table)
 		return -ENOMEM;
 
@@ -657,8 +657,8 @@  static inline void free_dev_table(struct amd_iommu_pci_seg *pci_seg)
 /* Allocate per PCI segment IOMMU rlookup table. */
 static inline int __init alloc_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
 {
-	pci_seg->rlookup_table = iommu_alloc_pages(GFP_KERNEL,
-						   get_order(pci_seg->rlookup_table_size));
+	pci_seg->rlookup_table =
+		iommu_alloc_pages_sz(GFP_KERNEL, pci_seg->rlookup_table_size);
 	if (pci_seg->rlookup_table == NULL)
 		return -ENOMEM;
 
@@ -673,8 +673,8 @@  static inline void free_rlookup_table(struct amd_iommu_pci_seg *pci_seg)
 
 static inline int __init alloc_irq_lookup_table(struct amd_iommu_pci_seg *pci_seg)
 {
-	pci_seg->irq_lookup_table = iommu_alloc_pages(GFP_KERNEL,
-						      get_order(pci_seg->rlookup_table_size));
+	pci_seg->irq_lookup_table =
+		iommu_alloc_pages_sz(GFP_KERNEL, pci_seg->rlookup_table_size);
 	kmemleak_alloc(pci_seg->irq_lookup_table,
 		       pci_seg->rlookup_table_size, 1, GFP_KERNEL);
 	if (pci_seg->irq_lookup_table == NULL)
@@ -694,8 +694,8 @@  static int __init alloc_alias_table(struct amd_iommu_pci_seg *pci_seg)
 {
 	int i;
 
-	pci_seg->alias_table = iommu_alloc_pages(GFP_KERNEL,
-						 get_order(pci_seg->alias_table_size));
+	pci_seg->alias_table =
+		iommu_alloc_pages_sz(GFP_KERNEL, pci_seg->alias_table_size);
 	if (!pci_seg->alias_table)
 		return -ENOMEM;
 
@@ -721,8 +721,7 @@  static void __init free_alias_table(struct amd_iommu_pci_seg *pci_seg)
  */
 static int __init alloc_command_buffer(struct amd_iommu *iommu)
 {
-	iommu->cmd_buf = iommu_alloc_pages(GFP_KERNEL,
-					   get_order(CMD_BUFFER_SIZE));
+	iommu->cmd_buf = iommu_alloc_pages_sz(GFP_KERNEL, CMD_BUFFER_SIZE);
 
 	return iommu->cmd_buf ? 0 : -ENOMEM;
 }
@@ -826,7 +825,7 @@  void *__init iommu_alloc_4k_pages(struct amd_iommu *iommu, gfp_t gfp,
 				  size_t size)
 {
 	int order = get_order(size);
-	void *buf = iommu_alloc_pages(gfp, order);
+	void *buf = iommu_alloc_pages_lg2(gfp, order + PAGE_SHIFT);
 
 	if (buf &&
 	    check_feature(FEATURE_SNP) &&
@@ -927,11 +926,11 @@  static int iommu_init_ga_log(struct amd_iommu *iommu)
 	if (!AMD_IOMMU_GUEST_IR_VAPIC(amd_iommu_guest_ir))
 		return 0;
 
-	iommu->ga_log = iommu_alloc_pages(GFP_KERNEL, get_order(GA_LOG_SIZE));
+	iommu->ga_log = iommu_alloc_pages_sz(GFP_KERNEL, GA_LOG_SIZE);
 	if (!iommu->ga_log)
 		goto err_out;
 
-	iommu->ga_log_tail = iommu_alloc_pages(GFP_KERNEL, get_order(8));
+	iommu->ga_log_tail = iommu_alloc_pages_sz(GFP_KERNEL, 8);
 	if (!iommu->ga_log_tail)
 		goto err_out;
 
@@ -1026,8 +1025,8 @@  static bool __copy_device_table(struct amd_iommu *iommu)
 	if (!old_devtb)
 		return false;
 
-	pci_seg->old_dev_tbl_cpy = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
-						     get_order(pci_seg->dev_table_size));
+	pci_seg->old_dev_tbl_cpy = iommu_alloc_pages_sz(
+		GFP_KERNEL | GFP_DMA32, pci_seg->dev_table_size);
 	if (pci_seg->old_dev_tbl_cpy == NULL) {
 		pr_err("Failed to allocate memory for copying old device table!\n");
 		memunmap(old_devtb);
diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c
index 9f424acf474e94..ce87cfae4eaaa3 100644
--- a/drivers/iommu/intel/dmar.c
+++ b/drivers/iommu/intel/dmar.c
@@ -1681,7 +1681,6 @@  int dmar_enable_qi(struct intel_iommu *iommu)
 {
 	struct q_inval *qi;
 	void *desc;
-	int order;
 
 	if (!ecap_qis(iommu->ecap))
 		return -ENOENT;
@@ -1702,8 +1701,10 @@  int dmar_enable_qi(struct intel_iommu *iommu)
 	 * Need two pages to accommodate 256 descriptors of 256 bits each
 	 * if the remapping hardware supports scalable mode translation.
 	 */
-	order = ecap_smts(iommu->ecap) ? 1 : 0;
-	desc = iommu_alloc_pages_node(iommu->node, GFP_ATOMIC, order);
+	desc = iommu_alloc_pages_node_lg2(iommu->node, GFP_ATOMIC,
+					  ecap_smts(iommu->ecap) ?
+						  order_base_2(SZ_8K) :
+						  order_base_2(SZ_4K));
 	if (!desc) {
 		kfree(qi);
 		iommu->qi = NULL;
diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c
index 8727d8e1e02f45..0e1dd519c3b9b3 100644
--- a/drivers/iommu/io-pgtable-arm.c
+++ b/drivers/iommu/io-pgtable-arm.c
@@ -263,14 +263,14 @@  static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp,
 				    void *cookie)
 {
 	struct device *dev = cfg->iommu_dev;
-	int order = get_order(size);
 	dma_addr_t dma;
 	void *pages;
 
 	if (cfg->alloc)
 		pages = cfg->alloc(cookie, size, gfp);
 	else
-		pages = iommu_alloc_pages_node(dev_to_node(dev), gfp, order);
+		pages = iommu_alloc_pages_node_lg2(dev_to_node(dev), gfp,
+						   order_base_2(size));
 
 	if (!pages)
 		return NULL;
diff --git a/drivers/iommu/io-pgtable-dart.c b/drivers/iommu/io-pgtable-dart.c
index a4cbd8a8a2976e..3b57d14baa9c1d 100644
--- a/drivers/iommu/io-pgtable-dart.c
+++ b/drivers/iommu/io-pgtable-dart.c
@@ -107,13 +107,6 @@  static phys_addr_t iopte_to_paddr(dart_iopte pte,
 	return paddr;
 }
 
-static void *__dart_alloc_pages(size_t size, gfp_t gfp)
-{
-	int order = get_order(size);
-
-	return iommu_alloc_pages(gfp, order);
-}
-
 static int dart_init_pte(struct dart_io_pgtable *data,
 			     unsigned long iova, phys_addr_t paddr,
 			     dart_iopte prot, int num_entries,
@@ -255,7 +248,7 @@  static int dart_map_pages(struct io_pgtable_ops *ops, unsigned long iova,
 
 	/* no L2 table present */
 	if (!pte) {
-		cptep = __dart_alloc_pages(tblsz, gfp);
+		cptep = iommu_alloc_pages_sz(gfp, tblsz);
 		if (!cptep)
 			return -ENOMEM;
 
@@ -412,7 +405,8 @@  apple_dart_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie)
 	cfg->apple_dart_cfg.n_ttbrs = 1 << data->tbl_bits;
 
 	for (i = 0; i < cfg->apple_dart_cfg.n_ttbrs; ++i) {
-		data->pgd[i] = __dart_alloc_pages(DART_GRANULE(data), GFP_KERNEL);
+		data->pgd[i] =
+			iommu_alloc_pages_sz(GFP_KERNEL, DART_GRANULE(data));
 		if (!data->pgd[i])
 			goto out_free_data;
 		cfg->apple_dart_cfg.ttbr[i] = virt_to_phys(data->pgd[i]);
diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c
index 3d77aed8507373..d0e515bf5dd1f6 100644
--- a/drivers/iommu/sun50i-iommu.c
+++ b/drivers/iommu/sun50i-iommu.c
@@ -690,8 +690,8 @@  sun50i_iommu_domain_alloc_paging(struct device *dev)
 	if (!sun50i_domain)
 		return NULL;
 
-	sun50i_domain->dt = iommu_alloc_pages(GFP_KERNEL | GFP_DMA32,
-					      get_order(DT_SIZE));
+	sun50i_domain->dt =
+		iommu_alloc_pages_sz(GFP_KERNEL | GFP_DMA32, DT_SIZE);
 	if (!sun50i_domain->dt)
 		goto err_free_domain;