diff mbox series

[v3,1/3] uio: introduce UIO_MEM_DMA_COHERENT type

Message ID 20240109121458.26475-2-njavali@marvell.com
State New
Headers show
Series UIO_MEM_DMA_COHERENT for cnic/bnx2/bnx2x | expand

Commit Message

Nilesh Javali Jan. 9, 2024, 12:14 p.m. UTC
From: Chris Leech <cleech@redhat.com>

Add a UIO memtype specifically for sharing dma_alloc_coherent
memory with userspace, backed by dma_mmap_coherent.

Reported-by: kernel test robot <lkp@intel.com>
Closes: https://lore.kernel.org/oe-kbuild-all/202401042222.J9GOUiYL-lkp@intel.com/
Signed-off-by: Nilesh Javali <njavali@marvell.com>
Signed-off-by: Chris Leech <cleech@redhat.com>
---
v3:
- fix warnings reported by kernel test robot
v2:
- expose only the dma_addr within uio_mem
- Cleanup newly added unions comprising virtual_addr
  and struct device
 drivers/uio/uio.c          | 40 ++++++++++++++++++++++++++++++++++++++
 include/linux/uio_driver.h |  2 ++
 2 files changed, 42 insertions(+)

Comments

Chris Leech Jan. 24, 2024, 11:14 p.m. UTC | #1
Hi Greg, I'd like to ask you to reconsider accepting UIO API changes for
mapping DMA coherent allocations.

The bnx2i iSCSI driver is currently broken due to incorrect (but
previously functioning) uio use in the "cnic" module.  I believe that
the most direct way to fix it is to specifically handle DMA coherent
allocations in the UIO API backed by dma_mmap_coherent.

After my original posting of these patches[1], Nilesh Javali had made an
attempt to change the cnic allocations[2]. Those were rejected, and
Nilesh has returned to cleaning up the UIO_MEM_DMA_COHERENT patches.

[1] https://lore.kernel.org/all/20230929170023.1020032-1-cleech@redhat.com/
[2] https://lore.kernel.org/linux-scsi/20231219055514.12324-1-njavali@marvell.com/

While I understand the complaints about how these drivers have been
structured, I also don't like letting support bitrot when there's a
reasonable alternative to re-architecting an existing driver.

There are two other uio drivers which are mmaping dma_alloc_coherent
memory as UIO_MEM_PHYS, uio_dmem_genirq and uio_pruss. While a
conversion to use dma_mmap_coherent might be more correct for these as
well, I have no way of testing them and assume that this just hasn't
been an issue for the platforms in question.

Nilesh has kindly removed the unions I had in the original posting, and
done additional testing on the changes.

Although I do believe that we need to go back to somehow storing the
device struct associated with the PCI device, and not using idev->dev
here.

> +	ret = dma_mmap_coherent(&idev->dev,
> +				vma,
> +				addr,
> +				mem->dma_addr,
> +				vma->vm_end - vma->vm_start);

Thank you,
- Chris Leech

On Tue, Jan 09, 2024 at 05:44:56PM +0530, Nilesh Javali wrote:
> From: Chris Leech <cleech@redhat.com>
> 
> Add a UIO memtype specifically for sharing dma_alloc_coherent
> memory with userspace, backed by dma_mmap_coherent.
> 
> Reported-by: kernel test robot <lkp@intel.com>
> Closes: https://lore.kernel.org/oe-kbuild-all/202401042222.J9GOUiYL-lkp@intel.com/
> Signed-off-by: Nilesh Javali <njavali@marvell.com>
> Signed-off-by: Chris Leech <cleech@redhat.com>
> ---
> v3:
> - fix warnings reported by kernel test robot
> v2:
> - expose only the dma_addr within uio_mem
> - Cleanup newly added unions comprising virtual_addr
>   and struct device
>  drivers/uio/uio.c          | 40 ++++++++++++++++++++++++++++++++++++++
>  include/linux/uio_driver.h |  2 ++
>  2 files changed, 42 insertions(+)
> 
> diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
> index 62082d64ece0..01d83728b513 100644
> --- a/drivers/uio/uio.c
> +++ b/drivers/uio/uio.c
> @@ -24,6 +24,7 @@
>  #include <linux/kobject.h>
>  #include <linux/cdev.h>
>  #include <linux/uio_driver.h>
> +#include <linux/dma-mapping.h>
>  
>  #define UIO_MAX_DEVICES		(1U << MINORBITS)
>  
> @@ -759,6 +760,42 @@ static int uio_mmap_physical(struct vm_area_struct *vma)
>  			       vma->vm_page_prot);
>  }
>  
> +static int uio_mmap_dma_coherent(struct vm_area_struct *vma)
> +{
> +	struct uio_device *idev = vma->vm_private_data;
> +	struct uio_mem *mem;
> +	void *addr;
> +	int ret = 0;
> +	int mi;
> +
> +	mi = uio_find_mem_index(vma);
> +	if (mi < 0)
> +		return -EINVAL;
> +
> +	mem = idev->info->mem + mi;
> +
> +	if (mem->dma_addr & ~PAGE_MASK)
> +		return -ENODEV;
> +	if (vma->vm_end - vma->vm_start > mem->size)
> +		return -EINVAL;
> +
> +	/*
> +	 * UIO uses offset to index into the maps for a device.
> +	 * We need to clear vm_pgoff for dma_mmap_coherent.
> +	 */
> +	vma->vm_pgoff = 0;
> +
> +	addr = (void *)mem->addr;
> +	ret = dma_mmap_coherent(&idev->dev,
> +				vma,
> +				addr,
> +				mem->dma_addr,
> +				vma->vm_end - vma->vm_start);
> +	vma->vm_pgoff = mi;
> +
> +	return ret;
> +}
> +
>  static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
>  {
>  	struct uio_listener *listener = filep->private_data;
> @@ -806,6 +843,9 @@ static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
>  	case UIO_MEM_VIRTUAL:
>  		ret = uio_mmap_logical(vma);
>  		break;
> +	case UIO_MEM_DMA_COHERENT:
> +		ret = uio_mmap_dma_coherent(vma);
> +		break;
>  	default:
>  		ret = -EINVAL;
>  	}
> diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
> index 47c5962b876b..7efa81497183 100644
> --- a/include/linux/uio_driver.h
> +++ b/include/linux/uio_driver.h
> @@ -37,6 +37,7 @@ struct uio_map;
>  struct uio_mem {
>  	const char		*name;
>  	phys_addr_t		addr;
> +	dma_addr_t		dma_addr;
>  	unsigned long		offs;
>  	resource_size_t		size;
>  	int			memtype;
> @@ -158,6 +159,7 @@ extern int __must_check
>  #define UIO_MEM_LOGICAL	2
>  #define UIO_MEM_VIRTUAL 3
>  #define UIO_MEM_IOVA	4
> +#define UIO_MEM_DMA_COHERENT	5
>  
>  /* defines for uio_port->porttype */
>  #define UIO_PORT_NONE	0
> -- 
> 2.23.1
>
Chris Leech Jan. 24, 2024, 11:36 p.m. UTC | #2
Nilesh, 

On Tue, Jan 09, 2024 at 05:44:56PM +0530, Nilesh Javali wrote:
> +	ret = dma_mmap_coherent(&idev->dev,
> +				vma,
> +				addr,
> +				mem->dma_addr,
> +				vma->vm_end - vma->vm_start);

When I asked about the use of idev->dev here in the v2 posting, you
repled as follows.

> While the cnic loads, it registers the cnic_uio_dev->pdev->dev with
> uio and the uio attaches its device to cnic device as it's parent. So
> uio and cnic are attached to the same PCI device.

I still don't think the sysfs parent relationship is enough to get the
correct behavior out of the DMA APIs on all platforms, and
dma_mmap_coherent needs to be using the same device struct as
dma_alloc_coherent.

I had some testing done on an AMD system, where your v2 patch set was
failing with the iommu enabled, and my original changes were reported to
work.  And I believe these v3 patches are functionally the same.

- Chris
Nilesh Javali Jan. 25, 2024, 10:49 a.m. UTC | #3
Chris,

> -----Original Message-----
> From: Chris Leech <cleech@redhat.com>
> Sent: Thursday, January 25, 2024 5:07 AM
> To: Nilesh Javali <njavali@marvell.com>
> Cc: martin.petersen@oracle.com; lduncan@suse.com; linux-
> scsi@vger.kernel.org; GR-QLogic-Storage-Upstream <GR-QLogic-Storage-
> Upstream@marvell.com>; jmeneghi@redhat.com
> Subject: [EXT] Re: [PATCH v3 1/3] uio: introduce UIO_MEM_DMA_COHERENT
> type
> 
> External Email
> 
> ----------------------------------------------------------------------
> Nilesh,
> 
> On Tue, Jan 09, 2024 at 05:44:56PM +0530, Nilesh Javali wrote:
> > +	ret = dma_mmap_coherent(&idev->dev,
> > +				vma,
> > +				addr,
> > +				mem->dma_addr,
> > +				vma->vm_end - vma->vm_start);
> 
> When I asked about the use of idev->dev here in the v2 posting, you
> repled as follows.
> 
> > While the cnic loads, it registers the cnic_uio_dev->pdev->dev with
> > uio and the uio attaches its device to cnic device as it's parent. So
> > uio and cnic are attached to the same PCI device.
> 
> I still don't think the sysfs parent relationship is enough to get the
> correct behavior out of the DMA APIs on all platforms, and
> dma_mmap_coherent needs to be using the same device struct as
> dma_alloc_coherent.
> 
> I had some testing done on an AMD system, where your v2 patch set was
> failing with the iommu enabled, and my original changes were reported to
> work.  And I believe these v3 patches are functionally the same.

We have extensively verified this patch set with IOMMU enabled and see
no regression when VT and SRIOV are enabled. However issues are observed
only when VT, VT-D and SRIOV are enabled in the HW BIOS.
In the failure case, with VT-D enabled, we observe the OS fails to boot with
DMAR timeout error.

"  **] A start job is running for Network Manager (2min 6s / no limit)
[ 147.069016] DMAR: VT-d detected Invalidation Time-out Error: SID 0
[ 147.069016] DMAR: DRHD: handling fault status reg 40
[ 147.080924] DMAR: QI HEAD: Device-TLB Invalidation qw0 = 0xaf0300100003, qw1 = 0x7ffffffffffff001
[ 147.090207] DMAR: QI PRIOR: Invalidation Wait qw0 = 0x200000025, qw1 = 0x10005f634".

With your proposed changes, please confirm if you see no issues with VT-D enabled on Intel/AMD platform.

Also based on our observation the issue with VT-D enabled is not related to the current patch set under test.

Thanks,
Nilesh
Chris Leech Jan. 25, 2024, 7:16 p.m. UTC | #4
On Thu, Jan 25, 2024 at 10:49:56AM +0000, Nilesh Javali wrote:
>
> We have extensively verified this patch set with IOMMU enabled and see
> no regression when VT and SRIOV are enabled. However issues are observed
> only when VT, VT-D and SRIOV are enabled in the HW BIOS.

I don't understand what having IOMMU enabled while VT-d is disabled in
the BIOS settings actually means. Isn't VT-d Intel's name for it's IOMMU
implemenation? Does disabling VT-d support but setting intel_iommu=on
actually do anything?

> In the failure case, with VT-D enabled, we observe the OS fails to boot with
> DMAR timeout error.
> 
> "  **] A start job is running for Network Manager (2min 6s / no limit)
> [ 147.069016] DMAR: VT-d detected Invalidation Time-out Error: SID 0
> [ 147.069016] DMAR: DRHD: handling fault status reg 40
> [ 147.080924] DMAR: QI HEAD: Device-TLB Invalidation qw0 = 0xaf0300100003, qw1 = 0x7ffffffffffff001
> [ 147.090207] DMAR: QI PRIOR: Invalidation Wait qw0 = 0x200000025, qw1 = 0x10005f634".
> 
> With your proposed changes, please confirm if you see no issues with
> VT-D enabled on Intel/AMD platform.

I've just gone back and tested on a R320 with a Xeon E5-2420, this
systems BIOS does not have seperate VT and VT-d setting, but VT-d does
appear to be on when the single virtualization features setting is
enabled.

- A kernel with my v1 patches logs into a target just fine.
- These v3 patches fail.  My configuration is probably different, I
  don't see a network manager online stall but I do see segfaults in
  iscsiuio.
- Adding back the dma_device lines to the v3 patches, keeping the union
  cleanups you did in place, and it goes back to working.

> Also based on our observation the issue with VT-D enabled is not
> related to the current patch set under test.

Yes, Jerry Snitsel noted that the IOMMU code had been clearing the
__GFP_COMP flag for longer than the DMA API has been rejecting it. 
So IOMMU support has had issues for longer, but I think we can fix both
by doing this correctly.

Thanks,
 Chris Leech
gregkh@linuxfoundation.org Jan. 25, 2024, 10:43 p.m. UTC | #5
On Wed, Jan 24, 2024 at 03:14:51PM -0800, Chris Leech wrote:
> Hi Greg, I'd like to ask you to reconsider accepting UIO API changes for
> mapping DMA coherent allocations.

Then why wasn't this patch series actually sent to me?  It's a bit hard
to apply something I am not even aware of :(

greg k-h
diff mbox series

Patch

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 62082d64ece0..01d83728b513 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -24,6 +24,7 @@ 
 #include <linux/kobject.h>
 #include <linux/cdev.h>
 #include <linux/uio_driver.h>
+#include <linux/dma-mapping.h>
 
 #define UIO_MAX_DEVICES		(1U << MINORBITS)
 
@@ -759,6 +760,42 @@  static int uio_mmap_physical(struct vm_area_struct *vma)
 			       vma->vm_page_prot);
 }
 
+static int uio_mmap_dma_coherent(struct vm_area_struct *vma)
+{
+	struct uio_device *idev = vma->vm_private_data;
+	struct uio_mem *mem;
+	void *addr;
+	int ret = 0;
+	int mi;
+
+	mi = uio_find_mem_index(vma);
+	if (mi < 0)
+		return -EINVAL;
+
+	mem = idev->info->mem + mi;
+
+	if (mem->dma_addr & ~PAGE_MASK)
+		return -ENODEV;
+	if (vma->vm_end - vma->vm_start > mem->size)
+		return -EINVAL;
+
+	/*
+	 * UIO uses offset to index into the maps for a device.
+	 * We need to clear vm_pgoff for dma_mmap_coherent.
+	 */
+	vma->vm_pgoff = 0;
+
+	addr = (void *)mem->addr;
+	ret = dma_mmap_coherent(&idev->dev,
+				vma,
+				addr,
+				mem->dma_addr,
+				vma->vm_end - vma->vm_start);
+	vma->vm_pgoff = mi;
+
+	return ret;
+}
+
 static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 {
 	struct uio_listener *listener = filep->private_data;
@@ -806,6 +843,9 @@  static int uio_mmap(struct file *filep, struct vm_area_struct *vma)
 	case UIO_MEM_VIRTUAL:
 		ret = uio_mmap_logical(vma);
 		break;
+	case UIO_MEM_DMA_COHERENT:
+		ret = uio_mmap_dma_coherent(vma);
+		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 47c5962b876b..7efa81497183 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -37,6 +37,7 @@  struct uio_map;
 struct uio_mem {
 	const char		*name;
 	phys_addr_t		addr;
+	dma_addr_t		dma_addr;
 	unsigned long		offs;
 	resource_size_t		size;
 	int			memtype;
@@ -158,6 +159,7 @@  extern int __must_check
 #define UIO_MEM_LOGICAL	2
 #define UIO_MEM_VIRTUAL 3
 #define UIO_MEM_IOVA	4
+#define UIO_MEM_DMA_COHERENT	5
 
 /* defines for uio_port->porttype */
 #define UIO_PORT_NONE	0