diff mbox series

[v3,1/7] iommu: Introduce a replace API for device pasid

Message ID 20240628090557.50898-2-yi.l.liu@intel.com
State New
Headers show
Series iommufd support pasid attach/replace | expand

Commit Message

Yi Liu June 28, 2024, 9:05 a.m. UTC
Provide a high-level API to allow replacements of one domain with
another for specific pasid of a device. This is similar to
iommu_group_replace_domain() and it is expected to be used only by
IOMMUFD.

Co-developed-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Signed-off-by: Yi Liu <yi.l.liu@intel.com>
---
 drivers/iommu/iommu-priv.h |  3 ++
 drivers/iommu/iommu.c      | 80 ++++++++++++++++++++++++++++++++++++--
 2 files changed, 79 insertions(+), 4 deletions(-)

Comments

Tian, Kevin July 18, 2024, 8:27 a.m. UTC | #1
> From: Liu, Yi L <yi.l.liu@intel.com>
> Sent: Friday, June 28, 2024 5:06 PM
> 
> @@ -3289,7 +3290,20 @@ static int __iommu_set_group_pasid(struct
> iommu_domain *domain,
> 
>  		if (device == last_gdev)
>  			break;
> -		ops->remove_dev_pasid(device->dev, pasid, domain);
> +		/* If no old domain, undo the succeeded devices/pasid */
> +		if (!old) {
> +			ops->remove_dev_pasid(device->dev, pasid, domain);
> +			continue;
> +		}
> +
> +		/*
> +		 * Rollback the succeeded devices/pasid to the old domain.
> +		 * And it is a driver bug to fail attaching with a previously
> +		 * good domain.
> +		 */
> +		if (WARN_ON(old->ops->set_dev_pasid(old, device->dev,
> +						    pasid, domain)))
> +			ops->remove_dev_pasid(device->dev, pasid, domain);

I wonder whether @remove_dev_pasid() can be replaced by having
blocking_domain support @set_dev_pasid?

> +int iommu_replace_device_pasid(struct iommu_domain *domain,
> +			       struct device *dev, ioasid_t pasid)
> +{
> +	/* Caller must be a probed driver on dev */
> +	struct iommu_group *group = dev->iommu_group;
> +	void *curr;
> +	int ret;
> +
> +	if (!domain->ops->set_dev_pasid)
> +		return -EOPNOTSUPP;
> +
> +	if (!group)
> +		return -ENODEV;
> +
> +	if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain-
> >owner ||
> +	    pasid == IOMMU_NO_PASID)
> +		return -EINVAL;
> +
> +	mutex_lock(&group->mutex);
> +	/*
> +	 * The recorded domain is inconsistent with the domain pasid is
> +	 * actually attached until pasid is attached to the new domain.
> +	 * This has race condition with the paths that do not hold
> +	 * group->mutex. E.g. the Page Request forwarding.
> +	 */

so?

> +	curr = xa_store(&group->pasid_array, pasid, domain, GFP_KERNEL);
> +	if (!curr) {
> +		xa_erase(&group->pasid_array, pasid);
> +		ret = -EINVAL;
> +		goto out_unlock;
> +	}
> +
> +	ret = xa_err(curr);
> +	if (ret)
> +		goto out_unlock;
> +
> +	if (curr == domain)
> +		goto out_unlock;
> +
> +	ret = __iommu_set_group_pasid(domain, group, pasid, curr);
> +	if (ret)
> +		WARN_ON(domain != xa_store(&group->pasid_array, pasid,
> +					   curr, GFP_KERNEL));

above can follow Jason's suggestion to iommu_group_replace_domain ()
in Baolu's series, i.e. doing a xa_reserve() first.
Yi Liu Aug. 16, 2024, 9:43 a.m. UTC | #2
On 2024/7/18 16:27, Tian, Kevin wrote:
>> From: Liu, Yi L <yi.l.liu@intel.com>
>> Sent: Friday, June 28, 2024 5:06 PM
>>
>> @@ -3289,7 +3290,20 @@ static int __iommu_set_group_pasid(struct
>> iommu_domain *domain,
>>
>>   		if (device == last_gdev)
>>   			break;
>> -		ops->remove_dev_pasid(device->dev, pasid, domain);
>> +		/* If no old domain, undo the succeeded devices/pasid */
>> +		if (!old) {
>> +			ops->remove_dev_pasid(device->dev, pasid, domain);
>> +			continue;
>> +		}
>> +
>> +		/*
>> +		 * Rollback the succeeded devices/pasid to the old domain.
>> +		 * And it is a driver bug to fail attaching with a previously
>> +		 * good domain.
>> +		 */
>> +		if (WARN_ON(old->ops->set_dev_pasid(old, device->dev,
>> +						    pasid, domain)))
>> +			ops->remove_dev_pasid(device->dev, pasid, domain);
> 
> I wonder whether @remove_dev_pasid() can be replaced by having
> blocking_domain support @set_dev_pasid?

how about your thought, @Jason?

>> +int iommu_replace_device_pasid(struct iommu_domain *domain,
>> +			       struct device *dev, ioasid_t pasid)
>> +{
>> +	/* Caller must be a probed driver on dev */
>> +	struct iommu_group *group = dev->iommu_group;
>> +	void *curr;
>> +	int ret;
>> +
>> +	if (!domain->ops->set_dev_pasid)
>> +		return -EOPNOTSUPP;
>> +
>> +	if (!group)
>> +		return -ENODEV;
>> +
>> +	if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain-
>>> owner ||
>> +	    pasid == IOMMU_NO_PASID)
>> +		return -EINVAL;
>> +
>> +	mutex_lock(&group->mutex);
>> +	/*
>> +	 * The recorded domain is inconsistent with the domain pasid is
>> +	 * actually attached until pasid is attached to the new domain.
>> +	 * This has race condition with the paths that do not hold
>> +	 * group->mutex. E.g. the Page Request forwarding.
>> +	 */
> 
> so?

This is added per the below comment. Maybe I should have made it clearer.
Due to the order of this xa operations, the domain in the xarray does not
match the actual translation structure, but it will become consistent in
the end.

https://lore.kernel.org/linux-iommu/20240429135512.GC941030@nvidia.com/

>> +	curr = xa_store(&group->pasid_array, pasid, domain, GFP_KERNEL);
>> +	if (!curr) {
>> +		xa_erase(&group->pasid_array, pasid);
>> +		ret = -EINVAL;
>> +		goto out_unlock;
>> +	}
>> +
>> +	ret = xa_err(curr);
>> +	if (ret)
>> +		goto out_unlock;
>> +
>> +	if (curr == domain)
>> +		goto out_unlock;
>> +
>> +	ret = __iommu_set_group_pasid(domain, group, pasid, curr);
>> +	if (ret)
>> +		WARN_ON(domain != xa_store(&group->pasid_array, pasid,
>> +					   curr, GFP_KERNEL));
> 
> above can follow Jason's suggestion to iommu_group_replace_domain ()
> in Baolu's series, i.e. doing a xa_reserve() first.

yeah, I noticed it. But there is a minor difference. In Baolu's series
no need to retrieve the old domain, but this path needs to get it and
pass it to set_dev_pasid().
Yi Liu Sept. 6, 2024, 4:21 a.m. UTC | #3
On 2024/8/16 21:02, Jason Gunthorpe wrote:
> On Fri, Aug 16, 2024 at 05:43:18PM +0800, Yi Liu wrote:
>> On 2024/7/18 16:27, Tian, Kevin wrote:
>>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>>> Sent: Friday, June 28, 2024 5:06 PM
>>>>
>>>> @@ -3289,7 +3290,20 @@ static int __iommu_set_group_pasid(struct
>>>> iommu_domain *domain,
>>>>
>>>>    		if (device == last_gdev)
>>>>    			break;
>>>> -		ops->remove_dev_pasid(device->dev, pasid, domain);
>>>> +		/* If no old domain, undo the succeeded devices/pasid */
>>>> +		if (!old) {
>>>> +			ops->remove_dev_pasid(device->dev, pasid, domain);
>>>> +			continue;
>>>> +		}
>>>> +
>>>> +		/*
>>>> +		 * Rollback the succeeded devices/pasid to the old domain.
>>>> +		 * And it is a driver bug to fail attaching with a previously
>>>> +		 * good domain.
>>>> +		 */
>>>> +		if (WARN_ON(old->ops->set_dev_pasid(old, device->dev,
>>>> +						    pasid, domain)))
>>>> +			ops->remove_dev_pasid(device->dev, pasid, domain);
>>>
>>> I wonder whether @remove_dev_pasid() can be replaced by having
>>> blocking_domain support @set_dev_pasid?
>>
>> how about your thought, @Jason?
> 
> I think we talked about doing that once before, I forget why it was
> not done. Maybe there was an issue?
> 
> But it seems worth trying.

Since remove_dev_pasid() does not return a result, so caller does not
need to check the result of it. If we want to replace it with the
blocked_domain->ops->set_dev_pasid(), shall we enforce that the
set_dev_pasid() op of blocked_domain to be always success. Is it?
Otherwise, this is not an apple-to-apple replacement.

> I would like to see set_dev_pasid pass in the old domain first:
> 
> 	int (*set_dev_pasid)(struct iommu_domain *new_domain,
>                               struct iommu_domain *old_domain,
>                               struct device *dev,
> 			     ioasid_t pasid);
> 
> Replace includes the old_domain as an argument and it is necessary
> information..

sure. Intel iommu driver should be able to support it as well. While
AMD iommu driver does no have the blocking domain stuff yet. Vasant
may keep me honest.

> A quick try on SMMUv3 seems reasonable:
> 
> diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> index 9bc50bded5af72..f512bfe5cd202c 100644
> --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c
> @@ -2931,13 +2931,12 @@ int arm_smmu_set_pasid(struct arm_smmu_master *master,
>          return ret;
>   }
>   
> -static void arm_smmu_remove_dev_pasid(struct device *dev, ioasid_t pasid,
> -                                     struct iommu_domain *domain)
> +static void arm_smmu_blocking_set_dev_pasid(struct iommu_domain *new_domain,
> +                                           struct iommu_domain *old_domain,
> +                                           struct device *dev, ioasid_t pasid)
>   {
>          struct arm_smmu_master *master = dev_iommu_priv_get(dev);
> -       struct arm_smmu_domain *smmu_domain;
> -
> -       smmu_domain = to_smmu_domain(domain);
> +       struct arm_smmu_domain *smmu_domain = to_smmu_domain(old_domain);
>   
>          mutex_lock(&arm_smmu_asid_lock);
>          arm_smmu_clear_cd(master, pasid);
> @@ -3039,6 +3038,7 @@ static int arm_smmu_attach_dev_blocked(struct iommu_domain *domain,
>   
>   static const struct iommu_domain_ops arm_smmu_blocked_ops = {
>          .attach_dev = arm_smmu_attach_dev_blocked,
> +       .set_dev_pasid = arm_smmu_blocked_set_dev_pasid,
>   };
>   
>   static struct iommu_domain arm_smmu_blocked_domain = {
> @@ -3487,7 +3487,6 @@ static struct iommu_ops arm_smmu_ops = {
>          .device_group           = arm_smmu_device_group,
>          .of_xlate               = arm_smmu_of_xlate,
>          .get_resv_regions       = arm_smmu_get_resv_regions,
> -       .remove_dev_pasid       = arm_smmu_remove_dev_pasid,
>          .dev_enable_feat        = arm_smmu_dev_enable_feature,
>          .dev_disable_feat       = arm_smmu_dev_disable_feature,
>          .page_response          = arm_smmu_page_response,
> 
> Jason
>
Yi Liu Sept. 6, 2024, 5:57 a.m. UTC | #4
On 2024/9/6 12:33, Baolu Lu wrote:
> On 9/6/24 12:21 PM, Yi Liu wrote:
>> On 2024/8/16 21:02, Jason Gunthorpe wrote:
>>> On Fri, Aug 16, 2024 at 05:43:18PM +0800, Yi Liu wrote:
>>>> On 2024/7/18 16:27, Tian, Kevin wrote:
>>>>>> From: Liu, Yi L <yi.l.liu@intel.com>
>>>>>> Sent: Friday, June 28, 2024 5:06 PM
>>>>>>
>>>>>> @@ -3289,7 +3290,20 @@ static int __iommu_set_group_pasid(struct
>>>>>> iommu_domain *domain,
>>>>>>
>>>>>>            if (device == last_gdev)
>>>>>>                break;
>>>>>> -        ops->remove_dev_pasid(device->dev, pasid, domain);
>>>>>> +        /* If no old domain, undo the succeeded devices/pasid */
>>>>>> +        if (!old) {
>>>>>> +            ops->remove_dev_pasid(device->dev, pasid, domain);
>>>>>> +            continue;
>>>>>> +        }
>>>>>> +
>>>>>> +        /*
>>>>>> +         * Rollback the succeeded devices/pasid to the old domain.
>>>>>> +         * And it is a driver bug to fail attaching with a previously
>>>>>> +         * good domain.
>>>>>> +         */
>>>>>> +        if (WARN_ON(old->ops->set_dev_pasid(old, device->dev,
>>>>>> +                            pasid, domain)))
>>>>>> +            ops->remove_dev_pasid(device->dev, pasid, domain);
>>>>>
>>>>> I wonder whether @remove_dev_pasid() can be replaced by having
>>>>> blocking_domain support @set_dev_pasid?
>>>>
>>>> how about your thought, @Jason?
>>>
>>> I think we talked about doing that once before, I forget why it was
>>> not done. Maybe there was an issue?
>>>
>>> But it seems worth trying.
>>
>> Since remove_dev_pasid() does not return a result, so caller does not
>> need to check the result of it. If we want to replace it with the
>> blocked_domain->ops->set_dev_pasid(), shall we enforce that the
>> set_dev_pasid() op of blocked_domain to be always success. Is it?
> 
> Yes. The semantics of blocking domain is that the iommu driver must
> ensure successful completion.

great. thanks for the confirmation.
diff mbox series

Patch

diff --git a/drivers/iommu/iommu-priv.h b/drivers/iommu/iommu-priv.h
index 5f731d994803..0949c02cee93 100644
--- a/drivers/iommu/iommu-priv.h
+++ b/drivers/iommu/iommu-priv.h
@@ -20,6 +20,9 @@  static inline const struct iommu_ops *dev_iommu_ops(struct device *dev)
 int iommu_group_replace_domain(struct iommu_group *group,
 			       struct iommu_domain *new_domain);
 
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+			       struct device *dev, ioasid_t pasid);
+
 int iommu_device_register_bus(struct iommu_device *iommu,
 			      const struct iommu_ops *ops,
 			      const struct bus_type *bus,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index b3a1dabed2dd..2d64582b7c43 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -3268,14 +3268,15 @@  bool iommu_group_dma_owner_claimed(struct iommu_group *group)
 EXPORT_SYMBOL_GPL(iommu_group_dma_owner_claimed);
 
 static int __iommu_set_group_pasid(struct iommu_domain *domain,
-				   struct iommu_group *group, ioasid_t pasid)
+				   struct iommu_group *group, ioasid_t pasid,
+				   struct iommu_domain *old)
 {
 	struct group_device *device, *last_gdev;
 	int ret;
 
 	for_each_group_device(group, device) {
 		ret = domain->ops->set_dev_pasid(domain, device->dev,
-						 pasid, NULL);
+						 pasid, old);
 		if (ret)
 			goto err_revert;
 	}
@@ -3289,7 +3290,20 @@  static int __iommu_set_group_pasid(struct iommu_domain *domain,
 
 		if (device == last_gdev)
 			break;
-		ops->remove_dev_pasid(device->dev, pasid, domain);
+		/* If no old domain, undo the succeeded devices/pasid */
+		if (!old) {
+			ops->remove_dev_pasid(device->dev, pasid, domain);
+			continue;
+		}
+
+		/*
+		 * Rollback the succeeded devices/pasid to the old domain.
+		 * And it is a driver bug to fail attaching with a previously
+		 * good domain.
+		 */
+		if (WARN_ON(old->ops->set_dev_pasid(old, device->dev,
+						    pasid, domain)))
+			ops->remove_dev_pasid(device->dev, pasid, domain);
 	}
 	return ret;
 }
@@ -3348,7 +3362,7 @@  int iommu_attach_device_pasid(struct iommu_domain *domain,
 		goto out_unlock;
 	}
 
-	ret = __iommu_set_group_pasid(domain, group, pasid);
+	ret = __iommu_set_group_pasid(domain, group, pasid, NULL);
 	if (ret)
 		xa_erase(&group->pasid_array, pasid);
 out_unlock:
@@ -3357,6 +3371,64 @@  int iommu_attach_device_pasid(struct iommu_domain *domain,
 }
 EXPORT_SYMBOL_GPL(iommu_attach_device_pasid);
 
+/**
+ * iommu_replace_device_pasid - Replace the domain that a pasid is attached to
+ * @domain: the new iommu domain
+ * @dev: the attached device.
+ * @pasid: the pasid of the device.
+ *
+ * This API allows the pasid to switch domains. Return 0 on success, or an
+ * error. The pasid will keep the old configuration if replacement failed.
+ */
+int iommu_replace_device_pasid(struct iommu_domain *domain,
+			       struct device *dev, ioasid_t pasid)
+{
+	/* Caller must be a probed driver on dev */
+	struct iommu_group *group = dev->iommu_group;
+	void *curr;
+	int ret;
+
+	if (!domain->ops->set_dev_pasid)
+		return -EOPNOTSUPP;
+
+	if (!group)
+		return -ENODEV;
+
+	if (!dev_has_iommu(dev) || dev_iommu_ops(dev) != domain->owner ||
+	    pasid == IOMMU_NO_PASID)
+		return -EINVAL;
+
+	mutex_lock(&group->mutex);
+	/*
+	 * The recorded domain is inconsistent with the domain pasid is
+	 * actually attached until pasid is attached to the new domain.
+	 * This has race condition with the paths that do not hold
+	 * group->mutex. E.g. the Page Request forwarding.
+	 */
+	curr = xa_store(&group->pasid_array, pasid, domain, GFP_KERNEL);
+	if (!curr) {
+		xa_erase(&group->pasid_array, pasid);
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	ret = xa_err(curr);
+	if (ret)
+		goto out_unlock;
+
+	if (curr == domain)
+		goto out_unlock;
+
+	ret = __iommu_set_group_pasid(domain, group, pasid, curr);
+	if (ret)
+		WARN_ON(domain != xa_store(&group->pasid_array, pasid,
+					   curr, GFP_KERNEL));
+out_unlock:
+	mutex_unlock(&group->mutex);
+	return ret;
+}
+EXPORT_SYMBOL_NS_GPL(iommu_replace_device_pasid, IOMMUFD_INTERNAL);
+
 /*
  * iommu_detach_device_pasid() - Detach the domain from pasid of device
  * @domain: the iommu domain.