Message ID | 20191209180514.272727-1-jean-philippe@linaro.org |
---|---|
Headers | show |
Series | iommu: Add PASID support to Arm SMMUv3 | expand |
Hi Jean, On 12/9/19 7:05 PM, Jean-Philippe Brucker wrote: > At the moment, the SMMUv3 driver implements only one stage-1 or stage-2 > page directory per device. However SMMUv3 allows more than one address > space for some devices, by providing multiple stage-1 page directories. In > addition to the Stream ID (SID), that identifies a device, we can now have > Substream IDs (SSID) identifying an address space. In PCIe, SID is called > Requester ID (RID) and SSID is called Process Address-Space ID (PASID). > A complete stage-1 walk goes through the context descriptor table: > > Stream tables Ctx. Desc. tables Page tables > +--------+ ,------->+-------+ ,------->+-------+ > : : | : : | : : > +--------+ | +-------+ | +-------+ > SID->| STE |---' SSID->| CD |---' IOVA->| PTE |--> IPA > +--------+ +-------+ +-------+ > : : : : : : > +--------+ +-------+ +-------+ > > Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table > entries. To keep things simple we only implement one level of context > descriptor tables here, but as with stream and page tables, an SSID can > be split to index multiple levels of tables. > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> > --- > drivers/iommu/arm-smmu-v3.c | 125 +++++++++++++++++++++++++++++------- > 1 file changed, 102 insertions(+), 23 deletions(-) > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c > index 43d6a7ded6e4..a01071123c34 100644 > --- a/drivers/iommu/arm-smmu-v3.c > +++ b/drivers/iommu/arm-smmu-v3.c > @@ -227,6 +227,11 @@ > #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) > #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59) > > +#define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0) > +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0 > +#define STRTAB_STE_1_S1DSS_BYPASS 0x1 > +#define STRTAB_STE_1_S1DSS_SSID0 0x2 > + > #define STRTAB_STE_1_S1C_CACHE_NC 0UL > #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL > #define STRTAB_STE_1_S1C_CACHE_WT 2UL > @@ -329,6 +334,7 @@ > #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0) > #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12) > > +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12) > #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32) > #define CMDQ_CFGI_1_LEAF (1UL << 0) > #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0) > @@ -446,8 +452,11 @@ struct arm_smmu_cmdq_ent { > > #define CMDQ_OP_CFGI_STE 0x3 > #define CMDQ_OP_CFGI_ALL 0x4 > + #define CMDQ_OP_CFGI_CD 0x5 > + #define CMDQ_OP_CFGI_CD_ALL 0x6 > struct { > u32 sid; > + u32 ssid; > union { > bool leaf; > u8 span; > @@ -568,6 +577,7 @@ struct arm_smmu_cd_table { > struct arm_smmu_s1_cfg { > struct arm_smmu_cd_table table; > struct arm_smmu_ctx_desc cd; > + u8 s1fmt; > u8 s1cdmax; > }; > > @@ -860,10 +870,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) > cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size); > cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK; > break; > + case CMDQ_OP_CFGI_CD: > + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); > + /* Fallthrough */ > case CMDQ_OP_CFGI_STE: > cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); > cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); > break; > + case CMDQ_OP_CFGI_CD_ALL: > + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); > + break; > case CMDQ_OP_CFGI_ALL: > /* Cover the entire SID range */ > cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); > @@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) > } > > /* Context descriptor manipulation functions */ > +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, > + int ssid, bool leaf) > +{ > + size_t i; > + unsigned long flags; > + struct arm_smmu_master *master; > + struct arm_smmu_device *smmu = smmu_domain->smmu; > + struct arm_smmu_cmdq_ent cmd = { > + .opcode = CMDQ_OP_CFGI_CD, > + .cfgi = { > + .ssid = ssid, > + .leaf = leaf, > + }, > + }; > + > + spin_lock_irqsave(&smmu_domain->devices_lock, flags); > + list_for_each_entry(master, &smmu_domain->devices, domain_head) { > + for (i = 0; i < master->num_sids; i++) { > + cmd.cfgi.sid = master->sids[i]; > + arm_smmu_cmdq_issue_cmd(smmu, &cmd); > + } > + } > + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > + > + arm_smmu_cmdq_issue_sync(smmu); > +} > + > static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, > struct arm_smmu_cd_table *table, > size_t num_entries) > @@ -1498,34 +1541,65 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) > return val; > } > > -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, > - struct arm_smmu_s1_cfg *cfg) > +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, > + int ssid, struct arm_smmu_ctx_desc *cd) > { > - u64 val; > - __le64 *cdptr = cfg->table.ptr; > - > /* > - * We don't need to issue any invalidation here, as we'll invalidate > - * the STE when installing the new entry anyway. > + * This function handles the following cases: > + * > + * (1) Install primary CD, for normal DMA traffic (SSID = 0). > + * (2) Install a secondary CD, for SID+SSID traffic. > + * (3) Update ASID of a CD. Atomically write the first 64 bits of the > + * CD, then invalidate the old entry and mappings. > + * (4) Remove a secondary CD. I see arm_smmu_write_ctx_desc getting called with non null cd in arm_smmu_attach_dev but I do not see it removed in the detach path? > */ > - val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) | > -#ifdef __BIG_ENDIAN > - CTXDESC_CD_0_ENDI | > -#endif > - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | > - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) | > - CTXDESC_CD_0_V; > + u64 val; > + bool cd_live; > + struct arm_smmu_device *smmu = smmu_domain->smmu; > + __le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid * > + CTXDESC_CD_DWORDS;> > - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ > - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) > - val |= CTXDESC_CD_0_S; > + val = le64_to_cpu(cdptr[0]); > + cd_live = !!(val & CTXDESC_CD_0_V); > > - cdptr[0] = cpu_to_le64(val); > + if (!cd) { /* (4) */ > + val = 0; > + } else if (cd_live) { /* (3) */ > + val &= ~CTXDESC_CD_0_ASID; > + val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); > + /* > + * Until CD+TLB invalidation, both ASIDs may be used for tagging > + * this substream's traffic > + */ > + } else { /* (1) and (2) */ > + cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); > + cdptr[2] = 0; > + cdptr[3] = cpu_to_le64(cd->mair); > + > + /* > + * STE is live, and the SMMU might fetch this CD at any > + * time. Ensure that it observes the rest of the CD before we > + * enable it. Mostly, on't you want the invalid state to be seen? > + */ > + arm_smmu_sync_cd(smmu_domain, ssid, true); > > - val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK; > - cdptr[1] = cpu_to_le64(val); > + val = arm_smmu_cpu_tcr_to_cd(cd->tcr) | > +#ifdef __BIG_ENDIAN > + CTXDESC_CD_0_ENDI | > +#endif > + CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | > + CTXDESC_CD_0_AA64 | > + FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | > + CTXDESC_CD_0_V; > > - cdptr[3] = cpu_to_le64(cfg->cd.mair); > + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ > + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) > + val |= CTXDESC_CD_0_S; > + } > + > + WRITE_ONCE(cdptr[0], cpu_to_le64(val)); > + arm_smmu_sync_cd(smmu_domain, ssid, true); > + return 0; > } > > static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) > @@ -1533,6 +1607,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) > struct arm_smmu_device *smmu = smmu_domain->smmu; > struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; > > + cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; > return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table, > 1 << cfg->s1cdmax); > } > @@ -1664,6 +1739,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > if (s1_cfg) { > BUG_ON(ste_live); > dst[1] = cpu_to_le64( > + FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | > FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | > FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | > FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | > @@ -1674,7 +1750,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > + FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | > + FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); Do I understand correctly that each time we update the CD, we also update the STE. This is not wrong but theoretically this is not needed, right? This also explains why you do not need the CMD_PREFETCH_CONFIG with ssid because you invalidate the config per sid? > } > > if (s2_cfg) { > @@ -2479,7 +2557,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) > master->ats_enabled = arm_smmu_ats_supported(master); > > if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) > - arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); > + arm_smmu_write_ctx_desc(smmu_domain, 0, > + &smmu_domain->s1_cfg.cd); > > arm_smmu_install_ste_for_dev(master); > > Thanks Eric
Hi Jean, On 12/17/19 4:21 PM, Jean-Philippe Brucker wrote: > Hi Eric, > > On Tue, Dec 17, 2019 at 12:05:18PM +0100, Auger Eric wrote: >>> + fwspec = dev_iommu_fwspec_get(dev); >>> + if (!err && fwspec) >>> + of_property_read_u32(master_np, "pasid-num-bits", >>> + &fwspec->num_pasid_bits); >>> + } >> This patch dedicates to platform devices however I fail to understand, >> at that stage, how/when do you retrieve/store the same max capability >> for PCI devices? > > For PCI devices, the max capability is only described by the PCIe PASID > capability, not by firmware. Patch 13 deals with PCI by setting > masted->ssid_bits from the PASID capability directly, ignoring > fwspec->num_pasid_bits. Get it now. thank you for the explanation. Eric > >> Besides, >> >> Reviewed-by: Eric Auger <eric.auger@redhat.com> > > Thanks! > Jean > > _______________________________________________ > linux-arm-kernel mailing list > linux-arm-kernel@lists.infradead.org > http://lists.infradead.org/mailman/listinfo/linux-arm-kernel >
Hi Jean, On 12/9/19 7:05 PM, Jean-Philippe Brucker wrote: > For platform devices that support SubstreamID (SSID), firmware provides > the number of supported SSID bits. Restrict it to what the SMMU supports > and cache it into master->ssid_bits, which will also be used for PCI > PASID. > > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> the title of the patch does not really explain what it actually does. At this stage we are far from supporting SSIDs ;-) Same for 04? Thanks Eric > --- > drivers/iommu/arm-smmu-v3.c | 13 +++++++++++++ > drivers/iommu/of_iommu.c | 6 +++++- > include/linux/iommu.h | 2 ++ > 3 files changed, 20 insertions(+), 1 deletion(-) > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c > index d4e8b7f8d9f4..837b4283b4dc 100644 > --- a/drivers/iommu/arm-smmu-v3.c > +++ b/drivers/iommu/arm-smmu-v3.c > @@ -292,6 +292,12 @@ > > #define CTXDESC_CD_1_TTB0_MASK GENMASK_ULL(51, 4) > > +/* > + * When the SMMU only supports linear context descriptor tables, pick a > + * reasonable size limit (64kB). > + */ > +#define CTXDESC_LINEAR_CDMAX ilog2(SZ_64K / (CTXDESC_CD_DWORDS << 3)) > + > /* Convert between AArch64 (CPU) TCR format and SMMU CD format */ > #define ARM_SMMU_TCR2CD(tcr, fld) FIELD_PREP(CTXDESC_CD_0_TCR_##fld, \ > FIELD_GET(ARM64_TCR_##fld, tcr)) > @@ -638,6 +644,7 @@ struct arm_smmu_master { > u32 *sids; > unsigned int num_sids; > bool ats_enabled; > + unsigned int ssid_bits; > }; > > /* SMMU private data for an IOMMU domain */ > @@ -2571,6 +2578,12 @@ static int arm_smmu_add_device(struct device *dev) > } > } > > + master->ssid_bits = min(smmu->ssid_bits, fwspec->num_pasid_bits); > + > + if (!(smmu->features & ARM_SMMU_FEAT_2_LVL_CDTAB)) > + master->ssid_bits = min_t(u8, master->ssid_bits, > + CTXDESC_LINEAR_CDMAX); > + > group = iommu_group_get_for_dev(dev); > if (!IS_ERR(group)) { > iommu_group_put(group); > diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c > index 026ad2b29dcd..b3ccb2f7f1c7 100644 > --- a/drivers/iommu/of_iommu.c > +++ b/drivers/iommu/of_iommu.c > @@ -196,8 +196,12 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, > if (err) > break; > } > - } > > + fwspec = dev_iommu_fwspec_get(dev); > + if (!err && fwspec) > + of_property_read_u32(master_np, "pasid-num-bits", > + &fwspec->num_pasid_bits); > + } > > /* > * Two success conditions can be represented by non-negative err here: > diff --git a/include/linux/iommu.h b/include/linux/iommu.h > index 3a113c5d7394..bd46775c3329 100644 > --- a/include/linux/iommu.h > +++ b/include/linux/iommu.h > @@ -581,6 +581,7 @@ struct iommu_group *fsl_mc_device_group(struct device *dev); > * @ops: ops for this device's IOMMU > * @iommu_fwnode: firmware handle for this device's IOMMU > * @iommu_priv: IOMMU driver private data for this device > + * @num_pasid_bits: number of PASID bits supported by this device > * @num_ids: number of associated device IDs > * @ids: IDs which this device may present to the IOMMU > */ > @@ -589,6 +590,7 @@ struct iommu_fwspec { > struct fwnode_handle *iommu_fwnode; > void *iommu_priv; > u32 flags; > + u32 num_pasid_bits; > unsigned int num_ids; > u32 ids[1]; > }; >
On Wed, Dec 18, 2019 at 11:17:40AM +0100, Auger Eric wrote: > Hi Jean, > > On 12/9/19 7:05 PM, Jean-Philippe Brucker wrote: > > For platform devices that support SubstreamID (SSID), firmware provides > > the number of supported SSID bits. Restrict it to what the SMMU supports > > and cache it into master->ssid_bits, which will also be used for PCI > > PASID. > > > > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> > the title of the patch does not really explain what it actually does. At > this stage we are far from supporting SSIDs ;-) Same for 04? Alright I'll make this "Parse PASID devicetree property of platform devices" and "Parse SSID property of named component node" Thanks, Jean
Hi jean, On 12/9/19 7:05 PM, Jean-Philippe Brucker wrote: > At the moment, the SMMUv3 driver implements only one stage-1 or stage-2 > page directory per device. However SMMUv3 allows more than one address > space for some devices, by providing multiple stage-1 page directories. In > addition to the Stream ID (SID), that identifies a device, we can now have > Substream IDs (SSID) identifying an address space. In PCIe, SID is called > Requester ID (RID) and SSID is called Process Address-Space ID (PASID). > A complete stage-1 walk goes through the context descriptor table: > > Stream tables Ctx. Desc. tables Page tables > +--------+ ,------->+-------+ ,------->+-------+ > : : | : : | : : > +--------+ | +-------+ | +-------+ > SID->| STE |---' SSID->| CD |---' IOVA->| PTE |--> IPA > +--------+ +-------+ +-------+ > : : : : : : > +--------+ +-------+ +-------+ > > Rewrite arm_smmu_write_ctx_desc() to modify context descriptor table > entries. To keep things simple we only implement one level of context > descriptor tables here, but as with stream and page tables, an SSID can > be split to index multiple levels of tables. > > Reviewed-by: Jonathan Cameron <Jonathan.Cameron@huawei.com> > Signed-off-by: Jean-Philippe Brucker <jean-philippe@linaro.org> Reviewed-by: Eric Auger <eric.auger@redhat.com> Thanks Eric > --- > drivers/iommu/arm-smmu-v3.c | 125 +++++++++++++++++++++++++++++------- > 1 file changed, 102 insertions(+), 23 deletions(-) > > diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c > index 43d6a7ded6e4..a01071123c34 100644 > --- a/drivers/iommu/arm-smmu-v3.c > +++ b/drivers/iommu/arm-smmu-v3.c > @@ -227,6 +227,11 @@ > #define STRTAB_STE_0_S1CTXPTR_MASK GENMASK_ULL(51, 6) > #define STRTAB_STE_0_S1CDMAX GENMASK_ULL(63, 59) > > +#define STRTAB_STE_1_S1DSS GENMASK_ULL(1, 0) > +#define STRTAB_STE_1_S1DSS_TERMINATE 0x0 > +#define STRTAB_STE_1_S1DSS_BYPASS 0x1 > +#define STRTAB_STE_1_S1DSS_SSID0 0x2 > + > #define STRTAB_STE_1_S1C_CACHE_NC 0UL > #define STRTAB_STE_1_S1C_CACHE_WBRA 1UL > #define STRTAB_STE_1_S1C_CACHE_WT 2UL > @@ -329,6 +334,7 @@ > #define CMDQ_PREFETCH_1_SIZE GENMASK_ULL(4, 0) > #define CMDQ_PREFETCH_1_ADDR_MASK GENMASK_ULL(63, 12) > > +#define CMDQ_CFGI_0_SSID GENMASK_ULL(31, 12) > #define CMDQ_CFGI_0_SID GENMASK_ULL(63, 32) > #define CMDQ_CFGI_1_LEAF (1UL << 0) > #define CMDQ_CFGI_1_RANGE GENMASK_ULL(4, 0) > @@ -446,8 +452,11 @@ struct arm_smmu_cmdq_ent { > > #define CMDQ_OP_CFGI_STE 0x3 > #define CMDQ_OP_CFGI_ALL 0x4 > + #define CMDQ_OP_CFGI_CD 0x5 > + #define CMDQ_OP_CFGI_CD_ALL 0x6 > struct { > u32 sid; > + u32 ssid; > union { > bool leaf; > u8 span; > @@ -568,6 +577,7 @@ struct arm_smmu_cd_table { > struct arm_smmu_s1_cfg { > struct arm_smmu_cd_table table; > struct arm_smmu_ctx_desc cd; > + u8 s1fmt; > u8 s1cdmax; > }; > > @@ -860,10 +870,16 @@ static int arm_smmu_cmdq_build_cmd(u64 *cmd, struct arm_smmu_cmdq_ent *ent) > cmd[1] |= FIELD_PREP(CMDQ_PREFETCH_1_SIZE, ent->prefetch.size); > cmd[1] |= ent->prefetch.addr & CMDQ_PREFETCH_1_ADDR_MASK; > break; > + case CMDQ_OP_CFGI_CD: > + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SSID, ent->cfgi.ssid); > + /* Fallthrough */ > case CMDQ_OP_CFGI_STE: > cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); > cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_LEAF, ent->cfgi.leaf); > break; > + case CMDQ_OP_CFGI_CD_ALL: > + cmd[0] |= FIELD_PREP(CMDQ_CFGI_0_SID, ent->cfgi.sid); > + break; > case CMDQ_OP_CFGI_ALL: > /* Cover the entire SID range */ > cmd[1] |= FIELD_PREP(CMDQ_CFGI_1_RANGE, 31); > @@ -1456,6 +1472,33 @@ static int arm_smmu_cmdq_issue_sync(struct arm_smmu_device *smmu) > } > > /* Context descriptor manipulation functions */ > +static void arm_smmu_sync_cd(struct arm_smmu_domain *smmu_domain, > + int ssid, bool leaf) > +{ > + size_t i; > + unsigned long flags; > + struct arm_smmu_master *master; > + struct arm_smmu_device *smmu = smmu_domain->smmu; > + struct arm_smmu_cmdq_ent cmd = { > + .opcode = CMDQ_OP_CFGI_CD, > + .cfgi = { > + .ssid = ssid, > + .leaf = leaf, > + }, > + }; > + > + spin_lock_irqsave(&smmu_domain->devices_lock, flags); > + list_for_each_entry(master, &smmu_domain->devices, domain_head) { > + for (i = 0; i < master->num_sids; i++) { > + cmd.cfgi.sid = master->sids[i]; > + arm_smmu_cmdq_issue_cmd(smmu, &cmd); > + } > + } > + spin_unlock_irqrestore(&smmu_domain->devices_lock, flags); > + > + arm_smmu_cmdq_issue_sync(smmu); > +} > + > static int arm_smmu_alloc_cd_leaf_table(struct arm_smmu_device *smmu, > struct arm_smmu_cd_table *table, > size_t num_entries) > @@ -1498,34 +1541,65 @@ static u64 arm_smmu_cpu_tcr_to_cd(u64 tcr) > return val; > } > > -static void arm_smmu_write_ctx_desc(struct arm_smmu_device *smmu, > - struct arm_smmu_s1_cfg *cfg) > +static int arm_smmu_write_ctx_desc(struct arm_smmu_domain *smmu_domain, > + int ssid, struct arm_smmu_ctx_desc *cd) > { > - u64 val; > - __le64 *cdptr = cfg->table.ptr; > - > /* > - * We don't need to issue any invalidation here, as we'll invalidate > - * the STE when installing the new entry anyway. > + * This function handles the following cases: > + * > + * (1) Install primary CD, for normal DMA traffic (SSID = 0). > + * (2) Install a secondary CD, for SID+SSID traffic. > + * (3) Update ASID of a CD. Atomically write the first 64 bits of the > + * CD, then invalidate the old entry and mappings. > + * (4) Remove a secondary CD. > */ > - val = arm_smmu_cpu_tcr_to_cd(cfg->cd.tcr) | > -#ifdef __BIG_ENDIAN > - CTXDESC_CD_0_ENDI | > -#endif > - CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | > - CTXDESC_CD_0_AA64 | FIELD_PREP(CTXDESC_CD_0_ASID, cfg->cd.asid) | > - CTXDESC_CD_0_V; > + u64 val; > + bool cd_live; > + struct arm_smmu_device *smmu = smmu_domain->smmu; > + __le64 *cdptr = smmu_domain->s1_cfg.table.ptr + ssid * > + CTXDESC_CD_DWORDS; > > - /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ > - if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) > - val |= CTXDESC_CD_0_S; > + val = le64_to_cpu(cdptr[0]); > + cd_live = !!(val & CTXDESC_CD_0_V); > > - cdptr[0] = cpu_to_le64(val); > + if (!cd) { /* (4) */ > + val = 0; > + } else if (cd_live) { /* (3) */ > + val &= ~CTXDESC_CD_0_ASID; > + val |= FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid); > + /* > + * Until CD+TLB invalidation, both ASIDs may be used for tagging > + * this substream's traffic > + */ > + } else { /* (1) and (2) */ > + cdptr[1] = cpu_to_le64(cd->ttbr & CTXDESC_CD_1_TTB0_MASK); > + cdptr[2] = 0; > + cdptr[3] = cpu_to_le64(cd->mair); > + > + /* > + * STE is live, and the SMMU might fetch this CD at any > + * time. Ensure that it observes the rest of the CD before we > + * enable it. > + */ > + arm_smmu_sync_cd(smmu_domain, ssid, true); > > - val = cfg->cd.ttbr & CTXDESC_CD_1_TTB0_MASK; > - cdptr[1] = cpu_to_le64(val); > + val = arm_smmu_cpu_tcr_to_cd(cd->tcr) | > +#ifdef __BIG_ENDIAN > + CTXDESC_CD_0_ENDI | > +#endif > + CTXDESC_CD_0_R | CTXDESC_CD_0_A | CTXDESC_CD_0_ASET | > + CTXDESC_CD_0_AA64 | > + FIELD_PREP(CTXDESC_CD_0_ASID, cd->asid) | > + CTXDESC_CD_0_V; > > - cdptr[3] = cpu_to_le64(cfg->cd.mair); > + /* STALL_MODEL==0b10 && CD.S==0 is ILLEGAL */ > + if (smmu->features & ARM_SMMU_FEAT_STALL_FORCE) > + val |= CTXDESC_CD_0_S; > + } > + > + WRITE_ONCE(cdptr[0], cpu_to_le64(val)); > + arm_smmu_sync_cd(smmu_domain, ssid, true); > + return 0; > } > > static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) > @@ -1533,6 +1607,7 @@ static int arm_smmu_alloc_cd_tables(struct arm_smmu_domain *smmu_domain) > struct arm_smmu_device *smmu = smmu_domain->smmu; > struct arm_smmu_s1_cfg *cfg = &smmu_domain->s1_cfg; > > + cfg->s1fmt = STRTAB_STE_0_S1FMT_LINEAR; > return arm_smmu_alloc_cd_leaf_table(smmu, &cfg->table, > 1 << cfg->s1cdmax); > } > @@ -1664,6 +1739,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > if (s1_cfg) { > BUG_ON(ste_live); > dst[1] = cpu_to_le64( > + FIELD_PREP(STRTAB_STE_1_S1DSS, STRTAB_STE_1_S1DSS_SSID0) | > FIELD_PREP(STRTAB_STE_1_S1CIR, STRTAB_STE_1_S1C_CACHE_WBRA) | > FIELD_PREP(STRTAB_STE_1_S1COR, STRTAB_STE_1_S1C_CACHE_WBRA) | > FIELD_PREP(STRTAB_STE_1_S1CSH, ARM_SMMU_SH_ISH) | > @@ -1674,7 +1750,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_master *master, u32 sid, > dst[1] |= cpu_to_le64(STRTAB_STE_1_S1STALLD); > > val |= (s1_cfg->table.ptr_dma & STRTAB_STE_0_S1CTXPTR_MASK) | > - FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS); > + FIELD_PREP(STRTAB_STE_0_CFG, STRTAB_STE_0_CFG_S1_TRANS) | > + FIELD_PREP(STRTAB_STE_0_S1CDMAX, s1_cfg->s1cdmax) | > + FIELD_PREP(STRTAB_STE_0_S1FMT, s1_cfg->s1fmt); > } > > if (s2_cfg) { > @@ -2479,7 +2557,8 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) > master->ats_enabled = arm_smmu_ats_supported(master); > > if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) > - arm_smmu_write_ctx_desc(smmu, &smmu_domain->s1_cfg); > + arm_smmu_write_ctx_desc(smmu_domain, 0, > + &smmu_domain->s1_cfg.cd); > > arm_smmu_install_ste_for_dev(master); > >