Message ID | 20181128164939.8329-7-julien.grall@arm.com |
---|---|
State | Superseded |
Headers | show |
Series | xen/arm: Workaround for Cortex-A76 erratum 1165522 | expand |
On 28.11.18 18:49, Julien Grall wrote: > Early version of Cortex-A76 can end-up with corrupt TLBs if they > speculate an AT instruction while the S1/S2 system registers are in an > inconsistent state. > > This can happen during guest context switch and when invalidating the > TLBs for other than the current VMID. > > The workaround implemented in Xen will: > - Use an empty stage-2 with a reserved VMID while context switching > between 2 guests > - Use an empty stage-2 with the VMID where TLBs need to be flushed > > Signed-off-by: Julien Grall <julien.grall@arm.com> Reviewed-by: Andrii Anisov <andrii_anisov@epam.com>
On Wed, 28 Nov 2018, Julien Grall wrote: > Early version of Cortex-A76 can end-up with corrupt TLBs if they > speculate an AT instruction while the S1/S2 system registers are in an > inconsistent state. > > This can happen during guest context switch and when invalidating the > TLBs for other than the current VMID. > > The workaround implemented in Xen will: > - Use an empty stage-2 with a reserved VMID while context switching > between 2 guests > - Use an empty stage-2 with the VMID where TLBs need to be flushed > > Signed-off-by: Julien Grall <julien.grall@arm.com> Thank you for doing this and for setting up a testing environment. I have a couple of questions on a couple of changes below. > --- > docs/misc/arm/silicon-errata.txt | 1 + > xen/arch/arm/cpuerrata.c | 6 ++++ > xen/arch/arm/domain.c | 8 +++-- > xen/arch/arm/p2m.c | 78 ++++++++++++++++++++++++++++++++++++++-- > xen/include/asm-arm/cpufeature.h | 3 +- > xen/include/asm-arm/processor.h | 2 ++ > 6 files changed, 93 insertions(+), 5 deletions(-) > > diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt > index 906bf5fd48..6cd1366f15 100644 > --- a/docs/misc/arm/silicon-errata.txt > +++ b/docs/misc/arm/silicon-errata.txt > @@ -48,4 +48,5 @@ stable hypervisors. > | ARM | Cortex-A57 | #852523 | N/A | > | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | > | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | > +| ARM | Cortex-A76 | #1165522 | N/A | > | ARM | MMU-500 | #842869 | N/A | > diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c > index adf88e7bdc..61c64b9816 100644 > --- a/xen/arch/arm/cpuerrata.c > +++ b/xen/arch/arm/cpuerrata.c > @@ -489,6 +489,12 @@ static const struct arm_cpu_capabilities arm_errata[] = { > .matches = has_ssbd_mitigation, > }, > #endif > + { > + /* Cortex-A76 r0p0 - r2p0 */ > + .desc = "ARM erratum 116522", > + .capability = ARM64_WORKAROUND_AT_SPECULATE, > + MIDR_RANGE(MIDR_CORTEX_A76, 0, 2 << MIDR_VARIANT_SHIFT), > + }, > {}, > }; > > diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c > index 1d926dcb29..3180edd89d 100644 > --- a/xen/arch/arm/domain.c > +++ b/xen/arch/arm/domain.c > @@ -181,8 +181,6 @@ static void ctxt_switch_to(struct vcpu *n) > if ( is_idle_vcpu(n) ) > return; > > - p2m_restore_state(n); > - > vpidr = READ_SYSREG32(MIDR_EL1); > WRITE_SYSREG32(vpidr, VPIDR_EL2); > WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2); > @@ -235,6 +233,12 @@ static void ctxt_switch_to(struct vcpu *n) > #endif > isb(); > > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after > + * the stage-1 MMU sysregs have been restored. > + */ > + p2m_restore_state(n); > + > /* Control Registers */ > WRITE_SYSREG(n->arch.cpacr, CPACR_EL1); > > diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c > index 844833c4c3..0facb66096 100644 > --- a/xen/arch/arm/p2m.c > +++ b/xen/arch/arm/p2m.c > @@ -15,6 +15,7 @@ > #include <asm/event.h> > #include <asm/hardirq.h> > #include <asm/page.h> > +#include <asm/alternative.h> > > #define MAX_VMID_8_BIT (1UL << 8) > #define MAX_VMID_16_BIT (1UL << 16) > @@ -47,6 +48,8 @@ static const paddr_t level_masks[] = > static const uint8_t level_orders[] = > { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER }; > > +static mfn_t __read_mostly empty_root_mfn; > + > static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn) > { > return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48)); > @@ -98,9 +101,25 @@ void dump_p2m_lookup(struct domain *d, paddr_t addr) > P2M_ROOT_LEVEL, P2M_ROOT_PAGES); > } > > +/* > + * p2m_save_state and p2m_restore_state works in pair to workaround ^ work > + * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to > + * point to the empty page-tables to stop allocating TLB entries. > + */ > void p2m_save_state(struct vcpu *p) > { > p->arch.sctlr = READ_SYSREG(SCTLR_EL1); > + > + if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + { > + WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2); > + /* > + * Ensure VTTBR_EL2 is correctly synchronized so we can restore > + * the next vCPU context without worrying about AT instruction > + * speculation. > + */ > + isb(); > + } > } OK > void p2m_restore_state(struct vcpu *n) > @@ -111,10 +130,17 @@ void p2m_restore_state(struct vcpu *n) > if ( is_idle_vcpu(n) ) > return; > > - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1); > WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2); > > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all > + * registers associated to EL1/EL0 translations regime have been > + * synchronized. > + */ > + asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE)); Obviously you have done a lot more thinking about this than me, but I don't fully understand the need for this barrier: this is not about ARM64_WORKAROUND_AT_SPECULATE per se, right? Shouldn't the CPU be able to figure out the right execution speculation path given that the instructions ordering is correct? I guess it depends on the nature of the hardware bug. > + WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > + > last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()]; > > /* > @@ -157,8 +183,23 @@ static void p2m_force_tlb_flush_sync(struct p2m_domain *p2m) > ovttbr = READ_SYSREG64(VTTBR_EL2); > if ( ovttbr != p2m->vttbr ) > { > + uint64_t vttbr; > + > local_irq_save(flags); > - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > + > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate > + * TLBs entries because the context is partially modified. We > + * only need the VMID for flushing the TLBs, so we can generate > + * a new VTTBR with the VMID to flush and the empty root table. > + */ > + if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + vttbr = p2m->vttbr; > + else > + vttbr = generate_vttbr(p2m->vmid, empty_root_mfn); > + > + WRITE_SYSREG64(vttbr, VTTBR_EL2); Good idea, any reasons not to use generate_vttbr(p2m->vmid, empty_root_mfn) in the general case? There should be no downsides, right? > /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */ > isb(); > } > @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; > static void setup_virt_paging_one(void *data) > { > WRITE_SYSREG32(vtcr, VTCR_EL2); > + > + /* > + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from > + * entries related to EL1/EL0 translation regime until a guest vCPU > + * is running. For that, we need to set-up VTTBR to point to an empty > + * page-table and turn on stage-2 translation. I don't understand why this is needed: isn't the lack of HCR_VM (due to your previous patch) supposed to be sufficient? How can there be speculation without HCR_VM? Even if speculation happens without HCR_EL2, why do we need to set it now? Isn't setting empty_root_mfn enough? > The TLB entries > + * associated with EL1/EL0 translation regime will also be flushed in case > + * an AT instruction was speculated before hand. > + */ > + if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + { > + WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2); > + WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2); > + isb(); > + > + flush_tlb_all_local(); > + } > } > > void __init setup_virt_paging(void) > @@ -1587,6 +1645,22 @@ void __init setup_virt_paging(void) > /* It is not allowed to concatenate a level zero root */ > BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 ); > vtcr = val; > + > + /* > + * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table > + * with all entries zeroed. > + */ > + if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > + { > + struct page_info *root; > + > + root = p2m_allocate_root(); > + if ( !root ) > + panic("Unable to allocate root table for ARM64_WORKAROUND_AT_SPECULATE\n"); > + > + empty_root_mfn = page_to_mfn(root); > + } OK > setup_virt_paging_one(NULL); > smp_call_function(setup_virt_paging_one, NULL, 1); > } > diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h > index 17de928467..c2c8f3417c 100644 > --- a/xen/include/asm-arm/cpufeature.h > +++ b/xen/include/asm-arm/cpufeature.h > @@ -45,8 +45,9 @@ > #define ARM_HARDEN_BRANCH_PREDICTOR 7 > #define ARM_SSBD 8 > #define ARM_SMCCC_1_1 9 > +#define ARM64_WORKAROUND_AT_SPECULATE 10 > > -#define ARM_NCAPS 10 > +#define ARM_NCAPS 11 > > #ifndef __ASSEMBLY__ > > diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h > index 72ddc42778..d03ec6e272 100644 > --- a/xen/include/asm-arm/processor.h > +++ b/xen/include/asm-arm/processor.h > @@ -52,6 +52,7 @@ > #define ARM_CPU_PART_CORTEX_A72 0xD08 > #define ARM_CPU_PART_CORTEX_A73 0xD09 > #define ARM_CPU_PART_CORTEX_A75 0xD0A > +#define ARM_CPU_PART_CORTEX_A76 0xD0B > > #define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12) > #define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17) > @@ -61,6 +62,7 @@ > #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) > #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) > #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) > +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) > > /* MPIDR Multiprocessor Affinity Register */ > #define _MPIDR_UP (30) > -- > 2.11.0 >
(+ James) Hi Stefano, @James, please correct me if I am wrong below :). On 24/01/2019 00:52, Stefano Stabellini wrote: > On Wed, 28 Nov 2018, Julien Grall wrote: >> void p2m_restore_state(struct vcpu *n) >> @@ -111,10 +130,17 @@ void p2m_restore_state(struct vcpu *n) >> if ( is_idle_vcpu(n) ) >> return; >> >> - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); >> WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1); >> WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2); >> >> + /* >> + * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all >> + * registers associated to EL1/EL0 translations regime have been >> + * synchronized. >> + */ >> + asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE)); > > Obviously you have done a lot more thinking about this than me, but > I don't fully understand the need for this barrier: this is not about > ARM64_WORKAROUND_AT_SPECULATE per se, right? This particular isb() is about ARM64_WORKAROUND_AT_SPECULATE. > Shouldn't the CPU be able > to figure out the right execution speculation path given that the > instructions ordering is correct? What instructions ordering? Writing a system registers can be re-ordered and you need an isb() to ensure the full synchronization before executing at AT instruction or flushing the TLBs. In hardware without the erratum, the registers associated with EL1 translation are out-of-context and the AT cannot speculate. The isb() added by patch #5 ensure the context is synchronized so an AT afterwards would use a consistent context. Now... > I guess it depends on the nature of > the hardware bug. ... in the context of the errata, you have to imagine what can happen if an AT instruction is inserted (via speculation) between each instruction and what happen if the system registers are re-ordered. The key of the erratum is VTTBR_EL2. This is what will stop a speculated AT instruction to allocate a TLBs entry because you are not allowed to cache a translation that will fault. Without the isb() here, the VTTBR_EL2 may be synchronized before the rest of the context, so a speculated AT instruction may use an inconsistent state and allocate a TLB entry with an unexpected translation against the guest. So here, we want to ensure the rest of the context is synchronized before writing to VTTBR_EL2, hence the isb(). > > >> + WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); >> + >> last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()]; >> >> /* >> @@ -157,8 +183,23 @@ static void p2m_force_tlb_flush_sync(struct p2m_domain *p2m) >> ovttbr = READ_SYSREG64(VTTBR_EL2); >> if ( ovttbr != p2m->vttbr ) >> { >> + uint64_t vttbr; >> + >> local_irq_save(flags); >> - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); >> + >> + /* >> + * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate >> + * TLBs entries because the context is partially modified. We >> + * only need the VMID for flushing the TLBs, so we can generate >> + * a new VTTBR with the VMID to flush and the empty root table. >> + */ >> + if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) >> + vttbr = p2m->vttbr; >> + else >> + vttbr = generate_vttbr(p2m->vmid, empty_root_mfn); >> + >> + WRITE_SYSREG64(vttbr, VTTBR_EL2); > > Good idea, any reasons not to use generate_vttbr(p2m->vmid, > empty_root_mfn) in the general case? There should be no downsides, > right? An empty root means you need to have the root page-tables allocated. In the configuration we currently support, the could be either 4K or 8K. Even if this seems small, this is a downside for platforms that don't require such trick. > > >> /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */ >> isb(); >> } >> @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; >> static void setup_virt_paging_one(void *data) >> { >> WRITE_SYSREG32(vtcr, VTCR_EL2); >> + >> + /* >> + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from >> + * entries related to EL1/EL0 translation regime until a guest vCPU >> + * is running. For that, we need to set-up VTTBR to point to an empty >> + * page-table and turn on stage-2 translation. > > I don't understand why this is needed: isn't the lack of HCR_VM (due to > your previous patch) supposed to be sufficient? How can there be > speculation without HCR_VM? HCR_EL2.VM unsets means the stage-2 will not be used for the EL1/EL0 translation regime. In the context of the erratum, the AT can still speculate except it will not take into account the stage-2. The dependencies on VMID stills applies when HCR_EL2.VM is unset, so from my understanding, the entry could get cached to whatever is VTTBR_EL2.VMID. > > Even if speculation happens without HCR_EL2, why do we need to set it > now? Isn't setting empty_root_mfn enough? The main goal here is to have the TLBs in a known state after the CPU has been initialized. After the sequence below, we are sure that the TLBs don't contain entries associated to the EL1/EL0 regime and and a speculated AT instruction will not be able to allocate more. Without HCR_EL2.VM set, the stage-2 page-table will not get used. So a speculated AT instruction could still allocate an entry in TLB. It is not a major issue as it would be against INVALID_VMID, yet it is not a very sane situation for the hypervisor. Cheers,
On Thu, 24 Jan 2019, Julien Grall wrote: > (+ James) > > Hi Stefano, > > @James, please correct me if I am wrong below :). > > On 24/01/2019 00:52, Stefano Stabellini wrote: > > On Wed, 28 Nov 2018, Julien Grall wrote: > > > void p2m_restore_state(struct vcpu *n) > > > @@ -111,10 +130,17 @@ void p2m_restore_state(struct vcpu *n) > > > if ( is_idle_vcpu(n) ) > > > return; > > > - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > > > WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1); > > > WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2); > > > + /* > > > + * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after > > > all > > > + * registers associated to EL1/EL0 translations regime have been > > > + * synchronized. > > > + */ > > > + asm volatile(ALTERNATIVE("nop", "isb", > > > ARM64_WORKAROUND_AT_SPECULATE)); > > > > Obviously you have done a lot more thinking about this than me, but > > I don't fully understand the need for this barrier: this is not about > > ARM64_WORKAROUND_AT_SPECULATE per se, right? > > This particular isb() is about ARM64_WORKAROUND_AT_SPECULATE. > > > Shouldn't the CPU be able > > to figure out the right execution speculation path given that the > > instructions ordering is correct? > > What instructions ordering? Writing a system registers can be re-ordered and > you need an isb() to ensure the full synchronization before executing at AT > instruction or flushing the TLBs. > > In hardware without the erratum, the registers associated with EL1 translation > are out-of-context and the AT cannot speculate. The isb() added by patch #5 > ensure the context is synchronized so an AT afterwards would use a consistent > context. Now... > > > I guess it depends on the nature of > > the hardware bug. > > ... in the context of the errata, you have to imagine what can happen if an AT > instruction is inserted (via speculation) between each instruction and what > happen if the system registers are re-ordered. > > The key of the erratum is VTTBR_EL2. This is what will stop a speculated AT > instruction to allocate a TLBs entry because you are not allowed to cache a > translation that will fault. Without the isb() here, the VTTBR_EL2 may be > synchronized before the rest of the context, so a speculated AT instruction > may use an inconsistent state and allocate a TLB entry with an unexpected > translation against the guest. > > So here, we want to ensure the rest of the context is synchronized before > writing to VTTBR_EL2, hence the isb(). OK. I understand the explanation, thank you. I just thought that the CPU would be smart enough to only reorder system registers writes when appropriate, especially when the CPU is also doing speculation at the same time. Why would it speculate if it knows that it is reordering sysreg writes that can badly affect the speculation itself? Let me say that it doesn't sound like a "sane" behavior to me. But if it behaves this way, it behaves this way... > > > > > + WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > > > + > > > last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()]; > > > /* > > > @@ -157,8 +183,23 @@ static void p2m_force_tlb_flush_sync(struct > > > p2m_domain *p2m) > > > ovttbr = READ_SYSREG64(VTTBR_EL2); > > > if ( ovttbr != p2m->vttbr ) > > > { > > > + uint64_t vttbr; > > > + > > > local_irq_save(flags); > > > - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); > > > + > > > + /* > > > + * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate > > > + * TLBs entries because the context is partially modified. We > > > + * only need the VMID for flushing the TLBs, so we can generate > > > + * a new VTTBR with the VMID to flush and the empty root table. > > > + */ > > > + if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) > > > + vttbr = p2m->vttbr; > > > + else > > > + vttbr = generate_vttbr(p2m->vmid, empty_root_mfn); > > > + > > > + WRITE_SYSREG64(vttbr, VTTBR_EL2); > > > > Good idea, any reasons not to use generate_vttbr(p2m->vmid, > > empty_root_mfn) in the general case? There should be no downsides, > > right? > An empty root means you need to have the root page-tables allocated. In the > configuration we currently support, the could be either 4K or 8K. > > Even if this seems small, this is a downside for platforms that don't require > such trick. Good point, I was fooled by empty_root_mfn being available to all as a variable. Let's not go down that route then. > > > > > /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */ > > > isb(); > > > } > > > @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; > > > static void setup_virt_paging_one(void *data) > > > { > > > WRITE_SYSREG32(vtcr, VTCR_EL2); > > > + > > > + /* > > > + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from > > > + * entries related to EL1/EL0 translation regime until a guest vCPU > > > + * is running. For that, we need to set-up VTTBR to point to an empty > > > + * page-table and turn on stage-2 translation. > > > > I don't understand why this is needed: isn't the lack of HCR_VM (due to > > your previous patch) supposed to be sufficient? How can there be > > speculation without HCR_VM? > > HCR_EL2.VM unsets means the stage-2 will not be used for the EL1/EL0 > translation regime. In the context of the erratum, the AT can still speculate > except it will not take into account the stage-2. The dependencies on VMID > stills applies when HCR_EL2.VM is unset, so from my understanding, the entry > could get cached to whatever is VTTBR_EL2.VMID. Damn! Even if at this point of the boot sequence there is no EL1 / EL0 at all? How can that speculation happen? Shouldn't the first EL1 / EL0 speculation occur after the first leave_hypervisor_tail? > > Even if speculation happens without HCR_EL2, why do we need to set it > > now? Isn't setting empty_root_mfn enough? > > The main goal here is to have the TLBs in a known state after the CPU has been > initialized. After the sequence below, we are sure that the TLBs don't contain > entries associated to the EL1/EL0 regime and and a speculated AT instruction > will not be able to allocate more. > > Without HCR_EL2.VM set, the stage-2 page-table will not get used. So a > speculated AT instruction could still allocate an entry in TLB. It is not a > major issue as it would be against INVALID_VMID, yet it is not a very sane > situation for the hypervisor. I have a question on the tlb flush. Do we need it because the tlb is not guaranteed to be clean after boot? Also, do we need a flush_tlb_all_local()? Would flush_tlb_local be enough, maybe executed immediately before switching VTTBR_EL2? I guess it depends on whether the speculation happens on the local VMID only. If it only speculate on the local VMID, then flush_tlb_all_local() should suffice?
Hi, On 1/25/19 9:36 PM, Stefano Stabellini wrote: > On Thu, 24 Jan 2019, Julien Grall wrote: >> @James, please correct me if I am wrong below :). >> >> On 24/01/2019 00:52, Stefano Stabellini wrote: >>> On Wed, 28 Nov 2018, Julien Grall wrote: >> ... in the context of the errata, you have to imagine what can happen if an AT >> instruction is inserted (via speculation) between each instruction and what >> happen if the system registers are re-ordered. >> >> The key of the erratum is VTTBR_EL2. This is what will stop a speculated AT >> instruction to allocate a TLBs entry because you are not allowed to cache a >> translation that will fault. Without the isb() here, the VTTBR_EL2 may be >> synchronized before the rest of the context, so a speculated AT instruction >> may use an inconsistent state and allocate a TLB entry with an unexpected >> translation against the guest. >> >> So here, we want to ensure the rest of the context is synchronized before >> writing to VTTBR_EL2, hence the isb(). > > OK. I understand the explanation, thank you. > > I just thought that the CPU would be smart enough to only reorder system > registers writes when appropriate, especially when the CPU is also doing > speculation at the same time. Why would it speculate if it knows that it > is reordering sysreg writes that can badly affect the speculation > itself? Let me say that it doesn't sound like a "sane" behavior to me. > But if it behaves this way, it behaves this way... I hope you are aware we are speaking about an erratum here... Not what the Arm Arm allows. Aside the erratum, a processor is allowed to do whatever it wants if it is within the Arm Arm. These registers are described as out-of-context and should not be used by speculation in EL2. If you want to use them in EL2, you need an isb() before any instruction in EL2 using them otherwise you may use an inconsistent context. This is giving enough freedom to the processor while the impact in the software is minimal. [...] >>> >>>> /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */ >>>> isb(); >>>> } >>>> @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; >>>> static void setup_virt_paging_one(void *data) >>>> { >>>> WRITE_SYSREG32(vtcr, VTCR_EL2); >>>> + >>>> + /* >>>> + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from >>>> + * entries related to EL1/EL0 translation regime until a guest vCPU >>>> + * is running. For that, we need to set-up VTTBR to point to an empty >>>> + * page-table and turn on stage-2 translation. >>> >>> I don't understand why this is needed: isn't the lack of HCR_VM (due to >>> your previous patch) supposed to be sufficient? How can there be >>> speculation without HCR_VM? >> >> HCR_EL2.VM unsets means the stage-2 will not be used for the EL1/EL0 >> translation regime. In the context of the erratum, the AT can still speculate >> except it will not take into account the stage-2. The dependencies on VMID >> stills applies when HCR_EL2.VM is unset, so from my understanding, the entry >> could get cached to whatever is VTTBR_EL2.VMID. > > Damn! Even if at this point of the boot sequence there is no EL1 / EL0 > at all? How can that speculation happen? Shouldn't the first EL1 / EL0 > speculation occur after the first leave_hypervisor_tail? How do you know EL1 was not run before hand? Imagine we did a soft reboot or kexec Xen... But the speculation in that context is may be because the processor noticed an AT instruction targeting EL1 in the stream. > > >>> Even if speculation happens without HCR_EL2, why do we need to set it >>> now? Isn't setting empty_root_mfn enough? >> >> The main goal here is to have the TLBs in a known state after the CPU has been >> initialized. After the sequence below, we are sure that the TLBs don't contain >> entries associated to the EL1/EL0 regime and and a speculated AT instruction >> will not be able to allocate more. >> >> Without HCR_EL2.VM set, the stage-2 page-table will not get used. So a >> speculated AT instruction could still allocate an entry in TLB. It is not a >> major issue as it would be against INVALID_VMID, yet it is not a very sane >> situation for the hypervisor. > > I have a question on the tlb flush. Do we need it because the tlb is > not guaranteed to be clean after boot? You don't know the state of the TLBs after boot. > > Also, do we need a flush_tlb_all_local()? Would flush_tlb_local be > enough, maybe executed immediately before switching VTTBR_EL2? I guess > it depends on whether the speculation happens on the local VMID only. Speculation can only happen using system registers. So only on the local VMID only. > If it only speculate on the local VMID, then flush_tlb_all_local() > should suffice? We have two VMIDs in play here: whatever was the value in VTTBR_EL2.VMID before the function and INVALID_VMID. We would need to flush the former and this would require empty root trick because speculation can happen as soon as flush ended. But then, you rely on Xen to only use a single VMID at boot. While this is the case today, I can't tell if it will be in the future. So the flush_tlb_local is the safest. Cheers,
On 1/27/19 9:55 AM, Julien Grall wrote: > Hi, > > On 1/25/19 9:36 PM, Stefano Stabellini wrote: >> On Thu, 24 Jan 2019, Julien Grall wrote: >>> @James, please correct me if I am wrong below :). >>> >>> On 24/01/2019 00:52, Stefano Stabellini wrote: >>>> On Wed, 28 Nov 2018, Julien Grall wrote: >>> ... in the context of the errata, you have to imagine what can happen >>> if an AT >>> instruction is inserted (via speculation) between each instruction >>> and what >>> happen if the system registers are re-ordered. >>> >>> The key of the erratum is VTTBR_EL2. This is what will stop a >>> speculated AT >>> instruction to allocate a TLBs entry because you are not allowed to >>> cache a >>> translation that will fault. Without the isb() here, the VTTBR_EL2 >>> may be >>> synchronized before the rest of the context, so a speculated AT >>> instruction >>> may use an inconsistent state and allocate a TLB entry with an >>> unexpected >>> translation against the guest. >>> >>> So here, we want to ensure the rest of the context is synchronized >>> before >>> writing to VTTBR_EL2, hence the isb(). >> >> OK. I understand the explanation, thank you. >> >> I just thought that the CPU would be smart enough to only reorder system >> registers writes when appropriate, especially when the CPU is also doing >> speculation at the same time. Why would it speculate if it knows that it >> is reordering sysreg writes that can badly affect the speculation >> itself? Let me say that it doesn't sound like a "sane" behavior to me. >> But if it behaves this way, it behaves this way... > > I hope you are aware we are speaking about an erratum here... Not what > the Arm Arm allows. > > Aside the erratum, a processor is allowed to do whatever it wants if it > is within the Arm Arm. These registers are described as out-of-context > and should not be used by speculation in EL2. If you want to use them in > EL2, you need an isb() before any instruction in EL2 using them > otherwise you may use an inconsistent context. This is giving enough > freedom to the processor while the impact in the software is minimal. > > [...] > >>>> >>>>> /* Ensure VTTBR_EL2 is synchronized before flushing the >>>>> TLBs */ >>>>> isb(); >>>>> } >>>>> @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; >>>>> static void setup_virt_paging_one(void *data) >>>>> { >>>>> WRITE_SYSREG32(vtcr, VTCR_EL2); >>>>> + >>>>> + /* >>>>> + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs >>>>> free from >>>>> + * entries related to EL1/EL0 translation regime until a guest >>>>> vCPU >>>>> + * is running. For that, we need to set-up VTTBR to point to >>>>> an empty >>>>> + * page-table and turn on stage-2 translation. >>>> >>>> I don't understand why this is needed: isn't the lack of HCR_VM (due to >>>> your previous patch) supposed to be sufficient? How can there be >>>> speculation without HCR_VM? >>> >>> HCR_EL2.VM unsets means the stage-2 will not be used for the EL1/EL0 >>> translation regime. In the context of the erratum, the AT can still >>> speculate >>> except it will not take into account the stage-2. The dependencies on >>> VMID >>> stills applies when HCR_EL2.VM is unset, so from my understanding, >>> the entry >>> could get cached to whatever is VTTBR_EL2.VMID. >> >> Damn! Even if at this point of the boot sequence there is no EL1 / EL0 >> at all? How can that speculation happen? Shouldn't the first EL1 / EL0 >> speculation occur after the first leave_hypervisor_tail? > > How do you know EL1 was not run before hand? Imagine we did a soft > reboot or kexec Xen... > > But the speculation in that context is may be because the processor > noticed an AT instruction targeting EL1 in the stream. > >> >> >>>> Even if speculation happens without HCR_EL2, why do we need to set it >>>> now? Isn't setting empty_root_mfn enough? >>> >>> The main goal here is to have the TLBs in a known state after the CPU >>> has been >>> initialized. After the sequence below, we are sure that the TLBs >>> don't contain >>> entries associated to the EL1/EL0 regime and and a speculated AT >>> instruction >>> will not be able to allocate more. >>> >>> Without HCR_EL2.VM set, the stage-2 page-table will not get used. So a >>> speculated AT instruction could still allocate an entry in TLB. It is >>> not a >>> major issue as it would be against INVALID_VMID, yet it is not a very >>> sane >>> situation for the hypervisor. >> >> I have a question on the tlb flush. Do we need it because the tlb is >> not guaranteed to be clean after boot? > > You don't know the state of the TLBs after boot. > >> >> Also, do we need a flush_tlb_all_local()? Would flush_tlb_local be >> enough, maybe executed immediately before switching VTTBR_EL2? I guess >> it depends on whether the speculation happens on the local VMID only. > > Speculation can only happen using system registers. So only on the local > VMID only. > >> If it only speculate on the local VMID, then flush_tlb_all_local() >> should suffice? > > We have two VMIDs in play here: whatever was the value in VTTBR_EL2.VMID > before the function and INVALID_VMID. We would need to flush the former > and this would require empty root trick because speculation can happen > as soon as flush ended. > > But then, you rely on Xen to only use a single VMID at boot. While this > is the case today, I can't tell if it will be in the future. > > So the flush_tlb_local is the safest. Hmmm, I meant flush_tlb_all_local here. Cheers,
On Mon, 28 Jan 2019, Julien Grall wrote: > On 1/27/19 9:55 AM, Julien Grall wrote: > > Hi, > > > > On 1/25/19 9:36 PM, Stefano Stabellini wrote: > > > On Thu, 24 Jan 2019, Julien Grall wrote: > > > > @James, please correct me if I am wrong below :). > > > > > > > > On 24/01/2019 00:52, Stefano Stabellini wrote: > > > > > On Wed, 28 Nov 2018, Julien Grall wrote: > > > > ... in the context of the errata, you have to imagine what can happen if > > > > an AT > > > > instruction is inserted (via speculation) between each instruction and > > > > what > > > > happen if the system registers are re-ordered. > > > > > > > > The key of the erratum is VTTBR_EL2. This is what will stop a speculated > > > > AT > > > > instruction to allocate a TLBs entry because you are not allowed to > > > > cache a > > > > translation that will fault. Without the isb() here, the VTTBR_EL2 may > > > > be > > > > synchronized before the rest of the context, so a speculated AT > > > > instruction > > > > may use an inconsistent state and allocate a TLB entry with an > > > > unexpected > > > > translation against the guest. > > > > > > > > So here, we want to ensure the rest of the context is synchronized > > > > before > > > > writing to VTTBR_EL2, hence the isb(). > > > > > > OK. I understand the explanation, thank you. > > > > > > I just thought that the CPU would be smart enough to only reorder system > > > registers writes when appropriate, especially when the CPU is also doing > > > speculation at the same time. Why would it speculate if it knows that it > > > is reordering sysreg writes that can badly affect the speculation > > > itself? Let me say that it doesn't sound like a "sane" behavior to me. > > > But if it behaves this way, it behaves this way... > > > > I hope you are aware we are speaking about an erratum here... Not what the > > Arm Arm allows. I know -- we are talking about a specific CPU implementation. That is why it seems strange to me that a CPU would reorder things that it should know they cause trouble to speculation. Anyway, no point in discussing hardware design choices at this stage. > > Aside the erratum, a processor is allowed to do whatever it wants if it is > > within the Arm Arm. These registers are described as out-of-context and > > should not be used by speculation in EL2. If you want to use them in EL2, > > you need an isb() before any instruction in EL2 using them otherwise you may > > use an inconsistent context. This is giving enough freedom to the processor > > while the impact in the software is minimal. > > > > [...] > > > > > > > > > > > > > /* Ensure VTTBR_EL2 is synchronized before flushing the > > > > > > TLBs */ > > > > > > isb(); > > > > > > } > > > > > > @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; > > > > > > static void setup_virt_paging_one(void *data) > > > > > > { > > > > > > WRITE_SYSREG32(vtcr, VTCR_EL2); > > > > > > + > > > > > > + /* > > > > > > + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free > > > > > > from > > > > > > + * entries related to EL1/EL0 translation regime until a guest > > > > > > vCPU > > > > > > + * is running. For that, we need to set-up VTTBR to point to an > > > > > > empty > > > > > > + * page-table and turn on stage-2 translation. > > > > > > > > > > I don't understand why this is needed: isn't the lack of HCR_VM (due > > > > > to > > > > > your previous patch) supposed to be sufficient? How can there be > > > > > speculation without HCR_VM? > > > > > > > > HCR_EL2.VM unsets means the stage-2 will not be used for the EL1/EL0 > > > > translation regime. In the context of the erratum, the AT can still > > > > speculate > > > > except it will not take into account the stage-2. The dependencies on > > > > VMID > > > > stills applies when HCR_EL2.VM is unset, so from my understanding, the > > > > entry > > > > could get cached to whatever is VTTBR_EL2.VMID. > > > > > > Damn! Even if at this point of the boot sequence there is no EL1 / EL0 > > > at all? How can that speculation happen? Shouldn't the first EL1 / EL0 > > > speculation occur after the first leave_hypervisor_tail? > > > > How do you know EL1 was not run before hand? Imagine we did a soft reboot or > > kexec Xen... > > > > But the speculation in that context is may be because the processor noticed > > an AT instruction targeting EL1 in the stream. This seems to be extremely improbable, borderline impossible to me, but I can imagine that we might want to be extra-paranoid to make sure all potential issues are covered. > > > > > Even if speculation happens without HCR_EL2, why do we need to set it > > > > > now? Isn't setting empty_root_mfn enough? > > > > > > > > The main goal here is to have the TLBs in a known state after the CPU > > > > has been > > > > initialized. After the sequence below, we are sure that the TLBs don't > > > > contain > > > > entries associated to the EL1/EL0 regime and and a speculated AT > > > > instruction > > > > will not be able to allocate more. > > > > > > > > Without HCR_EL2.VM set, the stage-2 page-table will not get used. So a > > > > speculated AT instruction could still allocate an entry in TLB. It is > > > > not a > > > > major issue as it would be against INVALID_VMID, yet it is not a very > > > > sane > > > > situation for the hypervisor. > > > > > > I have a question on the tlb flush. Do we need it because the tlb is > > > not guaranteed to be clean after boot? > > > > You don't know the state of the TLBs after boot. > > > > > > > > Also, do we need a flush_tlb_all_local()? Would flush_tlb_local be > > > enough, maybe executed immediately before switching VTTBR_EL2? I guess > > > it depends on whether the speculation happens on the local VMID only. > > > > Speculation can only happen using system registers. So only on the local > > VMID only. > > > > > If it only speculate on the local VMID, then flush_tlb_all_local() > > > should suffice? > > > > We have two VMIDs in play here: whatever was the value in VTTBR_EL2.VMID > > before the function and INVALID_VMID. We would need to flush the former and > > this would require empty root trick because speculation can happen as soon > > as flush ended. > > > > But then, you rely on Xen to only use a single VMID at boot. While this is > > the case today, I can't tell if it will be in the future. > > > > So the flush_tlb_local is the safest. > > Hmmm, I meant flush_tlb_all_local here. OK. Overall, I think we could probably get away without a couple of changes from this patch, but since it is basically impossible for me to prove it, I'll give my Reviewed-by. I saw that you resent the series already. I'll take care of committing it. Reviewed-by: Stefano Stabellini <sstabellini@kernel.org>
Hi, On 29/01/2019 00:52, Stefano Stabellini wrote: > On Mon, 28 Jan 2019, Julien Grall wrote: >> On 1/27/19 9:55 AM, Julien Grall wrote: >>> Hi, >>> >>> On 1/25/19 9:36 PM, Stefano Stabellini wrote: >>>> On Thu, 24 Jan 2019, Julien Grall wrote: >>>>> @James, please correct me if I am wrong below :). >>>>> >>>>> On 24/01/2019 00:52, Stefano Stabellini wrote: >>>>>> On Wed, 28 Nov 2018, Julien Grall wrote: >>>>> ... in the context of the errata, you have to imagine what can happen if >>>>> an AT >>>>> instruction is inserted (via speculation) between each instruction and >>>>> what >>>>> happen if the system registers are re-ordered. >>>>> >>>>> The key of the erratum is VTTBR_EL2. This is what will stop a speculated >>>>> AT >>>>> instruction to allocate a TLBs entry because you are not allowed to >>>>> cache a >>>>> translation that will fault. Without the isb() here, the VTTBR_EL2 may >>>>> be >>>>> synchronized before the rest of the context, so a speculated AT >>>>> instruction >>>>> may use an inconsistent state and allocate a TLB entry with an >>>>> unexpected >>>>> translation against the guest. >>>>> >>>>> So here, we want to ensure the rest of the context is synchronized >>>>> before >>>>> writing to VTTBR_EL2, hence the isb(). >>>> >>>> OK. I understand the explanation, thank you. >>>> >>>> I just thought that the CPU would be smart enough to only reorder system >>>> registers writes when appropriate, especially when the CPU is also doing >>>> speculation at the same time. Why would it speculate if it knows that it >>>> is reordering sysreg writes that can badly affect the speculation >>>> itself? Let me say that it doesn't sound like a "sane" behavior to me. >>>> But if it behaves this way, it behaves this way... >>> >>> I hope you are aware we are speaking about an erratum here... Not what the >>> Arm Arm allows. > > I know -- we are talking about a specific CPU implementation. That is > why it seems strange to me that a CPU would reorder things that it > should know they cause trouble to speculation. Anyway, no point in > discussing hardware design choices at this stage. I agree that speculation may not happen as I described in my previous e-mail. However, we have to assume that any behavior allowed by the Arm Arm can happen unless stated otherwise by the specific processor documentation. This series is based on the Arm Arm and the erratum description provides in the Software Developer Errata Notice for the Cortex-A76 [1], both are available publicly. Regarding re-ordering, the wording in the document does not provide any strong evidence the writes to system register cannot be re-ordered. The section "conditions" actually suggests the invert (i.e re-ordering is possible). [...] >>>>>> >>>>>>> /* Ensure VTTBR_EL2 is synchronized before flushing the >>>>>>> TLBs */ >>>>>>> isb(); >>>>>>> } >>>>>>> @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; >>>>>>> static void setup_virt_paging_one(void *data) >>>>>>> { >>>>>>> WRITE_SYSREG32(vtcr, VTCR_EL2); >>>>>>> + >>>>>>> + /* >>>>>>> + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free >>>>>>> from >>>>>>> + * entries related to EL1/EL0 translation regime until a guest >>>>>>> vCPU >>>>>>> + * is running. For that, we need to set-up VTTBR to point to an >>>>>>> empty >>>>>>> + * page-table and turn on stage-2 translation. >>>>>> >>>>>> I don't understand why this is needed: isn't the lack of HCR_VM (due >>>>>> to >>>>>> your previous patch) supposed to be sufficient? How can there be >>>>>> speculation without HCR_VM? >>>>> >>>>> HCR_EL2.VM unsets means the stage-2 will not be used for the EL1/EL0 >>>>> translation regime. In the context of the erratum, the AT can still >>>>> speculate >>>>> except it will not take into account the stage-2. The dependencies on >>>>> VMID >>>>> stills applies when HCR_EL2.VM is unset, so from my understanding, the >>>>> entry >>>>> could get cached to whatever is VTTBR_EL2.VMID. >>>> >>>> Damn! Even if at this point of the boot sequence there is no EL1 / EL0 >>>> at all? How can that speculation happen? Shouldn't the first EL1 / EL0 >>>> speculation occur after the first leave_hypervisor_tail? >>> >>> How do you know EL1 was not run before hand? Imagine we did a soft reboot or >>> kexec Xen... >>> >>> But the speculation in that context is may be because the processor noticed >>> an AT instruction targeting EL1 in the stream. > > This seems to be extremely improbable, borderline impossible to me, but > I can imagine that we might want to be extra-paranoid to make sure all > potential issues are covered. The "Workaround" section of the erratum contains the following wording: "Note that a workaround is only required if the system software contains an AT instruction as part of an executable page." Xen contains AT instruction in an executable page, so speculation can happen and we don't know when. [...] > Overall, I think we could probably get away without a couple of changes > from this patch, but since it is basically impossible for me to prove > it, I'll give my Reviewed-by. I saw that you resent the series already. > I'll take care of committing it. > > Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> Thank you for the review! Cheers, [1] https://silver.arm.com/download/Documentation/BX500-DA-10008-r0p0-02rel0/Arm_Cortex_A76_MP052_Software_Developer_Errata_Notice_v11.0.pdf
diff --git a/docs/misc/arm/silicon-errata.txt b/docs/misc/arm/silicon-errata.txt index 906bf5fd48..6cd1366f15 100644 --- a/docs/misc/arm/silicon-errata.txt +++ b/docs/misc/arm/silicon-errata.txt @@ -48,4 +48,5 @@ stable hypervisors. | ARM | Cortex-A57 | #852523 | N/A | | ARM | Cortex-A57 | #832075 | ARM64_ERRATUM_832075 | | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +| ARM | Cortex-A76 | #1165522 | N/A | | ARM | MMU-500 | #842869 | N/A | diff --git a/xen/arch/arm/cpuerrata.c b/xen/arch/arm/cpuerrata.c index adf88e7bdc..61c64b9816 100644 --- a/xen/arch/arm/cpuerrata.c +++ b/xen/arch/arm/cpuerrata.c @@ -489,6 +489,12 @@ static const struct arm_cpu_capabilities arm_errata[] = { .matches = has_ssbd_mitigation, }, #endif + { + /* Cortex-A76 r0p0 - r2p0 */ + .desc = "ARM erratum 116522", + .capability = ARM64_WORKAROUND_AT_SPECULATE, + MIDR_RANGE(MIDR_CORTEX_A76, 0, 2 << MIDR_VARIANT_SHIFT), + }, {}, }; diff --git a/xen/arch/arm/domain.c b/xen/arch/arm/domain.c index 1d926dcb29..3180edd89d 100644 --- a/xen/arch/arm/domain.c +++ b/xen/arch/arm/domain.c @@ -181,8 +181,6 @@ static void ctxt_switch_to(struct vcpu *n) if ( is_idle_vcpu(n) ) return; - p2m_restore_state(n); - vpidr = READ_SYSREG32(MIDR_EL1); WRITE_SYSREG32(vpidr, VPIDR_EL2); WRITE_SYSREG(n->arch.vmpidr, VMPIDR_EL2); @@ -235,6 +233,12 @@ static void ctxt_switch_to(struct vcpu *n) #endif isb(); + /* + * ARM64_WORKAROUND_AT_SPECULATE: The P2M should be restored after + * the stage-1 MMU sysregs have been restored. + */ + p2m_restore_state(n); + /* Control Registers */ WRITE_SYSREG(n->arch.cpacr, CPACR_EL1); diff --git a/xen/arch/arm/p2m.c b/xen/arch/arm/p2m.c index 844833c4c3..0facb66096 100644 --- a/xen/arch/arm/p2m.c +++ b/xen/arch/arm/p2m.c @@ -15,6 +15,7 @@ #include <asm/event.h> #include <asm/hardirq.h> #include <asm/page.h> +#include <asm/alternative.h> #define MAX_VMID_8_BIT (1UL << 8) #define MAX_VMID_16_BIT (1UL << 16) @@ -47,6 +48,8 @@ static const paddr_t level_masks[] = static const uint8_t level_orders[] = { ZEROETH_ORDER, FIRST_ORDER, SECOND_ORDER, THIRD_ORDER }; +static mfn_t __read_mostly empty_root_mfn; + static uint64_t generate_vttbr(uint16_t vmid, mfn_t root_mfn) { return (mfn_to_maddr(root_mfn) | ((uint64_t)vmid << 48)); @@ -98,9 +101,25 @@ void dump_p2m_lookup(struct domain *d, paddr_t addr) P2M_ROOT_LEVEL, P2M_ROOT_PAGES); } +/* + * p2m_save_state and p2m_restore_state works in pair to workaround + * ARM64_WORKAROUND_AT_SPECULATE. p2m_save_state will set-up VTTBR to + * point to the empty page-tables to stop allocating TLB entries. + */ void p2m_save_state(struct vcpu *p) { p->arch.sctlr = READ_SYSREG(SCTLR_EL1); + + if ( cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) + { + WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2); + /* + * Ensure VTTBR_EL2 is correctly synchronized so we can restore + * the next vCPU context without worrying about AT instruction + * speculation. + */ + isb(); + } } void p2m_restore_state(struct vcpu *n) @@ -111,10 +130,17 @@ void p2m_restore_state(struct vcpu *n) if ( is_idle_vcpu(n) ) return; - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); WRITE_SYSREG(n->arch.sctlr, SCTLR_EL1); WRITE_SYSREG(n->arch.hcr_el2, HCR_EL2); + /* + * ARM64_WORKAROUND_AT_SPECULATE: VTTBR_EL2 should be restored after all + * registers associated to EL1/EL0 translations regime have been + * synchronized. + */ + asm volatile(ALTERNATIVE("nop", "isb", ARM64_WORKAROUND_AT_SPECULATE)); + WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); + last_vcpu_ran = &p2m->last_vcpu_ran[smp_processor_id()]; /* @@ -157,8 +183,23 @@ static void p2m_force_tlb_flush_sync(struct p2m_domain *p2m) ovttbr = READ_SYSREG64(VTTBR_EL2); if ( ovttbr != p2m->vttbr ) { + uint64_t vttbr; + local_irq_save(flags); - WRITE_SYSREG64(p2m->vttbr, VTTBR_EL2); + + /* + * ARM64_WORKAROUND_AT_SPECULATE: We need to stop AT to allocate + * TLBs entries because the context is partially modified. We + * only need the VMID for flushing the TLBs, so we can generate + * a new VTTBR with the VMID to flush and the empty root table. + */ + if ( !cpus_have_const_cap(ARM64_WORKAROUND_AT_SPECULATE) ) + vttbr = p2m->vttbr; + else + vttbr = generate_vttbr(p2m->vmid, empty_root_mfn); + + WRITE_SYSREG64(vttbr, VTTBR_EL2); + /* Ensure VTTBR_EL2 is synchronized before flushing the TLBs */ isb(); } @@ -1504,6 +1545,23 @@ static uint32_t __read_mostly vtcr; static void setup_virt_paging_one(void *data) { WRITE_SYSREG32(vtcr, VTCR_EL2); + + /* + * ARM64_WORKAROUND_AT_SPECULATE: We want to keep the TLBs free from + * entries related to EL1/EL0 translation regime until a guest vCPU + * is running. For that, we need to set-up VTTBR to point to an empty + * page-table and turn on stage-2 translation. The TLB entries + * associated with EL1/EL0 translation regime will also be flushed in case + * an AT instruction was speculated before hand. + */ + if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) ) + { + WRITE_SYSREG64(generate_vttbr(INVALID_VMID, empty_root_mfn), VTTBR_EL2); + WRITE_SYSREG(READ_SYSREG(HCR_EL2) | HCR_VM, HCR_EL2); + isb(); + + flush_tlb_all_local(); + } } void __init setup_virt_paging(void) @@ -1587,6 +1645,22 @@ void __init setup_virt_paging(void) /* It is not allowed to concatenate a level zero root */ BUG_ON( P2M_ROOT_LEVEL == 0 && P2M_ROOT_ORDER > 0 ); vtcr = val; + + /* + * ARM64_WORKAROUND_AT_SPECULATE requires to allocate root table + * with all entries zeroed. + */ + if ( cpus_have_cap(ARM64_WORKAROUND_AT_SPECULATE) ) + { + struct page_info *root; + + root = p2m_allocate_root(); + if ( !root ) + panic("Unable to allocate root table for ARM64_WORKAROUND_AT_SPECULATE\n"); + + empty_root_mfn = page_to_mfn(root); + } + setup_virt_paging_one(NULL); smp_call_function(setup_virt_paging_one, NULL, 1); } diff --git a/xen/include/asm-arm/cpufeature.h b/xen/include/asm-arm/cpufeature.h index 17de928467..c2c8f3417c 100644 --- a/xen/include/asm-arm/cpufeature.h +++ b/xen/include/asm-arm/cpufeature.h @@ -45,8 +45,9 @@ #define ARM_HARDEN_BRANCH_PREDICTOR 7 #define ARM_SSBD 8 #define ARM_SMCCC_1_1 9 +#define ARM64_WORKAROUND_AT_SPECULATE 10 -#define ARM_NCAPS 10 +#define ARM_NCAPS 11 #ifndef __ASSEMBLY__ diff --git a/xen/include/asm-arm/processor.h b/xen/include/asm-arm/processor.h index 72ddc42778..d03ec6e272 100644 --- a/xen/include/asm-arm/processor.h +++ b/xen/include/asm-arm/processor.h @@ -52,6 +52,7 @@ #define ARM_CPU_PART_CORTEX_A72 0xD08 #define ARM_CPU_PART_CORTEX_A73 0xD09 #define ARM_CPU_PART_CORTEX_A75 0xD0A +#define ARM_CPU_PART_CORTEX_A76 0xD0B #define MIDR_CORTEX_A12 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A12) #define MIDR_CORTEX_A17 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A17) @@ -61,6 +62,7 @@ #define MIDR_CORTEX_A72 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) #define MIDR_CORTEX_A73 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A73) #define MIDR_CORTEX_A75 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A75) +#define MIDR_CORTEX_A76 MIDR_CPU_MODEL(ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A76) /* MPIDR Multiprocessor Affinity Register */ #define _MPIDR_UP (30)
Early version of Cortex-A76 can end-up with corrupt TLBs if they speculate an AT instruction while the S1/S2 system registers are in an inconsistent state. This can happen during guest context switch and when invalidating the TLBs for other than the current VMID. The workaround implemented in Xen will: - Use an empty stage-2 with a reserved VMID while context switching between 2 guests - Use an empty stage-2 with the VMID where TLBs need to be flushed Signed-off-by: Julien Grall <julien.grall@arm.com> --- docs/misc/arm/silicon-errata.txt | 1 + xen/arch/arm/cpuerrata.c | 6 ++++ xen/arch/arm/domain.c | 8 +++-- xen/arch/arm/p2m.c | 78 ++++++++++++++++++++++++++++++++++++++-- xen/include/asm-arm/cpufeature.h | 3 +- xen/include/asm-arm/processor.h | 2 ++ 6 files changed, 93 insertions(+), 5 deletions(-)