Message ID | e9f542b9f96a3de5bb7983245fa94f293ef96c9f.1738618801.git.ashish.kalra@amd.com |
---|---|
State | New |
Headers | show |
Series | Fix broken SNP support with KVM module built-in | expand |
Hi Ashish, [Sorry. I didn't see this series and responded to v2]. On 2/4/2025 3:26 AM, Ashish Kalra wrote: > From: Ashish Kalra <ashish.kalra@amd.com> > > Fix issues with enabling SNP host support and effectively SNP support > which is broken with respect to the KVM module being built-in. > > SNP host support is enabled in snp_rmptable_init() which is invoked as > device_initcall(). SNP check on IOMMU is done during IOMMU PCI init > (IOMMU_PCI_INIT stage). And for that reason snp_rmptable_init() is > currently invoked via device_initcall() and cannot be invoked via > subsys_initcall() as core IOMMU subsystem gets initialized via > subsys_initcall(). > > Now, if kvm_amd module is built-in, it gets initialized before SNP host > support is enabled in snp_rmptable_init() : > > [ 10.131811] kvm_amd: TSC scaling supported > [ 10.136384] kvm_amd: Nested Virtualization enabled > [ 10.141734] kvm_amd: Nested Paging enabled > [ 10.146304] kvm_amd: LBR virtualization supported > [ 10.151557] kvm_amd: SEV enabled (ASIDs 100 - 509) > [ 10.156905] kvm_amd: SEV-ES enabled (ASIDs 1 - 99) > [ 10.162256] kvm_amd: SEV-SNP enabled (ASIDs 1 - 99) > [ 10.171508] kvm_amd: Virtual VMLOAD VMSAVE supported > [ 10.177052] kvm_amd: Virtual GIF supported > ... > ... .../... > diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c > index c5cd92edada0..4bcb474e2252 100644 > --- a/drivers/iommu/amd/init.c > +++ b/drivers/iommu/amd/init.c > @@ -3194,7 +3194,7 @@ static bool __init detect_ivrs(void) > return true; > } > > -static void iommu_snp_enable(void) > +static __init void iommu_snp_enable(void) > { > #ifdef CONFIG_KVM_AMD_SEV > if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) > @@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void) > goto disable_snp; > } > > + /* > + * Enable host SNP support once SNP support is checked on IOMMU. > + */ > + if (snp_rmptable_init()) { > + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); > + goto disable_snp; > + } > + > pr_info("IOMMU SNP support enabled.\n"); > return; > > @@ -3318,6 +3326,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state) > ret = state_next(); > } > > + if (ret && !amd_iommu_snp_en && cc_platform_has(CC_ATTR_HOST_SEV_SNP)) I think we should clear when `amd_iommu_snp_en` is true. May be below check is enough? if (ret && amd_iommu_snp_en) -Vasant
On Wed, Feb 05, 2025, Vasant Hegde wrote: > Hi Ashish, > > [Sorry. I didn't see this series and responded to v2]. Heh, and then I saw your other email first and did the same. Copying my response here, too (and fixing a few typos in the process). > > diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c > > index c5cd92edada0..4bcb474e2252 100644 > > --- a/drivers/iommu/amd/init.c > > +++ b/drivers/iommu/amd/init.c > > @@ -3194,7 +3194,7 @@ static bool __init detect_ivrs(void) > > return true; > > } > > > > -static void iommu_snp_enable(void) > > +static __init void iommu_snp_enable(void) > > { > > #ifdef CONFIG_KVM_AMD_SEV > > if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) > > @@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void) > > goto disable_snp; > > } > > > > + /* > > + * Enable host SNP support once SNP support is checked on IOMMU. > > + */ > > + if (snp_rmptable_init()) { > > + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); > > + goto disable_snp; > > + } > > + > > pr_info("IOMMU SNP support enabled.\n"); > > return; > > > > @@ -3318,6 +3326,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state) > > ret = state_next(); > > } > > > > + if (ret && !amd_iommu_snp_en && cc_platform_has(CC_ATTR_HOST_SEV_SNP)) > > > I think we should clear when `amd_iommu_snp_en` is true. That doesn't address the case where amd_iommu_prepare() fails, because amd_iommu_snp_en will be %false (its init value) and the RMP will be uninitialized, i.e. CC_ATTR_HOST_SEV_SNP will be incorrectly left set. And conversely, IMO clearing CC_ATTR_HOST_SEV_SNP after initializing the IOMMU and RMP is wrong as well. Such a host is probably hosed regardless, but from the CPU's perspective, SNP is supported and enabled. > May be below check is enough? > > if (ret && amd_iommu_snp_en) > > > -Vasant > >
Hi Sean, On 2/5/2025 8:47 PM, Sean Christopherson wrote: > On Wed, Feb 05, 2025, Vasant Hegde wrote: >> Hi Ashish, >> >> [Sorry. I didn't see this series and responded to v2]. > > Heh, and then I saw your other email first and did the same. Copying my response > here, too (and fixing a few typos in the process). > >>> diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c >>> index c5cd92edada0..4bcb474e2252 100644 >>> --- a/drivers/iommu/amd/init.c >>> +++ b/drivers/iommu/amd/init.c >>> @@ -3194,7 +3194,7 @@ static bool __init detect_ivrs(void) >>> return true; >>> } >>> >>> -static void iommu_snp_enable(void) >>> +static __init void iommu_snp_enable(void) >>> { >>> #ifdef CONFIG_KVM_AMD_SEV >>> if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) >>> @@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void) >>> goto disable_snp; >>> } >>> >>> + /* >>> + * Enable host SNP support once SNP support is checked on IOMMU. >>> + */ >>> + if (snp_rmptable_init()) { >>> + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); >>> + goto disable_snp; >>> + } >>> + >>> pr_info("IOMMU SNP support enabled.\n"); >>> return; >>> >>> @@ -3318,6 +3326,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state) >>> ret = state_next(); >>> } >>> >>> + if (ret && !amd_iommu_snp_en && cc_platform_has(CC_ATTR_HOST_SEV_SNP)) >> >> >> I think we should clear when `amd_iommu_snp_en` is true. > > That doesn't address the case where amd_iommu_prepare() fails, because amd_iommu_snp_en > will be %false (its init value) and the RMP will be uninitialized, i.e. > CC_ATTR_HOST_SEV_SNP will be incorrectly left set. You are right. I missed early failure scenarios :-( > > And conversely, IMO clearing CC_ATTR_HOST_SEV_SNP after initializing the IOMMU > and RMP is wrong as well. Such a host is probably hosed regardless, but from > the CPU's perspective, SNP is supported and enabled. So we don't want to clear CC_ATTR_HOST_SEV_SNP after RMP initialization -OR- clear for all failures? -Vasant
On Wed, Feb 05, 2025, Vasant Hegde wrote: > On 2/5/2025 8:47 PM, Sean Christopherson wrote: > > On Wed, Feb 05, 2025, Vasant Hegde wrote: > >>> @@ -3318,6 +3326,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state) > >>> ret = state_next(); > >>> } > >>> > >>> + if (ret && !amd_iommu_snp_en && cc_platform_has(CC_ATTR_HOST_SEV_SNP)) > >> > >> > >> I think we should clear when `amd_iommu_snp_en` is true. > > > > That doesn't address the case where amd_iommu_prepare() fails, because amd_iommu_snp_en > > will be %false (its init value) and the RMP will be uninitialized, i.e. > > CC_ATTR_HOST_SEV_SNP will be incorrectly left set. > > You are right. I missed early failure scenarios :-( > > > > > And conversely, IMO clearing CC_ATTR_HOST_SEV_SNP after initializing the IOMMU > > and RMP is wrong as well. Such a host is probably hosed regardless, but from > > the CPU's perspective, SNP is supported and enabled. > > So we don't want to clear CC_ATTR_HOST_SEV_SNP after RMP initialization -OR- > clear for all failures? I honestly don't know, because the answer largely depends on what happens with hardware. I asked in an earlier version of this series if IOMMU initialization failure after the RMP is configured is even survivable. For this series, I think it makes sense to match the existing behavior, unless someone from AMD can definitively state that we should do something different. And the existing behavior is that amd_iommu_snp_en and CC_ATTR_HOST_SEV_SNP will be left set if the IOMMU completes iommu_snp_enable(), and the kernel completes RMP setup.
On 2/5/2025 1:31 PM, Sean Christopherson wrote: > On Wed, Feb 05, 2025, Vasant Hegde wrote: >> On 2/5/2025 8:47 PM, Sean Christopherson wrote: >>> On Wed, Feb 05, 2025, Vasant Hegde wrote: >>>>> @@ -3318,6 +3326,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state) >>>>> ret = state_next(); >>>>> } >>>>> >>>>> + if (ret && !amd_iommu_snp_en && cc_platform_has(CC_ATTR_HOST_SEV_SNP)) >>>> >>>> >>>> I think we should clear when `amd_iommu_snp_en` is true. >>> >>> That doesn't address the case where amd_iommu_prepare() fails, because amd_iommu_snp_en >>> will be %false (its init value) and the RMP will be uninitialized, i.e. >>> CC_ATTR_HOST_SEV_SNP will be incorrectly left set. >> >> You are right. I missed early failure scenarios :-( >> >>> >>> And conversely, IMO clearing CC_ATTR_HOST_SEV_SNP after initializing the IOMMU >>> and RMP is wrong as well. Such a host is probably hosed regardless, but from >>> the CPU's perspective, SNP is supported and enabled. >> >> So we don't want to clear CC_ATTR_HOST_SEV_SNP after RMP initialization -OR- >> clear for all failures? > > I honestly don't know, because the answer largely depends on what happens with > hardware. I asked in an earlier version of this series if IOMMU initialization > failure after the RMP is configured is even survivable. > As i mentioned earlier and as part of this series and summarizing this again here: - snp_rmptable_init() enables SNP support system-wide and that means the HW starts doing RMP checks for memory accesses, but as RMP table is zeroed out initially, all memory is configured to be host/HV owned. It is only after SNP_INIT(_EX) that RMP table is configured and initialized with HV_Fixed, firmware pages and stuff like IOMMU RMP enforcement is enabled. If the IOMMU initialization fails after IOMMU support on SNP check is completed and host SNP is enabled, then SNP_INIT(_EX) will fail as IOMMUs need to be enabled for SNP_INIT to succeed. > For this series, I think it makes sense to match the existing behavior, unless > someone from AMD can definitively state that we should do something different. > And the existing behavior is that amd_iommu_snp_en and CC_ATTR_HOST_SEV_SNP will > be left set if the IOMMU completes iommu_snp_enable(), and the kernel completes > RMP setup. Yes, that is true and this behavior is still consistent with this series. Again to reiterate, if iommu_snp_enable() and host SNP enablement is successful, any late IOMMU initialization failures should cause SNP_INIT to fail and that means IOMMU RMP enforcement will never get enabled and RMP table will remain configured for all memory marked as HV/host owned. Thanks, Ashish
Sean, On 2/6/2025 1:01 AM, Sean Christopherson wrote: > On Wed, Feb 05, 2025, Vasant Hegde wrote: >> On 2/5/2025 8:47 PM, Sean Christopherson wrote: >>> On Wed, Feb 05, 2025, Vasant Hegde wrote: >>>>> @@ -3318,6 +3326,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state) >>>>> ret = state_next(); >>>>> } >>>>> >>>>> + if (ret && !amd_iommu_snp_en && cc_platform_has(CC_ATTR_HOST_SEV_SNP)) >>>> >>>> >>>> I think we should clear when `amd_iommu_snp_en` is true. >>> >>> That doesn't address the case where amd_iommu_prepare() fails, because amd_iommu_snp_en >>> will be %false (its init value) and the RMP will be uninitialized, i.e. >>> CC_ATTR_HOST_SEV_SNP will be incorrectly left set. >> >> You are right. I missed early failure scenarios :-( >> >>> >>> And conversely, IMO clearing CC_ATTR_HOST_SEV_SNP after initializing the IOMMU >>> and RMP is wrong as well. Such a host is probably hosed regardless, but from >>> the CPU's perspective, SNP is supported and enabled. >> >> So we don't want to clear CC_ATTR_HOST_SEV_SNP after RMP initialization -OR- >> clear for all failures? > > I honestly don't know, because the answer largely depends on what happens with > hardware. I asked in an earlier version of this series if IOMMU initialization > failure after the RMP is configured is even survivable. > > For this series, I think it makes sense to match the existing behavior, unless > someone from AMD can definitively state that we should do something different. > And the existing behavior is that amd_iommu_snp_en and CC_ATTR_HOST_SEV_SNP will > be left set if the IOMMU completes iommu_snp_enable(), and the kernel completes > RMP setup. Thanks for the clarification. Patch looks OK to me. -Vasant
Hello Sean, On 2/7/2025 9:52 AM, Sean Christopherson wrote: > On Wed, Feb 05, 2025, Ashish Kalra wrote: >> On 2/5/2025 1:31 PM, Sean Christopherson wrote: >>> On Wed, Feb 05, 2025, Vasant Hegde wrote: >>>> So we don't want to clear CC_ATTR_HOST_SEV_SNP after RMP initialization -OR- >>>> clear for all failures? >>> >>> I honestly don't know, because the answer largely depends on what happens with >>> hardware. I asked in an earlier version of this series if IOMMU initialization >>> failure after the RMP is configured is even survivable. >>> >> >> As i mentioned earlier and as part of this series and summarizing this again here: > > Thanks! > >> - snp_rmptable_init() enables SNP support system-wide and that means the HW starts >> doing RMP checks for memory accesses, but as RMP table is zeroed out initially, >> all memory is configured to be host/HV owned. >> >> It is only after SNP_INIT(_EX) that RMP table is configured and initialized with >> HV_Fixed, firmware pages and stuff like IOMMU RMP enforcement is enabled. >> >> If the IOMMU initialization fails after IOMMU support on SNP check is completed >> and host SNP is enabled, then SNP_INIT(_EX) will fail as IOMMUs need to be enabled >> for SNP_INIT to succeed. >> >>> For this series, I think it makes sense to match the existing behavior, unless >>> someone from AMD can definitively state that we should do something different. >>> And the existing behavior is that amd_iommu_snp_en and CC_ATTR_HOST_SEV_SNP will >>> be left set if the IOMMU completes iommu_snp_enable(), and the kernel completes >>> RMP setup. >> >> Yes, that is true and this behavior is still consistent with this series. >> >> Again to reiterate, if iommu_snp_enable() and host SNP enablement is successful, >> any late IOMMU initialization failures should cause SNP_INIT to fail and that means >> IOMMU RMP enforcement will never get enabled and RMP table will remain configured >> for all memory marked as HV/host owned. > > So the kernel should be able to limp along, but CC_ATTR_HOST_SEV_SNP will be in > a half-baked state. > > Would it make sense to WARN if the RMP has been configured? E.g. as a follow-up > change: > > /* > * SNP platform initilazation requires IOMMUs to be fully configured. > * If the RMP has NOT been configured, simply mark SNP as unsupported. > * If the RMP is configured, but RMP enforcement has not been enabled > * in IOMMUs, then the system is in a half-baked state, but can limp > * along as all memory should be Hypervisor-Owned in the RMP. WARN, > * but leave SNP as "supported" to avoid confusing the kernel. > */ > if (ret && cc_platform_has(CC_ATTR_HOST_SEV_SNP) && > !WARN_ON_ONCE(amd_iommu_snp_en)) > cc_platform_clear(CC_ATTR_HOST_SEV_SNP); Yes, i can re-spin the series with this WARN_ON() added and additional comments added. Thanks, Ashish
diff --git a/arch/x86/include/asm/sev.h b/arch/x86/include/asm/sev.h index 5d9685f92e5c..1581246491b5 100644 --- a/arch/x86/include/asm/sev.h +++ b/arch/x86/include/asm/sev.h @@ -531,6 +531,7 @@ static inline void __init snp_secure_tsc_init(void) { } #ifdef CONFIG_KVM_AMD_SEV bool snp_probe_rmptable_info(void); +int snp_rmptable_init(void); int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level); void snp_dump_hva_rmpentry(unsigned long address); int psmash(u64 pfn); @@ -541,6 +542,7 @@ void kdump_sev_callback(void); void snp_fixup_e820_tables(void); #else static inline bool snp_probe_rmptable_info(void) { return false; } +static inline int snp_rmptable_init(void) { return -ENOSYS; } static inline int snp_lookup_rmpentry(u64 pfn, bool *assigned, int *level) { return -ENODEV; } static inline void snp_dump_hva_rmpentry(unsigned long address) {} static inline int psmash(u64 pfn) { return -ENODEV; } diff --git a/arch/x86/virt/svm/sev.c b/arch/x86/virt/svm/sev.c index 1dcc027ec77e..42e74a5a7d78 100644 --- a/arch/x86/virt/svm/sev.c +++ b/arch/x86/virt/svm/sev.c @@ -505,19 +505,19 @@ static bool __init setup_rmptable(void) * described in the SNP_INIT_EX firmware command description in the SNP * firmware ABI spec. */ -static int __init snp_rmptable_init(void) +int __init snp_rmptable_init(void) { unsigned int i; u64 val; - if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) - return 0; + if (WARN_ON_ONCE(!cc_platform_has(CC_ATTR_HOST_SEV_SNP))) + return -ENOSYS; - if (!amd_iommu_snp_en) - goto nosnp; + if (WARN_ON_ONCE(!amd_iommu_snp_en)) + return -ENOSYS; if (!setup_rmptable()) - goto nosnp; + return -ENOSYS; /* * Check if SEV-SNP is already enabled, this can happen in case of @@ -530,7 +530,7 @@ static int __init snp_rmptable_init(void) /* Zero out the RMP bookkeeping area */ if (!clear_rmptable_bookkeeping()) { free_rmp_segment_table(); - goto nosnp; + return -ENOSYS; } /* Zero out the RMP entries */ @@ -562,17 +562,8 @@ static int __init snp_rmptable_init(void) crash_kexec_post_notifiers = true; return 0; - -nosnp: - cc_platform_clear(CC_ATTR_HOST_SEV_SNP); - return -ENOSYS; } -/* - * This must be called after the IOMMU has been initialized. - */ -device_initcall(snp_rmptable_init); - static void set_rmp_segment_info(unsigned int segment_shift) { rmp_segment_shift = segment_shift; diff --git a/drivers/iommu/amd/init.c b/drivers/iommu/amd/init.c index c5cd92edada0..4bcb474e2252 100644 --- a/drivers/iommu/amd/init.c +++ b/drivers/iommu/amd/init.c @@ -3194,7 +3194,7 @@ static bool __init detect_ivrs(void) return true; } -static void iommu_snp_enable(void) +static __init void iommu_snp_enable(void) { #ifdef CONFIG_KVM_AMD_SEV if (!cc_platform_has(CC_ATTR_HOST_SEV_SNP)) @@ -3219,6 +3219,14 @@ static void iommu_snp_enable(void) goto disable_snp; } + /* + * Enable host SNP support once SNP support is checked on IOMMU. + */ + if (snp_rmptable_init()) { + pr_warn("SNP: RMP initialization failed, SNP cannot be supported.\n"); + goto disable_snp; + } + pr_info("IOMMU SNP support enabled.\n"); return; @@ -3318,6 +3326,9 @@ static int __init iommu_go_to_state(enum iommu_init_state state) ret = state_next(); } + if (ret && !amd_iommu_snp_en && cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); + return ret; } @@ -3426,18 +3437,23 @@ void __init amd_iommu_detect(void) int ret; if (no_iommu || (iommu_detected && !gart_iommu_aperture)) - return; + goto disable_snp; if (!amd_iommu_sme_check()) - return; + goto disable_snp; ret = iommu_go_to_state(IOMMU_IVRS_DETECTED); if (ret) - return; + goto disable_snp; amd_iommu_detected = true; iommu_detected = 1; x86_init.iommu.iommu_init = amd_iommu_init; + return; + +disable_snp: + if (cc_platform_has(CC_ATTR_HOST_SEV_SNP)) + cc_platform_clear(CC_ATTR_HOST_SEV_SNP); } /****************************************************************************