Message ID | 20210707181506.30489-1-brijesh.singh@amd.com |
---|---|
Headers | show |
Series | Add AMD Secure Nested Paging (SEV-SNP) Guest Support | expand |
* Brijesh Singh (brijesh.singh@amd.com) wrote: > The sev_feature_enabled() helper can be used by the guest to query whether > the SNP - Secure Nested Paging feature is active. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > arch/x86/include/asm/mem_encrypt.h | 8 ++++++++ > arch/x86/include/asm/msr-index.h | 2 ++ > arch/x86/mm/mem_encrypt.c | 14 ++++++++++++++ > 3 files changed, 24 insertions(+) > > diff --git a/arch/x86/include/asm/mem_encrypt.h b/arch/x86/include/asm/mem_encrypt.h > index 8cc2fd308f65..fb857f2e72cb 100644 > --- a/arch/x86/include/asm/mem_encrypt.h > +++ b/arch/x86/include/asm/mem_encrypt.h > @@ -16,6 +16,12 @@ > > #include <asm/bootparam.h> > > +enum sev_feature_type { > + SEV, > + SEV_ES, > + SEV_SNP > +}; Is this .... > #ifdef CONFIG_AMD_MEM_ENCRYPT > > extern u64 sme_me_mask; > @@ -54,6 +60,7 @@ void __init sev_es_init_vc_handling(void); > bool sme_active(void); > bool sev_active(void); > bool sev_es_active(void); > +bool sev_feature_enabled(unsigned int feature_type); > > #define __bss_decrypted __section(".bss..decrypted") > > @@ -87,6 +94,7 @@ static inline int __init > early_set_memory_encrypted(unsigned long vaddr, unsigned long size) { return 0; } > > static inline void mem_encrypt_free_decrypted_mem(void) { } > +static bool sev_feature_enabled(unsigned int feature_type) { return false; } > > #define __bss_decrypted > > diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h > index a7c413432b33..37589da0282e 100644 > --- a/arch/x86/include/asm/msr-index.h > +++ b/arch/x86/include/asm/msr-index.h > @@ -481,8 +481,10 @@ > #define MSR_AMD64_SEV 0xc0010131 > #define MSR_AMD64_SEV_ENABLED_BIT 0 > #define MSR_AMD64_SEV_ES_ENABLED_BIT 1 > +#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 Just the same as this ? > #define MSR_AMD64_SEV_ENABLED BIT_ULL(MSR_AMD64_SEV_ENABLED_BIT) > #define MSR_AMD64_SEV_ES_ENABLED BIT_ULL(MSR_AMD64_SEV_ES_ENABLED_BIT) > +#define MSR_AMD64_SEV_SNP_ENABLED BIT_ULL(MSR_AMD64_SEV_SNP_ENABLED_BIT) > > #define MSR_AMD64_VIRT_SPEC_CTRL 0xc001011f > > diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c > index ff08dc463634..63e7799a9a86 100644 > --- a/arch/x86/mm/mem_encrypt.c > +++ b/arch/x86/mm/mem_encrypt.c > @@ -389,6 +389,16 @@ bool noinstr sev_es_active(void) > return sev_status & MSR_AMD64_SEV_ES_ENABLED; > } > > +bool sev_feature_enabled(unsigned int type) In which case, if you want the enum then that would be enum sev_feature_type type ? > +{ > + switch (type) { > + case SEV: return sev_status & MSR_AMD64_SEV_ENABLED; > + case SEV_ES: return sev_status & MSR_AMD64_SEV_ES_ENABLED; > + case SEV_SNP: return sev_status & MSR_AMD64_SEV_SNP_ENABLED; > + default: return false; or, could you just go for making that whole thing a bit test on 1<<type ? > + } > +} > + > /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ > bool force_dma_unencrypted(struct device *dev) > { > @@ -461,6 +471,10 @@ static void print_mem_encrypt_feature_info(void) > if (sev_es_active()) > pr_cont(" SEV-ES"); > > + /* Secure Nested Paging */ > + if (sev_feature_enabled(SEV_SNP)) > + pr_cont(" SEV-SNP"); > + > pr_cont("\n"); > } Dave > -- > 2.17.1 > > -- Dr. David Alan Gilbert / dgilbert@redhat.com / Manchester, UK
On 08/07/21 10:50, Dr. David Alan Gilbert wrote: >> +enum sev_feature_type { >> + SEV, >> + SEV_ES, >> + SEV_SNP >> +}; > Is this .... > >> diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h >> index a7c413432b33..37589da0282e 100644 >> --- a/arch/x86/include/asm/msr-index.h >> +++ b/arch/x86/include/asm/msr-index.h >> @@ -481,8 +481,10 @@ >> #define MSR_AMD64_SEV 0xc0010131 >> #define MSR_AMD64_SEV_ENABLED_BIT 0 >> #define MSR_AMD64_SEV_ES_ENABLED_BIT 1 >> +#define MSR_AMD64_SEV_SNP_ENABLED_BIT 2 > Just the same as this ? > No, it's just a coincidence. Paolo
On Wed, Jul 07, 2021 at 01:14:33PM -0500, Brijesh Singh wrote: > diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h > index 11b7d9cea775..23929a3010df 100644 > --- a/arch/x86/include/asm/sev-common.h > +++ b/arch/x86/include/asm/sev-common.h > @@ -45,6 +45,15 @@ > (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ > (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) > > +/* GHCB Hypervisor Feature Request */ > +#define GHCB_MSR_HV_FT_REQ 0x080 > +#define GHCB_MSR_HV_FT_RESP 0x081 > +#define GHCB_MSR_HV_FT_POS 12 > +#define GHCB_MSR_HV_FT_MASK GENMASK_ULL(51, 0) > + > +#define GHCB_MSR_HV_FT_RESP_VAL(v) \ > + (((unsigned long)((v) >> GHCB_MSR_HV_FT_POS) & GHCB_MSR_HV_FT_MASK)) As I suggested... > @@ -215,6 +216,7 @@ > { SVM_VMGEXIT_NMI_COMPLETE, "vmgexit_nmi_complete" }, \ > { SVM_VMGEXIT_AP_HLT_LOOP, "vmgexit_ap_hlt_loop" }, \ > { SVM_VMGEXIT_AP_JUMP_TABLE, "vmgexit_ap_jump_table" }, \ > + { SVM_VMGEXIT_HYPERVISOR_FEATURES, "vmgexit_hypervisor_feature" }, \ SVM_VMGEXIT_HV_FEATURES > { SVM_EXIT_ERR, "invalid_guest_state" } > > > diff --git a/arch/x86/kernel/sev-shared.c b/arch/x86/kernel/sev-shared.c > index 19c2306ac02d..34821da5f05e 100644 > --- a/arch/x86/kernel/sev-shared.c > +++ b/arch/x86/kernel/sev-shared.c > @@ -23,6 +23,9 @@ > */ > static u16 ghcb_version __section(".data..ro_after_init"); > > +/* Bitmap of SEV features supported by the hypervisor */ > +u64 sev_hv_features __section(".data..ro_after_init") = 0; __ro_after_init > diff --git a/arch/x86/kernel/sev.c b/arch/x86/kernel/sev.c > index 66b7f63ad041..540b81ac54c9 100644 > --- a/arch/x86/kernel/sev.c > +++ b/arch/x86/kernel/sev.c > @@ -96,6 +96,9 @@ struct ghcb_state { > static DEFINE_PER_CPU(struct sev_es_runtime_data*, runtime_data); > DEFINE_STATIC_KEY_FALSE(sev_es_enable_key); > > +/* Bitmap of SEV features supported by the hypervisor */ > +EXPORT_SYMBOL(sev_hv_features); Why is this exported and why not a _GPL export?
On Wed, Jul 07, 2021 at 01:14:34PM -0500, Brijesh Singh wrote: > The sev_feature_enabled() helper can be used by the guest to query whether > the SNP - Secure Nested Paging feature is active. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > arch/x86/include/asm/mem_encrypt.h | 8 ++++++++ > arch/x86/include/asm/msr-index.h | 2 ++ > arch/x86/mm/mem_encrypt.c | 14 ++++++++++++++ > 3 files changed, 24 insertions(+) This will get replaced by this I presume: https://lkml.kernel.org/r/cover.1627424773.git.thomas.lendacky@amd.com -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On Tue, Aug 10, 2021 at 08:39:02AM -0500, Brijesh Singh wrote: > I was thinking that some driver may need it in future, but nothing in my > series needs it yet. I will drop it and we can revisit it later. Yeah, please never do such exports in anticipation. And if we *ever* need them, they should be _GPL ones - not EXPORT_SYMBOL. And then the API needs to be discussed and potentially proper accessors added instead of exporting naked variables... Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/10/21 6:25 AM, Borislav Petkov wrote: > On Wed, Jul 07, 2021 at 01:14:34PM -0500, Brijesh Singh wrote: >> The sev_feature_enabled() helper can be used by the guest to query whether >> the SNP - Secure Nested Paging feature is active. >> >> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> >> --- >> arch/x86/include/asm/mem_encrypt.h | 8 ++++++++ >> arch/x86/include/asm/msr-index.h | 2 ++ >> arch/x86/mm/mem_encrypt.c | 14 ++++++++++++++ >> 3 files changed, 24 insertions(+) > > This will get replaced by this I presume: > > https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flkml.kernel.org%2Fr%2Fcover.1627424773.git.thomas.lendacky%40amd.com&data=04%7C01%7Cbrijesh.singh%40amd.com%7C15d8b87644e148488da408d95bf16ae3%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637641914718165877%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=UMRigkWSG2h%2BZ4L08AUlG0JeUiqMb9te52LprPrq51M%3D&reserved=0 > Yes. thanks
On Wed, Jul 07, 2021 at 01:14:39PM -0500, Brijesh Singh wrote: > @@ -274,16 +274,31 @@ static int set_clr_page_flags(struct x86_mapping_info *info, > /* > * Changing encryption attributes of a page requires to flush it from > * the caches. > + * > + * If the encryption attribute is being cleared, then change the page > + * state to shared in the RMP table. That comment... > */ > - if ((set | clr) & _PAGE_ENC) > + if ((set | clr) & _PAGE_ENC) { > clflush_page(address); > ... goes here: <--- > + if (clr) > + snp_set_page_shared(pte_pfn(*ptep) << PAGE_SHIFT); > + } > + ... > diff --git a/arch/x86/boot/compressed/sev.c b/arch/x86/boot/compressed/sev.c > index 2f3081e9c78c..f386d45a57b6 100644 > --- a/arch/x86/boot/compressed/sev.c > +++ b/arch/x86/boot/compressed/sev.c > @@ -164,6 +164,47 @@ static bool is_vmpl0(void) > return true; > } > > +static void __page_state_change(unsigned long paddr, int op) That op should be: enum psc_op { SNP_PAGE_STATE_SHARED, SNP_PAGE_STATE_PRIVATE, }; and have static void __page_state_change(unsigned long paddr, enum psc_op op) so that the compiler can check you're at least passing from the correct set of defines. > diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h > index ea508835ab33..aee07d1bb138 100644 > --- a/arch/x86/include/asm/sev-common.h > +++ b/arch/x86/include/asm/sev-common.h > @@ -45,6 +45,23 @@ > (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ > (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) > > +/* SNP Page State Change */ > +#define GHCB_MSR_PSC_REQ 0x014 > +#define SNP_PAGE_STATE_PRIVATE 1 > +#define SNP_PAGE_STATE_SHARED 2 > +#define GHCB_MSR_PSC_GFN_POS 12 > +#define GHCB_MSR_PSC_GFN_MASK GENMASK_ULL(39, 0) > +#define GHCB_MSR_PSC_OP_POS 52 > +#define GHCB_MSR_PSC_OP_MASK 0xf > +#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \ > + (((unsigned long)((op) & GHCB_MSR_PSC_OP_MASK) << GHCB_MSR_PSC_OP_POS) | \ > + ((unsigned long)((gfn) & GHCB_MSR_PSC_GFN_MASK) << GHCB_MSR_PSC_GFN_POS) | \ > + GHCB_MSR_PSC_REQ) > + > +#define GHCB_MSR_PSC_RESP 0x015 > +#define GHCB_MSR_PSC_ERROR_POS 32 > +#define GHCB_MSR_PSC_RESP_VAL(val) ((val) >> GHCB_MSR_PSC_ERROR_POS) > + Also get rid of eccessive defines... -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Jul 07, 2021 at 01:14:42PM -0500, Brijesh Singh wrote: > +void __init early_snp_set_memory_private(unsigned long vaddr, unsigned long paddr, > + unsigned int npages) > +{ > + if (!sev_feature_enabled(SEV_SNP)) > + return; > + > + /* Ask hypervisor to add the memory pages in RMP table as a 'private'. */ From a previous review: Ask the hypervisor to mark the memory pages as private in the RMP table. Are you missing my comments, do I have to write them more prominently or what is the problem? DO I NEED TO WRITE IN CAPS ONLY MAYBE? > +void __init early_snp_set_memory_shared(unsigned long vaddr, unsigned long paddr, > + unsigned int npages) > +{ > + if (!sev_feature_enabled(SEV_SNP)) > + return; > + > + /* > + * Invalidate the memory pages before they are marked shared in the > + * RMP table. > + */ > + pvalidate_pages(vaddr, npages, 0); > + > + /* Ask hypervisor to make the memory pages shared in the RMP table. */ From a previous review: s/make/mark/ > + early_set_page_state(paddr, npages, SNP_PAGE_STATE_SHARED); > +} > + > +void __init snp_prep_memory(unsigned long paddr, unsigned int sz, int op) that op should be: enum psc_op { SNP_PAGE_STATE_SHARED, SNP_PAGE_STATE_PRIVATE, }; too. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/13/21 5:22 AM, Borislav Petkov wrote: >> +static void __page_state_change(unsigned long paddr, int op) > > That op should be: > > enum psc_op { > SNP_PAGE_STATE_SHARED, > SNP_PAGE_STATE_PRIVATE, > }; > Noted. > and have > > static void __page_state_change(unsigned long paddr, enum psc_op op) > > so that the compiler can check you're at least passing from the correct > set of defines. > >> diff --git a/arch/x86/include/asm/sev-common.h b/arch/x86/include/asm/sev-common.h >> index ea508835ab33..aee07d1bb138 100644 >> --- a/arch/x86/include/asm/sev-common.h >> +++ b/arch/x86/include/asm/sev-common.h >> @@ -45,6 +45,23 @@ >> (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ >> (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) >> >> +/* SNP Page State Change */ >> +#define GHCB_MSR_PSC_REQ 0x014 >> +#define SNP_PAGE_STATE_PRIVATE 1 >> +#define SNP_PAGE_STATE_SHARED 2 >> +#define GHCB_MSR_PSC_GFN_POS 12 >> +#define GHCB_MSR_PSC_GFN_MASK GENMASK_ULL(39, 0) >> +#define GHCB_MSR_PSC_OP_POS 52 >> +#define GHCB_MSR_PSC_OP_MASK 0xf >> +#define GHCB_MSR_PSC_REQ_GFN(gfn, op) \ >> + (((unsigned long)((op) & GHCB_MSR_PSC_OP_MASK) << GHCB_MSR_PSC_OP_POS) | \ >> + ((unsigned long)((gfn) & GHCB_MSR_PSC_GFN_MASK) << GHCB_MSR_PSC_GFN_POS) | \ >> + GHCB_MSR_PSC_REQ) >> + >> +#define GHCB_MSR_PSC_RESP 0x015 >> +#define GHCB_MSR_PSC_ERROR_POS 32 >> +#define GHCB_MSR_PSC_RESP_VAL(val) ((val) >> GHCB_MSR_PSC_ERROR_POS) >> + > > Also get rid of eccessive defines... I am getting conflicting review comments on function naming, comment style, macro etc. While addressing the feedback I try to incorporate all those comments, lets see how I do in next rev. thanks
On Wed, Jul 07, 2021 at 01:14:45PM -0500, Brijesh Singh wrote: > +struct __packed psc_hdr { > + u16 cur_entry; > + u16 end_entry; > + u32 reserved; > +}; > + > +struct __packed psc_entry { > + u64 cur_page : 12, > + gfn : 40, > + operation : 4, > + pagesize : 1, > + reserved : 7; > +}; > + > +struct __packed snp_psc_desc { > + struct psc_hdr hdr; > + struct psc_entry entries[VMGEXIT_PSC_MAX_ENTRY]; > +}; The majority of kernel code puts __packed after the struct definition, let's put it there too pls, out of the way. ... > +static int vmgexit_psc(struct snp_psc_desc *desc) > +{ > + int cur_entry, end_entry, ret; > + struct snp_psc_desc *data; > + struct ghcb_state state; > + struct ghcb *ghcb; > + struct psc_hdr *hdr; > + unsigned long flags; > + > + local_irq_save(flags); > + > + ghcb = __sev_get_ghcb(&state); > + if (unlikely(!ghcb)) > + panic("SEV-SNP: Failed to get GHCB\n"); > + > + /* Copy the input desc into GHCB shared buffer */ > + data = (struct snp_psc_desc *)ghcb->shared_buffer; > + memcpy(ghcb->shared_buffer, desc, sizeof(*desc)); > + > + hdr = &data->hdr; > + cur_entry = hdr->cur_entry; > + end_entry = hdr->end_entry; > + > + /* > + * As per the GHCB specification, the hypervisor can resume the guest > + * before processing all the entries. Checks whether all the entries > + * are processed. If not, then keep retrying. > + * > + * The stragtegy here is to wait for the hypervisor to change the page > + * state in the RMP table before guest access the memory pages. If the > + * page state was not successful, then later memory access will result > + * in the crash. > + */ > + while (hdr->cur_entry <= hdr->end_entry) { > + ghcb_set_sw_scratch(ghcb, (u64)__pa(data)); > + > + ret = sev_es_ghcb_hv_call(ghcb, NULL, SVM_VMGEXIT_PSC, 0, 0); > + > + /* > + * Page State Change VMGEXIT can pass error code through > + * exit_info_2. > + */ > + if (WARN(ret || ghcb->save.sw_exit_info_2, > + "SEV-SNP: page state change failed ret=%d exit_info_2=%llx\n", > + ret, ghcb->save.sw_exit_info_2)) > + return 1; Yikes, you return here and below with interrupts disabled. All your returns need to be "goto out;" instead where you do out: __sev_put_ghcb(&state); local_irq_restore(flags); Yap, you very likely need to put the GHCB too. > + /* > + * Lets do some sanity check that entry processing is not going > + * backward. This will happen only if hypervisor is tricking us. > + */ > + if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry), > + "SEV-SNP: page state change processing going backward, end_entry " > + "(expected %d got %d) cur_entry (expected %d got %d)\n", > + end_entry, hdr->end_entry, cur_entry, hdr->cur_entry)) > + return 1; WARNING: quoted string split across lines #293: FILE: arch/x86/kernel/sev.c:750: + "SEV-SNP: page state change processing going backward, end_entry " + "(expected %d got %d) cur_entry (expected %d got %d)\n", If you're wondering what to do, yes, you can really stretch that string and shorten it too: if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry), "SEV-SNP: PSC processing going backwards, end_entry %d (got %d) cur_entry: %d (got %d)\n", end_entry, hdr->end_entry, cur_entry, hdr->cur_entry)) return 1; so that it fits on a single line and grepping can find it. > + /* Lets verify that reserved bit is not set in the header*/ > + if (WARN(hdr->reserved, "Reserved bit is set in the PSC header\n")) psc_entry has a ->reserved field too and since we're iterating over the entries... > + return 1; > + } > + > + __sev_put_ghcb(&state); > + local_irq_restore(flags); > + > + return 0; > +} > + > +static void __set_page_state(struct snp_psc_desc *data, unsigned long vaddr, > + unsigned long vaddr_end, int op) > +{ > + struct psc_hdr *hdr; > + struct psc_entry *e; > + unsigned long pfn; > + int i; > + > + hdr = &data->hdr; > + e = data->entries; > + > + memset(data, 0, sizeof(*data)); > + i = 0; > + > + while (vaddr < vaddr_end) { > + if (is_vmalloc_addr((void *)vaddr)) > + pfn = vmalloc_to_pfn((void *)vaddr); > + else > + pfn = __pa(vaddr) >> PAGE_SHIFT; > + > + e->gfn = pfn; > + e->operation = op; > + hdr->end_entry = i; > + > + /* > + * The GHCB specification provides the flexibility to > + * use either 4K or 2MB page size in the RMP table. > + * The current SNP support does not keep track of the > + * page size used in the RMP table. To avoid the > + * overlap request, use the 4K page size in the RMP > + * table. > + */ > + e->pagesize = RMP_PG_SIZE_4K; > + > + vaddr = vaddr + PAGE_SIZE; > + e++; > + i++; > + } > + > + /* Terminate the guest on page state change failure. */ That comment is kinda obvious :) > + if (vmgexit_psc(data)) > + sev_es_terminate(1, GHCB_TERM_PSC); > +} > + > +static void set_page_state(unsigned long vaddr, unsigned int npages, int op) > +{ > + unsigned long vaddr_end, next_vaddr; > + struct snp_psc_desc *desc; > + > + vaddr = vaddr & PAGE_MASK; > + vaddr_end = vaddr + (npages << PAGE_SHIFT); > + > + desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT); kzalloc() so that you don't have to memset() later in __set_page_state(). > + if (!desc) > + panic("failed to allocate memory"); Make that error message more distinctive so that *if* it happens, one can pinpoint the place in the code where the panic comes from. > + while (vaddr < vaddr_end) { > + /* > + * Calculate the last vaddr that can be fit in one > + * struct snp_psc_desc. > + */ > + next_vaddr = min_t(unsigned long, vaddr_end, > + (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr); > + > + __set_page_state(desc, vaddr, next_vaddr, op); > + > + vaddr = next_vaddr; > + } > + > + kfree(desc); > +} > + -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Jul 07, 2021 at 01:14:46PM -0500, Brijesh Singh wrote: > The hypervisor uses the SEV_FEATURES field (offset 3B0h) in the Save State > Area to control the SEV-SNP guest features such as SNPActive, vTOM, > ReflectVC etc. An SEV-SNP guest can read the SEV_FEATURES fields through > the SEV_STATUS MSR. > > While at it, update the dump_vmcb() to log the VMPL level. > > See APM2 Table 15-34 and B-4 for more details. > > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > arch/x86/include/asm/svm.h | 15 +++++++++++++-- > arch/x86/kvm/svm/svm.c | 4 ++-- > 2 files changed, 15 insertions(+), 4 deletions(-) > > diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h > index 772e60efe243..ff614cdcf628 100644 > --- a/arch/x86/include/asm/svm.h > +++ b/arch/x86/include/asm/svm.h > @@ -212,6 +212,15 @@ struct __attribute__ ((__packed__)) vmcb_control_area { > #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) > #define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) > > +#define SVM_SEV_FEATURES_SNP_ACTIVE BIT(0) > +#define SVM_SEV_FEATURES_VTOM BIT(1) > +#define SVM_SEV_FEATURES_REFLECT_VC BIT(2) > +#define SVM_SEV_FEATURES_RESTRICTED_INJECTION BIT(3) > +#define SVM_SEV_FEATURES_ALTERNATE_INJECTION BIT(4) > +#define SVM_SEV_FEATURES_DEBUG_SWAP BIT(5) > +#define SVM_SEV_FEATURES_PREVENT_HOST_IBS BIT(6) > +#define SVM_SEV_FEATURES_BTB_ISOLATION BIT(7) Only some of those get used and only later. Please introduce only those with the patch that adds usage. Also, s/SVM_SEV_FEATURES_/SVM_SEV_FEAT_/g at least. And by the way, why is this patch and the next 3 part of the guest set? They look like they belong into the hypervisor set.
On Tue, Aug 17, 2021 at 07:54:15PM +0200, Borislav Petkov wrote: > And by the way, why is this patch and the next 3 part of the guest set? > They look like they belong into the hypervisor set. Aha, patch 20 and further need the definitions. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/17/21 12:27 PM, Borislav Petkov wrote: > > The majority of kernel code puts __packed after the struct definition, > let's put it there too pls, out of the way. > > ... Noted. >> + if (WARN(ret || ghcb->save.sw_exit_info_2, >> + "SEV-SNP: page state change failed ret=%d exit_info_2=%llx\n", >> + ret, ghcb->save.sw_exit_info_2)) >> + return 1; > > Yikes, you return here and below with interrupts disabled. > > All your returns need to be "goto out;" instead where you do > > out: > __sev_put_ghcb(&state); > local_irq_restore(flags); > > Yap, you very likely need to put the GHCB too. > Sure, let me revisit this code to fix those path. >> + /* >> + * Lets do some sanity check that entry processing is not going >> + * backward. This will happen only if hypervisor is tricking us. >> + */ >> + if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry), >> + "SEV-SNP: page state change processing going backward, end_entry " >> + "(expected %d got %d) cur_entry (expected %d got %d)\n", >> + end_entry, hdr->end_entry, cur_entry, hdr->cur_entry)) >> + return 1; > > WARNING: quoted string split across lines > #293: FILE: arch/x86/kernel/sev.c:750: > + "SEV-SNP: page state change processing going backward, end_entry " > + "(expected %d got %d) cur_entry (expected %d got %d)\n", > > If you're wondering what to do, yes, you can really stretch that string > and shorten it too: Okay. > > if (WARN((hdr->end_entry > end_entry) || (cur_entry > hdr->cur_entry), > "SEV-SNP: PSC processing going backwards, end_entry %d (got %d) cur_entry: %d (got %d)\n", > end_entry, hdr->end_entry, cur_entry, hdr->cur_entry)) > return 1; > > so that it fits on a single line and grepping can find it. > Noted. >> + /* Lets verify that reserved bit is not set in the header*/ >> + if (WARN(hdr->reserved, "Reserved bit is set in the PSC header\n")) > > psc_entry has a ->reserved field too and since we're iterating over the > entries... > Sure I can add that check. >> + >> + desc = kmalloc(sizeof(*desc), GFP_KERNEL_ACCOUNT); > > kzalloc() so that you don't have to memset() later in > __set_page_state(). Depending on the size, the __set_page_state() can be call multiple times so it should clear the desc memory before filling it. > >> + if (!desc) >> + panic("failed to allocate memory"); > > Make that error message more distinctive so that *if* it happens, one > can pinpoint the place in the code where the panic comes from. > Now I am running checkpatch and notice that it complain about the message too. I can add a BUG() or WARN() to get the stack trace before the crashing. >> + while (vaddr < vaddr_end) { >> + /* >> + * Calculate the last vaddr that can be fit in one >> + * struct snp_psc_desc. >> + */ >> + next_vaddr = min_t(unsigned long, vaddr_end, >> + (VMGEXIT_PSC_MAX_ENTRY * PAGE_SIZE) + vaddr); >> + >> + __set_page_state(desc, vaddr, next_vaddr, op); >> + >> + vaddr = next_vaddr; >> + } >> + >> + kfree(desc); >> +} >> + >
On 8/17/21 12:54 PM, Borislav Petkov wrote: > On Wed, Jul 07, 2021 at 01:14:46PM -0500, Brijesh Singh wrote: >> The hypervisor uses the SEV_FEATURES field (offset 3B0h) in the Save State >> Area to control the SEV-SNP guest features such as SNPActive, vTOM, >> ReflectVC etc. An SEV-SNP guest can read the SEV_FEATURES fields through >> the SEV_STATUS MSR. >> >> While at it, update the dump_vmcb() to log the VMPL level. >> >> See APM2 Table 15-34 and B-4 for more details. >> >> Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> >> --- >> arch/x86/include/asm/svm.h | 15 +++++++++++++-- >> arch/x86/kvm/svm/svm.c | 4 ++-- >> 2 files changed, 15 insertions(+), 4 deletions(-) >> >> diff --git a/arch/x86/include/asm/svm.h b/arch/x86/include/asm/svm.h >> index 772e60efe243..ff614cdcf628 100644 >> --- a/arch/x86/include/asm/svm.h >> +++ b/arch/x86/include/asm/svm.h >> @@ -212,6 +212,15 @@ struct __attribute__ ((__packed__)) vmcb_control_area { >> #define SVM_NESTED_CTL_SEV_ENABLE BIT(1) >> #define SVM_NESTED_CTL_SEV_ES_ENABLE BIT(2) >> >> +#define SVM_SEV_FEATURES_SNP_ACTIVE BIT(0) >> +#define SVM_SEV_FEATURES_VTOM BIT(1) >> +#define SVM_SEV_FEATURES_REFLECT_VC BIT(2) >> +#define SVM_SEV_FEATURES_RESTRICTED_INJECTION BIT(3) >> +#define SVM_SEV_FEATURES_ALTERNATE_INJECTION BIT(4) >> +#define SVM_SEV_FEATURES_DEBUG_SWAP BIT(5) >> +#define SVM_SEV_FEATURES_PREVENT_HOST_IBS BIT(6) >> +#define SVM_SEV_FEATURES_BTB_ISOLATION BIT(7) > > Only some of those get used and only later. Please introduce only those > with the patch that adds usage. > Okay. > Also, > > s/SVM_SEV_FEATURES_/SVM_SEV_FEAT_/g > I can do that. > at least. > > And by the way, why is this patch and the next 3 part of the guest set? > They look like they belong into the hypervisor set. > This is needed by the AP creation, in SNP the AP creation need to populate the VMSA page and thus need to use some of macros and fields etc.
On Tue, Aug 17, 2021 at 01:07:40PM -0500, Brijesh Singh wrote: > > > + if (!desc) > > > + panic("failed to allocate memory"); > > > > Make that error message more distinctive so that *if* it happens, one > > can pinpoint the place in the code where the panic comes from. > > > > Now I am running checkpatch and notice that it complain about the message > too. I can add a BUG() or WARN() to get the stack trace before the crashing. checkpatch complains because there's a kmalloc before it and if it fails, the mm core will issue a warning so there's no need for a warning here. But in this case, you want to panic and checkpatch doesn't see that so you can ignore it here and leave the panic message but make it more distinctive so one can find it by grepping. IOW, something like if (!desc) panic("SEV-SNP: Failed to allocame memory for PSC descriptor"); Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On 8/17/21 1:17 PM, Borislav Petkov wrote: > On Tue, Aug 17, 2021 at 01:07:40PM -0500, Brijesh Singh wrote: >>>> + if (!desc) >>>> + panic("failed to allocate memory"); >>> >>> Make that error message more distinctive so that *if* it happens, one >>> can pinpoint the place in the code where the panic comes from. >>> >> >> Now I am running checkpatch and notice that it complain about the message >> too. I can add a BUG() or WARN() to get the stack trace before the crashing. > > checkpatch complains because there's a kmalloc before it and if it > fails, the mm core will issue a warning so there's no need for a warning > here. > > But in this case, you want to panic and checkpatch doesn't see that so > you can ignore it here and leave the panic message but make it more > distinctive so one can find it by grepping. IOW, something like > > if (!desc) > panic("SEV-SNP: Failed to allocame memory for PSC descriptor"); > Got it, I will update the message accordingly. thanks
Hi Boris, On 8/17/21 12:27 PM, Borislav Petkov wrote: > >> + /* Lets verify that reserved bit is not set in the header*/ >> + if (WARN(hdr->reserved, "Reserved bit is set in the PSC header\n")) > > psc_entry has a ->reserved field too and since we're iterating over the > entries... > I am not seeing any strong reason to sanity check the reserved bit in the psc_entry. The fields in the psc_entry are input from guest to the hypervisor. The hypervisor cannot trick a guest by changing anything in the psc_entry because guest does not read the hypervisor filled value. I am okay with the psc_hdr because we need to read the current and end entry after the PSC completes to determine whether it was successful and sanity checking PSC header makes much more sense. Let me know if you are okay with it ? thanks
On Tue, Aug 17, 2021 at 03:34:41PM -0500, Brijesh Singh wrote: > I am not seeing any strong reason to sanity check the reserved bit in the > psc_entry. The fields in the psc_entry are input from guest to the > hypervisor. The hypervisor cannot trick a guest by changing anything in the > psc_entry because guest does not read the hypervisor filled value. I am okay > with the psc_hdr because we need to read the current and end entry after the > PSC completes to determine whether it was successful and sanity checking PSC > header makes much more sense. Let me know if you are okay with it ? Ok, fair enough. Thx. -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette
On Wed, Jul 07, 2021 at 01:14:51PM -0500, Brijesh Singh wrote: > From: Michael Roth <michael.roth@amd.com> > > As of commit 103a4908ad4d ("x86/head/64: Disable stack protection for > head$(BITS).o") kernel/head64.c is compiled with -fno-stack-protector > to allow a call to set_bringup_idt_handler(), which would otherwise > have stack protection enabled with CONFIG_STACKPROTECTOR_STRONG. While > sufficient for that case, this will still cause issues if we attempt to Who's "we"? Please use passive voice in your text: no "we" or "I", etc. Personal pronouns are ambiguous in text, especially with so many parties/companies/etc developing the kernel so let's avoid them please. > call out to any external functions that were compiled with stack > protection enabled that in-turn make stack-protected calls, or if the > exception handlers set up by set_bringup_idt_handler() make calls to > stack-protected functions. > > Subsequent patches for SEV-SNP CPUID validation support will introduce > both such cases. Attempting to disable stack protection for everything > in scope to address that is prohibitive since much of the code, like > SEV-ES #VC handler, is shared code that remains in use after boot and > could benefit from having stack protection enabled. Attempting to inline > calls is brittle and can quickly balloon out to library/helper code > where that's not really an option. > > Instead, set up %gs to point a buffer that stack protector can use for > canary values when needed. > > In doing so, it's likely we can stop using -no-stack-protector for > head64.c, but that hasn't been tested yet, and head32.c would need a > similar solution to be safe, so that is left as a potential follow-up. Well, then fix it properly pls. Remove the -no-stack-protector, test it and send it out, even separately if easier to handle. This version looks half-baked, just so that it gets you what you need for the SNP stuff but we don't do half-baked, sorry. > Signed-off-by: Michael Roth <michael.roth@amd.com> > Signed-off-by: Brijesh Singh <brijesh.singh@amd.com> > --- > arch/x86/kernel/head64.c | 18 +++++++++++++++++- > 1 file changed, 17 insertions(+), 1 deletion(-) > > diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c > index f4c3e632345a..8615418f98f1 100644 > --- a/arch/x86/kernel/head64.c > +++ b/arch/x86/kernel/head64.c > @@ -74,6 +74,9 @@ static struct desc_struct startup_gdt[GDT_ENTRIES] = { > [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), > }; > > +/* For use by stack protector code before switching to virtual addresses */ > +static char startup_gs_area[64]; That needs some CONFIG_STACKPROTECTOR ifdeffery around it, below too. > + > /* > * Address needs to be set at runtime because it references the startup_gdt > * while the kernel still uses a direct mapping. > @@ -598,6 +601,8 @@ void early_setup_idt(void) > */ > void __head startup_64_setup_env(unsigned long physbase) > { > + u64 gs_area = (u64)fixup_pointer(startup_gs_area, physbase); > + > /* Load GDT */ > startup_gdt_descr.address = (unsigned long)fixup_pointer(startup_gdt, physbase); > native_load_gdt(&startup_gdt_descr); > @@ -605,7 +610,18 @@ void __head startup_64_setup_env(unsigned long physbase) > /* New GDT is live - reload data segment registers */ > asm volatile("movl %%eax, %%ds\n" > "movl %%eax, %%ss\n" > - "movl %%eax, %%es\n" : : "a"(__KERNEL_DS) : "memory"); > + "movl %%eax, %%es\n" > + "movl %%eax, %%gs\n" : : "a"(__KERNEL_DS) : "memory"); > + > + /* > + * GCC stack protection needs a place to store canary values. The > + * default is %gs:0x28, which is what the kernel currently uses. > + * Point GS base to a buffer that can be used for this purpose. > + * Note that newer GCCs now allow this location to be configured, > + * so if we change from the default in the future we need to ensure > + * that this buffer overlaps whatever address ends up being used. > + */ > + native_wrmsr(MSR_GS_BASE, gs_area, gs_area >> 32); > > startup_64_load_idt(physbase); > } > -- -- Regards/Gruss, Boris. https://people.kernel.org/tglx/notes-about-netiquette