@@ -6,4 +6,5 @@ struct pci_dev;
extern void __iomem *pci_map_biosrom(struct pci_dev *pdev);
extern void pci_unmap_biosrom(void __iomem *rom);
extern size_t pci_biosrom_size(struct pci_dev *pdev);
+extern void snp_kexec_unprep_rom_memory(void);
#endif
@@ -81,6 +81,10 @@ extern void vc_no_ghcb(void);
extern void vc_boot_ghcb(void);
extern bool handle_vc_boot_ghcb(struct pt_regs *regs);
+extern atomic_t conversions_in_progress;
+extern bool conversion_allowed;
+extern unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot);
+
/* PVALIDATE return codes */
#define PVALIDATE_FAIL_SIZEMISMATCH 6
@@ -213,6 +217,8 @@ int snp_issue_guest_request(u64 exit_code, struct snp_req_data *input, struct sn
void snp_accept_memory(phys_addr_t start, phys_addr_t end);
u64 snp_get_unsupported_features(u64 status);
u64 sev_get_status(void);
+void snp_kexec_unshare_mem(void);
+void snp_kexec_stop_conversion(bool crash);
#else
static inline void sev_es_ist_enter(struct pt_regs *regs) { }
static inline void sev_es_ist_exit(void) { }
@@ -241,6 +247,8 @@ static inline int snp_issue_guest_request(u64 exit_code, struct snp_req_data *in
static inline void snp_accept_memory(phys_addr_t start, phys_addr_t end) { }
static inline u64 snp_get_unsupported_features(u64 status) { return 0; }
static inline u64 sev_get_status(void) { return 0; }
+void snp_kexec_unshare_mem(void) {}
+static void snp_kexec_stop_conversion(bool crash) {}
#endif
#endif
@@ -177,6 +177,22 @@ size_t pci_biosrom_size(struct pci_dev *pdev)
}
EXPORT_SYMBOL(pci_biosrom_size);
+void snp_kexec_unprep_rom_memory(void)
+{
+ unsigned long vaddr, npages, sz;
+
+ /*
+ * Switch back ROM regions to shared so that their validation
+ * does not fail during kexec kernel boot.
+ */
+ vaddr = (unsigned long)__va(video_rom_resource.start);
+ sz = (system_rom_resource.end + 1) - video_rom_resource.start;
+ npages = PAGE_ALIGN(sz) >> PAGE_SHIFT;
+
+ snp_set_memory_shared(vaddr, npages);
+}
+EXPORT_SYMBOL(snp_kexec_unprep_rom_memory);
+
#define ROMSIGNATURE 0xaa55
static int __init romsignature(const unsigned char *rom)
@@ -23,6 +23,9 @@
#include <linux/platform_device.h>
#include <linux/io.h>
#include <linux/psp-sev.h>
+#include <linux/pagewalk.h>
+#include <linux/cacheflush.h>
+#include <linux/delay.h>
#include <uapi/linux/sev-guest.h>
#include <asm/cpu_entry_area.h>
@@ -40,6 +43,7 @@
#include <asm/apic.h>
#include <asm/cpuid.h>
#include <asm/cmdline.h>
+#include <asm/probe_roms.h>
#define DR7_RESET_VALUE 0x400
@@ -71,6 +75,13 @@ static struct ghcb *boot_ghcb __section(".data");
/* Bitmap of SEV features supported by the hypervisor */
static u64 sev_hv_features __ro_after_init;
+/* Last address to be switched to private during kexec */
+static unsigned long last_address_shd_kexec;
+
+static bool crash_requested;
+atomic_t conversions_in_progress;
+bool conversion_allowed = true;
+
/* #VC handler runtime per-CPU data */
struct sev_es_runtime_data {
struct ghcb ghcb_page;
@@ -906,6 +917,206 @@ void snp_accept_memory(phys_addr_t start, phys_addr_t end)
set_pages_state(vaddr, npages, SNP_PAGE_STATE_PRIVATE);
}
+static inline bool pte_decrypted(pte_t pte)
+{
+ return cc_mkdec(pte_val(pte)) == pte_val(pte);
+}
+
+static int set_pte_enc(pte_t *kpte, int level, void *va)
+{
+ pgprot_t old_prot, new_prot;
+ unsigned long pfn, pa, size;
+ pte_t new_pte;
+
+ pfn = pg_level_to_pfn(level, kpte, &old_prot);
+ if (!pfn)
+ return 0;
+
+ new_prot = old_prot;
+ pgprot_val(new_prot) |= _PAGE_ENC;
+ pa = pfn << PAGE_SHIFT;
+ size = page_level_size(level);
+
+ /*
+ * Change the physical page attribute from C=0 to C=1. Flush the
+ * caches to ensure that data gets accessed with the correct C-bit.
+ */
+ clflush_cache_range(va, size);
+
+ /* Change the page encryption mask. */
+ new_pte = pfn_pte(pfn, new_prot);
+ set_pte_atomic(kpte, new_pte);
+
+ return 1;
+}
+
+static int unshare_pte(pte_t *pte, unsigned long addr, int pages, int level)
+{
+ struct sev_es_runtime_data *data;
+ struct ghcb *ghcb;
+
+ data = this_cpu_read(runtime_data);
+ ghcb = &data->ghcb_page;
+
+ /*
+ * check for GHCB for being part of a PMD range.
+ */
+ if ((unsigned long)ghcb >= addr &&
+ (unsigned long)ghcb <= (addr + (pages * PAGE_SIZE))) {
+ /*
+ * setup last address to be made private so that this GHCB
+ * is made private at the end of unshared loop so that RMP
+ * does not possibly getting PSMASHed from using the
+ * MSR protocol.
+ */
+ pr_debug("setting boot_ghcb to NULL for this cpu ghcb\n");
+ last_address_shd_kexec = addr;
+ return 1;
+ }
+ if (!set_pte_enc(pte, level, (void *)addr))
+ return 0;
+ snp_set_memory_private(addr, pages);
+
+ return 1;
+}
+
+static void unshare_all_memory(bool unmap)
+{
+ unsigned long addr, end;
+
+ /*
+ * Walk direct mapping and convert all shared memory back to private,
+ */
+
+ addr = PAGE_OFFSET;
+ end = PAGE_OFFSET + get_max_mapped();
+
+ while (addr < end) {
+ unsigned long size;
+ unsigned int level;
+ pte_t *pte;
+
+ pte = lookup_address(addr, &level);
+ size = page_level_size(level);
+
+ /*
+ * pte_none() check is required to skip physical memory holes in direct mapped.
+ */
+ if (pte && pte_decrypted(*pte) && !pte_none(*pte)) {
+ int pages = size / PAGE_SIZE;
+
+ if (!unshare_pte(pte, addr, pages, level)) {
+ pr_err("Failed to unshare range %#lx-%#lx\n",
+ addr, addr + size);
+ }
+
+ }
+
+ addr += size;
+ }
+ __flush_tlb_all();
+
+}
+
+static void unshare_all_bss_decrypted_memory(void)
+{
+ unsigned long vaddr, vaddr_end;
+ unsigned long size;
+ unsigned int level;
+ unsigned int npages;
+ pte_t *pte;
+
+ vaddr = (unsigned long)__start_bss_decrypted;
+ vaddr_end = (unsigned long)__start_bss_decrypted_unused;
+ npages = (vaddr_end - vaddr) >> PAGE_SHIFT;
+ for (; vaddr < vaddr_end; vaddr += PAGE_SIZE) {
+ pte = lookup_address(vaddr, &level);
+ if (!pte || !pte_decrypted(*pte) || pte_none(*pte))
+ continue;
+
+ size = page_level_size(level);
+ set_pte_enc(pte, level, (void *)vaddr);
+ }
+ vaddr = (unsigned long)__start_bss_decrypted;
+ snp_set_memory_private(vaddr, npages);
+}
+
+void snp_kexec_stop_conversion(bool crash)
+{
+ /* Stop new private<->shared conversions */
+ conversion_allowed = false;
+ crash_requested = crash;
+
+ /*
+ * Make sure conversion_allowed is cleared before checking
+ * conversions_in_progress.
+ */
+ barrier();
+
+ /*
+ * Crash kernel reaches here with interrupts disabled: can't wait for
+ * conversions to finish.
+ *
+ * If race happened, just report and proceed.
+ */
+ if (!crash) {
+ unsigned long timeout;
+
+ /*
+ * Wait for in-flight conversions to complete.
+ *
+ * Do not wait more than 30 seconds.
+ */
+ timeout = 30 * USEC_PER_SEC;
+ while (atomic_read(&conversions_in_progress) && timeout--)
+ udelay(1);
+ }
+
+ if (atomic_read(&conversions_in_progress))
+ pr_warn("Failed to finish shared<->private conversions\n");
+}
+
+void snp_kexec_unshare_mem(void)
+{
+ if (!cc_platform_has(CC_ATTR_GUEST_SEV_SNP))
+ return;
+
+ /*
+ * Switch back any specific memory regions such as option
+ * ROM regions back to shared so that (re)validation does
+ * not fail when kexec kernel boots.
+ */
+ snp_kexec_unprep_rom_memory();
+
+ unshare_all_memory(true);
+
+ unshare_all_bss_decrypted_memory();
+
+ if (last_address_shd_kexec) {
+ unsigned long size;
+ unsigned int level;
+ pte_t *pte;
+
+ /*
+ * Switch to using the MSR protocol to change this cpu's
+ * GHCB to private.
+ */
+ boot_ghcb = NULL;
+ /*
+ * All the per-cpu GHCBs have been switched back to private,
+ * so can't do any more GHCB calls to the hypervisor beyond
+ * this point till the kexec kernel starts running.
+ */
+ sev_cfg.ghcbs_initialized = false;
+
+ pr_debug("boot ghcb 0x%lx\n", last_address_shd_kexec);
+ pte = lookup_address(last_address_shd_kexec, &level);
+ size = page_level_size(level);
+ set_pte_enc(pte, level, (void *)last_address_shd_kexec);
+ snp_set_memory_private(last_address_shd_kexec, (size / PAGE_SIZE));
+ }
+}
+
static int snp_set_vmsa(void *va, bool vmsa)
{
u64 attrs;
@@ -214,7 +214,7 @@ void __init sme_map_bootdata(char *real_mode_data)
__sme_early_map_unmap_mem(__va(cmdline_paddr), COMMAND_LINE_SIZE, true);
}
-static unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
+unsigned long pg_level_to_pfn(int level, pte_t *kpte, pgprot_t *ret_prot)
{
unsigned long pfn = 0;
pgprot_t prot;
@@ -285,6 +285,17 @@ static void enc_dec_hypercall(unsigned long vaddr, unsigned long size, bool enc)
static int amd_enc_status_change_prepare(unsigned long vaddr, int npages, bool enc)
{
+ atomic_inc(&conversions_in_progress);
+
+ /*
+ * Check after bumping conversions_in_progress to serialize
+ * against snp_kexec_stop_conversion().
+ */
+ if (!conversion_allowed) {
+ atomic_dec(&conversions_in_progress);
+ return -EBUSY;
+ }
+
/*
* To maintain the security guarantees of SEV-SNP guests, make sure
* to invalidate the memory before encryption attribute is cleared.
@@ -308,6 +319,8 @@ static int amd_enc_status_change_finish(unsigned long vaddr, int npages, bool en
if (!cc_platform_has(CC_ATTR_HOST_MEM_ENCRYPT))
enc_dec_hypercall(vaddr, npages << PAGE_SHIFT, enc);
+ atomic_dec(&conversions_in_progress);
+
return 0;
}
@@ -468,6 +481,9 @@ void __init sme_early_init(void)
x86_platform.guest.enc_tlb_flush_required = amd_enc_tlb_flush_required;
x86_platform.guest.enc_cache_flush_required = amd_enc_cache_flush_required;
+ x86_platform.guest.enc_kexec_stop_conversion = snp_kexec_stop_conversion;
+ x86_platform.guest.enc_kexec_unshare_mem = snp_kexec_unshare_mem;
+
/*
* AMD-SEV-ES intercepts the RDMSR to read the X2APIC ID in the
* parallel bringup low level code. That raises #VC which cannot be