Message ID | 20220405234343.74045-7-kirill.shutemov@linux.intel.com |
---|---|
State | New |
Headers | show |
Series | mm, x86/cc: Implement support for unaccepted memory | expand |
On 4/5/22 16:43, Kirill A. Shutemov wrote: > Core-mm requires few helpers to support unaccepted memory: > > - accept_memory() checks the range of addresses against the bitmap and > accept memory if needed. > > - memory_is_unaccepted() check if anything within the range requires > acceptance. > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> > --- > arch/x86/include/asm/page.h | 5 +++ > arch/x86/include/asm/unaccepted_memory.h | 1 + > arch/x86/mm/Makefile | 2 + > arch/x86/mm/unaccepted_memory.c | 53 ++++++++++++++++++++++++ > 4 files changed, 61 insertions(+) > create mode 100644 arch/x86/mm/unaccepted_memory.c > > diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h > index 9cc82f305f4b..9ae0064f97e5 100644 > --- a/arch/x86/include/asm/page.h > +++ b/arch/x86/include/asm/page.h > @@ -19,6 +19,11 @@ > struct page; > > #include <linux/range.h> > + > +#ifdef CONFIG_UNACCEPTED_MEMORY > +#include <asm/unaccepted_memory.h> > +#endif It's a lot nicer to just to the #ifdefs inside the header. Is there a specific reason to do it this way? > extern struct range pfn_mapped[]; > extern int nr_pfn_mapped; > > diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h > index f1f835d3cd78..a8d12ef1bda8 100644 > --- a/arch/x86/include/asm/unaccepted_memory.h > +++ b/arch/x86/include/asm/unaccepted_memory.h > @@ -10,5 +10,6 @@ struct boot_params; > void mark_unaccepted(struct boot_params *params, u64 start, u64 num); > > void accept_memory(phys_addr_t start, phys_addr_t end); > +bool memory_is_unaccepted(phys_addr_t start, phys_addr_t end); > > #endif > diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile > index fe3d3061fc11..e327f83e6bbf 100644 > --- a/arch/x86/mm/Makefile > +++ b/arch/x86/mm/Makefile > @@ -60,3 +60,5 @@ obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o > > obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o > obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o > + > +obj-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o > diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c > new file mode 100644 > index 000000000000..3588a7cb954c > --- /dev/null > +++ b/arch/x86/mm/unaccepted_memory.c > @@ -0,0 +1,53 @@ > +// SPDX-License-Identifier: GPL-2.0-only > +#include <linux/memblock.h> > +#include <linux/mm.h> > +#include <linux/pfn.h> > +#include <linux/spinlock.h> > + > +#include <asm/io.h> > +#include <asm/setup.h> > +#include <asm/unaccepted_memory.h> > + > +static DEFINE_SPINLOCK(unaccepted_memory_lock); We need some documentation on what the lock does, either here or in the changelog. > +void accept_memory(phys_addr_t start, phys_addr_t end) > +{ > + unsigned long *unaccepted_memory; > + unsigned long flags; > + unsigned int rs, re; > + > + if (!boot_params.unaccepted_memory) > + return; > + > + unaccepted_memory = __va(boot_params.unaccepted_memory); > + rs = start / PMD_SIZE; > + > + spin_lock_irqsave(&unaccepted_memory_lock, flags); > + for_each_set_bitrange_from(rs, re, unaccepted_memory, > + DIV_ROUND_UP(end, PMD_SIZE)) { > + /* Platform-specific memory-acceptance call goes here */ > + panic("Cannot accept memory"); > + bitmap_clear(unaccepted_memory, rs, re - rs); > + } > + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); > +} That panic() is making me nervous. Is this bisect-safe? Is it safe because there are no callers of this function yet? > +bool memory_is_unaccepted(phys_addr_t start, phys_addr_t end) > +{ > + unsigned long *unaccepted_memory = __va(boot_params.unaccepted_memory); > + unsigned long flags; > + bool ret = false; > + > + spin_lock_irqsave(&unaccepted_memory_lock, flags); > + while (start < end) { > + if (test_bit(start / PMD_SIZE, unaccepted_memory)) { > + ret = true; > + break; > + } > + > + start += PMD_SIZE; > + } > + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); > + > + return ret; > +}
On 4/5/22 16:43, Kirill A. Shutemov wrote: > +void accept_memory(phys_addr_t start, phys_addr_t end) > +{ > + unsigned long *unaccepted_memory; > + unsigned long flags; > + unsigned int rs, re; > + > + if (!boot_params.unaccepted_memory) > + return; > + > + unaccepted_memory = __va(boot_params.unaccepted_memory); > + rs = start / PMD_SIZE; > + > + spin_lock_irqsave(&unaccepted_memory_lock, flags); > + for_each_set_bitrange_from(rs, re, unaccepted_memory, > + DIV_ROUND_UP(end, PMD_SIZE)) { > + /* Platform-specific memory-acceptance call goes here */ > + panic("Cannot accept memory"); > + bitmap_clear(unaccepted_memory, rs, re - rs); > + } > + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); > +} Just to reiterate: this is a global spinlock. It's disabling interrupts. "Platform-specific memory-acceptance call" will soon become: tdx_accept_memory(rs * PMD_SIZE, re * PMD_SIZE); which is a page-by-page __tdx_module_call(): > + for (i = 0; i < (end - start) / PAGE_SIZE; i++) { > + if (__tdx_module_call(TDACCEPTPAGE, start + i * PAGE_SIZE, > + 0, 0, 0, NULL)) { > + error("Cannot accept memory: page accept failed\n"); > + } > + } Each __tdx_module_call() involves a privilege transition that also surely includes things like changing CR3. It can't be cheap. It also is presumably touching the memory and probably flushing it out of the CPU caches. It's also unbounded: spin_lock_irqsave(&unaccepted_memory_lock, flags); for (i = 0; i < (end - start) / PAGE_SIZE; i++) // thousands? tens-of-thousands of cycles?? spin_lock_irqsave(&unaccepted_memory_lock, flags); How far apart can end and start be? It's at *least* 2MB in the page allocator, which is on the order of a millisecond. Are we sure there aren't any callers that want to do this at a gigabyte granularity? That would hold the global lock and disable interrupts on the order of a second. Do we want to bound the time that the lock can be held? Or, should we just let the lockup detectors tell us that we're being naughty?
On Fri, Apr 08, 2022 at 12:21:19PM -0700, Dave Hansen wrote: > On 4/5/22 16:43, Kirill A. Shutemov wrote: > > +void accept_memory(phys_addr_t start, phys_addr_t end) > > +{ > > + unsigned long *unaccepted_memory; > > + unsigned long flags; > > + unsigned int rs, re; > > + > > + if (!boot_params.unaccepted_memory) > > + return; > > + > > + unaccepted_memory = __va(boot_params.unaccepted_memory); > > + rs = start / PMD_SIZE; > > + > > + spin_lock_irqsave(&unaccepted_memory_lock, flags); > > + for_each_set_bitrange_from(rs, re, unaccepted_memory, > > + DIV_ROUND_UP(end, PMD_SIZE)) { > > + /* Platform-specific memory-acceptance call goes here */ > > + panic("Cannot accept memory"); > > + bitmap_clear(unaccepted_memory, rs, re - rs); > > + } > > + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); > > +} > > Just to reiterate: this is a global spinlock. It's disabling > interrupts. "Platform-specific memory-acceptance call" will soon become: > > tdx_accept_memory(rs * PMD_SIZE, re * PMD_SIZE); > > which is a page-by-page __tdx_module_call(): > > > + for (i = 0; i < (end - start) / PAGE_SIZE; i++) { > > + if (__tdx_module_call(TDACCEPTPAGE, start + i * PAGE_SIZE, > > + 0, 0, 0, NULL)) { > > + error("Cannot accept memory: page accept failed\n"); > > + } > > + } > > Each __tdx_module_call() involves a privilege transition that also > surely includes things like changing CR3. It can't be cheap. It also > is presumably touching the memory and probably flushing it out of the > CPU caches. It's also unbounded: > > spin_lock_irqsave(&unaccepted_memory_lock, flags); > for (i = 0; i < (end - start) / PAGE_SIZE; i++) > // thousands? tens-of-thousands of cycles?? > spin_lock_irqsave(&unaccepted_memory_lock, flags); > > How far apart can end and start be? It's at *least* 2MB in the page > allocator, which is on the order of a millisecond. Are we sure there > aren't any callers that want to do this at a gigabyte granularity? That > would hold the global lock and disable interrupts on the order of a second. This codepath only gets invoked with orders <MAX_ORDER or 4M on x86-64. > Do we want to bound the time that the lock can be held? Or, should we > just let the lockup detectors tell us that we're being naughty? Host can always DoS the guess, so yes this can lead to lockups.
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h index 9cc82f305f4b..9ae0064f97e5 100644 --- a/arch/x86/include/asm/page.h +++ b/arch/x86/include/asm/page.h @@ -19,6 +19,11 @@ struct page; #include <linux/range.h> + +#ifdef CONFIG_UNACCEPTED_MEMORY +#include <asm/unaccepted_memory.h> +#endif + extern struct range pfn_mapped[]; extern int nr_pfn_mapped; diff --git a/arch/x86/include/asm/unaccepted_memory.h b/arch/x86/include/asm/unaccepted_memory.h index f1f835d3cd78..a8d12ef1bda8 100644 --- a/arch/x86/include/asm/unaccepted_memory.h +++ b/arch/x86/include/asm/unaccepted_memory.h @@ -10,5 +10,6 @@ struct boot_params; void mark_unaccepted(struct boot_params *params, u64 start, u64 num); void accept_memory(phys_addr_t start, phys_addr_t end); +bool memory_is_unaccepted(phys_addr_t start, phys_addr_t end); #endif diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile index fe3d3061fc11..e327f83e6bbf 100644 --- a/arch/x86/mm/Makefile +++ b/arch/x86/mm/Makefile @@ -60,3 +60,5 @@ obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_amd.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_identity.o obj-$(CONFIG_AMD_MEM_ENCRYPT) += mem_encrypt_boot.o + +obj-$(CONFIG_UNACCEPTED_MEMORY) += unaccepted_memory.o diff --git a/arch/x86/mm/unaccepted_memory.c b/arch/x86/mm/unaccepted_memory.c new file mode 100644 index 000000000000..3588a7cb954c --- /dev/null +++ b/arch/x86/mm/unaccepted_memory.c @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include <linux/memblock.h> +#include <linux/mm.h> +#include <linux/pfn.h> +#include <linux/spinlock.h> + +#include <asm/io.h> +#include <asm/setup.h> +#include <asm/unaccepted_memory.h> + +static DEFINE_SPINLOCK(unaccepted_memory_lock); + +void accept_memory(phys_addr_t start, phys_addr_t end) +{ + unsigned long *unaccepted_memory; + unsigned long flags; + unsigned int rs, re; + + if (!boot_params.unaccepted_memory) + return; + + unaccepted_memory = __va(boot_params.unaccepted_memory); + rs = start / PMD_SIZE; + + spin_lock_irqsave(&unaccepted_memory_lock, flags); + for_each_set_bitrange_from(rs, re, unaccepted_memory, + DIV_ROUND_UP(end, PMD_SIZE)) { + /* Platform-specific memory-acceptance call goes here */ + panic("Cannot accept memory"); + bitmap_clear(unaccepted_memory, rs, re - rs); + } + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); +} + +bool memory_is_unaccepted(phys_addr_t start, phys_addr_t end) +{ + unsigned long *unaccepted_memory = __va(boot_params.unaccepted_memory); + unsigned long flags; + bool ret = false; + + spin_lock_irqsave(&unaccepted_memory_lock, flags); + while (start < end) { + if (test_bit(start / PMD_SIZE, unaccepted_memory)) { + ret = true; + break; + } + + start += PMD_SIZE; + } + spin_unlock_irqrestore(&unaccepted_memory_lock, flags); + + return ret; +}
Core-mm requires few helpers to support unaccepted memory: - accept_memory() checks the range of addresses against the bitmap and accept memory if needed. - memory_is_unaccepted() check if anything within the range requires acceptance. Signed-off-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> --- arch/x86/include/asm/page.h | 5 +++ arch/x86/include/asm/unaccepted_memory.h | 1 + arch/x86/mm/Makefile | 2 + arch/x86/mm/unaccepted_memory.c | 53 ++++++++++++++++++++++++ 4 files changed, 61 insertions(+) create mode 100644 arch/x86/mm/unaccepted_memory.c