@@ -31,4 +31,7 @@ static inline pgprot_t arch_vmap_pgprot_tagged(pgprot_t prot)
return pgprot_tagged(prot);
}
+extern unsigned long code_region_start __ro_after_init;
+extern unsigned long code_region_end __ro_after_init;
+
#endif /* _ASM_ARM64_VMALLOC_H */
@@ -29,6 +29,10 @@
static u64 module_direct_base __ro_after_init = 0;
static u64 module_plt_base __ro_after_init = 0;
+/* For pre-init vmalloc, assume the worst-case code range */
+unsigned long code_region_start __ro_after_init = (u64) (_end - SZ_2G);
+unsigned long code_region_end __ro_after_init = (u64) (_text + SZ_2G);
+
/*
* Choose a random page-aligned base address for a window of 'size' bytes which
* entirely contains the interval [start, end - 1].
@@ -101,6 +105,9 @@ static int __init module_init_limits(void)
module_plt_base = random_bounding_box(SZ_2G, min, max);
}
+ code_region_start = module_plt_base;
+ code_region_end = module_plt_base + SZ_2G;
+
pr_info("%llu pages in range for non-PLT usage",
module_direct_base ? (SZ_128M - kernel_size) / PAGE_SIZE : 0);
pr_info("%llu pages in range for PLT usage",
@@ -131,7 +131,7 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p)
void *alloc_insn_page(void)
{
- return __vmalloc_node_range(PAGE_SIZE, 1, VMALLOC_START, VMALLOC_END,
+ return __vmalloc_node_range(PAGE_SIZE, 1, code_region_start, code_region_end,
GFP_KERNEL, PAGE_KERNEL_ROX, VM_FLUSH_RESET_PERMS,
NUMA_NO_NODE, __builtin_return_address(0));
}
@@ -2,7 +2,8 @@
obj-y := dma-mapping.o extable.o fault.o init.o \
cache.o copypage.o flush.o \
ioremap.o mmap.o pgd.o mmu.o \
- context.o proc.o pageattr.o fixmap.o
+ context.o proc.o pageattr.o fixmap.o \
+ vmalloc.o
obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
obj-$(CONFIG_PTDUMP_CORE) += ptdump.o
obj-$(CONFIG_PTDUMP_DEBUGFS) += ptdump_debugfs.o
new file mode 100644
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+
+static void *__vmalloc_node_range_split(unsigned long size, unsigned long align,
+ unsigned long start, unsigned long end,
+ unsigned long exclusion_start, unsigned long exclusion_end, gfp_t gfp_mask,
+ pgprot_t prot, unsigned long vm_flags, int node,
+ const void *caller)
+{
+ void *res = NULL;
+
+ res = __vmalloc_node_range(size, align, start, exclusion_start,
+ gfp_mask, prot, vm_flags, node, caller);
+ if (!res)
+ res = __vmalloc_node_range(size, align, exclusion_end, end,
+ gfp_mask, prot, vm_flags, node, caller);
+
+ return res;
+}
+
+void *__vmalloc_node(unsigned long size, unsigned long align,
+ gfp_t gfp_mask, unsigned long vm_flags, int node,
+ const void *caller)
+{
+ return __vmalloc_node_range_split(size, align, VMALLOC_START,
+ VMALLOC_END, code_region_start, code_region_end,
+ gfp_mask, PAGE_KERNEL, vm_flags, node, caller);
+}
+
+void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
+{
+ return __vmalloc_node_range_split(size, 1, VMALLOC_START, VMALLOC_END,
+ code_region_start, code_region_end,
+ gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+ NUMA_NO_NODE, __builtin_return_address(0));
+}
+
+void *vmalloc_user(unsigned long size)
+{
+ return __vmalloc_node_range_split(size, SHMLBA, VMALLOC_START, VMALLOC_END,
+ code_region_start, code_region_end,
+ GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
+ VM_USERMAP, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
+void *vmalloc_32_user(unsigned long size)
+{
+ return __vmalloc_node_range_split(size, SHMLBA, VMALLOC_START, VMALLOC_END,
+ code_region_start, code_region_end,
+ GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
+ VM_USERMAP, NUMA_NO_NODE,
+ __builtin_return_address(0));
+}
+
@@ -13,6 +13,7 @@
#include <linux/memory.h>
#include <linux/printk.h>
#include <linux/slab.h>
+#include <linux/moduleloader.h>
#include <asm/asm-extable.h>
#include <asm/byteorder.h>
@@ -1690,12 +1691,12 @@ u64 bpf_jit_alloc_exec_limit(void)
void *bpf_jit_alloc_exec(unsigned long size)
{
/* Memory is intended to be executable, reset the pointer tag. */
- return kasan_reset_tag(vmalloc(size));
+ return kasan_reset_tag(module_alloc(size));
}
void bpf_jit_free_exec(void *addr)
{
- return vfree(addr);
+ return module_memfree(addr);
}
/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */
Current BPF and kprobe instruction allocation interfaces do not match the base kernel and intermingle code and data pages within the same sections. In the case of BPF, this appears to be a result of code duplication between the kernel's JIT compiler and arm64's JIT. However, This is no longer necessary given the possibility of overriding vmalloc wrapper functions. arm64's vmalloc_node routines now include a layer of indirection which splits the vmalloc region into two segments surrounding the middle module_alloc region determined by ASLR. To support this, code_region_start and code_region_end are defined to match the 2GB boundary chosen by the kernel module ASLR initialization routine. The result is a large benefits to overall kernel security, as code pages now remain protected by this ASLR routine and protections can be defined linearly for code regions rather than through PTE-level tracking. Signed-off-by: Maxwell Bland <mbland@motorola.com> --- arch/arm64/include/asm/vmalloc.h | 3 ++ arch/arm64/kernel/module.c | 7 ++++ arch/arm64/kernel/probes/kprobes.c | 2 +- arch/arm64/mm/Makefile | 3 +- arch/arm64/mm/vmalloc.c | 57 ++++++++++++++++++++++++++++++ arch/arm64/net/bpf_jit_comp.c | 5 +-- 6 files changed, 73 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/mm/vmalloc.c