@@ -61,7 +61,7 @@ static void __init init_irq_stacks(void)
THREAD_SIZE_ORDER);
else
stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
- THREADINFO_GFP, NUMA_NO_NODE,
+ THREADINFO_GFP, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (WARN_ON(!stack))
@@ -21,7 +21,7 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
BUILD_BUG_ON(!IS_ENABLED(CONFIG_VMAP_STACK));
- p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+ p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, 0, node,
__builtin_return_address(0));
return kasan_reset_tag(p);
}
@@ -205,7 +205,7 @@ static int __init arm64_efi_rt_init(void)
return 0;
p = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, GFP_KERNEL,
- NUMA_NO_NODE, &&l);
+ 0, NUMA_NO_NODE, &&l);
l: if (!p) {
pr_warn("Failed to allocate EFI runtime stack\n");
clear_bit(EFI_RUNTIME_SERVICES, &efi.flags);
@@ -308,7 +308,7 @@ DEFINE_INTERRUPT_HANDLER_ASYNC(do_IRQ)
static void *__init alloc_vm_stack(void)
{
return __vmalloc_node(THREAD_SIZE, THREAD_ALIGN, THREADINFO_GFP,
- NUMA_NO_NODE, (void *)_RET_IP_);
+ 0, NUMA_NO_NODE, (void *)_RET_IP_);
}
static void __init vmap_irqstack_init(void)
@@ -24,7 +24,7 @@ static inline unsigned long *arch_alloc_vmap_stack(size_t stack_size, int node)
{
void *p;
- p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, node,
+ p = __vmalloc_node(stack_size, THREAD_ALIGN, THREADINFO_GFP, 0, node,
__builtin_return_address(0));
return kasan_reset_tag(p);
}
@@ -70,7 +70,7 @@ void *diag204_get_buffer(enum diag204_format fmt, int *pages)
return ERR_PTR(-EOPNOTSUPP);
}
diag204_buf = __vmalloc_node(array_size(*pages, PAGE_SIZE),
- PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+ PAGE_SIZE, GFP_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (!diag204_buf)
return ERR_PTR(-ENOMEM);
@@ -254,7 +254,7 @@ static void __init conmode_default(void)
cpcmd("QUERY TERM", query_buffer, 1024, NULL);
ptr = strstr(query_buffer, "CONMODE");
/*
- * Set the conmode to 3215 so that the device recognition
+ * Set the conmode to 3215 so that the device recognition
* will set the cu_type of the console to 3215. If the
* conmode is 3270 and we don't set it back then both
* 3215 and the 3270 driver will try to access the console
@@ -314,7 +314,7 @@ static inline void setup_zfcpdump(void) {}
/*
* Reboot, halt and power_off stubs. They just call _machine_restart,
- * _machine_halt or _machine_power_off.
+ * _machine_halt or _machine_power_off.
*/
void machine_restart(char *command)
@@ -364,7 +364,7 @@ unsigned long stack_alloc(void)
void *ret;
ret = __vmalloc_node(THREAD_SIZE, THREAD_SIZE, THREADINFO_GFP,
- NUMA_NO_NODE, __builtin_return_address(0));
+ 0, NUMA_NO_NODE, __builtin_return_address(0));
kmemleak_not_leak(ret);
return (unsigned long)ret;
#else
@@ -318,7 +318,7 @@ static void fill_diag(struct sthyi_sctns *sctns)
return;
diag204_buf = __vmalloc_node(array_size(pages, PAGE_SIZE),
- PAGE_SIZE, GFP_KERNEL, NUMA_NO_NODE,
+ PAGE_SIZE, GFP_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
if (!diag204_buf)
return;
@@ -150,7 +150,8 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align,
pgprot_t prot, unsigned long vm_flags, int node,
const void *caller) __alloc_size(1);
void *__vmalloc_node(unsigned long size, unsigned long align, gfp_t gfp_mask,
- int node, const void *caller) __alloc_size(1);
+ unsigned long vm_flags, int node, const void *caller)
+ __alloc_size(1);
void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) __alloc_size(1);
extern void *__vmalloc_array(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2);
@@ -295,4 +296,16 @@ bool vmalloc_dump_obj(void *object);
static inline bool vmalloc_dump_obj(void *object) { return false; }
#endif
+#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
+#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
+#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
+#else
+/*
+ * 64b systems should always have either DMA or DMA32 zones. For others
+ * GFP_DMA32 should do the right thing and use the normal zone.
+ */
+#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
+#endif
+
#endif /* _LINUX_VMALLOC_H */
@@ -303,8 +303,8 @@ static void *__bpf_map_area_alloc(u64 size, int numa_node, bool mmapable)
return area;
}
- return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
- gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL, PAGE_KERNEL,
+ return __vmalloc_node(size, align,
+ gfp | GFP_KERNEL | __GFP_RETRY_MAYFAIL,
flags, numa_node, __builtin_return_address(0));
}
@@ -304,10 +304,8 @@ static int alloc_thread_stack_node(struct task_struct *tsk, int node)
* so memcg accounting is performed manually on assigning/releasing
* stacks to tasks. Drop __GFP_ACCOUNT.
*/
- stack = __vmalloc_node_range(THREAD_SIZE, THREAD_ALIGN,
- VMALLOC_START, VMALLOC_END,
+ stack = __vmalloc_node(THREAD_SIZE, THREAD_ALIGN,
THREADINFO_GFP & ~__GFP_ACCOUNT,
- PAGE_KERNEL,
0, node, __builtin_return_address(0));
if (!stack)
return -ENOMEM;
@@ -43,8 +43,7 @@ static void *__scs_alloc(int node)
}
}
- s = __vmalloc_node_range(SCS_SIZE, 1, VMALLOC_START, VMALLOC_END,
- GFP_SCS, PAGE_KERNEL, 0, node,
+ s = __vmalloc_node(SCS_SIZE, 1, GFP_SCS, 0, node,
__builtin_return_address(0));
out:
@@ -80,7 +80,7 @@ objpool_init_percpu_slots(struct objpool_head *pool, int nr_objs,
slot = kmalloc_node(size, pool->gfp, cpu_to_node(i));
else
slot = __vmalloc_node(size, sizeof(void *), pool->gfp,
- cpu_to_node(i), __builtin_return_address(0));
+ 0, cpu_to_node(i), __builtin_return_address(0));
if (!slot)
return -ENOMEM;
memset(slot, 0, size);
@@ -97,7 +97,7 @@ static int random_size_align_alloc_test(void)
size = ((rnd % 10) + 1) * PAGE_SIZE;
ptr = __vmalloc_node(size, align, GFP_KERNEL | __GFP_ZERO, 0,
- __builtin_return_address(0));
+ 0, __builtin_return_address(0));
if (!ptr)
return -1;
@@ -120,7 +120,7 @@ static int align_shift_alloc_test(void)
align = ((unsigned long) 1) << i;
ptr = __vmalloc_node(PAGE_SIZE, align, GFP_KERNEL|__GFP_ZERO, 0,
- __builtin_return_address(0));
+ 0, __builtin_return_address(0));
if (!ptr)
return -1;
@@ -138,7 +138,7 @@ static int fix_align_alloc_test(void)
for (i = 0; i < test_loop_count; i++) {
ptr = __vmalloc_node(5 * PAGE_SIZE, THREAD_ALIGN << 1,
GFP_KERNEL | __GFP_ZERO, 0,
- __builtin_return_address(0));
+ 0, __builtin_return_address(0));
if (!ptr)
return -1;
@@ -639,8 +639,7 @@ void *kvmalloc_node(size_t size, gfp_t flags, int node)
* about the resulting pointer, and cannot play
* protection games.
*/
- return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
- flags, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
+ return __vmalloc_node(size, 1, flags, VM_ALLOW_HUGE_VMAP,
node, __builtin_return_address(0));
}
EXPORT_SYMBOL(kvmalloc_node);
@@ -3119,7 +3119,7 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
/* Please note that the recursion is strictly bounded. */
if (array_size > PAGE_SIZE) {
- area->pages = __vmalloc_node(array_size, 1, nested_gfp, node,
+ area->pages = __vmalloc_node(array_size, 1, nested_gfp, 0, node,
area->caller);
} else {
area->pages = kmalloc_node(array_size, nested_gfp, node);
@@ -3379,11 +3379,12 @@ void *__vmalloc_node_range(unsigned long size, unsigned long align,
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *__vmalloc_node(unsigned long size, unsigned long align,
- gfp_t gfp_mask, int node, const void *caller)
+__weak void *__vmalloc_node(unsigned long size, unsigned long align,
+ gfp_t gfp_mask, unsigned long vm_flags, int node,
+ const void *caller)
{
return __vmalloc_node_range(size, align, VMALLOC_START, VMALLOC_END,
- gfp_mask, PAGE_KERNEL, 0, node, caller);
+ gfp_mask, PAGE_KERNEL, vm_flags, node, caller);
}
/*
* This is only for performance analysis of vmalloc and stress purpose.
@@ -3396,7 +3397,7 @@ EXPORT_SYMBOL_GPL(__vmalloc_node);
void *__vmalloc(unsigned long size, gfp_t gfp_mask)
{
- return __vmalloc_node(size, 1, gfp_mask, NUMA_NO_NODE,
+ return __vmalloc_node(size, 1, gfp_mask, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
EXPORT_SYMBOL(__vmalloc);
@@ -3415,7 +3416,7 @@ EXPORT_SYMBOL(__vmalloc);
*/
void *vmalloc(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_KERNEL, NUMA_NO_NODE,
+ return __vmalloc_node(size, 1, GFP_KERNEL, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc);
@@ -3432,7 +3433,7 @@ EXPORT_SYMBOL(vmalloc);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
+__weak void *vmalloc_huge(unsigned long size, gfp_t gfp_mask)
{
return __vmalloc_node_range(size, 1, VMALLOC_START, VMALLOC_END,
gfp_mask, PAGE_KERNEL, VM_ALLOW_HUGE_VMAP,
@@ -3455,7 +3456,7 @@ EXPORT_SYMBOL_GPL(vmalloc_huge);
*/
void *vzalloc(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, NUMA_NO_NODE,
+ return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
EXPORT_SYMBOL(vzalloc);
@@ -3469,7 +3470,7 @@ EXPORT_SYMBOL(vzalloc);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_user(unsigned long size)
+__weak void *vmalloc_user(unsigned long size)
{
return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL,
@@ -3493,7 +3494,7 @@ EXPORT_SYMBOL(vmalloc_user);
*/
void *vmalloc_node(unsigned long size, int node)
{
- return __vmalloc_node(size, 1, GFP_KERNEL, node,
+ return __vmalloc_node(size, 1, GFP_KERNEL, 0, node,
__builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_node);
@@ -3511,23 +3512,11 @@ EXPORT_SYMBOL(vmalloc_node);
*/
void *vzalloc_node(unsigned long size, int node)
{
- return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, node,
+ return __vmalloc_node(size, 1, GFP_KERNEL | __GFP_ZERO, 0, node,
__builtin_return_address(0));
}
EXPORT_SYMBOL(vzalloc_node);
-#if defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA32)
-#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
-#elif defined(CONFIG_64BIT) && defined(CONFIG_ZONE_DMA)
-#define GFP_VMALLOC32 (GFP_DMA | GFP_KERNEL)
-#else
-/*
- * 64b systems should always have either DMA or DMA32 zones. For others
- * GFP_DMA32 should do the right thing and use the normal zone.
- */
-#define GFP_VMALLOC32 (GFP_DMA32 | GFP_KERNEL)
-#endif
-
/**
* vmalloc_32 - allocate virtually contiguous memory (32bit addressable)
* @size: allocation size
@@ -3539,7 +3528,7 @@ EXPORT_SYMBOL(vzalloc_node);
*/
void *vmalloc_32(unsigned long size)
{
- return __vmalloc_node(size, 1, GFP_VMALLOC32, NUMA_NO_NODE,
+ return __vmalloc_node(size, 1, GFP_VMALLOC32, 0, NUMA_NO_NODE,
__builtin_return_address(0));
}
EXPORT_SYMBOL(vmalloc_32);
@@ -3553,7 +3542,7 @@ EXPORT_SYMBOL(vmalloc_32);
*
* Return: pointer to the allocated memory or %NULL on error
*/
-void *vmalloc_32_user(unsigned long size)
+__weak void *vmalloc_32_user(unsigned long size)
{
return __vmalloc_node_range(size, SHMLBA, VMALLOC_START, VMALLOC_END,
GFP_VMALLOC32 | __GFP_ZERO, PAGE_KERNEL,
Present non-uniform use of __vmalloc_node and __vmalloc_node_range makes enforcing appropriate code and data seperation untenable on certain microarchitectures, as VMALLOC_START and VMALLOC_END are monolithic while the use of the vmalloc interface is non-monolithic: in particular, appropriate randomness in ASLR makes it such that code regions must fall in some region between VMALLOC_START and VMALLOC_end, but this necessitates that code pages are intermingled with data pages, meaning code-specific protections, such as arm64's PXNTable, cannot be performantly runtime enforced. The solution to this problem allows architectures to override the vmalloc wrapper functions by enforcing that the rest of the kernel does not reimplement __vmalloc_node by using __vmalloc_node_range with the same parameters as __vmalloc_node or provides a __weak tag to those functions using __vmalloc_node_range with parameters repeating those of __vmalloc_node. Two benefits of this approach are (1) greater flexibility to each architecture for handling of virtual memory while not compromising the kernel's vmalloc logic and (2) more uniform use of the __vmalloc_node interface, reserving the more specialized __vmalloc_node_range for more specialized cases, such as kasan's shadow memory. Signed-off-by: Maxwell Bland <mbland@motorola.com> --- arch/arm/kernel/irq.c | 2 +- arch/arm64/include/asm/vmap_stack.h | 2 +- arch/arm64/kernel/efi.c | 2 +- arch/powerpc/kernel/irq.c | 2 +- arch/riscv/include/asm/irq_stack.h | 2 +- arch/s390/hypfs/hypfs_diag.c | 2 +- arch/s390/kernel/setup.c | 6 ++--- arch/s390/kernel/sthyi.c | 2 +- include/linux/vmalloc.h | 15 ++++++++++- kernel/bpf/syscall.c | 4 +-- kernel/fork.c | 4 +-- kernel/scs.c | 3 +-- lib/objpool.c | 2 +- lib/test_vmalloc.c | 6 ++--- mm/util.c | 3 +-- mm/vmalloc.c | 39 +++++++++++------------------ 16 files changed, 47 insertions(+), 49 deletions(-)