@@ -46,3 +46,4 @@ obj-$(CONFIG_TARGET_BCMNS3) += bcmns3/
obj-$(CONFIG_XEN) += xen/
obj-$(CONFIG_ARMV8_CE_SHA1) += sha1_ce_glue.o sha1_ce_core.o
obj-$(CONFIG_ARMV8_CE_SHA256) += sha256_ce_glue.o sha256_ce_core.o
+obj-$(CONFIG_COROUTINES) += co_switch.o
new file mode 100644
@@ -0,0 +1,35 @@
+/* void _co_switch(struct uco *from_co, struct uco *to_co); */
+.text
+.globl _co_switch
+.type _co_switch, @function
+_co_switch:
+ // x0: from_co
+ // x1: to_co
+ // from_co and to_co layout: { pc, sp, x19-x29 }
+
+ // Save context to from_co (x0)
+ // AAPCS64 says "A subroutine invocation must preserve the contents of the
+ // registers r19-r29 and SP"
+ adr x2, 1f // pc we should use to resume after this function
+ mov x3, sp
+ stp x2, x3, [x0, #0] // pc, sp
+ stp x19, x20, [x0, #16]
+ stp x21, x22, [x0, #32]
+ stp x23, x24, [x0, #48]
+ stp x25, x26, [x0, #64]
+ stp x27, x28, [x0, #80]
+ stp x29, x30, [x0, #96]
+
+ // Load new context from to_co (x1)
+ ldp x2, x3, [x1, #0] // pc, sp
+ ldp x19, x20, [x1, #16]
+ ldp x21, x22, [x1, #32]
+ ldp x23, x24, [x1, #48]
+ ldp x25, x26, [x1, #64]
+ ldp x27, x28, [x1, #80]
+ ldp x29, x30, [x1, #96]
+ mov sp, x3
+ br x2
+
+1: // Return to the caller
+ ret
@@ -9,3 +9,4 @@ ifndef CONFIG_TPL_BUILD
obj-y += interrupt.o
endif
obj-y += setjmp.o
+obj-$(CONFIG_COROUTINES) += co_switch.o
new file mode 100644
@@ -0,0 +1,26 @@
+/* void _co_switch(struct uco *from_co, struct uco *to_co); */
+.text
+.globl _co_switch
+.type _co_switch, @function
+.intel_syntax noprefix
+_co_switch:
+ mov eax,DWORD PTR [esp+0x4] // from_co
+ mov edx,DWORD PTR [esp] // retaddr
+ lea ecx,[esp+0x4] // esp
+ mov DWORD PTR [eax+0x8],ebp //<ebp
+ mov DWORD PTR [eax+0x4],ecx //<esp
+ mov DWORD PTR [eax+0x0],edx //<retaddr
+ mov DWORD PTR [eax+0xc],edi //<edi
+ mov ecx,DWORD PTR [esp+0x8] // to_co
+ mov DWORD PTR [eax+0x10],esi //<esi
+ mov DWORD PTR [eax+0x14],ebx //<ebx
+ mov edx,DWORD PTR [ecx+0x4] //>esp
+ mov ebp,DWORD PTR [ecx+0x8] //>ebp
+ mov eax,DWORD PTR [ecx+0x0] //>retaddr
+ mov edi,DWORD PTR [ecx+0xc] //>edi
+ mov esi,DWORD PTR [ecx+0x10] //>esi
+ mov ebx,DWORD PTR [ecx+0x14] //>ebx
+ xor ecx,ecx
+ mov esp,edx
+ xor edx,edx
+ jmp eax
@@ -8,3 +8,5 @@ obj-y += cpu.o interrupts.o setjmp.o
ifndef CONFIG_EFI
obj-y += misc.o
endif
+
+obj-$(CONFIG_COUROUTINES) += co_switch.o
new file mode 100644
@@ -0,0 +1,26 @@
+/* void _co_switch(struct uco *from_co, struct uco *to_co); */
+.text
+.globl _co_switch
+.type _co_switch, @function
+.intel_syntax noprefix
+_co_switch:
+ mov rdx,QWORD PTR [rsp] // retaddr
+ lea rcx,[rsp+0x8] // rsp
+ mov QWORD PTR [rdi+0x0], r12
+ mov QWORD PTR [rdi+0x8], r13
+ mov QWORD PTR [rdi+0x10],r14
+ mov QWORD PTR [rdi+0x18],r15
+ mov QWORD PTR [rdi+0x20],rdx // retaddr
+ mov QWORD PTR [rdi+0x28],rcx // rsp
+ mov QWORD PTR [rdi+0x30],rbx
+ mov QWORD PTR [rdi+0x38],rbp
+ mov r12,QWORD PTR [rsi+0x0]
+ mov r13,QWORD PTR [rsi+0x8]
+ mov r14,QWORD PTR [rsi+0x10]
+ mov r15,QWORD PTR [rsi+0x18]
+ mov rax,QWORD PTR [rsi+0x20] // retaddr
+ mov rcx,QWORD PTR [rsi+0x28] // rsp
+ mov rbx,QWORD PTR [rsi+0x30]
+ mov rbp,QWORD PTR [rsi+0x38]
+ mov rsp,rcx
+ jmp rax
new file mode 100644
@@ -0,0 +1,151 @@
+/* SPDX-License-Identifier: Apache-2.0 */
+/*
+ * Copyright 2018 Sen Han <00hnes@gmail.com>
+ * Copyright 2025 Linaro Limited
+ */
+
+#ifndef _COROUTINES_H_
+#define _COROUTINES_H_
+
+#ifndef CONFIG_COROUTINES
+
+static inline void co_yield(void) {}
+static inline void co_exit(void) {}
+
+#else
+
+#ifdef __UBOOT__
+#include <log.h>
+#else
+#include <assert.h>
+#endif
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+#ifdef __i386__
+#define UCO_REG_IDX_RETADDR 0
+#define UCO_REG_IDX_SP 1
+#define UCO_REG_IDX_BP 2
+#elif __x86_64__
+#define UCO_REG_IDX_RETADDR 4
+#define UCO_REG_IDX_SP 5
+#define UCO_REG_IDX_BP 7
+#elif __aarch64__
+#define UCO_REG_IDX_RETADDR 0
+#define UCO_REG_IDX_SP 1
+#else
+#error Architecture no supported
+#endif
+
+struct co_save_stack {
+ void* ptr;
+ size_t sz;
+ size_t valid_sz;
+ size_t max_cpsz; /* max copy size in bytes */
+};
+
+struct co_stack {
+ void *ptr;
+ size_t sz;
+ void *align_highptr;
+ void *align_retptr;
+ size_t align_validsz;
+ size_t align_limit;
+ struct co *owner;
+ void *real_ptr;
+ size_t real_sz;
+};
+
+struct co {
+ /*
+ * CPU registers state (callee-savec plus SP, PC)
+ */
+#ifdef __i386__
+ void* reg[6];
+#elif __x86_64__
+ void* reg[8];
+#elif __aarch64__
+ void *reg[14]; // pc, sp, x19-x29, x30 (lr)
+#endif
+ struct co *main_co;
+ void *arg;
+ bool done;
+
+ void (*fp)(void);
+
+ struct co_save_stack save_stack;
+ struct co_stack *stack;
+};
+
+#if defined(__i386__) || defined(__x86_64__)
+#define UCO_THREAD __thread
+#else
+#define UCO_THREAD
+#endif
+
+extern UCO_THREAD struct co *current_co;
+
+static inline struct co *co_get_co(void)
+{
+ return current_co;
+}
+
+static inline void *co_get_arg(void)
+{
+ return co_get_co()->arg;
+}
+
+struct co_stack *co_stack_new(size_t sz);
+
+void co_stack_destroy(struct co_stack *s);
+
+struct co *co_create(struct co *main_co,
+ struct co_stack *stack,
+ size_t save_stack_sz, void (*fp)(void),
+ void *arg);
+
+void co_resume(struct co *resume_co);
+
+void co_destroy(struct co *co);
+
+void *_co_switch(struct co *from_co, struct co *to_co);
+
+static inline void _co_yield_to_main_co(struct co *yield_co)
+{
+ assert(yield_co);
+ assert(yield_co->main_co);
+ _co_switch(yield_co, yield_co->main_co);
+}
+
+static inline void co_yield(void)
+{
+ if (current_co)
+ _co_yield_to_main_co(current_co);
+}
+
+static inline bool co_is_main_co(struct co *co)
+{
+ return !co->main_co;
+}
+
+static inline void co_exit(void)
+{
+ struct co *co = co_get_co();
+
+ if (!co)
+ return;
+ co->done = true;
+ assert(co->stack->owner == co);
+ co->stack->owner = NULL;
+ co->stack->align_validsz = 0;
+ _co_yield_to_main_co(co);
+ assert(false);
+}
+
+#endif /* CONFIG_COROUTINES */
+#endif /* _COROUTINES_H_ */
@@ -1226,6 +1226,16 @@ config PHANDLE_CHECK_SEQ
enable this config option to distinguish them using
phandles in fdtdec_get_alias_seq() function.
+config COROUTINES
+ bool "Enable coroutine support"
+ help
+ Coroutines allow to implement a simple form of cooperative
+ multi-tasking. The main thread of execution registers one or
+ more functions as coroutine entry points, then it schedules one
+ of them. At any point the scheduled coroutine may yield, that is,
+ suspend its execution and return back to the main thread. At this
+ point another coroutine may be scheduled and so on until all the
+ registered coroutines are done.
endmenu
source "lib/fwu_updates/Kconfig"
@@ -159,6 +159,8 @@ obj-$(CONFIG_LIB_ELF) += elf.o
obj-$(CONFIG_$(PHASE_)SEMIHOSTING) += semihosting.o
+obj-$(CONFIG_COROUTINES) += coroutines.o
+
#
# Build a fast OID lookup registry from include/linux/oid_registry.h
#
new file mode 100644
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: Apache-2.0
+
+// Copyright 2018 Sen Han <00hnes@gmail.com>
+// Copyright 2025 Linaro Limited
+
+#include <coroutines.h>
+#include <stdio.h>
+#include <stdint.h>
+
+
+/* Current co-routine */
+UCO_THREAD struct co *current_co;
+
+struct co_stack *co_stack_new(size_t sz)
+{
+ struct co_stack *p = calloc(1, sizeof(*p));
+ uintptr_t u_p;
+
+ if (!p)
+ return NULL;
+
+ if (sz < 4096)
+ sz = 4096;
+
+ p->sz = sz;
+ p->ptr = malloc(sz);
+ if (!p->ptr) {
+ free(p);
+ return NULL;
+ }
+
+ p->owner = NULL;
+ u_p = (uintptr_t)(p->sz - (sizeof(void*) << 1) + (uintptr_t)p->ptr);
+ u_p = (u_p >> 4) << 4;
+ p->align_highptr = (void*)u_p;
+ p->align_retptr = (void*)(u_p - sizeof(void*));
+ assert(p->sz > (16 + (sizeof(void*) << 1) + sizeof(void*)));
+ p->align_limit = p->sz - 16 - (sizeof(void*) << 1);
+
+ return p;
+}
+
+void co_stack_destroy(struct co_stack *s){
+ if (!s)
+ return;
+ free(s->ptr);
+ free(s);
+}
+
+struct co *co_create(struct co *main_co,
+ struct co_stack *stack,
+ size_t save_stack_sz,
+ void (*fp)(void), void *arg)
+{
+ struct co *p = malloc(sizeof(*p));
+ assert(p);
+ memset(p, 0, sizeof(*p));
+
+ if (main_co) {
+ assert(stack);
+ p->stack = stack;
+#ifdef __i386__
+ // POSIX.1-2008 (IEEE Std 1003.1-2008) - General Information - Data Types - Pointer Types
+ // http://pubs.opengroup.org/onlinepubs/9699919799.2008edition/functions/V2_chap02.html#tag_15_12_03
+ p->reg[UCO_REG_IDX_RETADDR] = (void*)fp;
+ // push retaddr
+ p->reg[UCO_REG_IDX_SP] = p->stack->align_retptr;
+#elif __x86_64__
+ p->reg[UCO_REG_IDX_RETADDR] = (void*)fp;
+ p->reg[UCO_REG_IDX_SP] = p->stack->align_retptr;
+#elif __aarch64__
+ p->reg[UCO_REG_IDX_RETADDR] = (void *)fp;
+ // FIXME setting to align_retptr causes a crash
+ p->reg[UCO_REG_IDX_SP] = p->stack->align_highptr;
+#endif
+ p->main_co = main_co;
+ p->arg = arg;
+ p->fp = fp;
+ if (!save_stack_sz)
+ save_stack_sz = 64;
+ p->save_stack.ptr = malloc(save_stack_sz);
+ assert(p->save_stack.ptr);
+ p->save_stack.sz = save_stack_sz;
+ p->save_stack.valid_sz = 0;
+ } else {
+ p->main_co = NULL;
+ p->arg = arg;
+ p->fp = fp;
+ p->stack = NULL;
+ p->save_stack.ptr = NULL;
+ }
+ return p;
+}
+
+static void grab_stack(struct co *resume_co)
+{
+ struct co *owner_co = resume_co->stack->owner;
+
+ if (owner_co) {
+ assert(owner_co->stack == resume_co->stack);
+ assert((uintptr_t)(owner_co->stack->align_retptr) >=
+ (uintptr_t)(owner_co->reg[UCO_REG_IDX_SP]));
+ assert((uintptr_t)owner_co->stack->align_highptr -
+ (uintptr_t)owner_co->stack->align_limit
+ <= (uintptr_t)owner_co->reg[UCO_REG_IDX_SP]);
+ owner_co->save_stack.valid_sz =
+ (uintptr_t)owner_co->stack->align_retptr -
+ (uintptr_t)owner_co->reg[UCO_REG_IDX_SP];
+ if (owner_co->save_stack.sz < owner_co->save_stack.valid_sz) {
+ free(owner_co->save_stack.ptr);
+ owner_co->save_stack.ptr = NULL;
+ do {
+ owner_co->save_stack.sz <<= 1;
+ assert(owner_co->save_stack.sz > 0);
+ } while (owner_co->save_stack.sz <
+ owner_co->save_stack.valid_sz);
+ owner_co->save_stack.ptr =
+ malloc(owner_co->save_stack.sz);
+ assert(owner_co->save_stack.ptr);
+ }
+ if (owner_co->save_stack.valid_sz > 0)
+ memcpy(owner_co->save_stack.ptr,
+ owner_co->reg[UCO_REG_IDX_SP],
+ owner_co->save_stack.valid_sz);
+ if (owner_co->save_stack.valid_sz >
+ owner_co->save_stack.max_cpsz)
+ owner_co->save_stack.max_cpsz =
+ owner_co->save_stack.valid_sz;
+ owner_co->stack->owner = NULL;
+ owner_co->stack->align_validsz = 0;
+ }
+ assert(!resume_co->stack->owner);
+ assert(resume_co->save_stack.valid_sz <=
+ resume_co->stack->align_limit - sizeof(void *));
+ if (resume_co->save_stack.valid_sz > 0)
+ memcpy((void*)
+ (uintptr_t)(resume_co->stack->align_retptr) -
+ resume_co->save_stack.valid_sz,
+ resume_co->save_stack.ptr,
+ resume_co->save_stack.valid_sz);
+ if (resume_co->save_stack.valid_sz > resume_co->save_stack.max_cpsz)
+ resume_co->save_stack.max_cpsz = resume_co->save_stack.valid_sz;
+ resume_co->stack->align_validsz =
+ resume_co->save_stack.valid_sz + sizeof(void *);
+ resume_co->stack->owner = resume_co;
+}
+
+void co_resume(struct co *resume_co)
+{
+ assert(resume_co && resume_co->main_co && !resume_co->done);
+
+ if (resume_co->stack->owner != resume_co)
+ grab_stack(resume_co);
+
+ current_co = resume_co;
+ _co_switch(resume_co->main_co, resume_co);
+ current_co = resume_co->main_co;
+}
+
+void co_destroy(struct co *co){
+ if (!co)
+ return;
+
+ if(co_is_main_co(co)){
+ free(co);
+ current_co = NULL;
+ } else {
+ if(co->stack->owner == co){
+ co->stack->owner = NULL;
+ co->stack->align_validsz = 0;
+ }
+ free(co->save_stack.ptr);
+ co->save_stack.ptr = NULL;
+ free(co);
+ }
+}
Adds the COROUTINES Kconfig symbol which introduces a new internal API for coroutines support. As explained in the Kconfig file, this is meant to provide some kind of cooperative multi-tasking with the goal to improve performance by overlapping lengthy operations. The API as well as the implementation is very much inspired from libaco [1]. The reference implementation is simplified to remove all things not needed in U-Boot, the coding style is updated, and the aco_ prefix is replaced by co_. I believe the stack handling could be simplified: the stack of the main coroutine could probably probably be used by the secondary coroutines instead of allocating a new stack dynamically. Only i386, x86_64 and aarch64 are supported at the moment. Other architectures need to provide a _co_switch() function in assembly. Only aarch64 has been tested. [1] https://github.com/hnes/libaco/ Signed-off-by: Jerome Forissier <jerome.forissier@linaro.org> --- arch/arm/cpu/armv8/Makefile | 1 + arch/arm/cpu/armv8/co_switch.S | 35 +++++++ arch/x86/cpu/i386/Makefile | 1 + arch/x86/cpu/i386/co_switch.S | 26 +++++ arch/x86/cpu/x86_64/Makefile | 2 + arch/x86/cpu/x86_64/co_switch.S | 26 +++++ include/coroutines.h | 151 +++++++++++++++++++++++++++ lib/Kconfig | 10 ++ lib/Makefile | 2 + lib/coroutines.c | 176 ++++++++++++++++++++++++++++++++ 10 files changed, 430 insertions(+) create mode 100644 arch/arm/cpu/armv8/co_switch.S create mode 100644 arch/x86/cpu/i386/co_switch.S create mode 100644 arch/x86/cpu/x86_64/co_switch.S create mode 100644 include/coroutines.h create mode 100644 lib/coroutines.c