diff mbox series

[4/5] tools/nolibc: add csky support

Message ID 20240929-nolibc-csky-v1-4-bb28031a73b0@weissschuh.net
State New
Headers show
Series csky: add shutdown and nolibc support | expand

Commit Message

Thomas Weißschuh Sept. 29, 2024, 9:47 p.m. UTC
Add support for the C-SKY architecture, which is very similar to
LoongArch.
Only v2 ABI is supported.
Optimizations are disabled as the compiler[0] seems to misoptimize the
code, especially the r4 register gets clobbered.
Compile the initramfs directly into the kernel, as qemu does not support
passing the initrd via OF.

There is no qemu mainline support for qemu.
Testing was done with commit 1f172a2c7cd5c2e7 of the downstream csky qemu [1].
Some tiny changes were necessary on top [2].

[0] gcc 13.2.0 and 14.2.0 from kernel.org crosstools
[1] https://github.com/XUANTIE-RV/qemu/
[2]

Comments

Thomas Weißschuh Sept. 30, 2024, 5:23 a.m. UTC | #1
Hi Willy,

On 2024-09-30 05:49:46+0000, Willy Tarreau wrote:
> On Sun, Sep 29, 2024 at 11:47:39PM +0200, Thomas Weißschuh wrote:
> > Add support for the C-SKY architecture, which is very similar to
> > LoongArch.
> > Only v2 ABI is supported.
> > Optimizations are disabled as the compiler[0] seems to misoptimize the
> > code, especially the r4 register gets clobbered.
> > Compile the initramfs directly into the kernel, as qemu does not support
> > passing the initrd via OF.
> > 
> > There is no qemu mainline support for qemu.
> > Testing was done with commit 1f172a2c7cd5c2e7 of the downstream csky qemu [1].
> > Some tiny changes were necessary on top [2].
> > 
> > [0] gcc 13.2.0 and 14.2.0 from kernel.org crosstools
> > [1] https://github.com/XUANTIE-RV/qemu/
> > [2]
> 
> I think you wanted to place a link or something above for [2].

[2] was supposed to be inline patches for QEMU, I'll try to make that a
bit clearer.

> > diff --git a/target/csky/cpu-param.h b/target/csky/cpu-param.h
> > index 80554cc0fc03..9181b602a26f 100644
> > --- a/target/csky/cpu-param.h
> > +++ b/target/csky/cpu-param.h
> (...)
> > diff --git a/target/csky/op_vdsp2.c b/target/csky/op_vdsp2.c
> > index a9985a03be33..d953f5ea94fe 100644
> > --- a/target/csky/op_vdsp2.c
> > +++ b/target/csky/op_vdsp2.c
> 
> Also, the first two patches look like fixes for the arch itself, they
> should really go outside of the nolibc development tree, at least
> because they might have to be backported to some stable branches,
> or later fixed/reverted in case they wouldn't be optimal.

As mentioned above, these are patches for qemu, not Linux.
I don't know enough about QEMU or C-SKY to know if these are the
generally correct fixes. But they seem to work well enough for nolibc.

Guo, if these QEMU patches look reasonable to you I can also submit them.

> Aside this, it's been a long time since we last added an architecture
> and it's pleasant to see how easy it has become over time, even when
> requiring specific settings ;-)

Agreed!


Thomas
diff mbox series

Patch

diff --git a/target/csky/cpu-param.h b/target/csky/cpu-param.h
index 80554cc0fc03..9181b602a26f 100644
--- a/target/csky/cpu-param.h
+++ b/target/csky/cpu-param.h
@@ -24,11 +24,7 @@ 
 #define TARGET_PAGE_BITS    12
 #define TARGET_PHYS_ADDR_SPACE_BITS 32

-#ifdef CONFIG_USER_ONLY
-#define TARGET_VIRT_ADDR_SPACE_BITS 30
-#else
 #define TARGET_VIRT_ADDR_SPACE_BITS 32
-#endif
 #define TCG_GUEST_DEFAULT_MO      (0)

 #endif
diff --git a/target/csky/op_vdsp2.c b/target/csky/op_vdsp2.c
index a9985a03be33..d953f5ea94fe 100644
--- a/target/csky/op_vdsp2.c
+++ b/target/csky/op_vdsp2.c
@@ -4784,7 +4784,7 @@  void VDSP2_HELPER(vmulae)(CPUCSKYState *env, uint32_t insn)

     wid = ((insn >> 20) & 0x1) | ((insn >> 24) & 0x2);
     lng = 8 * pow(2, wid);
-    cnt = 128 / lng;
+    cnt = 64 / lng;
     sign = (insn >> CSKY_VDSP2_SIGN_SHI) & CSKY_VDSP2_SIGN_MASK;
     rx = (insn >> CSKY_VDSP2_VREG_SHI_VRX) & CSKY_VDSP2_VREG_MASK;
     ry = (insn >> CSKY_VDSP2_VREG_SHI_VRY) & CSKY_VDSP2_VREG_MASK;

Signed-off-by: Thomas Weißschuh <linux@weissschuh.net>
---
 tools/include/nolibc/arch-csky.h        | 161 ++++++++++++++++++++++++++++++++
 tools/include/nolibc/arch.h             |   2 +
 tools/testing/selftests/nolibc/Makefile |   8 ++
 3 files changed, 171 insertions(+)

diff --git a/tools/include/nolibc/arch-csky.h b/tools/include/nolibc/arch-csky.h
new file mode 100644
index 0000000000000000000000000000000000000000..158e5499375c22a6572321337ba4e2b8162d0d65
--- /dev/null
+++ b/tools/include/nolibc/arch-csky.h
@@ -0,0 +1,161 @@ 
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * C-SKY specific definitions for NOLIBC
+ * Copyright (C) 2023 Loongson Technology Corporation Limited
+ * Copyright (C) 2024 Thomas Weißschuh <linux@weissschuh.net>
+ */
+
+#ifndef _NOLIBC_ARCH_CSKY_H
+#define _NOLIBC_ARCH_CSKY_H
+
+#include "compiler.h"
+#include "crt.h"
+
+#if __csky__ != 2
+#error Unsupported csky ABI
+#endif
+
+/* Syscalls for C-SKY :
+ *   - stack is 8-byte aligned
+ *   - syscall number is passed in r7
+ *   - arguments are in r0, r1, r2, r3, r4, r5
+ *   - the system call is performed by calling "trap 0"
+ *   - syscall return comes in r0
+ *   - the arguments are cast to long and assigned into the target
+ *     registers which are then simply passed as registers to the asm code,
+ *     so that we don't have to experience issues with register constraints.
+ */
+
+#define _NOLIBC_SYSCALL_CLOBBERLIST \
+	"memory", "t0", "t1", "t2", "t3", "t4", "t5", "t6", "t7", "t8", "t9"
+
+#define my_syscall0(num)                                                      \
+({                                                                            \
+	register long _num  __asm__ ("r7") = (num);                           \
+	register long _arg1 __asm__ ("r0");                                   \
+									      \
+	__asm__ volatile (                                                    \
+		"trap 0\n"                                                    \
+		: "=r"(_arg1)                                                 \
+		: "r"(_num)                                                   \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall1(num, arg1)                                                \
+({                                                                            \
+	register long _num  __asm__ ("r7") = (num);                           \
+	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+									      \
+	__asm__ volatile (                                                    \
+		"trap 0\n"                                                    \
+		: "+r"(_arg1)                                                 \
+		: "r"(_num)                                                   \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall2(num, arg1, arg2)                                          \
+({                                                                            \
+	register long _num  __asm__ ("r7") = (num);                           \
+	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+									      \
+	__asm__ volatile (                                                    \
+		"trap 0\n"                                                    \
+		: "+r"(_arg1)                                                 \
+		: "r"(_arg2),                                                 \
+		  "r"(_num)                                                   \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall3(num, arg1, arg2, arg3)                                    \
+({                                                                            \
+	register long _num  __asm__ ("r7") = (num);                           \
+	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+									      \
+	__asm__ volatile (                                                    \
+		"trap 0\n"                                                    \
+		: "+r"(_arg1)                                                 \
+		: "r"(_arg2), "r"(_arg3),                                     \
+		  "r"(_num)                                                   \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall4(num, arg1, arg2, arg3, arg4)                              \
+({                                                                            \
+	register long _num  __asm__ ("r7") = (num);                           \
+	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
+									      \
+	__asm__ volatile (                                                    \
+		"trap 0\n"                                                    \
+		: "+r"(_arg1)                                                 \
+		: "r"(_arg2), "r"(_arg3), "r"(_arg4),                         \
+		  "r"(_num)                                                   \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5)                        \
+({                                                                            \
+	register long _num  __asm__ ("r7") = (num);                           \
+	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
+	register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
+									      \
+	__asm__ volatile (                                                    \
+		"trap 0\n"                                                    \
+		: "+r"(_arg1)                                                 \
+		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5),             \
+		  "r"(_num)                                                   \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6)                  \
+({                                                                            \
+	register long _num  __asm__ ("r7") = (num);                           \
+	register long _arg1 __asm__ ("r0") = (long)(arg1);                    \
+	register long _arg2 __asm__ ("r1") = (long)(arg2);                    \
+	register long _arg3 __asm__ ("r2") = (long)(arg3);                    \
+	register long _arg4 __asm__ ("r3") = (long)(arg4);                    \
+	register long _arg5 __asm__ ("r4") = (long)(arg5);                    \
+	register long _arg6 __asm__ ("r5") = (long)(arg6);                    \
+									      \
+	__asm__ volatile (                                                    \
+		"trap 0\n"                                                    \
+		: "+r"(_arg1)                                                 \
+		: "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), "r"(_arg6), \
+		  "r"(_num)                                                   \
+		: _NOLIBC_SYSCALL_CLOBBERLIST                                 \
+	);                                                                    \
+	_arg1;                                                                \
+})
+
+/* startup code */
+void __attribute__((weak, noreturn)) __nolibc_entrypoint __no_stack_protector _start(void)
+{
+	__asm__ volatile (
+		"mov           r0, sp\n"     /* save stack pointer to r0, as arg1 of _start_c */
+		"andni         sp, sp, 8\n"  /* sp must be 8-byte aligned in the callee       */
+		"jbsr          _start_c\n"   /* transfer to c runtime                         */
+	);
+	__nolibc_entrypoint_epilogue();
+}
+
+#endif /* _NOLIBC_ARCH_CSKY_H */
diff --git a/tools/include/nolibc/arch.h b/tools/include/nolibc/arch.h
index c8f4e5d3add9eb5b8a438900c084dc0449fcfbd6..71cdf1eedb2045b9abd22146c72ee891765ad553 100644
--- a/tools/include/nolibc/arch.h
+++ b/tools/include/nolibc/arch.h
@@ -33,6 +33,8 @@ 
 #include "arch-s390.h"
 #elif defined(__loongarch__)
 #include "arch-loongarch.h"
+#elif defined(__csky__)
+#include "arch-csky.h"
 #else
 #error Unsupported Architecture
 #endif
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
index e8278924cf28f17144044e69724df1d4fde141a3..2f51d8ea45f1c0658584f27553a9c8e1ecf428a9 100644
--- a/tools/testing/selftests/nolibc/Makefile
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -67,6 +67,7 @@  IMAGE_ppc64le    = arch/powerpc/boot/zImage
 IMAGE_riscv      = arch/riscv/boot/Image
 IMAGE_s390       = arch/s390/boot/bzImage
 IMAGE_loongarch  = arch/loongarch/boot/vmlinuz.efi
+IMAGE_csky       = arch/csky/boot/Image
 IMAGE            = $(objtree)/$(IMAGE_$(XARCH))
 IMAGE_NAME       = $(notdir $(IMAGE))
 
@@ -84,9 +85,11 @@  DEFCONFIG_ppc64le    = powernv_defconfig
 DEFCONFIG_riscv      = defconfig
 DEFCONFIG_s390       = defconfig
 DEFCONFIG_loongarch  = defconfig
+DEFCONFIG_csky       = defconfig
 DEFCONFIG            = $(DEFCONFIG_$(XARCH))
 
 EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN
+EXTRACONFIG_csky     = -e CONFIG_BLK_DEV_INITRD -e CONFIG_VIRT_DRIVERS -e CONFIG_CSKY_EXIT
 EXTRACONFIG           = $(EXTRACONFIG_$(XARCH))
 
 # optional tests to run (default = all)
@@ -106,6 +109,7 @@  QEMU_ARCH_ppc64le    = ppc64
 QEMU_ARCH_riscv      = riscv64
 QEMU_ARCH_s390       = s390x
 QEMU_ARCH_loongarch  = loongarch64
+QEMU_ARCH_csky       = cskyv2
 QEMU_ARCH            = $(QEMU_ARCH_$(XARCH))
 
 QEMU_ARCH_USER_ppc64le = ppc64le
@@ -132,8 +136,11 @@  QEMU_ARGS_ppc64le    = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC
 QEMU_ARGS_riscv      = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_s390       = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS_loongarch  = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_csky       = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
 QEMU_ARGS            = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
 
+QEMU_RUN_STANDALONE_csky = 1
+
 # OUTPUT is only set when run from the main makefile, otherwise
 # it defaults to this nolibc directory.
 OUTPUT ?= $(CURDIR)/
@@ -151,6 +158,7 @@  CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
 CFLAGS_s390 = -m64
 CFLAGS_mips32le = -EL -mabi=32 -fPIC
 CFLAGS_mips32be = -EB -mabi=32
+CFLAGS_csky = -O0
 CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
 CFLAGS  ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
 		$(call cc-option,-fno-stack-protector) \