diff mbox series

[v4,03/17] tests/tcg: make aarch64 boot.S handle different starting modes

Message ID 20250603110204.838117-4-alex.bennee@linaro.org
State Superseded
Headers show
Series Maintainer updates for May (testing, plugins, virtio-gpu) - pre-PR | expand

Commit Message

Alex Bennée June 3, 2025, 11:01 a.m. UTC
Currently the boot.S code assumes everything starts at EL1. This will
break things like the memory test which will barf on unaligned memory
access when run at a higher level.

Adapt the boot code to do some basic verification of the starting mode
and the minimal configuration to move to the lower exception levels.
With this we can run the memory test with:

  -M virt,secure=on
  -M virt,secure=on,virtualization=on
  -M virt,virtualisation=on

If a test needs to be at a particular EL it can use the semihosting
command line to indicate the level we should execute in.

Cc: Julian Armistead <julian.armistead@linaro.org>
Cc: Jim MacArthur <jim.macarthur@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>

---
v4
  - drop post eret nops
  - proper error string for EL0 error case
  - clamp any invalid target EL value to 1
v3
  - create system stack so we _exit cleanly
  - normalise EL string before compares
  - catch when we start in a lower EL than we asked for
  - default to EL1 when arg unclear
v2
  - allow tests to control the final EL we end up at
  - use tabs consistently
  - validate command line arg is between 1 and 3
---
 tests/tcg/aarch64/Makefile.softmmu-target |   3 +-
 tests/tcg/aarch64/system/boot.S           | 172 +++++++++++++++++++++-
 2 files changed, 169 insertions(+), 6 deletions(-)

Comments

Akihiko Odaki June 5, 2025, 8:29 a.m. UTC | #1
On 2025/06/03 20:01, Alex Bennée wrote:
> Currently the boot.S code assumes everything starts at EL1. This will
> break things like the memory test which will barf on unaligned memory
> access when run at a higher level.
> 
> Adapt the boot code to do some basic verification of the starting mode
> and the minimal configuration to move to the lower exception levels.
> With this we can run the memory test with:
> 
>    -M virt,secure=on
>    -M virt,secure=on,virtualization=on
>    -M virt,virtualisation=on
> 
> If a test needs to be at a particular EL it can use the semihosting
> command line to indicate the level we should execute in.
> 
> Cc: Julian Armistead <julian.armistead@linaro.org>
> Cc: Jim MacArthur <jim.macarthur@linaro.org>
> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
> 
> ---
> v4
>    - drop post eret nops
>    - proper error string for EL0 error case
>    - clamp any invalid target EL value to 1
> v3
>    - create system stack so we _exit cleanly
>    - normalise EL string before compares
>    - catch when we start in a lower EL than we asked for
>    - default to EL1 when arg unclear
> v2
>    - allow tests to control the final EL we end up at
>    - use tabs consistently
>    - validate command line arg is between 1 and 3
> ---
>   tests/tcg/aarch64/Makefile.softmmu-target |   3 +-
>   tests/tcg/aarch64/system/boot.S           | 172 +++++++++++++++++++++-
>   2 files changed, 169 insertions(+), 6 deletions(-)
> 
> diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
> index 9c52475b7a..f7a7d2b800 100644
> --- a/tests/tcg/aarch64/Makefile.softmmu-target
> +++ b/tests/tcg/aarch64/Makefile.softmmu-target
> @@ -68,7 +68,8 @@ run-plugin-semiconsole-with-%: semiconsole
>   
>   # vtimer test needs EL2
>   QEMU_EL2_MACHINE=-machine virt,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 4
> -run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_BASE_ARGS) -kernel
> +QEMU_EL2_BASE_ARGS=-semihosting-config enable=on,target=native,chardev=output,arg="2"
> +run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_EL2_BASE_ARGS) -kernel
>   
>   # Simple Record/Replay Test
>   .PHONY: memory-record
> diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
> index a5df9c173d..8bfa4e4efc 100644
> --- a/tests/tcg/aarch64/system/boot.S
> +++ b/tests/tcg/aarch64/system/boot.S
> @@ -16,6 +16,7 @@
>   #define semihosting_call hlt 0xf000
>   #define SYS_WRITEC	0x03	/* character to debug channel */
>   #define SYS_WRITE0	0x04	/* string to debug channel */
> +#define SYS_GET_CMDLINE 0x15	/* get command line */
>   #define SYS_EXIT	0x18
>   
>   	.align	12
> @@ -70,21 +71,172 @@ lower_a32_sync:
>   lower_a32_irq:
>   lower_a32_fiq:
>   lower_a32_serror:
> +	adr	x1, .unexp_excp
> +exit_msg:
>   	mov	x0, SYS_WRITE0
> -	adr	x1, .error
>   	semihosting_call
>   	mov	x0, 1 /* EXIT_FAILURE */
>   	bl 	_exit
>   	/* never returns */
>   
>   	.section .rodata
> -.error:
> -	.string "Terminated by exception.\n"
> +.unexp_excp:
> +	.string "Unexpected exception.\n"
> +.high_el_msg:
> +	.string "Started in lower EL than requested.\n"
> +.unexp_el0:
> +	.string "Started in invalid EL.\n"
> +
> +	.align 8
> +.get_cmd:

Please do not send a new version without addressing all comments for the 
previous versions or at least noting there are unaddressed comments:
https://lore.kernel.org/qemu-devel/7a76e746-9022-48cf-8216-775071e6d631@daynix.com

Following the best practices in docs/devel/submitting-a-patch.rst will 
ensure a smoother patch review. It is fine for me if you submit a new 
version noting unaddressed comments, but some may disagree.

The same goes "[PATCH v4 11/17] ui/gtk-gl-area: Remove extra draw call 
in refresh".

Regards,
Akihiko Odaki

> +	.quad	cmdline
> +	.quad	128
>   
>   	.text
>   	.align 4
>   	.global __start
>   __start:
> +	/*
> +         * Initialise the stack for whatever EL we are in before
> +	 * anything else, we need it to be able to _exit cleanly.
> +	 * It's smaller than the stack we pass to the C code but we
> +	 * don't need much.
> +	 */
> +	adrp	x0, system_stack_end
> +	add	x0, x0, :lo12:system_stack_end
> +	mov	sp, x0
> +
> +	/*
> +	 * The test can set the semihosting command line to the target
> +	 * EL needed for the test. However if no semihosting args are set we will
> +	 * end up with -kernel/-append data (see semihosting_arg_fallback).
> +	 * Keep the normalised target in w11.
> +	 */
> +	mov	x0, SYS_GET_CMDLINE
> +	adr	x1, .get_cmd
> +	semihosting_call
> +	adrp	x10, cmdline
> +	add	x10, x10, :lo12:cmdline
> +	ldrb	w11, [x10]
> +
> +	/* sanity check, normalise char to EL, clamp to 1 if outside range */
> +	subs w11, w11, #'0'
> +	b.lt el_default
> +	cmp  w11, #3
> +	b.gt el_default
> +	b 1f
> +
> +el_high:
> +	adr	x1, .high_el_msg
> +	b	exit_msg
> +
> +el_default:
> +	mov	w11, #1
> +
> +1:
> +	/* Determine current Exception Level */
> +	mrs	x0, CurrentEL
> +	lsr	x0, x0, #2	  /* CurrentEL[3:2] contains the current EL */
> +
> +	/* Are we already in a lower EL than we want? */
> +	cmp	w11, w0
> +	bgt	el_high
> +
> +	/* Branch based on current EL */
> +	cmp	x0, #3
> +	b.eq	setup_el3
> +	cmp	x0, #2
> +	b.eq	setup_el2
> +	cmp	x0, #1
> +	b.eq	at_testel	     /* Already at EL1, skip transition */
> +
> +	/* Should not be at EL0 - error out */
> +	adr 	x1, .unexp_el0
> +	b	exit_msg
> +
> +setup_el3:
> +	/* Ensure we trap if we get anything wrong */
> +	adr	x0, vector_table
> +	msr	vbar_el3, x0
> +
> +	/* Does the test want to be at EL3? */
> +	cmp	w11, #3
> +	beq	at_testel
> +
> +	/* Configure EL3 to for lower states (EL2 or EL1) */
> +	mrs	x0, scr_el3
> +	orr	x0, x0, #(1 << 10)    /* RW = 1: EL2/EL1 execution state is AArch64 */
> +	orr	x0, x0, #(1 << 0)     /* NS = 1: Non-secure state */
> +	msr	scr_el3, x0
> +
> +	/*
> +	 * We need to check if EL2 is actually enabled via ID_AA64PFR0_EL1,
> +	 * otherwise we should just jump straight to EL1.
> +	 */
> +	mrs	x0, id_aa64pfr0_el1
> +	ubfx	x0, x0, #8, #4	      /* Extract EL2 field (bits 11:8) */
> +	cbz	x0, el2_not_present   /* If field is 0 no EL2 */
> +
> +
> +	/* Prepare SPSR for exception return to EL2 */
> +	mov	x0, #0x3c9	      /* DAIF bits and EL2h mode (9) */
> +	msr	spsr_el3, x0
> +
> +	/* Set EL2 entry point */
> +	adr	x0, setup_el2
> +	msr	elr_el3, x0
> +
> +	/* Return to EL2 */
> +	eret
> +
> +el2_not_present:
> +	/* Initialize SCTLR_EL1 with reset value */
> +	msr	sctlr_el1, xzr
> +
> +	/* Set EL1 entry point */
> +	adr	x0, at_testel
> +	msr	elr_el3, x0
> +
> +	/* Prepare SPSR for exception return to EL1h with interrupts masked */
> +	mov	x0, #0x3c5	      /* DAIF bits and EL1h mode (5) */
> +	msr	spsr_el3, x0
> +
> +	isb			      /* Synchronization barrier */
> +	eret			      /* Jump to EL1 */
> +
> +setup_el2:
> +	/* Ensure we trap if we get anything wrong */
> +	adr	x0, vector_table
> +	msr	vbar_el2, x0
> +
> +	/* Does the test want to be at EL2? */
> +	cmp	w11, #2
> +	beq	at_testel
> +
> +	/* Configure EL2 to allow transition to EL1 */
> +	mrs	x0, hcr_el2
> +	orr	x0, x0, #(1 << 31)    /* RW = 1: EL1 execution state is AArch64 */
> +	msr	hcr_el2, x0
> +
> +	/* Initialize SCTLR_EL1 with reset value */
> +	msr	sctlr_el1, xzr
> +
> +	/* Set EL1 entry point */
> +	adr	x0, at_testel
> +	msr	elr_el2, x0
> +
> +	/* Prepare SPSR for exception return to EL1 */
> +	mov	x0, #(0x5 << 0)	      /* EL1h (SPx), with interrupts disabled */
> +	msr	spsr_el2, x0
> +
> +	/* Return to EL1 */
> +	eret
> +
> +	/*
> +	 * At the target EL for the test, usually EL1. Note we still
> +	 * set everything up as if we were at EL1.
> +	 */
> +at_testel:
>   	/* Installs a table of exception vectors to catch and handle all
>   	   exceptions by terminating the process with a diagnostic.  */
>   	adr	x0, vector_table
> @@ -100,7 +252,7 @@ __start:
>   	 * maps RAM to the first Gb. The stage2 tables have two 2mb
>   	 * translation block entries covering a series of adjacent
>   	 * 4k pages.
> -	*/
> +	 */
>   
>   	/* Stage 1 entry: indexed by IA[38:30] */
>   	adr	x1, .				/* phys address */
> @@ -198,7 +350,8 @@ __start:
>   	orr	x0, x0, #(3 << 16)
>   	msr	cpacr_el1, x0
>   
> -	/* Setup some stack space and enter the test code.
> +	/*
> +	 * Setup some stack space before we enter the test code.
>   	 * Assume everything except the return value is garbage when we
>   	 * return, we won't need it.
>   	 */
> @@ -233,6 +386,11 @@ __sys_outc:
>   	ret
>   
>   	.data
> +
> +	.align 8
> +cmdline:
> +	.space 128, 0
> +
>   	.align	12
>   
>   	/* Translation table
> @@ -246,6 +404,10 @@ ttb_stage2:
>   	.space	4096, 0
>   
>   	.align	12
> +system_stack:
> +	.space 4096, 0
> +system_stack_end:
> +
>   stack:
>   	.space 65536, 0
>   stack_end:
Alex Bennée June 5, 2025, 8:51 a.m. UTC | #2
Akihiko Odaki <akihiko.odaki@daynix.com> writes:

> On 2025/06/03 20:01, Alex Bennée wrote:
>> Currently the boot.S code assumes everything starts at EL1. This will
>> break things like the memory test which will barf on unaligned memory
>> access when run at a higher level.
>> Adapt the boot code to do some basic verification of the starting
>> mode
>> and the minimal configuration to move to the lower exception levels.
>> With this we can run the memory test with:
>>    -M virt,secure=on
>>    -M virt,secure=on,virtualization=on
>>    -M virt,virtualisation=on
>> If a test needs to be at a particular EL it can use the semihosting
>> command line to indicate the level we should execute in.
>> Cc: Julian Armistead <julian.armistead@linaro.org>
>> Cc: Jim MacArthur <jim.macarthur@linaro.org>
>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>> ---
>> v4
>>    - drop post eret nops
>>    - proper error string for EL0 error case
>>    - clamp any invalid target EL value to 1
>> v3
>>    - create system stack so we _exit cleanly
>>    - normalise EL string before compares
>>    - catch when we start in a lower EL than we asked for
>>    - default to EL1 when arg unclear
>> v2
>>    - allow tests to control the final EL we end up at
>>    - use tabs consistently
>>    - validate command line arg is between 1 and 3
>> ---
>>   tests/tcg/aarch64/Makefile.softmmu-target |   3 +-
>>   tests/tcg/aarch64/system/boot.S           | 172 +++++++++++++++++++++-
>>   2 files changed, 169 insertions(+), 6 deletions(-)
>> diff --git a/tests/tcg/aarch64/Makefile.softmmu-target
>> b/tests/tcg/aarch64/Makefile.softmmu-target
>> index 9c52475b7a..f7a7d2b800 100644
>> --- a/tests/tcg/aarch64/Makefile.softmmu-target
>> +++ b/tests/tcg/aarch64/Makefile.softmmu-target
>> @@ -68,7 +68,8 @@ run-plugin-semiconsole-with-%: semiconsole
>>     # vtimer test needs EL2
>>   QEMU_EL2_MACHINE=-machine virt,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 4
>> -run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_BASE_ARGS) -kernel
>> +QEMU_EL2_BASE_ARGS=-semihosting-config enable=on,target=native,chardev=output,arg="2"
>> +run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_EL2_BASE_ARGS) -kernel
>>     # Simple Record/Replay Test
>>   .PHONY: memory-record
>> diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
>> index a5df9c173d..8bfa4e4efc 100644
>> --- a/tests/tcg/aarch64/system/boot.S
>> +++ b/tests/tcg/aarch64/system/boot.S
>> @@ -16,6 +16,7 @@
>>   #define semihosting_call hlt 0xf000
>>   #define SYS_WRITEC	0x03	/* character to debug channel */
>>   #define SYS_WRITE0	0x04	/* string to debug channel */
>> +#define SYS_GET_CMDLINE 0x15	/* get command line */
>>   #define SYS_EXIT	0x18
>>     	.align	12
>> @@ -70,21 +71,172 @@ lower_a32_sync:
>>   lower_a32_irq:
>>   lower_a32_fiq:
>>   lower_a32_serror:
>> +	adr	x1, .unexp_excp
>> +exit_msg:
>>   	mov	x0, SYS_WRITE0
>> -	adr	x1, .error
>>   	semihosting_call
>>   	mov	x0, 1 /* EXIT_FAILURE */
>>   	bl 	_exit
>>   	/* never returns */
>>     	.section .rodata
>> -.error:
>> -	.string "Terminated by exception.\n"
>> +.unexp_excp:
>> +	.string "Unexpected exception.\n"
>> +.high_el_msg:
>> +	.string "Started in lower EL than requested.\n"
>> +.unexp_el0:
>> +	.string "Started in invalid EL.\n"
>> +
>> +	.align 8
>> +.get_cmd:
>
> Please do not send a new version without addressing all comments for
> the previous versions or at least noting there are unaddressed
> comments:
> https://lore.kernel.org/qemu-devel/7a76e746-9022-48cf-8216-775071e6d631@daynix.com
>
> Following the best practices in docs/devel/submitting-a-patch.rst will
> ensure a smoother patch review. It is fine for me if you submit a new
> version noting unaddressed comments, but some may disagree.

There is no style guide for assembler. I have made the strings
consistently use the . prefix.
Akihiko Odaki June 6, 2025, 9:53 a.m. UTC | #3
On 2025/06/05 17:51, Alex Bennée wrote:
> Akihiko Odaki <akihiko.odaki@daynix.com> writes:
> 
>> On 2025/06/03 20:01, Alex Bennée wrote:
>>> Currently the boot.S code assumes everything starts at EL1. This will
>>> break things like the memory test which will barf on unaligned memory
>>> access when run at a higher level.
>>> Adapt the boot code to do some basic verification of the starting
>>> mode
>>> and the minimal configuration to move to the lower exception levels.
>>> With this we can run the memory test with:
>>>     -M virt,secure=on
>>>     -M virt,secure=on,virtualization=on
>>>     -M virt,virtualisation=on
>>> If a test needs to be at a particular EL it can use the semihosting
>>> command line to indicate the level we should execute in.
>>> Cc: Julian Armistead <julian.armistead@linaro.org>
>>> Cc: Jim MacArthur <jim.macarthur@linaro.org>
>>> Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
>>> ---
>>> v4
>>>     - drop post eret nops
>>>     - proper error string for EL0 error case
>>>     - clamp any invalid target EL value to 1
>>> v3
>>>     - create system stack so we _exit cleanly
>>>     - normalise EL string before compares
>>>     - catch when we start in a lower EL than we asked for
>>>     - default to EL1 when arg unclear
>>> v2
>>>     - allow tests to control the final EL we end up at
>>>     - use tabs consistently
>>>     - validate command line arg is between 1 and 3
>>> ---
>>>    tests/tcg/aarch64/Makefile.softmmu-target |   3 +-
>>>    tests/tcg/aarch64/system/boot.S           | 172 +++++++++++++++++++++-
>>>    2 files changed, 169 insertions(+), 6 deletions(-)
>>> diff --git a/tests/tcg/aarch64/Makefile.softmmu-target
>>> b/tests/tcg/aarch64/Makefile.softmmu-target
>>> index 9c52475b7a..f7a7d2b800 100644
>>> --- a/tests/tcg/aarch64/Makefile.softmmu-target
>>> +++ b/tests/tcg/aarch64/Makefile.softmmu-target
>>> @@ -68,7 +68,8 @@ run-plugin-semiconsole-with-%: semiconsole
>>>      # vtimer test needs EL2
>>>    QEMU_EL2_MACHINE=-machine virt,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 4
>>> -run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_BASE_ARGS) -kernel
>>> +QEMU_EL2_BASE_ARGS=-semihosting-config enable=on,target=native,chardev=output,arg="2"
>>> +run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_EL2_BASE_ARGS) -kernel
>>>      # Simple Record/Replay Test
>>>    .PHONY: memory-record
>>> diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
>>> index a5df9c173d..8bfa4e4efc 100644
>>> --- a/tests/tcg/aarch64/system/boot.S
>>> +++ b/tests/tcg/aarch64/system/boot.S
>>> @@ -16,6 +16,7 @@
>>>    #define semihosting_call hlt 0xf000
>>>    #define SYS_WRITEC	0x03	/* character to debug channel */
>>>    #define SYS_WRITE0	0x04	/* string to debug channel */
>>> +#define SYS_GET_CMDLINE 0x15	/* get command line */
>>>    #define SYS_EXIT	0x18
>>>      	.align	12
>>> @@ -70,21 +71,172 @@ lower_a32_sync:
>>>    lower_a32_irq:
>>>    lower_a32_fiq:
>>>    lower_a32_serror:
>>> +	adr	x1, .unexp_excp
>>> +exit_msg:
>>>    	mov	x0, SYS_WRITE0
>>> -	adr	x1, .error
>>>    	semihosting_call
>>>    	mov	x0, 1 /* EXIT_FAILURE */
>>>    	bl 	_exit
>>>    	/* never returns */
>>>      	.section .rodata
>>> -.error:
>>> -	.string "Terminated by exception.\n"
>>> +.unexp_excp:
>>> +	.string "Unexpected exception.\n"
>>> +.high_el_msg:
>>> +	.string "Started in lower EL than requested.\n"
>>> +.unexp_el0:
>>> +	.string "Started in invalid EL.\n"
>>> +
>>> +	.align 8
>>> +.get_cmd:
>>
>> Please do not send a new version without addressing all comments for
>> the previous versions or at least noting there are unaddressed
>> comments:
>> https://lore.kernel.org/qemu-devel/7a76e746-9022-48cf-8216-775071e6d631@daynix.com
>>
>> Following the best practices in docs/devel/submitting-a-patch.rst will
>> ensure a smoother patch review. It is fine for me if you submit a new
>> version noting unaddressed comments, but some may disagree.
> 
> There is no style guide for assembler. I have made the strings
> consistently use the . prefix.
> 

Global symbols share the symbol space with C so the naming convention of 
C can be applied to assembly too.

I also pointed out it ".error" was prefixed with a dot probably due to a 
failed attempt to make it local. There is no point of following a mistake.

I don't see a reason to differentiate the string labels from the others 
either.

Regards,
Akihiko Odaki
diff mbox series

Patch

diff --git a/tests/tcg/aarch64/Makefile.softmmu-target b/tests/tcg/aarch64/Makefile.softmmu-target
index 9c52475b7a..f7a7d2b800 100644
--- a/tests/tcg/aarch64/Makefile.softmmu-target
+++ b/tests/tcg/aarch64/Makefile.softmmu-target
@@ -68,7 +68,8 @@  run-plugin-semiconsole-with-%: semiconsole
 
 # vtimer test needs EL2
 QEMU_EL2_MACHINE=-machine virt,virtualization=on,gic-version=2 -cpu cortex-a57 -smp 4
-run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_BASE_ARGS) -kernel
+QEMU_EL2_BASE_ARGS=-semihosting-config enable=on,target=native,chardev=output,arg="2"
+run-vtimer: QEMU_OPTS=$(QEMU_EL2_MACHINE) $(QEMU_EL2_BASE_ARGS) -kernel
 
 # Simple Record/Replay Test
 .PHONY: memory-record
diff --git a/tests/tcg/aarch64/system/boot.S b/tests/tcg/aarch64/system/boot.S
index a5df9c173d..8bfa4e4efc 100644
--- a/tests/tcg/aarch64/system/boot.S
+++ b/tests/tcg/aarch64/system/boot.S
@@ -16,6 +16,7 @@ 
 #define semihosting_call hlt 0xf000
 #define SYS_WRITEC	0x03	/* character to debug channel */
 #define SYS_WRITE0	0x04	/* string to debug channel */
+#define SYS_GET_CMDLINE 0x15	/* get command line */
 #define SYS_EXIT	0x18
 
 	.align	12
@@ -70,21 +71,172 @@  lower_a32_sync:
 lower_a32_irq:
 lower_a32_fiq:
 lower_a32_serror:
+	adr	x1, .unexp_excp
+exit_msg:
 	mov	x0, SYS_WRITE0
-	adr	x1, .error
 	semihosting_call
 	mov	x0, 1 /* EXIT_FAILURE */
 	bl 	_exit
 	/* never returns */
 
 	.section .rodata
-.error:
-	.string "Terminated by exception.\n"
+.unexp_excp:
+	.string "Unexpected exception.\n"
+.high_el_msg:
+	.string "Started in lower EL than requested.\n"
+.unexp_el0:
+	.string "Started in invalid EL.\n"
+
+	.align 8
+.get_cmd:
+	.quad	cmdline
+	.quad	128
 
 	.text
 	.align 4
 	.global __start
 __start:
+	/*
+         * Initialise the stack for whatever EL we are in before
+	 * anything else, we need it to be able to _exit cleanly.
+	 * It's smaller than the stack we pass to the C code but we
+	 * don't need much.
+	 */
+	adrp	x0, system_stack_end
+	add	x0, x0, :lo12:system_stack_end
+	mov	sp, x0
+
+	/*
+	 * The test can set the semihosting command line to the target
+	 * EL needed for the test. However if no semihosting args are set we will
+	 * end up with -kernel/-append data (see semihosting_arg_fallback).
+	 * Keep the normalised target in w11.
+	 */
+	mov	x0, SYS_GET_CMDLINE
+	adr	x1, .get_cmd
+	semihosting_call
+	adrp	x10, cmdline
+	add	x10, x10, :lo12:cmdline
+	ldrb	w11, [x10]
+
+	/* sanity check, normalise char to EL, clamp to 1 if outside range */
+	subs w11, w11, #'0'
+	b.lt el_default
+	cmp  w11, #3
+	b.gt el_default
+	b 1f
+
+el_high:
+	adr	x1, .high_el_msg
+	b	exit_msg
+
+el_default:
+	mov	w11, #1
+
+1:
+	/* Determine current Exception Level */
+	mrs	x0, CurrentEL
+	lsr	x0, x0, #2	  /* CurrentEL[3:2] contains the current EL */
+
+	/* Are we already in a lower EL than we want? */
+	cmp	w11, w0
+	bgt	el_high
+
+	/* Branch based on current EL */
+	cmp	x0, #3
+	b.eq	setup_el3
+	cmp	x0, #2
+	b.eq	setup_el2
+	cmp	x0, #1
+	b.eq	at_testel	     /* Already at EL1, skip transition */
+
+	/* Should not be at EL0 - error out */
+	adr 	x1, .unexp_el0
+	b	exit_msg
+
+setup_el3:
+	/* Ensure we trap if we get anything wrong */
+	adr	x0, vector_table
+	msr	vbar_el3, x0
+
+	/* Does the test want to be at EL3? */
+	cmp	w11, #3
+	beq	at_testel
+
+	/* Configure EL3 to for lower states (EL2 or EL1) */
+	mrs	x0, scr_el3
+	orr	x0, x0, #(1 << 10)    /* RW = 1: EL2/EL1 execution state is AArch64 */
+	orr	x0, x0, #(1 << 0)     /* NS = 1: Non-secure state */
+	msr	scr_el3, x0
+
+	/*
+	 * We need to check if EL2 is actually enabled via ID_AA64PFR0_EL1,
+	 * otherwise we should just jump straight to EL1.
+	 */
+	mrs	x0, id_aa64pfr0_el1
+	ubfx	x0, x0, #8, #4	      /* Extract EL2 field (bits 11:8) */
+	cbz	x0, el2_not_present   /* If field is 0 no EL2 */
+
+
+	/* Prepare SPSR for exception return to EL2 */
+	mov	x0, #0x3c9	      /* DAIF bits and EL2h mode (9) */
+	msr	spsr_el3, x0
+
+	/* Set EL2 entry point */
+	adr	x0, setup_el2
+	msr	elr_el3, x0
+
+	/* Return to EL2 */
+	eret
+
+el2_not_present:
+	/* Initialize SCTLR_EL1 with reset value */
+	msr	sctlr_el1, xzr
+
+	/* Set EL1 entry point */
+	adr	x0, at_testel
+	msr	elr_el3, x0
+
+	/* Prepare SPSR for exception return to EL1h with interrupts masked */
+	mov	x0, #0x3c5	      /* DAIF bits and EL1h mode (5) */
+	msr	spsr_el3, x0
+
+	isb			      /* Synchronization barrier */
+	eret			      /* Jump to EL1 */
+
+setup_el2:
+	/* Ensure we trap if we get anything wrong */
+	adr	x0, vector_table
+	msr	vbar_el2, x0
+
+	/* Does the test want to be at EL2? */
+	cmp	w11, #2
+	beq	at_testel
+
+	/* Configure EL2 to allow transition to EL1 */
+	mrs	x0, hcr_el2
+	orr	x0, x0, #(1 << 31)    /* RW = 1: EL1 execution state is AArch64 */
+	msr	hcr_el2, x0
+
+	/* Initialize SCTLR_EL1 with reset value */
+	msr	sctlr_el1, xzr
+
+	/* Set EL1 entry point */
+	adr	x0, at_testel
+	msr	elr_el2, x0
+
+	/* Prepare SPSR for exception return to EL1 */
+	mov	x0, #(0x5 << 0)	      /* EL1h (SPx), with interrupts disabled */
+	msr	spsr_el2, x0
+
+	/* Return to EL1 */
+	eret
+
+	/*
+	 * At the target EL for the test, usually EL1. Note we still
+	 * set everything up as if we were at EL1.
+	 */
+at_testel:
 	/* Installs a table of exception vectors to catch and handle all
 	   exceptions by terminating the process with a diagnostic.  */
 	adr	x0, vector_table
@@ -100,7 +252,7 @@  __start:
 	 * maps RAM to the first Gb. The stage2 tables have two 2mb
 	 * translation block entries covering a series of adjacent
 	 * 4k pages.
-	*/
+	 */
 
 	/* Stage 1 entry: indexed by IA[38:30] */
 	adr	x1, .				/* phys address */
@@ -198,7 +350,8 @@  __start:
 	orr	x0, x0, #(3 << 16)
 	msr	cpacr_el1, x0
 
-	/* Setup some stack space and enter the test code.
+	/*
+	 * Setup some stack space before we enter the test code.
 	 * Assume everything except the return value is garbage when we
 	 * return, we won't need it.
 	 */
@@ -233,6 +386,11 @@  __sys_outc:
 	ret
 
 	.data
+
+	.align 8
+cmdline:
+	.space 128, 0
+
 	.align	12
 
 	/* Translation table
@@ -246,6 +404,10 @@  ttb_stage2:
 	.space	4096, 0
 
 	.align	12
+system_stack:
+	.space 4096, 0
+system_stack_end:
+
 stack:
 	.space 65536, 0
 stack_end: