Message ID | 20190812173019.11956-28-julien.grall@arm.com |
---|---|
State | Superseded |
Headers | show |
Series | xen/arm: Rework head.S to make it more compliant with the Arm Arm | expand |
On Mon, 12 Aug 2019, Julien Grall wrote: > At the moment, any update to the boot-pages are open-coded. This is > making more difficult to understand the logic of a function as each > update roughly requires 6 instructions. > > To ease the readability, two new macros are introduced: > - create_table_entry: Create a page-table entry in a given table. > This can work at any level. > - create_mapping_entry: Create a mapping entry in a given table. > None of the users will require to map at any other level than 3rd > (i.e page granularity). So the macro is supporting support 3rd level > mapping. > > Unlike arm64, there are no easy way to have a PC relative address within > the range -/+4GB. In order to have the possibility to use the macro in > context with MMU on/off, the user needs to tell the state of the MMU. > > Lastly, take the opportunity to replace open-coded version in > setup_fixmap() by the two new macros. The ones in create_page_tables() > will be replaced in a follow-up patch. > > Signed-off-by: Julien Grall <julien.grall@arm.com> > > --- > The adr_l hack is a bit ugly, but I can't find nicer way to avoid > code duplication and improve readability. > > Changes in v3: > - Patch added > --- > xen/arch/arm/arm32/head.S | 108 ++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 89 insertions(+), 19 deletions(-) > > diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S > index e86a9f95e7..6d03fecaf2 100644 > --- a/xen/arch/arm/arm32/head.S > +++ b/xen/arch/arm/arm32/head.S > @@ -50,6 +50,20 @@ > .endm > > /* > + * There are no easy way to have a PC relative address within the range > + * +/- 4GB of the PC. > + * > + * This macro workaround it by asking the user to tell whether the MMU > + * has been turned on or not. I would add one statement saying why we are using r10 below in the implementation. Just a suggestion to make things clearer. > + */ > +.macro adr_l, dst, sym, mmu > + ldr \dst, =\sym > + .if \mmu == 0 > + add \dst, \dst, r10 > + .endif > +.endm > + > +/* > * Common register usage in this file: > * r0 - > * r1 - > @@ -342,6 +356,76 @@ cpu_init_done: > ENDPROC(cpu_init) > > /* > + * Macro to create a page table entry in \ptbl to \tbl > + * > + * ptbl: table symbol where the entry will be created > + * tbl: table symbol to point to > + * virt: virtual address > + * shift: #imm page table shift > + * mmu: Is the MMU turned on/off. If not specified it will be off > + * > + * Preserves \virt > + * Clobbers r1 - r4 In the 64bit version you added the temp registers to the parameter list. Why do things differently here, hard-coding the usage of r1-r4? > + * Also use r10 for the phys offset. > + * > + * Note that \virt should be in a register other than r1 - r4 > + */ > +.macro create_table_entry, ptbl, tbl, virt, shift, mmu=0 > + lsr r1, \virt, #\shift > + mov_w r2, LPAE_ENTRY_MASK > + and r1, r1, r2 /* r1 := slot in \tlb */ > + lsl r1, r1, #3 /* r1 := slot offset in \tlb */ > + > + ldr r4, =\tbl > + add r4, r4, r10 /* r4 := paddr(\tlb) */ you could use adr_l? > + mov r2, #PT_PT /* r2:r3 := right for linear PT */ > + orr r2, r2, r4 /* + \tlb paddr */ > + mov r3, #0 > + > + adr_l r4, \ptbl, \mmu > + > + strd r2, r3, [r4, r1] > +.endm > + > +/* > + * Macro to create a mapping entry in \tbl to \paddr. Only mapping in 3rd > + * level table (i.e page granularity) is supported. > + * > + * tbl: table symbol where the entry will be created > + * virt: virtual address > + * phys: physical address > + * type: mapping type. If not specified it will be normal memory (PT_MEM_L3) > + * mmu: Is the MMU turned on/off. If not specified it will be off > + * > + * Preserves \virt, \phys > + * Clobbers r1 - r4 Same here > + * * Also use r10 for the phys offset. > + * > + * Note that \virt and \paddr should be in other registers than r1 - r4 > + * and be distinct. > + */ > +.macro create_mapping_entry, tbl, virt, phys, type=PT_MEM_L3, mmu=0 > + lsr r4, \phys, #THIRD_SHIFT > + lsl r4, r4, #THIRD_SHIFT /* r4 := PAGE_ALIGNED(phys) */ and THIRD_MASK like for arm64? Doesn't really matter but it would be nicer if we could keep them similar. > + mov_w r2, LPAE_ENTRY_MASK > + lsr r1, \virt, #THIRD_SHIFT > + and r1, r1, r2 /* r1 := slot in \tlb */ > + lsl r1, r1, #3 /* r1 := slot offset in \tlb */ I would prefer if you moved these for lines up, like you have down in create_table_entry, it is clearer to read for me. Just an optional suggestion. > + mov r2, #\type /* r2:r3 := right for section PT */ > + orr r2, r2, r4 /* + PAGE_ALIGNED(phys) */ > + mov r3, #0 > + > + adr_l r4, \tbl, \mmu > + > + strd r2, r3, [r4, r1] > +.endm > + > +/* > * Rebuild the boot pagetable's first-level entries. The structure > * is described in mm.c. > * > @@ -559,31 +643,17 @@ ENDPROC(remove_identity_mapping) > * r10: Physical offset > * r11: Early UART base physical address > * > - * Clobbers r1 - r4 > + * Clobbers r0 - r4 > */ > setup_fixmap: > #if defined(CONFIG_EARLY_PRINTK) > /* Add UART to the fixmap table */ > - ldr r1, =xen_fixmap /* r1 := vaddr (xen_fixmap) */ > - lsr r2, r11, #THIRD_SHIFT > - lsl r2, r2, #THIRD_SHIFT /* 4K aligned paddr of UART */ > - orr r2, r2, #PT_UPPER(DEV_L3) > - orr r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART */ > - mov r3, #0x0 > - strd r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first fixmap's slot */ > + ldr r0, =EARLY_UART_VIRTUAL_ADDRESS > + create_mapping_entry xen_fixmap, r0, r11, type=PT_DEV_L3, mmu=1 > #endif > - > /* Map fixmap into boot_second */ > - ldr r1, =boot_second /* r1 := vaddr (boot_second) */ > - ldr r2, =xen_fixmap > - add r2, r2, r10 /* r2 := paddr (xen_fixmap) */ > - orr r2, r2, #PT_UPPER(PT) > - orr r2, r2, #PT_LOWER(PT) /* r2:r3 := table map of xen_fixmap */ > - ldr r4, =FIXMAP_ADDR(0) > - mov r4, r4, lsr #(SECOND_SHIFT - 3) /* r4 := Slot for FIXMAP(0) */ > - mov r3, #0x0 > - strd r2, r3, [r1, r4] /* Map it in the fixmap's slot */ > - > + mov_w r0, FIXMAP_ADDR(0) > + create_table_entry boot_second, xen_fixmap, r0, SECOND_SHIFT, mmu=1 > /* Ensure any page table updates made above have occurred. */ > dsb nshst > > -- > 2.11.0 >
Hi Stefano, On 23/08/2019 02:10, Stefano Stabellini wrote: > On Mon, 12 Aug 2019, Julien Grall wrote: >> At the moment, any update to the boot-pages are open-coded. This is >> making more difficult to understand the logic of a function as each >> update roughly requires 6 instructions. >> >> To ease the readability, two new macros are introduced: >> - create_table_entry: Create a page-table entry in a given table. >> This can work at any level. >> - create_mapping_entry: Create a mapping entry in a given table. >> None of the users will require to map at any other level than 3rd >> (i.e page granularity). So the macro is supporting support 3rd level >> mapping. >> >> Unlike arm64, there are no easy way to have a PC relative address within >> the range -/+4GB. In order to have the possibility to use the macro in >> context with MMU on/off, the user needs to tell the state of the MMU. >> >> Lastly, take the opportunity to replace open-coded version in >> setup_fixmap() by the two new macros. The ones in create_page_tables() >> will be replaced in a follow-up patch. >> >> Signed-off-by: Julien Grall <julien.grall@arm.com> >> >> --- >> The adr_l hack is a bit ugly, but I can't find nicer way to avoid >> code duplication and improve readability. >> >> Changes in v3: >> - Patch added >> --- >> xen/arch/arm/arm32/head.S | 108 ++++++++++++++++++++++++++++++++++++++-------- >> 1 file changed, 89 insertions(+), 19 deletions(-) >> >> diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S >> index e86a9f95e7..6d03fecaf2 100644 >> --- a/xen/arch/arm/arm32/head.S >> +++ b/xen/arch/arm/arm32/head.S >> @@ -50,6 +50,20 @@ >> .endm >> >> /* >> + * There are no easy way to have a PC relative address within the range >> + * +/- 4GB of the PC. >> + * >> + * This macro workaround it by asking the user to tell whether the MMU >> + * has been turned on or not. > > I would add one statement saying why we are using r10 below in the > implementation. Just a suggestion to make things clearer. Sure. > > >> + */ >> +.macro adr_l, dst, sym, mmu >> + ldr \dst, =\sym >> + .if \mmu == 0 >> + add \dst, \dst, r10 >> + .endif >> +.endm >> + >> +/* >> * Common register usage in this file: >> * r0 - >> * r1 - >> @@ -342,6 +356,76 @@ cpu_init_done: >> ENDPROC(cpu_init) >> >> /* >> + * Macro to create a page table entry in \ptbl to \tbl >> + * >> + * ptbl: table symbol where the entry will be created >> + * tbl: table symbol to point to >> + * virt: virtual address >> + * shift: #imm page table shift >> + * mmu: Is the MMU turned on/off. If not specified it will be off >> + * >> + * Preserves \virt >> + * Clobbers r1 - r4 > > In the 64bit version you added the temp registers to the parameter list. > Why do things differently here, hard-coding the usage of r1-r4? A few reasons: 1) There are 2 more parameters for the arm32 version (one more tmp register + mmu). So the line will be far over 80 characters. A split of the line will not be inline with the idea of one line per "instruction" within the file. 2) strd impose the two registers that will be stored to be contiguous (the start register should be an even number). So we would have to convey it to the user. arm64 version is using the parameter list because I feel macro is not meant to modify pre-defined registers. I am aware this is what we do on arm32 but we also have symbols in parameters and a function would not have made things much better in the callers (you would have to load all symbols in registers before hand). So this is the best compromised I have found. > > >> + * Also use r10 for the phys offset. >> + * >> + * Note that \virt should be in a register other than r1 - r4 >> + */ >> +.macro create_table_entry, ptbl, tbl, virt, shift, mmu=0 >> + lsr r1, \virt, #\shift >> + mov_w r2, LPAE_ENTRY_MASK >> + and r1, r1, r2 /* r1 := slot in \tlb */ >> + lsl r1, r1, #3 /* r1 := slot offset in \tlb */ >> + >> + ldr r4, =\tbl >> + add r4, r4, r10 /* r4 := paddr(\tlb) */ > > you could use adr_l? adr_l is for loading relative address. Here we want the physical address regarding the state of the MMU. >> + mov r2, #PT_PT /* r2:r3 := right for linear PT */ >> + orr r2, r2, r4 /* + \tlb paddr */ >> + mov r3, #0 >> + >> + adr_l r4, \ptbl, \mmu >> + >> + strd r2, r3, [r4, r1] >> +.endm >> + >> +/* >> + * Macro to create a mapping entry in \tbl to \paddr. Only mapping in 3rd >> + * level table (i.e page granularity) is supported. >> + * >> + * tbl: table symbol where the entry will be created >> + * virt: virtual address >> + * phys: physical address >> + * type: mapping type. If not specified it will be normal memory (PT_MEM_L3) >> + * mmu: Is the MMU turned on/off. If not specified it will be off >> + * >> + * Preserves \virt, \phys >> + * Clobbers r1 - r4 > > Same here > > >> + * * Also use r10 for the phys offset. >> + * >> + * Note that \virt and \paddr should be in other registers than r1 - r4 >> + * and be distinct. >> + */ >> +.macro create_mapping_entry, tbl, virt, phys, type=PT_MEM_L3, mmu=0 >> + lsr r4, \phys, #THIRD_SHIFT >> + lsl r4, r4, #THIRD_SHIFT /* r4 := PAGE_ALIGNED(phys) */ > > and THIRD_MASK like for arm64? Doesn't really matter but it would be > nicer if we could keep them similar. I am with you on this point. But sometimes it is not possible (they are two distinct instructions set with similarities) or make the code less obvious for the arch. In this case, the instruction 'and' is taking a "modified immediate constant". The immediate will be encoded as an 8-bit number than can be rotate. The value of THIRD_MASK is 0xfffffffffffff000. Therefore it can't be encoded as an immediate for the instruction 'and'. One solution would be to load the immediate in a register, and then using the instruction 'and'. But I felt, the lsr/lsl solution was nicer. > > >> + mov_w r2, LPAE_ENTRY_MASK >> + lsr r1, \virt, #THIRD_SHIFT >> + and r1, r1, r2 /* r1 := slot in \tlb */ >> + lsl r1, r1, #3 /* r1 := slot offset in \tlb */ > > I would prefer if you moved these for lines up, like you have down in > create_table_entry, it is clearer to read for me. Just an optional > suggestion. Sure. Cheers,
diff --git a/xen/arch/arm/arm32/head.S b/xen/arch/arm/arm32/head.S index e86a9f95e7..6d03fecaf2 100644 --- a/xen/arch/arm/arm32/head.S +++ b/xen/arch/arm/arm32/head.S @@ -50,6 +50,20 @@ .endm /* + * There are no easy way to have a PC relative address within the range + * +/- 4GB of the PC. + * + * This macro workaround it by asking the user to tell whether the MMU + * has been turned on or not. + */ +.macro adr_l, dst, sym, mmu + ldr \dst, =\sym + .if \mmu == 0 + add \dst, \dst, r10 + .endif +.endm + +/* * Common register usage in this file: * r0 - * r1 - @@ -342,6 +356,76 @@ cpu_init_done: ENDPROC(cpu_init) /* + * Macro to create a page table entry in \ptbl to \tbl + * + * ptbl: table symbol where the entry will be created + * tbl: table symbol to point to + * virt: virtual address + * shift: #imm page table shift + * mmu: Is the MMU turned on/off. If not specified it will be off + * + * Preserves \virt + * Clobbers r1 - r4 + * + * Also use r10 for the phys offset. + * + * Note that \virt should be in a register other than r1 - r4 + */ +.macro create_table_entry, ptbl, tbl, virt, shift, mmu=0 + lsr r1, \virt, #\shift + mov_w r2, LPAE_ENTRY_MASK + and r1, r1, r2 /* r1 := slot in \tlb */ + lsl r1, r1, #3 /* r1 := slot offset in \tlb */ + + ldr r4, =\tbl + add r4, r4, r10 /* r4 := paddr(\tlb) */ + + mov r2, #PT_PT /* r2:r3 := right for linear PT */ + orr r2, r2, r4 /* + \tlb paddr */ + mov r3, #0 + + adr_l r4, \ptbl, \mmu + + strd r2, r3, [r4, r1] +.endm + +/* + * Macro to create a mapping entry in \tbl to \paddr. Only mapping in 3rd + * level table (i.e page granularity) is supported. + * + * tbl: table symbol where the entry will be created + * virt: virtual address + * phys: physical address + * type: mapping type. If not specified it will be normal memory (PT_MEM_L3) + * mmu: Is the MMU turned on/off. If not specified it will be off + * + * Preserves \virt, \phys + * Clobbers r1 - r4 + * + * * Also use r10 for the phys offset. + * + * Note that \virt and \paddr should be in other registers than r1 - r4 + * and be distinct. + */ +.macro create_mapping_entry, tbl, virt, phys, type=PT_MEM_L3, mmu=0 + lsr r4, \phys, #THIRD_SHIFT + lsl r4, r4, #THIRD_SHIFT /* r4 := PAGE_ALIGNED(phys) */ + + mov_w r2, LPAE_ENTRY_MASK + lsr r1, \virt, #THIRD_SHIFT + and r1, r1, r2 /* r1 := slot in \tlb */ + lsl r1, r1, #3 /* r1 := slot offset in \tlb */ + + mov r2, #\type /* r2:r3 := right for section PT */ + orr r2, r2, r4 /* + PAGE_ALIGNED(phys) */ + mov r3, #0 + + adr_l r4, \tbl, \mmu + + strd r2, r3, [r4, r1] +.endm + +/* * Rebuild the boot pagetable's first-level entries. The structure * is described in mm.c. * @@ -559,31 +643,17 @@ ENDPROC(remove_identity_mapping) * r10: Physical offset * r11: Early UART base physical address * - * Clobbers r1 - r4 + * Clobbers r0 - r4 */ setup_fixmap: #if defined(CONFIG_EARLY_PRINTK) /* Add UART to the fixmap table */ - ldr r1, =xen_fixmap /* r1 := vaddr (xen_fixmap) */ - lsr r2, r11, #THIRD_SHIFT - lsl r2, r2, #THIRD_SHIFT /* 4K aligned paddr of UART */ - orr r2, r2, #PT_UPPER(DEV_L3) - orr r2, r2, #PT_LOWER(DEV_L3) /* r2:r3 := 4K dev map including UART */ - mov r3, #0x0 - strd r2, r3, [r1, #(FIXMAP_CONSOLE*8)] /* Map it in the first fixmap's slot */ + ldr r0, =EARLY_UART_VIRTUAL_ADDRESS + create_mapping_entry xen_fixmap, r0, r11, type=PT_DEV_L3, mmu=1 #endif - /* Map fixmap into boot_second */ - ldr r1, =boot_second /* r1 := vaddr (boot_second) */ - ldr r2, =xen_fixmap - add r2, r2, r10 /* r2 := paddr (xen_fixmap) */ - orr r2, r2, #PT_UPPER(PT) - orr r2, r2, #PT_LOWER(PT) /* r2:r3 := table map of xen_fixmap */ - ldr r4, =FIXMAP_ADDR(0) - mov r4, r4, lsr #(SECOND_SHIFT - 3) /* r4 := Slot for FIXMAP(0) */ - mov r3, #0x0 - strd r2, r3, [r1, r4] /* Map it in the fixmap's slot */ - + mov_w r0, FIXMAP_ADDR(0) + create_table_entry boot_second, xen_fixmap, r0, SECOND_SHIFT, mmu=1 /* Ensure any page table updates made above have occurred. */ dsb nshst
At the moment, any update to the boot-pages are open-coded. This is making more difficult to understand the logic of a function as each update roughly requires 6 instructions. To ease the readability, two new macros are introduced: - create_table_entry: Create a page-table entry in a given table. This can work at any level. - create_mapping_entry: Create a mapping entry in a given table. None of the users will require to map at any other level than 3rd (i.e page granularity). So the macro is supporting support 3rd level mapping. Unlike arm64, there are no easy way to have a PC relative address within the range -/+4GB. In order to have the possibility to use the macro in context with MMU on/off, the user needs to tell the state of the MMU. Lastly, take the opportunity to replace open-coded version in setup_fixmap() by the two new macros. The ones in create_page_tables() will be replaced in a follow-up patch. Signed-off-by: Julien Grall <julien.grall@arm.com> --- The adr_l hack is a bit ugly, but I can't find nicer way to avoid code duplication and improve readability. Changes in v3: - Patch added --- xen/arch/arm/arm32/head.S | 108 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 19 deletions(-)