diff mbox series

[Xen-devel,17/17] xen/arm64: Zero BSS after the MMU and D-cache is turned on

Message ID 20190610193215.23704-18-julien.grall@arm.com
State New
Headers show
Series xen/arm64: Rework head.S to make it more compliant with the Arm Arm | expand

Commit Message

Julien Grall June 10, 2019, 7:32 p.m. UTC
At the moment BSS is zeroed before the MMU and D-Cache is turned on.
In other words, the cache will be bypassed when zeroing the BSS section.

Per the Image protocol [1], the state of the cache for BSS region is not
known because it is not part of the "loaded kernel image".

This means that the cache will need to be invalidated twice for the BSS
region:
    1) Before zeroing to remove any dirty cache line. Otherwise they may
    get evicted while zeroing and therefore overriding the value.
    2) After zeroing to remove any cache line that may have been
    speculated. Otherwise when turning on MMU and D-Cache, the CPU may
    see old values.

However, the only reason to have the BSS zeroed early is because the
boot page tables are part of BSS. To avoid the two cache invalidations,
it is possible to move the page tables in the section .data.page_aligned.

A new macro DEFINE_BOOT_PAGE_TABLE is introduced to create and mark
page-tables used before BSS is zeroed. This includes all boot_* but also
xen_fixmap as zero_bss() will print a message when earlyprintk is
enabled.

[1] linux/Documentation/arm64/booting.txt

---

    Note that the arm32 support is not there yet. This will need to be
    addressed here or separately depending on when the Arm32 boot rework
    is sent.
---
 xen/arch/arm/arm64/head.S |  6 +++---
 xen/arch/arm/mm.c         | 23 +++++++++++++++++------
 2 files changed, 20 insertions(+), 9 deletions(-)

Comments

Stefano Stabellini June 26, 2019, 7:29 p.m. UTC | #1
On Mon, 10 Jun 2019, Julien Grall wrote:
> At the moment BSS is zeroed before the MMU and D-Cache is turned on.
> In other words, the cache will be bypassed when zeroing the BSS section.
> 
> Per the Image protocol [1], the state of the cache for BSS region is not
> known because it is not part of the "loaded kernel image".
> 
> This means that the cache will need to be invalidated twice for the BSS
> region:
>     1) Before zeroing to remove any dirty cache line. Otherwise they may
>     get evicted while zeroing and therefore overriding the value.
>     2) After zeroing to remove any cache line that may have been
>     speculated. Otherwise when turning on MMU and D-Cache, the CPU may
>     see old values.
> 
> However, the only reason to have the BSS zeroed early is because the
> boot page tables are part of BSS. To avoid the two cache invalidations,
> it is possible to move the page tables in the section .data.page_aligned.

I am not following the last part. How is moving the boot page tables to
.data.page_aligned solving the problem? Do we need to zero
.data.page_aligned early or can we skip it because it is guaranteed to
already be zero?


> A new macro DEFINE_BOOT_PAGE_TABLE is introduced to create and mark
> page-tables used before BSS is zeroed. This includes all boot_* but also
> xen_fixmap as zero_bss() will print a message when earlyprintk is
> enabled.

On a similar note, and continuing from what I wrote above, do we need to
make sure to zero the xen_fixmap before hooking it up setup_fixmap?


> [1] linux/Documentation/arm64/booting.txt
> 
> ---
> 
>     Note that the arm32 support is not there yet. This will need to be
>     addressed here or separately depending on when the Arm32 boot rework
>     is sent.
> ---
>  xen/arch/arm/arm64/head.S |  6 +++---
>  xen/arch/arm/mm.c         | 23 +++++++++++++++++------
>  2 files changed, 20 insertions(+), 9 deletions(-)
> 
> diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
> index 130ab66d8e..6c3edbbc81 100644
> --- a/xen/arch/arm/arm64/head.S
> +++ b/xen/arch/arm/arm64/head.S
> @@ -291,7 +291,6 @@ real_start_efi:
>          mov   x22, #0                /* x22 := is_secondary_cpu */
>  
>          bl    check_cpu_mode
> -        bl    zero_bss
>          bl    cpu_init
>          bl    create_page_tables
>          bl    enable_mmu
> @@ -312,6 +311,7 @@ primary_switched:
>          /* Use a virtual address to access the UART. */
>          ldr   x23, =EARLY_UART_VIRTUAL_ADDRESS
>  #endif
> +        bl    zero_bss
>          PRINT("- Ready -\r\n")
>          /* Setup the arguments for start_xen and jump to C world */
>          mov   x0, x20                /* x0 := phys_offset */
> @@ -423,8 +423,8 @@ zero_bss:
>          cbnz  x26, skip_bss
>  
>          PRINT("- Zero BSS -\r\n")
> -        load_paddr x0, __bss_start    /* Load paddr of start & end of bss */
> -        load_paddr x1, __bss_end
> +        ldr   x0, =__bss_start       /* x0 := vaddr(__bss_start) */
> +        ldr   x1, =__bss_end         /* x1 := vaddr(__bss_start) */
>  
>  1:      str   xzr, [x0], #8
>          cmp   x0, x1
> diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
> index 6a549e9283..0b2d07a258 100644
> --- a/xen/arch/arm/mm.c
> +++ b/xen/arch/arm/mm.c
> @@ -48,6 +48,17 @@
>  #undef mfn_to_virt
>  #define mfn_to_virt(mfn) __mfn_to_virt(mfn_x(mfn))
>  
> +/*
> + * Macros to define page-tables:
> + *  - DEFINE_BOOT_PAGE_TABLE is used to define page-table that are used
> + *  in assembly code before BSS is zeroed.
> + *  - DEFINE_PAGE_TABLE{,S} are used to define one or multiple
> + *  page-tables to be used after BSS is zeroed (typically they are only used
> + *  in C).
> + */
> +#define DEFINE_BOOT_PAGE_TABLE(name)                                          \
> +lpae_t __aligned(PAGE_SIZE) __section(".data.page_aligned") name[LPAE_ENTRIES]
> +
>  #define DEFINE_PAGE_TABLES(name, nr)                    \
>  lpae_t __aligned(PAGE_SIZE) name[LPAE_ENTRIES * (nr)]
>  
> @@ -76,13 +87,13 @@ lpae_t __aligned(PAGE_SIZE) name[LPAE_ENTRIES * (nr)]
>   * Finally, if EARLY_PRINTK is enabled then xen_fixmap will be mapped
>   * by the CPU once it has moved off the 1:1 mapping.
>   */
> -DEFINE_PAGE_TABLE(boot_pgtable);
> +DEFINE_BOOT_PAGE_TABLE(boot_pgtable);
>  #ifdef CONFIG_ARM_64
> -DEFINE_PAGE_TABLE(boot_first);
> -DEFINE_PAGE_TABLE(boot_first_id);
> +DEFINE_BOOT_PAGE_TABLE(boot_first);
> +DEFINE_BOOT_PAGE_TABLE(boot_first_id);
>  #endif
> -DEFINE_PAGE_TABLE(boot_second);
> -DEFINE_PAGE_TABLE(boot_third);
> +DEFINE_BOOT_PAGE_TABLE(boot_second);
> +DEFINE_BOOT_PAGE_TABLE(boot_third);
>  
>  /* Main runtime page tables */
>  
> @@ -135,7 +146,7 @@ static __initdata int xenheap_first_first_slot = -1;
>   */
>  static DEFINE_PAGE_TABLES(xen_second, 2);
>  /* First level page table used for fixmap */
> -DEFINE_PAGE_TABLE(xen_fixmap);
> +DEFINE_BOOT_PAGE_TABLE(xen_fixmap);
>  /* First level page table used to map Xen itself with the XN bit set
>   * as appropriate. */
>  static DEFINE_PAGE_TABLE(xen_xenmap);
> -- 
> 2.11.0
>
Julien Grall June 26, 2019, 8:07 p.m. UTC | #2
Hi Stefano,

On 6/26/19 8:29 PM, Stefano Stabellini wrote:
> On Mon, 10 Jun 2019, Julien Grall wrote:
>> At the moment BSS is zeroed before the MMU and D-Cache is turned on.
>> In other words, the cache will be bypassed when zeroing the BSS section.
>>
>> Per the Image protocol [1], the state of the cache for BSS region is not
>> known because it is not part of the "loaded kernel image".
>>
>> This means that the cache will need to be invalidated twice for the BSS
>> region:
>>      1) Before zeroing to remove any dirty cache line. Otherwise they may
>>      get evicted while zeroing and therefore overriding the value.
>>      2) After zeroing to remove any cache line that may have been
>>      speculated. Otherwise when turning on MMU and D-Cache, the CPU may
>>      see old values.
>>
>> However, the only reason to have the BSS zeroed early is because the
>> boot page tables are part of BSS. To avoid the two cache invalidations,
>> it is possible to move the page tables in the section .data.page_aligned.
> 
> I am not following the last part. How is moving the boot page tables to
> .data.page_aligned solving the problem? Do we need to zero
> .data.page_aligned early or can we skip it because it is guaranteed to
> already be zero?

Global variables are initialized to zero by default regardless the 
section they reside. Usually they are stored in BSS because it saves 
space in the binary.

With the Image protocol, BSS is not initialized by the bootloader so we 
have to do ourself.

The section .data.page_aligned is always part of the binary. So the 
compiler will write zero in the binary for any global variables part of 
that section and therefore the corresponding memory will be zeroed when 
loading the binary.

If it makes sense, I can try to incorporate that in the commit message.

> 
> 
>> A new macro DEFINE_BOOT_PAGE_TABLE is introduced to create and mark
>> page-tables used before BSS is zeroed. This includes all boot_* but also
>> xen_fixmap as zero_bss() will print a message when earlyprintk is
>> enabled.
> 
> On a similar note, and continuing from what I wrote above, do we need to
> make sure to zero the xen_fixmap before hooking it up setup_fixmap?

See above.

Cheers,
Stefano Stabellini June 26, 2019, 9:08 p.m. UTC | #3
On Wed, 26 Jun 2019, Julien Grall wrote:
> Hi Stefano,
> 
> On 6/26/19 8:29 PM, Stefano Stabellini wrote:
> > On Mon, 10 Jun 2019, Julien Grall wrote:
> > > At the moment BSS is zeroed before the MMU and D-Cache is turned on.
> > > In other words, the cache will be bypassed when zeroing the BSS section.
> > > 
> > > Per the Image protocol [1], the state of the cache for BSS region is not
> > > known because it is not part of the "loaded kernel image".
> > > 
> > > This means that the cache will need to be invalidated twice for the BSS
> > > region:
> > >      1) Before zeroing to remove any dirty cache line. Otherwise they may
> > >      get evicted while zeroing and therefore overriding the value.
> > >      2) After zeroing to remove any cache line that may have been
> > >      speculated. Otherwise when turning on MMU and D-Cache, the CPU may
> > >      see old values.
> > > 
> > > However, the only reason to have the BSS zeroed early is because the
> > > boot page tables are part of BSS. To avoid the two cache invalidations,
> > > it is possible to move the page tables in the section .data.page_aligned.
> > 
> > I am not following the last part. How is moving the boot page tables to
> > .data.page_aligned solving the problem? Do we need to zero
> > .data.page_aligned early or can we skip it because it is guaranteed to
> > already be zero?
> 
> Global variables are initialized to zero by default regardless the section
> they reside. Usually they are stored in BSS because it saves space in the
> binary.
> 
> With the Image protocol, BSS is not initialized by the bootloader so we have
> to do ourself.
> 
> The section .data.page_aligned is always part of the binary. So the compiler
> will write zero in the binary for any global variables part of that section
> and therefore the corresponding memory will be zeroed when loading the binary.
> 
> If it makes sense, I can try to incorporate that in the commit message.

So basically it is really only BSS the problem. All right, looks good to
me.

Acked-by: Stefano Stabellini <sstabellini@kernel.org>


> > > A new macro DEFINE_BOOT_PAGE_TABLE is introduced to create and mark
> > > page-tables used before BSS is zeroed. This includes all boot_* but also
> > > xen_fixmap as zero_bss() will print a message when earlyprintk is
> > > enabled.
> > 
> > On a similar note, and continuing from what I wrote above, do we need to
> > make sure to zero the xen_fixmap before hooking it up setup_fixmap?
> 
> See above.
> 
> Cheers,
> 
> -- 
> Julien Grall
>
Julien Grall June 27, 2019, 11:04 a.m. UTC | #4
Hi Stefano,

On 26/06/2019 22:08, Stefano Stabellini wrote:
> On Wed, 26 Jun 2019, Julien Grall wrote:
>> Hi Stefano,
>>
>> On 6/26/19 8:29 PM, Stefano Stabellini wrote:
>>> On Mon, 10 Jun 2019, Julien Grall wrote:
>>>> At the moment BSS is zeroed before the MMU and D-Cache is turned on.
>>>> In other words, the cache will be bypassed when zeroing the BSS section.
>>>>
>>>> Per the Image protocol [1], the state of the cache for BSS region is not
>>>> known because it is not part of the "loaded kernel image".
>>>>
>>>> This means that the cache will need to be invalidated twice for the BSS
>>>> region:
>>>>       1) Before zeroing to remove any dirty cache line. Otherwise they may
>>>>       get evicted while zeroing and therefore overriding the value.
>>>>       2) After zeroing to remove any cache line that may have been
>>>>       speculated. Otherwise when turning on MMU and D-Cache, the CPU may
>>>>       see old values.
>>>>
>>>> However, the only reason to have the BSS zeroed early is because the
>>>> boot page tables are part of BSS. To avoid the two cache invalidations,
>>>> it is possible to move the page tables in the section .data.page_aligned.
>>>
>>> I am not following the last part. How is moving the boot page tables to
>>> .data.page_aligned solving the problem? Do we need to zero
>>> .data.page_aligned early or can we skip it because it is guaranteed to
>>> already be zero?
>>
>> Global variables are initialized to zero by default regardless the section
>> they reside. Usually they are stored in BSS because it saves space in the
>> binary.
>>
>> With the Image protocol, BSS is not initialized by the bootloader so we have
>> to do ourself.
>>
>> The section .data.page_aligned is always part of the binary. So the compiler
>> will write zero in the binary for any global variables part of that section
>> and therefore the corresponding memory will be zeroed when loading the binary.
>>
>> If it makes sense, I can try to incorporate that in the commit message.
> 
> So basically it is really only BSS the problem. All right, looks good to
> me.

Yes that's correct.

> 
> Acked-by: Stefano Stabellini <sstabellini@kernel.org>

Thank you!

Cheers,
diff mbox series

Patch

diff --git a/xen/arch/arm/arm64/head.S b/xen/arch/arm/arm64/head.S
index 130ab66d8e..6c3edbbc81 100644
--- a/xen/arch/arm/arm64/head.S
+++ b/xen/arch/arm/arm64/head.S
@@ -291,7 +291,6 @@  real_start_efi:
         mov   x22, #0                /* x22 := is_secondary_cpu */
 
         bl    check_cpu_mode
-        bl    zero_bss
         bl    cpu_init
         bl    create_page_tables
         bl    enable_mmu
@@ -312,6 +311,7 @@  primary_switched:
         /* Use a virtual address to access the UART. */
         ldr   x23, =EARLY_UART_VIRTUAL_ADDRESS
 #endif
+        bl    zero_bss
         PRINT("- Ready -\r\n")
         /* Setup the arguments for start_xen and jump to C world */
         mov   x0, x20                /* x0 := phys_offset */
@@ -423,8 +423,8 @@  zero_bss:
         cbnz  x26, skip_bss
 
         PRINT("- Zero BSS -\r\n")
-        load_paddr x0, __bss_start    /* Load paddr of start & end of bss */
-        load_paddr x1, __bss_end
+        ldr   x0, =__bss_start       /* x0 := vaddr(__bss_start) */
+        ldr   x1, =__bss_end         /* x1 := vaddr(__bss_start) */
 
 1:      str   xzr, [x0], #8
         cmp   x0, x1
diff --git a/xen/arch/arm/mm.c b/xen/arch/arm/mm.c
index 6a549e9283..0b2d07a258 100644
--- a/xen/arch/arm/mm.c
+++ b/xen/arch/arm/mm.c
@@ -48,6 +48,17 @@ 
 #undef mfn_to_virt
 #define mfn_to_virt(mfn) __mfn_to_virt(mfn_x(mfn))
 
+/*
+ * Macros to define page-tables:
+ *  - DEFINE_BOOT_PAGE_TABLE is used to define page-table that are used
+ *  in assembly code before BSS is zeroed.
+ *  - DEFINE_PAGE_TABLE{,S} are used to define one or multiple
+ *  page-tables to be used after BSS is zeroed (typically they are only used
+ *  in C).
+ */
+#define DEFINE_BOOT_PAGE_TABLE(name)                                          \
+lpae_t __aligned(PAGE_SIZE) __section(".data.page_aligned") name[LPAE_ENTRIES]
+
 #define DEFINE_PAGE_TABLES(name, nr)                    \
 lpae_t __aligned(PAGE_SIZE) name[LPAE_ENTRIES * (nr)]
 
@@ -76,13 +87,13 @@  lpae_t __aligned(PAGE_SIZE) name[LPAE_ENTRIES * (nr)]
  * Finally, if EARLY_PRINTK is enabled then xen_fixmap will be mapped
  * by the CPU once it has moved off the 1:1 mapping.
  */
-DEFINE_PAGE_TABLE(boot_pgtable);
+DEFINE_BOOT_PAGE_TABLE(boot_pgtable);
 #ifdef CONFIG_ARM_64
-DEFINE_PAGE_TABLE(boot_first);
-DEFINE_PAGE_TABLE(boot_first_id);
+DEFINE_BOOT_PAGE_TABLE(boot_first);
+DEFINE_BOOT_PAGE_TABLE(boot_first_id);
 #endif
-DEFINE_PAGE_TABLE(boot_second);
-DEFINE_PAGE_TABLE(boot_third);
+DEFINE_BOOT_PAGE_TABLE(boot_second);
+DEFINE_BOOT_PAGE_TABLE(boot_third);
 
 /* Main runtime page tables */
 
@@ -135,7 +146,7 @@  static __initdata int xenheap_first_first_slot = -1;
  */
 static DEFINE_PAGE_TABLES(xen_second, 2);
 /* First level page table used for fixmap */
-DEFINE_PAGE_TABLE(xen_fixmap);
+DEFINE_BOOT_PAGE_TABLE(xen_fixmap);
 /* First level page table used to map Xen itself with the XN bit set
  * as appropriate. */
 static DEFINE_PAGE_TABLE(xen_xenmap);