diff mbox series

[v2,05/20] x86: decompressor: Avoid the need for a stack in the 32-bit trampoline

Message ID 20230508070330.582131-6-ardb@kernel.org
State Superseded
Headers show
Series efi/x86: Avoid bare metal decompressor during EFI boot | expand

Commit Message

Ard Biesheuvel May 8, 2023, 7:03 a.m. UTC
The 32-bit trampoline no longer uses the stack for anything except
performing a long return back to long mode. Currently, this stack is
allocated in the same page that carries the trampoline code, which means
this page must be mapped writable and executable, and the stack is
therefore executable as well.

So let's do a long jump instead: that way, we can pre-calculate the
return address and poke it into the code before we call it. In a later
patch, we will take advantage of this by removing writable permissions
(and adding executable ones) explicitly when booting via the EFI stub.

Not playing with the stack pointer also makes it more straight-forward
to call the trampoline code as an ordinary 64-bit function from C code.

Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
---
 arch/x86/boot/compressed/head_64.S    | 34 ++++----------------
 arch/x86/boot/compressed/pgtable.h    |  6 ++--
 arch/x86/boot/compressed/pgtable_64.c | 12 ++++++-
 3 files changed, 21 insertions(+), 31 deletions(-)

Comments

Kirill A. Shutemov May 15, 2023, 2:03 p.m. UTC | #1
On Mon, May 08, 2023 at 09:03:15AM +0200, Ard Biesheuvel wrote:
> The 32-bit trampoline no longer uses the stack for anything except
> performing a long return back to long mode. Currently, this stack is
> allocated in the same page that carries the trampoline code, which means
> this page must be mapped writable and executable, and the stack is
> therefore executable as well.
> 
> So let's do a long jump instead: that way, we can pre-calculate the
> return address and poke it into the code before we call it. In a later
> patch, we will take advantage of this by removing writable permissions
> (and adding executable ones) explicitly when booting via the EFI stub.
> 
> Not playing with the stack pointer also makes it more straight-forward
> to call the trampoline code as an ordinary 64-bit function from C code.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>

Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Tom Lendacky May 17, 2023, 10:40 p.m. UTC | #2
On 5/8/23 02:03, Ard Biesheuvel wrote:
> The 32-bit trampoline no longer uses the stack for anything except
> performing a long return back to long mode. Currently, this stack is
> allocated in the same page that carries the trampoline code, which means
> this page must be mapped writable and executable, and the stack is
> therefore executable as well.
> 
> So let's do a long jump instead: that way, we can pre-calculate the
> return address and poke it into the code before we call it. In a later
> patch, we will take advantage of this by removing writable permissions
> (and adding executable ones) explicitly when booting via the EFI stub.
> 
> Not playing with the stack pointer also makes it more straight-forward
> to call the trampoline code as an ordinary 64-bit function from C code.
> 
> Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> ---
>   arch/x86/boot/compressed/head_64.S    | 34 ++++----------------
>   arch/x86/boot/compressed/pgtable.h    |  6 ++--
>   arch/x86/boot/compressed/pgtable_64.c | 12 ++++++-
>   3 files changed, 21 insertions(+), 31 deletions(-)
> 
> diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
> index b1f8a867777120bb..3b5fc851737ffc39 100644
> --- a/arch/x86/boot/compressed/head_64.S
> +++ b/arch/x86/boot/compressed/head_64.S
> @@ -460,9 +460,6 @@ SYM_CODE_START(startup_64)
>   	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
>   	call	*%rax
>   
> -	/* Restore the stack, the 32-bit trampoline uses its own stack */
> -	leaq	rva(boot_stack_end)(%rbx), %rsp
> -
>   	/*
>   	 * cleanup_trampoline() would restore trampoline memory.
>   	 *
> @@ -563,24 +560,17 @@ SYM_FUNC_END(.Lrelocated)
>    * EDI contains the base address of the trampoline memory.
>    * Non-zero ESI means trampoline needs to enable 5-level paging.
>    */
> +	.section ".rodata", "a", @progbits
>   SYM_CODE_START(trampoline_32bit_src)
> -	popq	%r8
>   	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
>   	pushq	$__KERNEL32_CS
>   	leaq	0f(%rip), %rax
>   	pushq	%rax
>   	lretq
> +.Lret:	retq

Maybe just add a comment above this to explain that this is a target of 
the long jump below to get back into long mode and be able to return 
without setting up a new stack for the 32-bit code.

And then a corresponding comment on the long jump itself. I think it would 
make it easier to understand what is going on in this part of the code.

Thanks,
Tom

>   
>   	.code32
> -0:	/* Set up data and stack segments */
> -	movl	$__KERNEL_DS, %eax
> -	movl	%eax, %ds
> -	movl	%eax, %ss
> -
> -	/* Set up new stack */
> -	leal	TRAMPOLINE_32BIT_STACK_END(%edi), %esp
> -
> -	/* Disable paging */
> +0:	/* Disable paging */
>   	movl	%cr0, %eax
>   	btrl	$X86_CR0_PG_BIT, %eax
>   	movl	%eax, %cr0
> @@ -634,26 +624,16 @@ SYM_CODE_START(trampoline_32bit_src)
>   1:
>   	movl	%eax, %cr4
>   
> -	/* Calculate address of paging_enabled() once we are executing in the trampoline */
> -	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%edi), %eax
> -
> -	/* Prepare the stack for far return to Long Mode */
> -	pushl	$__KERNEL_CS
> -	pushl	%eax
> -
>   	/* Enable paging again. */
>   	movl	%cr0, %eax
>   	btsl	$X86_CR0_PG_BIT, %eax
>   	movl	%eax, %cr0
>   
> -	lret
> +.Ljmp:	ljmpl	$__KERNEL_CS, $(.Lret - trampoline_32bit_src)
>   SYM_CODE_END(trampoline_32bit_src)
>   
> -	.code64
> -SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
> -	/* Return from the trampoline */
> -	jmp	*%r8
> -SYM_FUNC_END(.Lpaging_enabled)
> +/* keep this right after trampoline_32bit_src() so we can infer its size */
> +SYM_DATA(trampoline_ljmp_imm_offset, .word  .Ljmp + 1 - trampoline_32bit_src)
>   
>   	/*
>            * The trampoline code has a size limit.
> @@ -662,7 +642,7 @@ SYM_FUNC_END(.Lpaging_enabled)
>   	 */
>   	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
>   
> -	.code32
> +	.text
>   SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
>   	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
>   1:
> diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
> index 4e8cef135226bcbb..131488f50af55d0a 100644
> --- a/arch/x86/boot/compressed/pgtable.h
> +++ b/arch/x86/boot/compressed/pgtable.h
> @@ -6,9 +6,7 @@
>   #define TRAMPOLINE_32BIT_PGTABLE_OFFSET	0
>   
>   #define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
> -#define TRAMPOLINE_32BIT_CODE_SIZE	0xA0
> -
> -#define TRAMPOLINE_32BIT_STACK_END	TRAMPOLINE_32BIT_SIZE
> +#define TRAMPOLINE_32BIT_CODE_SIZE	0x80
>   
>   #ifndef __ASSEMBLER__
>   
> @@ -16,5 +14,7 @@ extern unsigned long *trampoline_32bit;
>   
>   extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
>   
> +extern const u16 trampoline_ljmp_imm_offset;
> +
>   #endif /* __ASSEMBLER__ */
>   #endif /* BOOT_COMPRESSED_PAGETABLE_H */
> diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
> index 2ac12ff4111bf8c0..09fc18180929fab3 100644
> --- a/arch/x86/boot/compressed/pgtable_64.c
> +++ b/arch/x86/boot/compressed/pgtable_64.c
> @@ -109,6 +109,7 @@ static unsigned long find_trampoline_placement(void)
>   struct paging_config paging_prepare(void *rmode)
>   {
>   	struct paging_config paging_config = {};
> +	void *tramp_code;
>   
>   	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
>   	boot_params = rmode;
> @@ -143,9 +144,18 @@ struct paging_config paging_prepare(void *rmode)
>   	memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
>   
>   	/* Copy trampoline code in place */
> -	memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
> +	tramp_code = memcpy(trampoline_32bit +
> +			TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
>   			&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
>   
> +	/*
> +	 * Avoid the need for a stack in the 32-bit trampoline code, by using
> +	 * LJMP rather than LRET to return back to long mode. LJMP takes an
> +	 * immediate absolute address, so we have to adjust that based on the
> +	 * placement of the trampoline.
> +	 */
> +	*(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code;
> +
>   	/*
>   	 * The code below prepares page table in trampoline memory.
>   	 *
Ard Biesheuvel May 18, 2023, 2:55 p.m. UTC | #3
On Thu, 18 May 2023 at 00:40, Tom Lendacky <thomas.lendacky@amd.com> wrote:
>
> On 5/8/23 02:03, Ard Biesheuvel wrote:
> > The 32-bit trampoline no longer uses the stack for anything except
> > performing a long return back to long mode. Currently, this stack is
> > allocated in the same page that carries the trampoline code, which means
> > this page must be mapped writable and executable, and the stack is
> > therefore executable as well.
> >
> > So let's do a long jump instead: that way, we can pre-calculate the
> > return address and poke it into the code before we call it. In a later
> > patch, we will take advantage of this by removing writable permissions
> > (and adding executable ones) explicitly when booting via the EFI stub.
> >
> > Not playing with the stack pointer also makes it more straight-forward
> > to call the trampoline code as an ordinary 64-bit function from C code.
> >
> > Signed-off-by: Ard Biesheuvel <ardb@kernel.org>
> > ---
> >   arch/x86/boot/compressed/head_64.S    | 34 ++++----------------
> >   arch/x86/boot/compressed/pgtable.h    |  6 ++--
> >   arch/x86/boot/compressed/pgtable_64.c | 12 ++++++-
> >   3 files changed, 21 insertions(+), 31 deletions(-)
> >
> > diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
> > index b1f8a867777120bb..3b5fc851737ffc39 100644
> > --- a/arch/x86/boot/compressed/head_64.S
> > +++ b/arch/x86/boot/compressed/head_64.S
> > @@ -460,9 +460,6 @@ SYM_CODE_START(startup_64)
> >       leaq    TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
> >       call    *%rax
> >
> > -     /* Restore the stack, the 32-bit trampoline uses its own stack */
> > -     leaq    rva(boot_stack_end)(%rbx), %rsp
> > -
> >       /*
> >        * cleanup_trampoline() would restore trampoline memory.
> >        *
> > @@ -563,24 +560,17 @@ SYM_FUNC_END(.Lrelocated)
> >    * EDI contains the base address of the trampoline memory.
> >    * Non-zero ESI means trampoline needs to enable 5-level paging.
> >    */
> > +     .section ".rodata", "a", @progbits
> >   SYM_CODE_START(trampoline_32bit_src)
> > -     popq    %r8
> >       /* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
> >       pushq   $__KERNEL32_CS
> >       leaq    0f(%rip), %rax
> >       pushq   %rax
> >       lretq
> > +.Lret:       retq
>
> Maybe just add a comment above this to explain that this is a target of
> the long jump below to get back into long mode and be able to return
> without setting up a new stack for the 32-bit code.
>
> And then a corresponding comment on the long jump itself. I think it would
> make it easier to understand what is going on in this part of the code.
>

Fair point. I'll add that in the next version.
diff mbox series

Patch

diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S
index b1f8a867777120bb..3b5fc851737ffc39 100644
--- a/arch/x86/boot/compressed/head_64.S
+++ b/arch/x86/boot/compressed/head_64.S
@@ -460,9 +460,6 @@  SYM_CODE_START(startup_64)
 	leaq	TRAMPOLINE_32BIT_CODE_OFFSET(%rax), %rax
 	call	*%rax
 
-	/* Restore the stack, the 32-bit trampoline uses its own stack */
-	leaq	rva(boot_stack_end)(%rbx), %rsp
-
 	/*
 	 * cleanup_trampoline() would restore trampoline memory.
 	 *
@@ -563,24 +560,17 @@  SYM_FUNC_END(.Lrelocated)
  * EDI contains the base address of the trampoline memory.
  * Non-zero ESI means trampoline needs to enable 5-level paging.
  */
+	.section ".rodata", "a", @progbits
 SYM_CODE_START(trampoline_32bit_src)
-	popq	%r8
 	/* Switch to compatibility mode (CS.L = 0 CS.D = 1) via far return */
 	pushq	$__KERNEL32_CS
 	leaq	0f(%rip), %rax
 	pushq	%rax
 	lretq
+.Lret:	retq
 
 	.code32
-0:	/* Set up data and stack segments */
-	movl	$__KERNEL_DS, %eax
-	movl	%eax, %ds
-	movl	%eax, %ss
-
-	/* Set up new stack */
-	leal	TRAMPOLINE_32BIT_STACK_END(%edi), %esp
-
-	/* Disable paging */
+0:	/* Disable paging */
 	movl	%cr0, %eax
 	btrl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
@@ -634,26 +624,16 @@  SYM_CODE_START(trampoline_32bit_src)
 1:
 	movl	%eax, %cr4
 
-	/* Calculate address of paging_enabled() once we are executing in the trampoline */
-	leal	.Lpaging_enabled - trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_OFFSET(%edi), %eax
-
-	/* Prepare the stack for far return to Long Mode */
-	pushl	$__KERNEL_CS
-	pushl	%eax
-
 	/* Enable paging again. */
 	movl	%cr0, %eax
 	btsl	$X86_CR0_PG_BIT, %eax
 	movl	%eax, %cr0
 
-	lret
+.Ljmp:	ljmpl	$__KERNEL_CS, $(.Lret - trampoline_32bit_src)
 SYM_CODE_END(trampoline_32bit_src)
 
-	.code64
-SYM_FUNC_START_LOCAL_NOALIGN(.Lpaging_enabled)
-	/* Return from the trampoline */
-	jmp	*%r8
-SYM_FUNC_END(.Lpaging_enabled)
+/* keep this right after trampoline_32bit_src() so we can infer its size */
+SYM_DATA(trampoline_ljmp_imm_offset, .word  .Ljmp + 1 - trampoline_32bit_src)
 
 	/*
          * The trampoline code has a size limit.
@@ -662,7 +642,7 @@  SYM_FUNC_END(.Lpaging_enabled)
 	 */
 	.org	trampoline_32bit_src + TRAMPOLINE_32BIT_CODE_SIZE
 
-	.code32
+	.text
 SYM_FUNC_START_LOCAL_NOALIGN(.Lno_longmode)
 	/* This isn't an x86-64 CPU, so hang intentionally, we cannot continue */
 1:
diff --git a/arch/x86/boot/compressed/pgtable.h b/arch/x86/boot/compressed/pgtable.h
index 4e8cef135226bcbb..131488f50af55d0a 100644
--- a/arch/x86/boot/compressed/pgtable.h
+++ b/arch/x86/boot/compressed/pgtable.h
@@ -6,9 +6,7 @@ 
 #define TRAMPOLINE_32BIT_PGTABLE_OFFSET	0
 
 #define TRAMPOLINE_32BIT_CODE_OFFSET	PAGE_SIZE
-#define TRAMPOLINE_32BIT_CODE_SIZE	0xA0
-
-#define TRAMPOLINE_32BIT_STACK_END	TRAMPOLINE_32BIT_SIZE
+#define TRAMPOLINE_32BIT_CODE_SIZE	0x80
 
 #ifndef __ASSEMBLER__
 
@@ -16,5 +14,7 @@  extern unsigned long *trampoline_32bit;
 
 extern void trampoline_32bit_src(void *trampoline, bool enable_5lvl);
 
+extern const u16 trampoline_ljmp_imm_offset;
+
 #endif /* __ASSEMBLER__ */
 #endif /* BOOT_COMPRESSED_PAGETABLE_H */
diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c
index 2ac12ff4111bf8c0..09fc18180929fab3 100644
--- a/arch/x86/boot/compressed/pgtable_64.c
+++ b/arch/x86/boot/compressed/pgtable_64.c
@@ -109,6 +109,7 @@  static unsigned long find_trampoline_placement(void)
 struct paging_config paging_prepare(void *rmode)
 {
 	struct paging_config paging_config = {};
+	void *tramp_code;
 
 	/* Initialize boot_params. Required for cmdline_find_option_bool(). */
 	boot_params = rmode;
@@ -143,9 +144,18 @@  struct paging_config paging_prepare(void *rmode)
 	memset(trampoline_32bit, 0, TRAMPOLINE_32BIT_SIZE);
 
 	/* Copy trampoline code in place */
-	memcpy(trampoline_32bit + TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
+	tramp_code = memcpy(trampoline_32bit +
+			TRAMPOLINE_32BIT_CODE_OFFSET / sizeof(unsigned long),
 			&trampoline_32bit_src, TRAMPOLINE_32BIT_CODE_SIZE);
 
+	/*
+	 * Avoid the need for a stack in the 32-bit trampoline code, by using
+	 * LJMP rather than LRET to return back to long mode. LJMP takes an
+	 * immediate absolute address, so we have to adjust that based on the
+	 * placement of the trampoline.
+	 */
+	*(u32 *)(tramp_code + trampoline_ljmp_imm_offset) += (unsigned long)tramp_code;
+
 	/*
 	 * The code below prepares page table in trampoline memory.
 	 *