diff mbox series

x86/acpi: fix suspend with Xen

Message ID 20230113140610.7132-1-jgross@suse.com
State New
Headers show
Series x86/acpi: fix suspend with Xen | expand

Commit Message

Juergen Gross Jan. 13, 2023, 2:06 p.m. UTC
Commit f1e525009493 ("x86/boot: Skip realmode init code when running as
Xen PV guest") missed one code path accessing real_mode_header, leading
to dereferencing NULL when suspending the system under Xen:

    [  348.284004] PM: suspend entry (deep)
    [  348.289532] Filesystems sync: 0.005 seconds
    [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) done.
    [  348.292457] OOM killer disabled.
    [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 seconds) done.
    [  348.396612] printk: Suspending console(s) (use no_console_suspend to debug)
    [  348.749228] PM: suspend devices took 0.352 seconds
    [  348.769713] ACPI: EC: interrupt blocked
    [  348.816077] BUG: kernel NULL pointer dereference, address: 000000000000001c
    [  348.816080] #PF: supervisor read access in kernel mode
    [  348.816081] #PF: error_code(0x0000) - not-present page
    [  348.816083] PGD 0 P4D 0
    [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
    [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 6.1.3-1.fc32.qubes.x86_64 #1
    [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 07/03/2022
    [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20

Fix that by adding an indirection for acpi_get_wakeup_address() which
Xen PV dom0 can use to return a dummy non-zero wakeup address (this
address won't ever be used, as the real suspend handling is done by the
hypervisor).

Fixes: f1e525009493 ("x86/boot: Skip realmode init code when running as Xen PV guest")
Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
Signed-off-by: Juergen Gross <jgross@suse.com>
---
 arch/x86/include/asm/acpi.h  | 2 +-
 arch/x86/kernel/acpi/sleep.c | 3 ++-
 include/xen/acpi.h           | 9 +++++++++
 3 files changed, 12 insertions(+), 2 deletions(-)

Comments

Marek Marczykowski-Górecki Jan. 13, 2023, 10:52 p.m. UTC | #1
On Fri, Jan 13, 2023 at 08:40:15PM +0100, Rafael J. Wysocki wrote:
> On Fri, Jan 13, 2023 at 3:06 PM Juergen Gross <jgross@suse.com> wrote:
> >
> > Commit f1e525009493 ("x86/boot: Skip realmode init code when running as
> > Xen PV guest") missed one code path accessing real_mode_header, leading
> > to dereferencing NULL when suspending the system under Xen:
> >
> >     [  348.284004] PM: suspend entry (deep)
> >     [  348.289532] Filesystems sync: 0.005 seconds
> >     [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) done.
> >     [  348.292457] OOM killer disabled.
> >     [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 seconds) done.
> >     [  348.396612] printk: Suspending console(s) (use no_console_suspend to debug)
> >     [  348.749228] PM: suspend devices took 0.352 seconds
> >     [  348.769713] ACPI: EC: interrupt blocked
> >     [  348.816077] BUG: kernel NULL pointer dereference, address: 000000000000001c
> >     [  348.816080] #PF: supervisor read access in kernel mode
> >     [  348.816081] #PF: error_code(0x0000) - not-present page
> >     [  348.816083] PGD 0 P4D 0
> >     [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
> >     [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 6.1.3-1.fc32.qubes.x86_64 #1
> >     [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 07/03/2022
> >     [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20
> >
> > Fix that by adding an indirection for acpi_get_wakeup_address() which
> > Xen PV dom0 can use to return a dummy non-zero wakeup address (this
> > address won't ever be used, as the real suspend handling is done by the
> > hypervisor).
> 
> How exactly does this help?

By not accessing calling acpi_get_wakeup_address() (with the patch
renamed to x86_acpi_get_wakeup_address()) during PV dom0 suspend, which
otherwise would access not initialized real_mode_header.

I confirm this patch fixes the issue.

> > Fixes: f1e525009493 ("x86/boot: Skip realmode init code when running as Xen PV guest")
> > Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com>
> > Signed-off-by: Juergen Gross <jgross@suse.com>
> > ---
> >  arch/x86/include/asm/acpi.h  | 2 +-
> >  arch/x86/kernel/acpi/sleep.c | 3 ++-
> >  include/xen/acpi.h           | 9 +++++++++
> >  3 files changed, 12 insertions(+), 2 deletions(-)
> >
> > diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
> > index 65064d9f7fa6..137259ff8f03 100644
> > --- a/arch/x86/include/asm/acpi.h
> > +++ b/arch/x86/include/asm/acpi.h
> > @@ -61,7 +61,7 @@ static inline void acpi_disable_pci(void)
> >  extern int (*acpi_suspend_lowlevel)(void);
> >
> >  /* Physical address to resume after wakeup */
> > -unsigned long acpi_get_wakeup_address(void);
> > +extern unsigned long (*acpi_get_wakeup_address)(void);
> >
> >  /*
> >   * Check if the CPU can handle C2 and deeper
> > diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
> > index 3b7f4cdbf2e0..1a3cd5e24cd0 100644
> > --- a/arch/x86/kernel/acpi/sleep.c
> > +++ b/arch/x86/kernel/acpi/sleep.c
> > @@ -33,10 +33,11 @@ static char temp_stack[4096];
> >   * Returns the physical address where the kernel should be resumed after the
> >   * system awakes from S3, e.g. for programming into the firmware waking vector.
> >   */
> > -unsigned long acpi_get_wakeup_address(void)
> > +static unsigned long x86_acpi_get_wakeup_address(void)
> >  {
> >         return ((unsigned long)(real_mode_header->wakeup_start));
> >  }
> > +unsigned long (*acpi_get_wakeup_address)(void) = x86_acpi_get_wakeup_address;
> >
> >  /**
> >   * x86_acpi_enter_sleep_state - enter sleep state
> > diff --git a/include/xen/acpi.h b/include/xen/acpi.h
> > index b1e11863144d..7e1e5dbfb77c 100644
> > --- a/include/xen/acpi.h
> > +++ b/include/xen/acpi.h
> > @@ -56,6 +56,12 @@ static inline int xen_acpi_suspend_lowlevel(void)
> >         return 0;
> >  }
> >
> > +static inline unsigned long xen_acpi_get_wakeup_address(void)
> > +{
> > +       /* Just return a dummy non-zero value, it will never be used. */
> > +       return 1;
> > +}
> > +
> >  static inline void xen_acpi_sleep_register(void)
> >  {
> >         if (xen_initial_domain()) {
> > @@ -65,6 +71,9 @@ static inline void xen_acpi_sleep_register(void)
> >                         &xen_acpi_notify_hypervisor_extended_sleep);
> >
> >                 acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
> > +#ifdef CONFIG_ACPI_SLEEP
> > +               acpi_get_wakeup_address = xen_acpi_get_wakeup_address;
> > +#endif
> >         }
> >  }
> >  #else
> > --
> > 2.35.3
> >
Juergen Gross Jan. 16, 2023, 6:45 a.m. UTC | #2
On 13.01.23 20:40, Rafael J. Wysocki wrote:
> On Fri, Jan 13, 2023 at 3:06 PM Juergen Gross <jgross@suse.com> wrote:
>>
>> Commit f1e525009493 ("x86/boot: Skip realmode init code when running as
>> Xen PV guest") missed one code path accessing real_mode_header, leading
>> to dereferencing NULL when suspending the system under Xen:
>>
>>      [  348.284004] PM: suspend entry (deep)
>>      [  348.289532] Filesystems sync: 0.005 seconds
>>      [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) done.
>>      [  348.292457] OOM killer disabled.
>>      [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 seconds) done.
>>      [  348.396612] printk: Suspending console(s) (use no_console_suspend to debug)
>>      [  348.749228] PM: suspend devices took 0.352 seconds
>>      [  348.769713] ACPI: EC: interrupt blocked
>>      [  348.816077] BUG: kernel NULL pointer dereference, address: 000000000000001c
>>      [  348.816080] #PF: supervisor read access in kernel mode
>>      [  348.816081] #PF: error_code(0x0000) - not-present page
>>      [  348.816083] PGD 0 P4D 0
>>      [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
>>      [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 6.1.3-1.fc32.qubes.x86_64 #1
>>      [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 07/03/2022
>>      [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20
>>
>> Fix that by adding an indirection for acpi_get_wakeup_address() which
>> Xen PV dom0 can use to return a dummy non-zero wakeup address (this
>> address won't ever be used, as the real suspend handling is done by the
>> hypervisor).
> 
> How exactly does this help?

I believed the first sentence of the commit message would make this
clear enough.

I can expand the commit message to go more into detail if you think
this is really needed.


Juergen
Rafael J. Wysocki Jan. 17, 2023, 2:09 p.m. UTC | #3
On Mon, Jan 16, 2023 at 7:45 AM Juergen Gross <jgross@suse.com> wrote:
>
> On 13.01.23 20:40, Rafael J. Wysocki wrote:
> > On Fri, Jan 13, 2023 at 3:06 PM Juergen Gross <jgross@suse.com> wrote:
> >>
> >> Commit f1e525009493 ("x86/boot: Skip realmode init code when running as
> >> Xen PV guest") missed one code path accessing real_mode_header, leading
> >> to dereferencing NULL when suspending the system under Xen:
> >>
> >>      [  348.284004] PM: suspend entry (deep)
> >>      [  348.289532] Filesystems sync: 0.005 seconds
> >>      [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) done.
> >>      [  348.292457] OOM killer disabled.
> >>      [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 seconds) done.
> >>      [  348.396612] printk: Suspending console(s) (use no_console_suspend to debug)
> >>      [  348.749228] PM: suspend devices took 0.352 seconds
> >>      [  348.769713] ACPI: EC: interrupt blocked
> >>      [  348.816077] BUG: kernel NULL pointer dereference, address: 000000000000001c
> >>      [  348.816080] #PF: supervisor read access in kernel mode
> >>      [  348.816081] #PF: error_code(0x0000) - not-present page
> >>      [  348.816083] PGD 0 P4D 0
> >>      [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
> >>      [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 6.1.3-1.fc32.qubes.x86_64 #1
> >>      [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 07/03/2022
> >>      [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20
> >>
> >> Fix that by adding an indirection for acpi_get_wakeup_address() which
> >> Xen PV dom0 can use to return a dummy non-zero wakeup address (this
> >> address won't ever be used, as the real suspend handling is done by the
> >> hypervisor).
> >
> > How exactly does this help?
>
> I believed the first sentence of the commit message would make this
> clear enough.

That was clear, but the fix part wasn't really.

> I can expand the commit message to go more into detail if you think
> this is really needed.

IMO calling acpi_set_waking_vector() with a known-invalid wakeup
vector address in dom0 is plain confusing.

I'm not sure what to do about it yet, but IMV something needs to be done.
Juergen Gross Jan. 17, 2023, 3:32 p.m. UTC | #4
On 17.01.23 15:09, Rafael J. Wysocki wrote:
> On Mon, Jan 16, 2023 at 7:45 AM Juergen Gross <jgross@suse.com> wrote:
>>
>> On 13.01.23 20:40, Rafael J. Wysocki wrote:
>>> On Fri, Jan 13, 2023 at 3:06 PM Juergen Gross <jgross@suse.com> wrote:
>>>>
>>>> Commit f1e525009493 ("x86/boot: Skip realmode init code when running as
>>>> Xen PV guest") missed one code path accessing real_mode_header, leading
>>>> to dereferencing NULL when suspending the system under Xen:
>>>>
>>>>       [  348.284004] PM: suspend entry (deep)
>>>>       [  348.289532] Filesystems sync: 0.005 seconds
>>>>       [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) done.
>>>>       [  348.292457] OOM killer disabled.
>>>>       [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 seconds) done.
>>>>       [  348.396612] printk: Suspending console(s) (use no_console_suspend to debug)
>>>>       [  348.749228] PM: suspend devices took 0.352 seconds
>>>>       [  348.769713] ACPI: EC: interrupt blocked
>>>>       [  348.816077] BUG: kernel NULL pointer dereference, address: 000000000000001c
>>>>       [  348.816080] #PF: supervisor read access in kernel mode
>>>>       [  348.816081] #PF: error_code(0x0000) - not-present page
>>>>       [  348.816083] PGD 0 P4D 0
>>>>       [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
>>>>       [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 6.1.3-1.fc32.qubes.x86_64 #1
>>>>       [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 07/03/2022
>>>>       [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20
>>>>
>>>> Fix that by adding an indirection for acpi_get_wakeup_address() which
>>>> Xen PV dom0 can use to return a dummy non-zero wakeup address (this
>>>> address won't ever be used, as the real suspend handling is done by the
>>>> hypervisor).
>>>
>>> How exactly does this help?
>>
>> I believed the first sentence of the commit message would make this
>> clear enough.
> 
> That was clear, but the fix part wasn't really.
> 
>> I can expand the commit message to go more into detail if you think
>> this is really needed.
> 
> IMO calling acpi_set_waking_vector() with a known-invalid wakeup
> vector address in dom0 is plain confusing.
> 
> I'm not sure what to do about it yet, but IMV something needs to be done.

Another possibility would be to modify acpi_sleep_prepare(), e.g. like the
attached patch (compile tested only).


Juergen
Rafael J. Wysocki Jan. 17, 2023, 3:36 p.m. UTC | #5
On Tue, Jan 17, 2023 at 4:32 PM Juergen Gross <jgross@suse.com> wrote:
>
> On 17.01.23 15:09, Rafael J. Wysocki wrote:
> > On Mon, Jan 16, 2023 at 7:45 AM Juergen Gross <jgross@suse.com> wrote:
> >>
> >> On 13.01.23 20:40, Rafael J. Wysocki wrote:
> >>> On Fri, Jan 13, 2023 at 3:06 PM Juergen Gross <jgross@suse.com> wrote:
> >>>>
> >>>> Commit f1e525009493 ("x86/boot: Skip realmode init code when running as
> >>>> Xen PV guest") missed one code path accessing real_mode_header, leading
> >>>> to dereferencing NULL when suspending the system under Xen:
> >>>>
> >>>>       [  348.284004] PM: suspend entry (deep)
> >>>>       [  348.289532] Filesystems sync: 0.005 seconds
> >>>>       [  348.291545] Freezing user space processes ... (elapsed 0.000 seconds) done.
> >>>>       [  348.292457] OOM killer disabled.
> >>>>       [  348.292462] Freezing remaining freezable tasks ... (elapsed 0.104 seconds) done.
> >>>>       [  348.396612] printk: Suspending console(s) (use no_console_suspend to debug)
> >>>>       [  348.749228] PM: suspend devices took 0.352 seconds
> >>>>       [  348.769713] ACPI: EC: interrupt blocked
> >>>>       [  348.816077] BUG: kernel NULL pointer dereference, address: 000000000000001c
> >>>>       [  348.816080] #PF: supervisor read access in kernel mode
> >>>>       [  348.816081] #PF: error_code(0x0000) - not-present page
> >>>>       [  348.816083] PGD 0 P4D 0
> >>>>       [  348.816086] Oops: 0000 [#1] PREEMPT SMP NOPTI
> >>>>       [  348.816089] CPU: 0 PID: 6764 Comm: systemd-sleep Not tainted 6.1.3-1.fc32.qubes.x86_64 #1
> >>>>       [  348.816092] Hardware name: Star Labs StarBook/StarBook, BIOS 8.01 07/03/2022
> >>>>       [  348.816093] RIP: e030:acpi_get_wakeup_address+0xc/0x20
> >>>>
> >>>> Fix that by adding an indirection for acpi_get_wakeup_address() which
> >>>> Xen PV dom0 can use to return a dummy non-zero wakeup address (this
> >>>> address won't ever be used, as the real suspend handling is done by the
> >>>> hypervisor).
> >>>
> >>> How exactly does this help?
> >>
> >> I believed the first sentence of the commit message would make this
> >> clear enough.
> >
> > That was clear, but the fix part wasn't really.
> >
> >> I can expand the commit message to go more into detail if you think
> >> this is really needed.
> >
> > IMO calling acpi_set_waking_vector() with a known-invalid wakeup
> > vector address in dom0 is plain confusing.
> >
> > I'm not sure what to do about it yet, but IMV something needs to be done.
>
> Another possibility would be to modify acpi_sleep_prepare(), e.g. like the
> attached patch (compile tested only).

I prefer this to the previous version.  It is much more straightforward IMV.
diff mbox series

Patch

diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index 65064d9f7fa6..137259ff8f03 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -61,7 +61,7 @@  static inline void acpi_disable_pci(void)
 extern int (*acpi_suspend_lowlevel)(void);
 
 /* Physical address to resume after wakeup */
-unsigned long acpi_get_wakeup_address(void);
+extern unsigned long (*acpi_get_wakeup_address)(void);
 
 /*
  * Check if the CPU can handle C2 and deeper
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 3b7f4cdbf2e0..1a3cd5e24cd0 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -33,10 +33,11 @@  static char temp_stack[4096];
  * Returns the physical address where the kernel should be resumed after the
  * system awakes from S3, e.g. for programming into the firmware waking vector.
  */
-unsigned long acpi_get_wakeup_address(void)
+static unsigned long x86_acpi_get_wakeup_address(void)
 {
 	return ((unsigned long)(real_mode_header->wakeup_start));
 }
+unsigned long (*acpi_get_wakeup_address)(void) = x86_acpi_get_wakeup_address;
 
 /**
  * x86_acpi_enter_sleep_state - enter sleep state
diff --git a/include/xen/acpi.h b/include/xen/acpi.h
index b1e11863144d..7e1e5dbfb77c 100644
--- a/include/xen/acpi.h
+++ b/include/xen/acpi.h
@@ -56,6 +56,12 @@  static inline int xen_acpi_suspend_lowlevel(void)
 	return 0;
 }
 
+static inline unsigned long xen_acpi_get_wakeup_address(void)
+{
+	/* Just return a dummy non-zero value, it will never be used. */
+	return 1;
+}
+
 static inline void xen_acpi_sleep_register(void)
 {
 	if (xen_initial_domain()) {
@@ -65,6 +71,9 @@  static inline void xen_acpi_sleep_register(void)
 			&xen_acpi_notify_hypervisor_extended_sleep);
 
 		acpi_suspend_lowlevel = xen_acpi_suspend_lowlevel;
+#ifdef CONFIG_ACPI_SLEEP
+		acpi_get_wakeup_address = xen_acpi_get_wakeup_address;
+#endif
 	}
 }
 #else