diff mbox series

[3/3] x86/pm: Add enumeration check before spec MSRs save/restore setup

Message ID 206cec042e17f15432d523d12ce6f5ae9267dc77.1663025154.git.pawan.kumar.gupta@linux.intel.com
State New
Headers show
Series Check enumeration before MSR save/restore | expand

Commit Message

Pawan Gupta Sept. 12, 2022, 11:41 p.m. UTC
On an Intel Atom N2600 (and presumable other Cedar Trail models)
MSR_IA32_TSX_CTRL can be read, causing saved_msr.valid to be set for it
by msr_build_context().

This causes restore_processor_state() to try and restore it, but writing
this MSR is not allowed on the Intel Atom N2600 leading to:

[   99.955141] unchecked MSR access error: WRMSR to 0x122 (tried to write 0x0000000000000002) at rIP: 0xffffffff8b07a574 (native_write_msr+0x4/0x20)
[   99.955176] Call Trace:
[   99.955186]  <TASK>
[   99.955195]  restore_processor_state+0x275/0x2c0
[   99.955246]  x86_acpi_suspend_lowlevel+0x10e/0x140
[   99.955273]  acpi_suspend_enter+0xd3/0x100
[   99.955297]  suspend_devices_and_enter+0x7e2/0x830
[   99.955341]  pm_suspend.cold+0x2d2/0x35e
[   99.955368]  state_store+0x68/0xd0
[   99.955402]  kernfs_fop_write_iter+0x15e/0x210
[   99.955442]  vfs_write+0x225/0x4b0
[   99.955523]  ksys_write+0x59/0xd0
[   99.955557]  do_syscall_64+0x58/0x80
[   99.955579]  ? do_syscall_64+0x67/0x80
[   99.955600]  ? up_read+0x17/0x20
[   99.955631]  ? lock_is_held_type+0xe3/0x140
[   99.955670]  ? asm_exc_page_fault+0x22/0x30
[   99.955688]  ? lockdep_hardirqs_on+0x7d/0x100
[   99.955710]  entry_SYSCALL_64_after_hwframe+0x63/0xcd
[   99.955723] RIP: 0033:0x7f7d0fb018f7
[   99.955741] Code: 0f 00 f7 d8 64 89 02 48 c7 c0 ff ff ff ff eb b7 0f 1f 00 f3 0f 1e fa 64 8b 04 25 18 00 00 00 85 c0 75 10 b8 01 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 51 c3 48 83 ec 28 48 89 54 24 18 48 89 74 24
[   99.955753] RSP: 002b:00007ffd03292ee8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[   99.955771] RAX: ffffffffffffffda RBX: 0000000000000004 RCX: 00007f7d0fb018f7
[   99.955781] RDX: 0000000000000004 RSI: 00007ffd03292fd0 RDI: 0000000000000004
[   99.955790] RBP: 00007ffd03292fd0 R08: 000000000000c0fe R09: 0000000000000000
[   99.955799] R10: 00007f7d0fb85fb0 R11: 0000000000000246 R12: 0000000000000004
[   99.955808] R13: 000055df564173e0 R14: 0000000000000004 R15: 00007f7d0fbf49e0
[   99.955910]  </TASK>

Add speculation control MSRs to MSR save/restore list only if their
corresponding feature bit is set. This prevents MSR save/restore to
attempt invalid MSR.

Reported-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Pawan Gupta <pawan.kumar.gupta@linux.intel.com>
---
 arch/x86/power/cpu.c | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

Comments

Pawan Gupta Nov. 8, 2022, 10:09 p.m. UTC | #1
On Tue, Nov 08, 2022 at 10:40:06AM -0800, Dave Hansen wrote:
>On 9/12/22 16:41, Pawan Gupta wrote:
>> On an Intel Atom N2600 (and presumable other Cedar Trail models)
>> MSR_IA32_TSX_CTRL can be read, causing saved_msr.valid to be set for it
>> by msr_build_context().
>
>This changelog needs some help.  Shouldn't it be something like this?
>
>pm_save_spec_msr() keeps a list of all the MSRs which _might_ need to be
>saved and restored at hibernate?? and resume??.  However, it has zero
>awareness of CPU support for these MSRs.  It mostly works by
>unconditionally attempting to manipulate these MSRs and relying on
>rdmsrl_safe() being able to handle a #GP on CPUs where the support is
>unavailable.
>
>However, it's possible for reads (RDMSR) to be supported for a given MSR
>while writes (WRMSR) are not.  In this case, msr_build_context() sees a
>successful read (RDMSR) and marks the MSR as 'valid'.  Then, later, a
>write (WRMSR) fails, producing a nasty (but harmless) error message.
>
>To fix this, add the corresponding X86_FEATURE bit for each MSR.  Avoid
>trying to manipulate the MSR when the feature bit is clear.  This
>required adding a X86_FEATURE bit for MSRs that do not have one already,
>but it's a small price to pay.

Yes, that's a lot better. Thanks.
diff mbox series

Patch

diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
index bb176c72891c..b3492dd55e61 100644
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -511,17 +511,26 @@  static int pm_cpu_check(const struct x86_cpu_id *c)
 	return ret;
 }
 
+struct msr_enumeration {
+	u32 msr_no;
+	u32 feature;
+};
+
 static void pm_save_spec_msr(void)
 {
-	u32 spec_msr_id[] = {
-		MSR_IA32_SPEC_CTRL,
-		MSR_IA32_TSX_CTRL,
-		MSR_TSX_FORCE_ABORT,
-		MSR_IA32_MCU_OPT_CTRL,
-		MSR_AMD64_LS_CFG,
+	struct msr_enumeration msr_enum[] = {
+		{MSR_IA32_SPEC_CTRL,	X86_FEATURE_MSR_SPEC_CTRL},
+		{MSR_IA32_TSX_CTRL,	X86_FEATURE_MSR_TSX_CTRL},
+		{MSR_TSX_FORCE_ABORT,	X86_FEATURE_TSX_FORCE_ABORT},
+		{MSR_IA32_MCU_OPT_CTRL,	X86_FEATURE_SRBDS_CTRL},
+		{MSR_AMD64_LS_CFG,	X86_FEATURE_MSR_LS_CFG},
 	};
+	int i;
 
-	msr_build_context(spec_msr_id, ARRAY_SIZE(spec_msr_id));
+	for (i = 0; i < ARRAY_SIZE(msr_enum); i++) {
+		if (boot_cpu_has(msr_enum[i].feature))
+			msr_build_context(&msr_enum[i].msr_no, 1);
+	}
 }
 
 static int pm_check_save_msr(void)