diff mbox series

i2c: designware: Revert recent changes to i2c_dw_probe_lock_support()

Message ID 20240111125658.921083-1-jarkko.nikula@linux.intel.com
State New
Headers show
Series i2c: designware: Revert recent changes to i2c_dw_probe_lock_support() | expand

Commit Message

Jarkko Nikula Jan. 11, 2024, 12:56 p.m. UTC
Borislav Petkov reported a regression below on an AMD system and it
appeared on linux-next only after late December 2023. V, Narasimhan and
Kim Phillips helped to track down regression to commit 2f571a725434
("i2c: designware: Fix lock probe call order in dw_i2c_plat_probe()").

Unfortuately above commit is not directly revertible so revert it by
reverting also two other related commits on top of it. So this patch
reverts following commits:

f9b51f600217 ("i2c: designware: Save pointer to semaphore callbacks instead of index")
b8034c7d28a9 ("i2c: designware: Replace a while-loop by for-loop")
2f571a725434 ("i2c: designware: Fix lock probe call order in dw_i2c_plat_probe()")

[    6.245173] i2c_designware AMDI0010:00: Unknown Synopsys component type: 0xffffffff
[    6.252683] BUG: kernel NULL pointer dereference, address: 00000000000001fc
[    6.256551] #PF: supervisor read access in kernel mode
[    6.256551] #PF: error_code(0x0000) - not-present page
[    6.256551] PGD 0
[    6.256551] Oops: 0000 [#1] PREEMPT SMP NOPTI
[    6.256551] CPU: 32 PID: 211 Comm: kworker/32:0 Not tainted 6.7.0-rc6-next-20231222-1703820640818 #1
[    6.256551] Workqueue: pm pm_runtime_work
[    6.256551] RIP: 0010:regmap_read+0x12/0x70
[    6.256551] Code: 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 <8b> 87 fc 01 00 00 83 e8 01 85 f0 75 42 48 89 fb 41 89 f4 49 89 d5
[    6.256551] RSP: 0018:ff7fa5c740bcbc98 EFLAGS: 00010246
[    6.256551] RAX: 0000000000000000 RBX: ff38ff5c159f1028 RCX: 0000000000000008
[    6.256551] RDX: ff7fa5c740bcbcc4 RSI: 0000000000000034 RDI: 0000000000000000
[    6.256551] RBP: ff7fa5c740bcbcb0 R08: ff38ff5c02ceb8b0 R09: ff38ff5c002a4500
[    6.256551] R10: 0000000000000003 R11: 0000000000000003 R12: ff38ff5c159f1028
[    6.256551] R13: 0000000000000000 R14: 0000000000000000 R15: ff38ff5c159ed8f4
[    6.256551] FS:  0000000000000000(0000) GS:ff38ff6b0d200000(0000) knlGS:0000000000000000
[    6.256551] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    6.256551] CR2: 00000000000001fc CR3: 000000007403c001 CR4: 0000000000771ef0
[    6.256551] PKRU: 55555554
[    6.256551] Call Trace:
[    6.256551]  <TASK>
[    6.256551]  ? show_regs+0x6d/0x80
[    6.256551]  ? __die+0x29/0x70
[    6.256551]  ? page_fault_oops+0x153/0x4a0
[    6.256551]  ? do_user_addr_fault+0x30f/0x6c0
[    6.256551]  ? exc_page_fault+0x7c/0x190
[    6.256551]  ? asm_exc_page_fault+0x2b/0x30
[    6.256551]  ? regmap_read+0x12/0x70
[    6.256551]  ? update_load_avg+0x82/0x7d0
[    6.256551]  __i2c_dw_disable+0x38/0x180
[    6.256551]  i2c_dw_disable+0x3f/0xb0
[    6.256551]  i2c_dw_runtime_suspend+0x33/0x50
[    6.256551]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
[    6.256551]  pm_generic_runtime_suspend+0x2f/0x40
[    6.256551]  __rpm_callback+0x48/0x120
[    6.256551]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
[    6.256551]  rpm_callback+0x66/0x70
[    6.256551]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
[    6.256551]  rpm_suspend+0x166/0x700
[    6.256551]  ? srso_alias_return_thunk+0x5/0xfbef5
[    6.256551]  ? __schedule+0x3df/0x1720
[    6.256551]  pm_runtime_work+0xb2/0xd0
[    6.256551]  process_one_work+0x178/0x350
[    6.256551]  worker_thread+0x2f5/0x420
[    6.256551]  ? __pfx_worker_thread+0x10/0x10
[    6.256551]  kthread+0xf5/0x130
[    6.256551]  ? __pfx_kthread+0x10/0x10
[    6.256551]  ret_from_fork+0x3d/0x60
[    6.256551]  ? __pfx_kthread+0x10/0x10
[    6.256551]  ret_from_fork_asm+0x1a/0x30
[    6.256551]  </TASK>
[    6.256551] Modules linked in:
[    6.256551] CR2: 00000000000001fc
[    6.256551] ---[ end trace 0000000000000000 ]---
[    6.256551] RIP: 0010:regmap_read+0x12/0x70
[    6.256551] Code: 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 <8b> 87 fc 01 00 00 83 e8 01 85 f0 75 42 48 89 fb 41 89 f4 49 89 d5
[    6.256551] RSP: 0018:ff7fa5c740bcbc98 EFLAGS: 00010246
[    6.256551] RAX: 0000000000000000 RBX: ff38ff5c159f1028 RCX: 0000000000000008
[    6.256551] RDX: ff7fa5c740bcbcc4 RSI: 0000000000000034 RDI: 0000000000000000
[    6.256551] RBP: ff7fa5c740bcbcb0 R08: ff38ff5c02ceb8b0 R09: ff38ff5c002a4500
[    6.256551] R10: 0000000000000003 R11: 0000000000000003 R12: ff38ff5c159f1028
[    6.256551] R13: 0000000000000000 R14: 0000000000000000 R15: ff38ff5c159ed8f4
[    6.256551] FS:  0000000000000000(0000) GS:ff38ff6b0d200000(0000) knlGS:0000000000000000
[    6.256551] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[    6.256551] CR2: 00000000000001fc CR3: 000000007403c001 CR4: 0000000000771ef0
[    6.256551] PKRU: 55555554
[    6.256551] note: kworker/32:0[211] exited with irqs disabled

Reported-by: Borislav Petkov <bp@alien8.de>
Reported-by: V Narasimhan <Narasimhan.V@amd.com>
Reported-by: Kim Phillips <kim.phillips@amd.com>
Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
---
 drivers/i2c/busses/i2c-designware-core.h    |  4 +--
 drivers/i2c/busses/i2c-designware-platdrv.c | 35 ++++++++++++---------
 2 files changed, 22 insertions(+), 17 deletions(-)

Comments

Kim Phillips Jan. 11, 2024, 5:56 p.m. UTC | #1
On 1/11/24 6:56 AM, Jarkko Nikula wrote:
> Borislav Petkov reported a regression below on an AMD system and it
> appeared on linux-next only after late December 2023. V, Narasimhan and
> Kim Phillips helped to track down regression to commit 2f571a725434
> ("i2c: designware: Fix lock probe call order in dw_i2c_plat_probe()").
> 
> Unfortuately above commit is not directly revertible so revert it by
> reverting also two other related commits on top of it. So this patch
> reverts following commits:
> 
> f9b51f600217 ("i2c: designware: Save pointer to semaphore callbacks instead of index")
> b8034c7d28a9 ("i2c: designware: Replace a while-loop by for-loop")
> 2f571a725434 ("i2c: designware: Fix lock probe call order in dw_i2c_plat_probe()")
> 
> [    6.245173] i2c_designware AMDI0010:00: Unknown Synopsys component type: 0xffffffff
> [    6.252683] BUG: kernel NULL pointer dereference, address: 00000000000001fc
> [    6.256551] #PF: supervisor read access in kernel mode
> [    6.256551] #PF: error_code(0x0000) - not-present page
> [    6.256551] PGD 0
> [    6.256551] Oops: 0000 [#1] PREEMPT SMP NOPTI
> [    6.256551] CPU: 32 PID: 211 Comm: kworker/32:0 Not tainted 6.7.0-rc6-next-20231222-1703820640818 #1
> [    6.256551] Workqueue: pm pm_runtime_work
> [    6.256551] RIP: 0010:regmap_read+0x12/0x70
> [    6.256551] Code: 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 <8b> 87 fc 01 00 00 83 e8 01 85 f0 75 42 48 89 fb 41 89 f4 49 89 d5
> [    6.256551] RSP: 0018:ff7fa5c740bcbc98 EFLAGS: 00010246
> [    6.256551] RAX: 0000000000000000 RBX: ff38ff5c159f1028 RCX: 0000000000000008
> [    6.256551] RDX: ff7fa5c740bcbcc4 RSI: 0000000000000034 RDI: 0000000000000000
> [    6.256551] RBP: ff7fa5c740bcbcb0 R08: ff38ff5c02ceb8b0 R09: ff38ff5c002a4500
> [    6.256551] R10: 0000000000000003 R11: 0000000000000003 R12: ff38ff5c159f1028
> [    6.256551] R13: 0000000000000000 R14: 0000000000000000 R15: ff38ff5c159ed8f4
> [    6.256551] FS:  0000000000000000(0000) GS:ff38ff6b0d200000(0000) knlGS:0000000000000000
> [    6.256551] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    6.256551] CR2: 00000000000001fc CR3: 000000007403c001 CR4: 0000000000771ef0
> [    6.256551] PKRU: 55555554
> [    6.256551] Call Trace:
> [    6.256551]  <TASK>
> [    6.256551]  ? show_regs+0x6d/0x80
> [    6.256551]  ? __die+0x29/0x70
> [    6.256551]  ? page_fault_oops+0x153/0x4a0
> [    6.256551]  ? do_user_addr_fault+0x30f/0x6c0
> [    6.256551]  ? exc_page_fault+0x7c/0x190
> [    6.256551]  ? asm_exc_page_fault+0x2b/0x30
> [    6.256551]  ? regmap_read+0x12/0x70
> [    6.256551]  ? update_load_avg+0x82/0x7d0
> [    6.256551]  __i2c_dw_disable+0x38/0x180
> [    6.256551]  i2c_dw_disable+0x3f/0xb0
> [    6.256551]  i2c_dw_runtime_suspend+0x33/0x50
> [    6.256551]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
> [    6.256551]  pm_generic_runtime_suspend+0x2f/0x40
> [    6.256551]  __rpm_callback+0x48/0x120
> [    6.256551]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
> [    6.256551]  rpm_callback+0x66/0x70
> [    6.256551]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
> [    6.256551]  rpm_suspend+0x166/0x700
> [    6.256551]  ? srso_alias_return_thunk+0x5/0xfbef5
> [    6.256551]  ? __schedule+0x3df/0x1720
> [    6.256551]  pm_runtime_work+0xb2/0xd0
> [    6.256551]  process_one_work+0x178/0x350
> [    6.256551]  worker_thread+0x2f5/0x420
> [    6.256551]  ? __pfx_worker_thread+0x10/0x10
> [    6.256551]  kthread+0xf5/0x130
> [    6.256551]  ? __pfx_kthread+0x10/0x10
> [    6.256551]  ret_from_fork+0x3d/0x60
> [    6.256551]  ? __pfx_kthread+0x10/0x10
> [    6.256551]  ret_from_fork_asm+0x1a/0x30
> [    6.256551]  </TASK>
> [    6.256551] Modules linked in:
> [    6.256551] CR2: 00000000000001fc
> [    6.256551] ---[ end trace 0000000000000000 ]---
> [    6.256551] RIP: 0010:regmap_read+0x12/0x70
> [    6.256551] Code: 00 00 00 00 0f 1f 40 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 <8b> 87 fc 01 00 00 83 e8 01 85 f0 75 42 48 89 fb 41 89 f4 49 89 d5
> [    6.256551] RSP: 0018:ff7fa5c740bcbc98 EFLAGS: 00010246
> [    6.256551] RAX: 0000000000000000 RBX: ff38ff5c159f1028 RCX: 0000000000000008
> [    6.256551] RDX: ff7fa5c740bcbcc4 RSI: 0000000000000034 RDI: 0000000000000000
> [    6.256551] RBP: ff7fa5c740bcbcb0 R08: ff38ff5c02ceb8b0 R09: ff38ff5c002a4500
> [    6.256551] R10: 0000000000000003 R11: 0000000000000003 R12: ff38ff5c159f1028
> [    6.256551] R13: 0000000000000000 R14: 0000000000000000 R15: ff38ff5c159ed8f4
> [    6.256551] FS:  0000000000000000(0000) GS:ff38ff6b0d200000(0000) knlGS:0000000000000000
> [    6.256551] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [    6.256551] CR2: 00000000000001fc CR3: 000000007403c001 CR4: 0000000000771ef0
> [    6.256551] PKRU: 55555554
> [    6.256551] note: kworker/32:0[211] exited with irqs disabled
> 
> Reported-by: Borislav Petkov <bp@alien8.de>
> Reported-by: V Narasimhan <Narasimhan.V@amd.com>
> Reported-by: Kim Phillips <kim.phillips@amd.com>
> Signed-off-by: Jarkko Nikula <jarkko.nikula@linux.intel.com>
> ---

Hold on, I'm testing this on top of next-20240111 and still seeing the splat...

[   18.822681][    T1] usbcore: registered new device driver usb
[   18.863839][    T1] i2c_designware AMDI0010:00: Unknown Synopsys component type: 0xffffffff
[   18.882449][    T1] i2c_designware AMDI0010:01: Unknown Synopsys component type: 0xffffffff
[   18.890568][ T3175] BUG: kernel NULL pointer dereference, address: 0000000000000384
[   18.894399][ T3175] #PF: supervisor read access in kernel mode
[   18.894399][ T3175] #PF: error_code(0x0000) - not-present page
[   18.894399][ T3175] PGD 0
[   18.894399][ T3175] Oops: 0000 [#1] SMP NOPTI
[   18.894399][ T3175] CPU: 386 PID: 3175 Comm: kworker/386:1 Not tainted 6.7.0-next-20240111+ #7 ad2022c7b217b1e9ec5a9b3b4ecf4603a3c9a2e0
[   18.894399][ T3175] Workqueue: pm pm_runtime_work
[   18.894399][ T3175] RIP: 0010:regmap_read+0x12/0x80
[   18.894399][ T3175] Code: ff ff 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 <8b> 87 84 03 00 00 83 e8 01 85 f0 75 54 48 89 fb 41 89 f4 49 89 d5
[   18.894399][ T3175] RSP: 0018:ff5a6e8852503c70 EFLAGS: 00010246
[   18.894399][ T3175] RAX: 0000000000000000 RBX: ff367ab45a32e028 RCX: 0000000000000000
[   18.894399][ T3175] RDX: ff5a6e8852503c9c RSI: 0000000000000034 RDI: 0000000000000000
[   18.894399][ T3175] RBP: ff5a6e8852503c88 R08: 0000000000000000 R09: 0000000000000000
[   18.894399][ T3175] R10: 0000000000000000 R11: 0000000000000000 R12: ff367ab45a32e028
[   18.894399][ T3175] R13: 0000000000000000 R14: 0000000000000000 R15: ff367aa50c2a9178
[   18.894399][ T3175] FS:  0000000000000000(0000) GS:ff367ac3ed600000(0000) knlGS:0000000000000000
[   18.894399][ T3175] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   18.894399][ T3175] CR2: 0000000000000384 CR3: 0000001813a64001 CR4: 0000000000771ef0
[   18.894399][ T3175] PKRU: 55555554
[   18.894399][ T3175] Call Trace:
[   18.894399][ T3175]  <TASK>
[   18.894399][ T3175]  ? show_regs+0x75/0x90
[   18.894399][ T3175]  ? __die+0x29/0x80
[   18.894399][ T3175]  ? page_fault_oops+0x153/0x4b0
[   18.894399][ T3175]  ? do_user_addr_fault+0x345/0x710
[   18.894399][ T3175]  ? exc_page_fault+0x87/0x1f0
[   18.894399][ T3175]  ? asm_exc_page_fault+0x2b/0x30
[   18.894399][ T3175]  ? regmap_read+0x12/0x80
[   18.894399][ T3175]  __i2c_dw_disable+0x38/0x190
[   18.894399][ T3175]  ? local_clock+0x12/0x20
[   18.894399][ T3175]  i2c_dw_disable+0x3f/0xb0
[   18.894399][ T3175]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
[   18.894399][ T3175]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
[   18.894399][ T3175]  i2c_dw_runtime_suspend+0x26/0x40
[   18.894399][ T3175]  pm_generic_runtime_suspend+0x2f/0x50
[   18.894399][ T3175]  __rpm_callback+0x48/0x130
[   18.894399][ T3175]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
[   18.894399][ T3175]  rpm_callback+0x6c/0x80
[   18.894399][ T3175]  ? __pfx_pm_generic_runtime_suspend+0x10/0x10
[   18.894399][ T3175]  rpm_suspend+0x17a/0x730
[   18.894399][ T3175]  ? lock_acquired+0xc2/0x350
[   18.894399][ T3175]  pm_runtime_work+0xd6/0xf0
[   18.894399][ T3175]  process_one_work+0x215/0x4f0
[   18.894399][ T3175]  ? process_one_work+0x1b5/0x4f0
[   18.894399][ T3175]  worker_thread+0x1d5/0x3f0
[   18.894399][ T3175]  ? __pfx_worker_thread+0x10/0x10
[   18.894399][ T3175]  kthread+0xd8/0x110
[   18.894399][ T3175]  ? __pfx_kthread+0x10/0x10
[   18.894399][ T3175]  ret_from_fork+0x47/0x70
[   18.894399][ T3175]  ? __pfx_kthread+0x10/0x10
[   18.894399][ T3175]  ret_from_fork_asm+0x1a/0x30
[   18.894399][ T3175]  </TASK>
[   18.894399][ T3175] Modules linked in:
[   18.894399][ T3175] CR2: 0000000000000384
[   18.894399][ T3175] ---[ end trace 0000000000000000 ]---
[   18.894399][ T3175] RIP: 0010:regmap_read+0x12/0x80
[   18.894399][ T3175] Code: ff ff 66 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 f3 0f 1e fa 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 53 <8b> 87 84 03 00 00 83 e8 01 85 f0 75 54 48 89 fb 41 89 f4 49 89 d5
[   18.894399][ T3175] RSP: 0018:ff5a6e8852503c70 EFLAGS: 00010246
[   18.894399][ T3175] RAX: 0000000000000000 RBX: ff367ab45a32e028 RCX: 0000000000000000
[   18.894399][ T3175] RDX: ff5a6e8852503c9c RSI: 0000000000000034 RDI: 0000000000000000
[   18.894399][ T3175] RBP: ff5a6e8852503c88 R08: 0000000000000000 R09: 0000000000000000
[   18.894399][ T3175] R10: 0000000000000000 R11: 0000000000000000 R12: ff367ab45a32e028
[   18.894399][ T3175] R13: 0000000000000000 R14: 0000000000000000 R15: ff367aa50c2a9178
[   18.894399][ T3175] FS:  0000000000000000(0000) GS:ff367ac3ed600000(0000) knlGS:0000000000000000
[   18.894399][ T3175] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[   18.894399][ T3175] CR2: 0000000000000384 CR3: 0000001813a64001 CR4: 0000000000771ef0
[   18.894399][ T3175] PKRU: 55555554
[   18.894399][ T3175] note: kworker/386:1[3175] exited with irqs disabled

Thanks,

Kim
Wolfram Sang Jan. 13, 2024, 7:26 p.m. UTC | #2
> > Hold on, I'm testing this on top of next-20240111 and still seeing the
> > splat...
> > 
> Btw, does this reproduce always? Can we be mislead if it happens somewhat
> randomly? Happens to boot once we revert some commits and then at another
> Andy's nearby commit does not and we make the wrong conclusion?

Thanks for all the work trying to find the regression so far. As I want
to send out my pull request soon, I think it is safest if I revert the
whole series and we start with a clean new version.

All the best!
Andy Shevchenko Jan. 14, 2024, 5:06 p.m. UTC | #3
On Sat, Jan 13, 2024 at 08:26:28PM +0100, Wolfram Sang wrote:
> > > Hold on, I'm testing this on top of next-20240111 and still seeing the
> > > splat...
> > > 
> > Btw, does this reproduce always? Can we be mislead if it happens somewhat
> > randomly? Happens to boot once we revert some commits and then at another
> > Andy's nearby commit does not and we make the wrong conclusion?
> 
> Thanks for all the work trying to find the regression so far. As I want
> to send out my pull request soon, I think it is safest if I revert the
> whole series and we start with a clean new version.

Oh, but true. Let's start over later on. I will rearrange patches and Cc to AMD
in the next version, so we will have unquestionable ones first.
Jarkko Nikula Jan. 15, 2024, 6:58 a.m. UTC | #4
On 1/14/24 19:06, Andy Shevchenko wrote:
> On Sat, Jan 13, 2024 at 08:26:28PM +0100, Wolfram Sang wrote:
>>>> Hold on, I'm testing this on top of next-20240111 and still seeing the
>>>> splat...
>>>>
>>> Btw, does this reproduce always? Can we be mislead if it happens somewhat
>>> randomly? Happens to boot once we revert some commits and then at another
>>> Andy's nearby commit does not and we make the wrong conclusion?
>>
>> Thanks for all the work trying to find the regression so far. As I want
>> to send out my pull request soon, I think it is safest if I revert the
>> whole series and we start with a clean new version.
> 
> Oh, but true. Let's start over later on. I will rearrange patches and Cc to AMD
> in the next version, so we will have unquestionable ones first.
> 
I think everybody also likes sentence below from 
Documentation/process/submitting-patches.rst:

"If you cannot condense your patch set into a smaller set of patches,
then only post say 15 or so at a time and wait for review and integration."
Kim Phillips Jan. 15, 2024, 9:16 p.m. UTC | #5
On 1/12/24 2:13 AM, Jarkko Nikula wrote:
> Hi

Hi,

> On 1/11/24 19:56, Kim Phillips wrote:
>>> [    6.245173] i2c_designware AMDI0010:00: Unknown Synopsys component type: 0xffffffff
> 
> This has puzzled me all the time since I'm unable to see which one of Andy's patches could cause it. However controller is clearly powered down since DW_IC_COMP_TYPE register reads 0xffffffff.
> 
> That I'd call as a regression one. Second regression is the Oops and I was speculating if commit bd466a892612 ("i2c: designware: Fix PM calls order in dw_i2c_plat_probe()") can cause it.
> 
>>
>> Hold on, I'm testing this on top of next-20240111 and still seeing the splat...
>>
> Btw, does this reproduce always? Can we be mislead if it happens somewhat randomly? Happens to boot once we revert some commits and then at another Andy's nearby commit does not and we make the wrong conclusion?

It's possible, yes, since we initially blamed commit 2f571a725434
("i2c: designware: Fix lock probe call order in dw_i2c_plat_probe()")
and now testing this patch produces the failure: I'm pretty sure
that those two boots were using the same exact kernel code base,
yet one didn't produce the stacktrace, and the other did.

I've since updated my BIOS to the latest, and did a factory reset
on both the host and BMC to try to be more stably reproducible, but
we shall see.
  
> Does bisecting between v6.7-rc1 and next-20240111 lead anywhere?

Not really, unfortunately:

# bad: [9e21984d62c56a0f6d1fc6f76b646212cfd7fe88] Add linux-next specific files for 20240111
# good: [b85ea95d086471afb4ad062012a4d73cd328fa86] Linux 6.7-rc1
git bisect start 'next-20240111' 'v6.7-rc1'
# good: [0e7cc4233dafe9474ab32825bda3a8fed92b08bb] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
git bisect good 0e7cc4233dafe9474ab32825bda3a8fed92b08bb
# bad: [627690dd85803b0ac9861751c663bad0d5ff6c1a] Merge branch 'drm-next' of git://git.freedesktop.org/git/drm/drm.git
git bisect bad 627690dd85803b0ac9861751c663bad0d5ff6c1a
# good: [cdf1b6bad35cbc1e2be672982cb0dc7825dafe14] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
git bisect good cdf1b6bad35cbc1e2be672982cb0dc7825dafe14
# good: [04194a4f780895799cf83c86d5bb8bc11560a536] drm/xe: Fix lockdep warning from xe_vm_madvise
git bisect good 04194a4f780895799cf83c86d5bb8bc11560a536
# good: [b6e1b708176846248c87318786d22465ac96dd2c] drm/xe: Remove uninitialized variable from warning
git bisect good b6e1b708176846248c87318786d22465ac96dd2c
# good: [0f35b0a7b8fa402adbffa2565047cdcc4c480153] Revert "drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole"
git bisect good 0f35b0a7b8fa402adbffa2565047cdcc4c480153
# good: [d4ca26ac4be0d9aea7005c40df75e6775749671b] drm/msm/dp: call dp_display_get_next_bridge() during probe
git bisect good d4ca26ac4be0d9aea7005c40df75e6775749671b
# good: [6aaff21547a08e5a151fbf7a3f7be5a68877d9e3] Merge tag 'drm-intel-next-2023-12-18' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
git bisect good 6aaff21547a08e5a151fbf7a3f7be5a68877d9e3
# good: [3c064aea46d071ccf95a142be5532768a7fa6f02] Merge tag 'drm-misc-next-fixes-2024-01-04' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
git bisect good 3c064aea46d071ccf95a142be5532768a7fa6f02
# good: [b76c01f1d950425924ee1c1377760de3c024ef78] Merge tag 'drm-intel-gt-next-2023-12-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
git bisect good b76c01f1d950425924ee1c1377760de3c024ef78
# good: [f48705f473cea37efeeaa6a197ae12730c112863] Bluetooth: Remove HCI_POWER_OFF_TIMEOUT
git bisect good f48705f473cea37efeeaa6a197ae12730c112863
# good: [7974b2128489d062c9d21419633eebde07f07032] Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
git bisect good 7974b2128489d062c9d21419633eebde07f07032
# good: [f8c47ee39e6dc6170da06865b84e8c8b08e87ab0] Bluetooth: hci_event: Use HCI error defines instead of magic values
git bisect good f8c47ee39e6dc6170da06865b84e8c8b08e87ab0
# good: [ba0bc076e90fa6ae1284fc0b34d7460531c45ab7] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
git bisect good ba0bc076e90fa6ae1284fc0b34d7460531c45ab7
# first bad commit: [627690dd85803b0ac9861751c663bad0d5ff6c1a] Merge branch 'drm-next' of git://git.freedesktop.org/git/drm/drm.git

Let me know if there are other ideas, otherwise I'll wait for the
series with the rearranged series from Andy.

Thanks,

Kim
Kim Phillips Jan. 15, 2024, 10:44 p.m. UTC | #6
On 1/15/24 3:16 PM, Kim Phillips wrote:
> On 1/12/24 2:13 AM, Jarkko Nikula wrote:
>> Hi
> 
> Hi,
> 
>> On 1/11/24 19:56, Kim Phillips wrote:
>>>> [    6.245173] i2c_designware AMDI0010:00: Unknown Synopsys component type: 0xffffffff
>>
>> This has puzzled me all the time since I'm unable to see which one of Andy's patches could cause it. However controller is clearly powered down since DW_IC_COMP_TYPE register reads 0xffffffff.

Just FYI, that message is apparently 'normal' as, e.g., a stable v6.4
based tree emits it, but it doesn't crash because of it:

[    7.640335] usbcore: registered new device driver usb
[    7.663651] i2c_designware AMDI0010:00: Unknown Synopsys component type: 0xffffffff
[    7.677362] i2c_designware AMDI0010:01: Unknown Synopsys component type: 0xffffffff
[    7.738163] pps_core: LinuxPPS API ver. 1 registered

>> That I'd call as a regression one. Second regression is the Oops and I was speculating if commit bd466a892612 ("i2c: designware: Fix PM calls order in dw_i2c_plat_probe()") can cause it.

So I just tested checking out bd466a892612, and indeed it produces
the stacktrace.  Prior to that commit is v6.7-rc3, which boots fine.
So right now I'm suspecting bd466a892612 is to blame for the stacktrace.

Thanks,

Kim

>>> Hold on, I'm testing this on top of next-20240111 and still seeing the splat...
>>>
>> Btw, does this reproduce always? Can we be mislead if it happens somewhat randomly? Happens to boot once we revert some commits and then at another Andy's nearby commit does not and we make the wrong conclusion?
> 
> It's possible, yes, since we initially blamed commit 2f571a725434
> ("i2c: designware: Fix lock probe call order in dw_i2c_plat_probe()")
> and now testing this patch produces the failure: I'm pretty sure
> that those two boots were using the same exact kernel code base,
> yet one didn't produce the stacktrace, and the other did.
> 
> I've since updated my BIOS to the latest, and did a factory reset
> on both the host and BMC to try to be more stably reproducible, but
> we shall see.
> 
>> Does bisecting between v6.7-rc1 and next-20240111 lead anywhere?
> 
> Not really, unfortunately:
> 
> # bad: [9e21984d62c56a0f6d1fc6f76b646212cfd7fe88] Add linux-next specific files for 20240111
> # good: [b85ea95d086471afb4ad062012a4d73cd328fa86] Linux 6.7-rc1
> git bisect start 'next-20240111' 'v6.7-rc1'
> # good: [0e7cc4233dafe9474ab32825bda3a8fed92b08bb] Merge branch 'for-next' of git://git.kernel.org/pub/scm/linux/kernel/git/hid/hid.git
> git bisect good 0e7cc4233dafe9474ab32825bda3a8fed92b08bb
> # bad: [627690dd85803b0ac9861751c663bad0d5ff6c1a] Merge branch 'drm-next' of git://git.freedesktop.org/git/drm/drm.git
> git bisect bad 627690dd85803b0ac9861751c663bad0d5ff6c1a
> # good: [cdf1b6bad35cbc1e2be672982cb0dc7825dafe14] Merge branch 'main' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next.git
> git bisect good cdf1b6bad35cbc1e2be672982cb0dc7825dafe14
> # good: [04194a4f780895799cf83c86d5bb8bc11560a536] drm/xe: Fix lockdep warning from xe_vm_madvise
> git bisect good 04194a4f780895799cf83c86d5bb8bc11560a536
> # good: [b6e1b708176846248c87318786d22465ac96dd2c] drm/xe: Remove uninitialized variable from warning
> git bisect good b6e1b708176846248c87318786d22465ac96dd2c
> # good: [0f35b0a7b8fa402adbffa2565047cdcc4c480153] Revert "drm/amdkfd: Relocate TBA/TMA to opposite side of VM hole"
> git bisect good 0f35b0a7b8fa402adbffa2565047cdcc4c480153
> # good: [d4ca26ac4be0d9aea7005c40df75e6775749671b] drm/msm/dp: call dp_display_get_next_bridge() during probe
> git bisect good d4ca26ac4be0d9aea7005c40df75e6775749671b
> # good: [6aaff21547a08e5a151fbf7a3f7be5a68877d9e3] Merge tag 'drm-intel-next-2023-12-18' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
> git bisect good 6aaff21547a08e5a151fbf7a3f7be5a68877d9e3
> # good: [3c064aea46d071ccf95a142be5532768a7fa6f02] Merge tag 'drm-misc-next-fixes-2024-01-04' of git://anongit.freedesktop.org/drm/drm-misc into drm-next
> git bisect good 3c064aea46d071ccf95a142be5532768a7fa6f02
> # good: [b76c01f1d950425924ee1c1377760de3c024ef78] Merge tag 'drm-intel-gt-next-2023-12-15' of git://anongit.freedesktop.org/drm/drm-intel into drm-next
> git bisect good b76c01f1d950425924ee1c1377760de3c024ef78
> # good: [f48705f473cea37efeeaa6a197ae12730c112863] Bluetooth: Remove HCI_POWER_OFF_TIMEOUT
> git bisect good f48705f473cea37efeeaa6a197ae12730c112863
> # good: [7974b2128489d062c9d21419633eebde07f07032] Bluetooth: hci_event: Fix wrongly recorded wakeup BD_ADDR
> git bisect good 7974b2128489d062c9d21419633eebde07f07032
> # good: [f8c47ee39e6dc6170da06865b84e8c8b08e87ab0] Bluetooth: hci_event: Use HCI error defines instead of magic values
> git bisect good f8c47ee39e6dc6170da06865b84e8c8b08e87ab0
> # good: [ba0bc076e90fa6ae1284fc0b34d7460531c45ab7] Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/bluetooth/bluetooth-next.git
> git bisect good ba0bc076e90fa6ae1284fc0b34d7460531c45ab7
> # first bad commit: [627690dd85803b0ac9861751c663bad0d5ff6c1a] Merge branch 'drm-next' of git://git.freedesktop.org/git/drm/drm.git
> 
> Let me know if there are other ideas, otherwise I'll wait for the
> series with the rearranged series from Andy.
> 
> Thanks,
> 
> Kim
diff mbox series

Patch

diff --git a/drivers/i2c/busses/i2c-designware-core.h b/drivers/i2c/busses/i2c-designware-core.h
index 5405d4da2b7d..efec9ed305a9 100644
--- a/drivers/i2c/busses/i2c-designware-core.h
+++ b/drivers/i2c/busses/i2c-designware-core.h
@@ -186,8 +186,6 @@  struct clk;
 struct device;
 struct reset_control;
 
-struct i2c_dw_semaphore_callbacks;
-
 /**
  * struct dw_i2c_dev - private i2c-designware data
  * @dev: driver model device node
@@ -291,7 +289,7 @@  struct dw_i2c_dev {
 	u16			hs_lcnt;
 	int			(*acquire_lock)(void);
 	void			(*release_lock)(void);
-	const struct i2c_dw_semaphore_callbacks *semaphore_cb;
+	int			semaphore_idx;
 	bool			shared_with_punit;
 	int			(*init)(struct dw_i2c_dev *dev);
 	int			(*set_sda_hold_time)(struct dw_i2c_dev *dev);
diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index fa9c0c56b11e..e523df18bb0d 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -176,23 +176,17 @@  static const struct i2c_dw_semaphore_callbacks i2c_dw_semaphore_cb_table[] = {
 	{}
 };
 
-static void i2c_dw_remove_lock_support(void *data)
-{
-	struct dw_i2c_dev *dev = data;
-
-	if (!dev->semaphore_cb)
-		return;
-
-	if (dev->semaphore_cb->remove)
-		dev->semaphore_cb->remove(dev);
-}
-
 static int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev)
 {
 	const struct i2c_dw_semaphore_callbacks *ptr;
+	int i = 0;
 	int ret;
 
-	for (ptr = i2c_dw_semaphore_cb_table; ptr->probe; ptr++) {
+	ptr = i2c_dw_semaphore_cb_table;
+
+	dev->semaphore_idx = -1;
+
+	while (ptr->probe) {
 		ret = ptr->probe(dev);
 		if (ret) {
 			/*
@@ -203,14 +197,25 @@  static int i2c_dw_probe_lock_support(struct dw_i2c_dev *dev)
 			if (ret != -ENODEV)
 				return ret;
 
+			i++;
+			ptr++;
 			continue;
 		}
 
-		dev->semaphore_cb = ptr;
+		dev->semaphore_idx = i;
 		break;
 	}
 
-	return devm_add_action_or_reset(dev->dev, i2c_dw_remove_lock_support, dev);
+	return 0;
+}
+
+static void i2c_dw_remove_lock_support(struct dw_i2c_dev *dev)
+{
+	if (dev->semaphore_idx < 0)
+		return;
+
+	if (i2c_dw_semaphore_cb_table[dev->semaphore_idx].remove)
+		i2c_dw_semaphore_cb_table[dev->semaphore_idx].remove(dev);
 }
 
 static void dw_i2c_plat_assert_reset(void *data)
@@ -340,6 +345,8 @@  static void dw_i2c_plat_remove(struct platform_device *pdev)
 
 	pm_runtime_dont_use_autosuspend(&pdev->dev);
 	pm_runtime_put_sync(&pdev->dev);
+
+	i2c_dw_remove_lock_support(dev);
 }
 
 static const struct of_device_id dw_i2c_of_match[] = {