Message ID | 20250314225206.1487838-1-bvanassche@acm.org |
---|---|
State | New |
Headers | show |
Series | [v2] scsi: ufs: core: Fix a race condition related to device commands | expand |
On 3/15/25 1:46 AM, Avri Altman wrote: > Shouldn't you now call for reinit_completion now? > before wait_for_dev? Or at ufshcd_dev_cmd_completion ? complete() increments the counter in struct completion and wait_for_complete() decrements it, isn't it? From kernel/sched/completion.c: void complete(struct completion *x) { complete_with_flags(x, 0); } EXPORT_SYMBOL(complete); static void complete_with_flags(struct completion *x, int wake_flags) { unsigned long flags; raw_spin_lock_irqsave(&x->wait.lock, flags); if (x->done != UINT_MAX) x->done++; swake_up_locked(&x->wait, wake_flags); raw_spin_unlock_irqrestore(&x->wait.lock, flags); } As one can see complete() increments x->done if it is less than UINT_MAX, which should be the case in the UFS driver. From the same file: void __sched wait_for_completion(struct completion *x) { wait_for_common(x, MAX_SCHEDULE_TIMEOUT, TASK_UNINTERRUPTIBLE); } EXPORT_SYMBOL(wait_for_completion); static long __sched wait_for_common(struct completion *x, long timeout, int state) { return __wait_for_common(x, schedule_timeout, timeout, state); } static inline long __sched __wait_for_common(struct completion *x, long (*action)(long), long timeout, int state) { might_sleep(); complete_acquire(x); raw_spin_lock_irq(&x->wait.lock); timeout = do_wait_for_common(x, action, timeout, state); raw_spin_unlock_irq(&x->wait.lock); complete_release(x); return timeout; } static inline long __sched do_wait_for_common(struct completion *x, long (*action)(long), long timeout, int state) { if (!x->done) { DECLARE_SWAITQUEUE(wait); do { if (signal_pending_state(state, current)) { timeout = -ERESTARTSYS; break; } __prepare_to_swait(&x->wait, &wait); __set_current_state(state); raw_spin_unlock_irq(&x->wait.lock); timeout = action(timeout); raw_spin_lock_irq(&x->wait.lock); } while (!x->done && timeout); __finish_swait(&x->wait, &wait); if (!x->done) return timeout; } if (x->done != UINT_MAX) x->done--; return timeout ?: 1; } If I read the above code correctly, it waits until x->done != 0 or the timeout has been reached. x->done is decremented if a strictly positive value is returned. do_wait_for_common() ignores pending signals because state == TASK_UNINTERRUPTIBLE. Bart.
On Fri, 2025-03-14 at 15:51 -0700, Bart Van Assche wrote: > > External email : Please do not click links or open attachments until > you have verified the sender or the content. > > > There is a TOCTOU race in ufshcd_compl_one_cqe(): hba- > >dev_cmd.complete > may be cleared from another thread after it has been checked and > before > it is used. Fix this race by moving the device command completion > from > the stack of the device command submitter into struct ufs_hba. This > patch fixes the following kernel crash: > > Unable to handle kernel NULL pointer dereference at virtual address > 0000000000000008 > Call trace: > _raw_spin_lock_irqsave+0x34/0x80 > complete+0x24/0xb8 > ufshcd_compl_one_cqe+0x13c/0x4f0 > ufshcd_mcq_poll_cqe_lock+0xb4/0x108 > ufshcd_intr+0x2f4/0x444 > __handle_irq_event_percpu+0xbc/0x250 > handle_irq_event+0x48/0xb0 > > Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT > UPIU") > Signed-off-by: Bart Van Assche <bvanassche@acm.org> > --- > > Changes compared to v1: > - Call init_completion() once instead of every time a device > management > command is submitted. > Reviewed-by: Peter Wang <peter.wang@mediatek.com>
Bart, > There is a TOCTOU race in ufshcd_compl_one_cqe(): > hba->dev_cmd.complete may be cleared from another thread after it has > been checked and before it is used. Fix this race by moving the device > command completion from the stack of the device command submitter into > struct ufs_hba. This patch fixes the following kernel crash: Applied to 6.15/scsi-staging, thanks!
diff --git a/drivers/ufs/core/ufshcd.c b/drivers/ufs/core/ufshcd.c index 4e1e214fc5a2..3288a7da73dc 100644 --- a/drivers/ufs/core/ufshcd.c +++ b/drivers/ufs/core/ufshcd.c @@ -3176,16 +3176,10 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, int err; retry: - time_left = wait_for_completion_timeout(hba->dev_cmd.complete, + time_left = wait_for_completion_timeout(&hba->dev_cmd.complete, time_left); if (likely(time_left)) { - /* - * The completion handler called complete() and the caller of - * this function still owns the @lrbp tag so the code below does - * not trigger any race conditions. - */ - hba->dev_cmd.complete = NULL; err = ufshcd_get_tr_ocs(lrbp, NULL); if (!err) err = ufshcd_dev_cmd_completion(hba, lrbp); @@ -3199,7 +3193,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, /* successfully cleared the command, retry if needed */ if (ufshcd_clear_cmd(hba, lrbp->task_tag) == 0) err = -EAGAIN; - hba->dev_cmd.complete = NULL; return err; } @@ -3215,11 +3208,9 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) { - hba->dev_cmd.complete = NULL; + if (pending) __clear_bit(lrbp->task_tag, &hba->outstanding_reqs); - } spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3237,8 +3228,6 @@ static int ufshcd_wait_for_dev_cmd(struct ufs_hba *hba, spin_lock_irqsave(&hba->outstanding_lock, flags); pending = test_bit(lrbp->task_tag, &hba->outstanding_reqs); - if (pending) - hba->dev_cmd.complete = NULL; spin_unlock_irqrestore(&hba->outstanding_lock, flags); if (!pending) { @@ -3272,13 +3261,9 @@ static void ufshcd_dev_man_unlock(struct ufs_hba *hba) static int ufshcd_issue_dev_cmd(struct ufs_hba *hba, struct ufshcd_lrb *lrbp, const u32 tag, int timeout) { - DECLARE_COMPLETION_ONSTACK(wait); int err; - hba->dev_cmd.complete = &wait; - ufshcd_add_query_upiu_trace(hba, UFS_QUERY_SEND, lrbp->ucd_req_ptr); - ufshcd_send_command(hba, tag, hba->dev_cmd_queue); err = ufshcd_wait_for_dev_cmd(hba, lrbp, timeout); @@ -5585,12 +5570,12 @@ void ufshcd_compl_one_cqe(struct ufs_hba *hba, int task_tag, ufshcd_release_scsi_cmd(hba, lrbp); /* Do not touch lrbp after scsi done */ scsi_done(cmd); - } else if (hba->dev_cmd.complete) { + } else { if (cqe) { ocs = le32_to_cpu(cqe->status) & MASK_OCS; lrbp->utr_descriptor_ptr->header.ocs = ocs; } - complete(hba->dev_cmd.complete); + complete(&hba->dev_cmd.complete); } } @@ -10475,6 +10460,8 @@ int ufshcd_init(struct ufs_hba *hba, void __iomem *mmio_base, unsigned int irq) */ spin_lock_init(&hba->clk_gating.lock); + init_completion(&hba->dev_cmd.complete); + err = ufshcd_hba_init(hba); if (err) goto out_error; diff --git a/include/ufs/ufshcd.h b/include/ufs/ufshcd.h index e3909cc691b2..f56050ce9445 100644 --- a/include/ufs/ufshcd.h +++ b/include/ufs/ufshcd.h @@ -246,7 +246,7 @@ struct ufs_query { struct ufs_dev_cmd { enum dev_cmd_type type; struct mutex lock; - struct completion *complete; + struct completion complete; struct ufs_query query; };
There is a TOCTOU race in ufshcd_compl_one_cqe(): hba->dev_cmd.complete may be cleared from another thread after it has been checked and before it is used. Fix this race by moving the device command completion from the stack of the device command submitter into struct ufs_hba. This patch fixes the following kernel crash: Unable to handle kernel NULL pointer dereference at virtual address 0000000000000008 Call trace: _raw_spin_lock_irqsave+0x34/0x80 complete+0x24/0xb8 ufshcd_compl_one_cqe+0x13c/0x4f0 ufshcd_mcq_poll_cqe_lock+0xb4/0x108 ufshcd_intr+0x2f4/0x444 __handle_irq_event_percpu+0xbc/0x250 handle_irq_event+0x48/0xb0 Fixes: 5a0b0cb9bee7 ("[SCSI] ufs: Add support for sending NOP OUT UPIU") Signed-off-by: Bart Van Assche <bvanassche@acm.org> --- Changes compared to v1: - Call init_completion() once instead of every time a device management command is submitted. drivers/ufs/core/ufshcd.c | 25 ++++++------------------- include/ufs/ufshcd.h | 2 +- 2 files changed, 7 insertions(+), 20 deletions(-)