diff mbox series

[RFC] scsi:libiscsi:Fix possible NULL dereference in iscsi_eh_cmd_timed_out

Message ID 1607935317-263599-1-git-send-email-wubo40@huawei.com
State New
Headers show
Series [RFC] scsi:libiscsi:Fix possible NULL dereference in iscsi_eh_cmd_timed_out | expand

Commit Message

Wu Bo Dec. 14, 2020, 8:41 a.m. UTC
When testing kernel 4.18 version, NULL pointer dereference problem occurs
in iscsi_eh_cmd_timed_out function.

I think this bug in the upstream is still exists.

The analysis reasons are as follows:
1)  For some reason, I/O command did not complete within 
    the timeout period. The block layer timer works, 
    call scsi_times_out() to handle I/O timeout logic. 
    At the same time the command just completes.

2)  scsi_times_out() call iscsi_eh_cmd_timed_out() 
    to processing timeout logic.  although there is an NULL judgment 
	for the task, the task has not been released yet now.    

3)  iscsi_complete_task() call __iscsi_put_task(), 
    The task reference count reaches zero, the conditions for free task 
    is met, then iscsi_free_task () free the task, 
    and let sc->SCp.ptr = NULL. After iscsi_eh_cmd_timed_out passes 
    the task judgment check, there may be NULL dereference scenarios
    later.
	
   CPU0                                       	       CPU3

    |- scsi_times_out()                        		|- iscsi_complete_task()
    |                                       		|
    |- iscsi_eh_cmd_timed_out()                 	|- __iscsi_put_task()
    |                                       		|
    |- task=sc->SCp.ptr, task is not NUL, check passed  |- iscsi_free_task(task) 
    |                                       		|
    | 							|-> sc->SCp.ptr = NULL
    |                                                   |
    |- task is NULL now, NULL pointer dereference       |
    |                                           	| 
   \|/                                     	       \|/

Calltrace:
[380751.840862] BUG: unable to handle kernel NULL pointer dereference at 0000000000000138
[380751.843709] PGD 0 P4D 0
[380751.844770] Oops: 0000 [#1] SMP PTI
[380751.846283] CPU: 0 PID: 403 Comm: kworker/0:1H Kdump: loaded Tainted: G
[380751.851467] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996)
[380751.856521] Workqueue: kblockd blk_mq_timeout_work
[380751.858527] RIP: 0010:iscsi_eh_cmd_timed_out+0x15e/0x2e0 [libiscsi]
[380751.861129] Code: 83 ea 01 48 8d 74 d0 08 48 8b 10 48 8b 4a 50 48 85 c9 74 2c 48 39 d5 74
[380751.868811] RSP: 0018:ffffc1e280a5fd58 EFLAGS: 00010246
[380751.870978] RAX: ffff9fd1e84e15e0 RBX: ffff9fd1e84e6dd0 RCX: 0000000116acc580
[380751.873791] RDX: ffff9fd1f97a9400 RSI: ffff9fd1e84e1800 RDI: ffff9fd1e4d6d420
[380751.876059] RBP: ffff9fd1e4d49000 R08: 0000000116acc580 R09: 0000000116acc580
[380751.878284] R10: 0000000000000000 R11: 0000000000000000 R12: ffff9fd1e6e931e8
[380751.880500] R13: ffff9fd1e84e6ee0 R14: 0000000000000010 R15: 0000000000000003
[380751.882687] FS:  0000000000000000(0000) GS:ffff9fd1fac00000(0000) knlGS:0000000000000000
[380751.885236] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[380751.887059] CR2: 0000000000000138 CR3: 000000011860a001 CR4: 00000000003606f0
[380751.889308] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000
[380751.891523] DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400
[380751.893738] Call Trace:
[380751.894639]  scsi_times_out+0x60/0x1c0
[380751.895861]  blk_mq_check_expired+0x144/0x200
[380751.897302]  ? __switch_to_asm+0x35/0x70
[380751.898551]  blk_mq_queue_tag_busy_iter+0x195/0x2e0
[380751.900091]  ? __blk_mq_requeue_request+0x100/0x100
[380751.901611]  ? __switch_to_asm+0x41/0x70
[380751.902853]  ? __blk_mq_requeue_request+0x100/0x100
[380751.904398]  blk_mq_timeout_work+0x54/0x130
[380751.905740]  process_one_work+0x195/0x390
[380751.907228]  worker_thread+0x30/0x390
[380751.908713]  ? process_one_work+0x390/0x390
[380751.910350]  kthread+0x10d/0x130
[380751.911470]  ? kthread_flush_work_fn+0x10/0x10
[380751.913007]  ret_from_fork+0x35/0x40

crash> dis -l iscsi_eh_cmd_timed_out+0x15e
xxxxx/drivers/scsi/libiscsi.c: 2062

1970 enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
{
...
1984         spin_lock_bh(&session->frwd_lock);
1985         task = (struct iscsi_task *)sc->SCp.ptr;
1986         if (!task) {    
1987                 /*
1988                  * Raced with completion. Blk layer has taken ownership
1989                  * so let timeout code complete it now.
1990                  */     
1991                 rc = BLK_EH_DONE;
1992                 goto done;
1993         }

...

2052         for (i = 0; i < conn->session->cmds_max; i++) {
2053                 running_task = conn->session->cmds[i];
2054                 if (!running_task->sc || running_task == task ||
2055                      running_task->state != ISCSI_TASK_RUNNING)
2056                         continue;
2057
2058                 /*
2059                  * Only check if cmds started before this one have made
2060                  * progress, or this could never fail
2061                  */
2062                 if (time_after(running_task->sc->jiffies_at_alloc, 
2063                                task->sc->jiffies_at_alloc))    <---
2064                         continue;
2065
...
}

carsh> struct scsi_cmnd ffff9fd1e6e931e8
struct scsi_cmnd {
  ...
  SCp = {
    ptr = 0x0,   <--- iscsi_task
    this_residual = 0,
    ...
  },
}

Fixes: 3e5c28ad03 ("libiscsi: merge iscsi_mgmt_task and iscsi_cmd_task")
Signed-off-by: Wu Bo <wubo40@huawei.com>
---
 drivers/scsi/libiscsi.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

Comments

Wu Bo Dec. 16, 2020, 6:05 a.m. UTC | #1
On 2020/12/15 1:36, Mike Christie wrote:
> On 12/14/20 2:41 AM, Wu Bo wrote:

>> When testing kernel 4.18 version, NULL pointer dereference problem occurs

>> in iscsi_eh_cmd_timed_out function.

>>

>> I think this bug in the upstream is still exists.

>>

>> The analysis reasons are as follows:

>> 1)  For some reason, I/O command did not complete within

>>      the timeout period. The block layer timer works,

>>      call scsi_times_out() to handle I/O timeout logic.

>>      At the same time the command just completes.

>>

>> 2)  scsi_times_out() call iscsi_eh_cmd_timed_out()

>>      to processing timeout logic.  although there is an NULL judgment

>> 	for the task, the task has not been released yet now.

>>

>> 3)  iscsi_complete_task() call __iscsi_put_task(),

>>      The task reference count reaches zero, the conditions for free task

>>      is met, then iscsi_free_task () free the task,

>>      and let sc->SCp.ptr = NULL. After iscsi_eh_cmd_timed_out passes

>>      the task judgment check, there may be NULL dereference scenarios

>>      later.

>> 	

> 

> I have a patch for this I think. This is broken out of patchset I was

> trying to fixup the back lock usage for offload drivers, so I have only

> compile tested it.

> 

> There is another issue where the for lun reset cleanup we could race. The

> comments mention suspending the rx side, but we only do that for session level

> cleaup.

>  > The basic idea is we don't want to add more frwd lock uses in the 

completion
> patch like in your patch. In these non perf paths, like the tmf/timeout case

> we can just take a ref to the cmd so it's not freed from under us.

> 


You are right, add more frwd lock does affect performance in the completion.

> 

> 

> diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c

> index f9314f1..f07f8c1 100644

> --- a/drivers/scsi/libiscsi.c

> +++ b/drivers/scsi/libiscsi.c

> @@ -573,18 +573,9 @@ void iscsi_complete_scsi_task(struct iscsi_task *task,

>   static void fail_scsi_task(struct iscsi_task *task, int err)

>   {

>   	struct iscsi_conn *conn = task->conn;

> -	struct scsi_cmnd *sc;

> +	struct scsi_cmnd *sc = task->sc;

>   	int state;

>   

> -	/*

> -	 * if a command completes and we get a successful tmf response

> -	 * we will hit this because the scsi eh abort code does not take

> -	 * a ref to the task.

> -	 */

> -	sc = task->sc;

> -	if (!sc)

> -		return;

> -

>   	if (task->state == ISCSI_TASK_PENDING) {

>   		/*

>   		 * cmd never made it to the xmit thread, so we should not count

> @@ -1855,26 +1846,34 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,

>   }

>   

>   /*

> - * Fail commands. session lock held and recv side suspended and xmit

> - * thread flushed

> + * Fail commands. session frwd lock held and and xmit thread flushed.

>    */

>   static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error)

>   {

> +	struct iscsi_session *session = conn->session;

>   	struct iscsi_task *task;

>   	int i;

>   

> -	for (i = 0; i < conn->session->cmds_max; i++) {

> -		task = conn->session->cmds[i];

> -		if (!task->sc || task->state == ISCSI_TASK_FREE)

> +	for (i = 0; i < session->cmds_max; i++) {

> +		spin_lock_bh(&session->back_lock);

> +		task = session->cmds[i];

> +		if (!task->sc || task->state == ISCSI_TASK_FREE) {

> +			spin_unlock_bh(&session->back_lock);

>   			continue;

> +		}

>   

> -		if (lun != -1 && lun != task->sc->device->lun)

> +		if (lun != -1 && lun != task->sc->device->lun) {

> +			spin_unlock_bh(&session->back_lock);

>   			continue;

> +		}

> +		__iscsi_get_task(task);

> +		spin_unlock_bh(&session->back_lock);

>   

> -		ISCSI_DBG_SESSION(conn->session,

> +		ISCSI_DBG_SESSION(session,

>   				  "failing sc %p itt 0x%x state %d\n",

>   				  task->sc, task->itt, task->state);

>   		fail_scsi_task(task, error);

> +		iscsi_put_task(task);

>   	}

>   }

>   

> @@ -1953,6 +1952,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)

>   	ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc);

>   

>   	spin_lock_bh(&session->frwd_lock);

> +	spin_lock(&session->back_lock);

>   	task = (struct iscsi_task *)sc->SCp.ptr;

>   	if (!task) {

>   		/*

> @@ -1960,8 +1960,11 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)

>   		 * so let timeout code complete it now.

>   		 */

>   		rc = BLK_EH_DONE;

> +		spin_unlock(&session->back_lock);

>   		goto done;

>   	}

> +	__iscsi_get_task(task);

> +	spin_unlock(&session->back_lock);

>   

>   	if (session->state != ISCSI_STATE_LOGGED_IN) {

>   		/*

> @@ -2077,9 +2080,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)

>   	rc = BLK_EH_RESET_TIMER;

>   

>   done:

> -	if (task)

> -		task->last_timeout = jiffies;

>   	spin_unlock_bh(&session->frwd_lock);

> +

> +	if (task) {

> +		task->last_timeout = jiffies;

> +		iscsi_put_task(task);

> +	}

>   	ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?

>   		     "timer reset" : "shutdown or nh");

>   	return rc;

> @@ -2187,15 +2193,20 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)

>   	conn->eh_abort_cnt++;

>   	age = session->age;

>   

> -	task = (struct iscsi_task *)sc->SCp.ptr;

> -	ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n",

> -		     sc, task->itt);

> -

> -	/* task completed before time out */

> -	if (!task->sc) {

> +	spin_lock(&session->back_lock);

> +	task = (struct iscsi_task *)sc->SCp.ptr;	

> +	if (!task || !task->sc) {

> +		/* task completed before time out */

>   		ISCSI_DBG_EH(session, "sc completed while abort in progress\n");

> -		goto success;

> +

> +		spin_unlock(&session->back_lock);

> +		spin_unlock_bh(&session->frwd_lock);

> +		mutex_unlock(&session->eh_mutex);

> +		return SUCCESS;

>   	}

> +	ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", sc, task->itt);

> +	__iscsi_get_task(task);

> +	spin_unlock(&session->back_lock);

>   

>   	if (task->state == ISCSI_TASK_PENDING) {

>   		fail_scsi_task(task, DID_ABORT);

> @@ -2258,6 +2269,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)

>   	ISCSI_DBG_EH(session, "abort success [sc %p itt 0x%x]\n",

>   		     sc, task->itt);

>   	mutex_unlock(&session->eh_mutex);

> +

> +	iscsi_put_task(task);

>   	return SUCCESS;

>   

>   failed:

> @@ -2266,6 +2279,8 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)

>   	ISCSI_DBG_EH(session, "abort failed [sc %p itt 0x%x]\n", sc,

>   		     task ? task->itt : 0);

>   	mutex_unlock(&session->eh_mutex);

> +

> +	iscsi_put_task(task);

>   	return FAILED;

>   }

>   EXPORT_SYMBOL_GPL(iscsi_eh_abort);

> 


I have tested this patch, covering IO timeout and IO abort error 
handling scenarios, it is works well.

It is lgtm, Thanks
diff mbox series

Patch

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 0bb5d76..e2cacdd 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -876,7 +876,9 @@  static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 	ISCSI_DBG_SESSION(session, "cmd rsp done [sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
+	spin_lock_bh(&session->frwd_lock);
 	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+	spin_unlock_bh(&session->frwd_lock);
 }
 
 /**
@@ -917,7 +919,9 @@  static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 			  "[sc %p res %d itt 0x%x]\n",
 			  sc, sc->result, task->itt);
 	conn->scsirsp_pdus_cnt++;
+	spin_lock_bh(&conn->session->frwd_lock);
 	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+	spin_unlock_bh(&conn->session->frwd_lock);
 }
 
 static void iscsi_tmf_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
@@ -1001,7 +1005,10 @@  static int iscsi_nop_out_rsp(struct iscsi_task *task,
 			rc = ISCSI_ERR_CONN_FAILED;
 	} else
 		mod_timer(&conn->transport_timer, jiffies + conn->recv_timeout);
+	spin_lock_bh(&conn->session->frwd_lock);
 	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+	spin_unlock_bh(&conn->session->frwd_lock);
+
 	return rc;
 }
 
@@ -1241,7 +1248,9 @@  int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 		}
 
 		iscsi_tmf_rsp(conn, hdr);
+		spin_lock_bh(&session->frwd_lock);
 		iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+		spin_unlock_bh(&session->frwd_lock);
 		break;
 	case ISCSI_OP_NOOP_IN:
 		iscsi_update_cmdsn(session, (struct iscsi_nopin*)hdr);
@@ -1264,7 +1273,10 @@  int __iscsi_complete_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
 recv_pdu:
 	if (iscsi_recv_pdu(conn->cls_conn, hdr, data, datalen))
 		rc = ISCSI_ERR_CONN_FAILED;
+	spin_lock_bh(&session->frwd_lock);
 	iscsi_complete_task(task, ISCSI_TASK_COMPLETED);
+	spin_unlock_bh(&session->frwd_lock);
+
 	return rc;
 }
 EXPORT_SYMBOL_GPL(__iscsi_complete_pdu);