Message ID | 20201106193317.16993-1-leeman.duncan@gmail.com |
---|---|
State | New |
Headers | show |
Series | [v2] SCSI: libiscsi: fix NOP race condition | expand |
On 11/6/20 1:33 PM, Lee Duncan wrote: > From: Lee Duncan <lduncan@suse.com> > > iSCSI NOPs are sometimes "lost", mistakenly sent to the > user-land iscsid daemon instead of handled in the kernel, > as they should be, resulting in a message from the daemon like: > >> iscsid: Got nop in, but kernel supports nop handling. > > This can occur because of the new forward- and back-locks, > and the fact that an iSCSI NOP response can occur before > processing of the NOP send is complete. This can result > in "conn->ping_task" being NULL in iscsi_nop_out_rsp(), > when the pointer is actually in the process of being set. > > To work around this, we add a new state to the "ping_task" > pointer. In addition to NULL (not assigned) and a pointer > (assigned), we add the state "being set", which is signaled > with an INVALID pointer (using "-1"). > > Changes since V1: > - expanded using READ_ONCE()/WRITE_ONCE() to the whole path > > Signed-off-by: Lee Duncan <lduncan@suse.com> > --- > drivers/scsi/libiscsi.c | 23 +++++++++++++++-------- > include/scsi/libiscsi.h | 3 +++ > 2 files changed, 18 insertions(+), 8 deletions(-) > > diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c > index 1e9c3171fa9f..f9314f1393fb 100644 > --- a/drivers/scsi/libiscsi.c > +++ b/drivers/scsi/libiscsi.c > @@ -533,8 +533,8 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) > if (conn->task == task) > conn->task = NULL; > > - if (conn->ping_task == task) > - conn->ping_task = NULL; > + if (READ_ONCE(conn->ping_task) == task) > + WRITE_ONCE(conn->ping_task, NULL); > > /* release get from queueing */ > __iscsi_put_task(task); > @@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, > task->conn->session->age); > } > > + if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK)) > + WRITE_ONCE(conn->ping_task, task); > + > if (!ihost->workq) { > if (iscsi_prep_mgmt_task(conn, task)) > goto free_task; > @@ -941,8 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) > struct iscsi_nopout hdr; > struct iscsi_task *task; > > - if (!rhdr && conn->ping_task) > - return -EINVAL; > + if (!rhdr) { > + if (READ_ONCE(conn->ping_task)) > + return -EINVAL; > + WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK); > + } > > memset(&hdr, 0, sizeof(struct iscsi_nopout)); > hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; > @@ -957,11 +963,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) > > task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); > if (!task) { > + if (!rhdr) > + WRITE_ONCE(conn->ping_task, NULL); > iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); > return -EIO; > } else if (!rhdr) { > /* only track our nops */ > - conn->ping_task = task; > conn->last_ping = jiffies; > } > > @@ -984,7 +991,7 @@ static int iscsi_nop_out_rsp(struct iscsi_task *task, > struct iscsi_conn *conn = task->conn; > int rc = 0; > > - if (conn->ping_task != task) { > + if (READ_ONCE(conn->ping_task) != task) { > /* > * If this is not in response to one of our > * nops then it must be from userspace. > @@ -1923,7 +1930,7 @@ static void iscsi_start_tx(struct iscsi_conn *conn) > */ > static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) > { > - if (conn->ping_task && > + if (READ_ONCE(conn->ping_task) && > time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + > (conn->ping_timeout * HZ), jiffies)) > return 1; > @@ -2058,7 +2065,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) > * Checking the transport already or nop from a cmd timeout still > * running > */ > - if (conn->ping_task) { > + if (READ_ONCE(conn->ping_task)) { > task->have_checked_conn = true; > rc = BLK_EH_RESET_TIMER; > goto done; > diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h > index c25fb86ffae9..b3bbd10eb3f0 100644 > --- a/include/scsi/libiscsi.h > +++ b/include/scsi/libiscsi.h > @@ -132,6 +132,9 @@ struct iscsi_task { > void *dd_data; /* driver/transport data */ > }; > > +/* invalid scsi_task pointer */ > +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l > + > static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) > { > return task->unsol_r2t.data_length > task->unsol_r2t.sent; > Reviewed-by: Mike Christie <michael.christie@oracle.com>
On Fri, 6 Nov 2020 11:33:17 -0800, Lee Duncan wrote: > iSCSI NOPs are sometimes "lost", mistakenly sent to the > user-land iscsid daemon instead of handled in the kernel, > as they should be, resulting in a message from the daemon like: > > > iscsid: Got nop in, but kernel supports nop handling. > > This can occur because of the new forward- and back-locks, > and the fact that an iSCSI NOP response can occur before > processing of the NOP send is complete. This can result > in "conn->ping_task" being NULL in iscsi_nop_out_rsp(), > when the pointer is actually in the process of being set. > > [...] Applied to 5.10/scsi-fixes, thanks! [1/1] scsi: libiscsi: Fix NOP race condition https://git.kernel.org/mkp/scsi/c/fe0a8a95e713 -- Martin K. Petersen Oracle Linux Engineering
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 1e9c3171fa9f..f9314f1393fb 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -533,8 +533,8 @@ static void iscsi_complete_task(struct iscsi_task *task, int state) if (conn->task == task) conn->task = NULL; - if (conn->ping_task == task) - conn->ping_task = NULL; + if (READ_ONCE(conn->ping_task) == task) + WRITE_ONCE(conn->ping_task, NULL); /* release get from queueing */ __iscsi_put_task(task); @@ -738,6 +738,9 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr, task->conn->session->age); } + if (unlikely(READ_ONCE(conn->ping_task) == INVALID_SCSI_TASK)) + WRITE_ONCE(conn->ping_task, task); + if (!ihost->workq) { if (iscsi_prep_mgmt_task(conn, task)) goto free_task; @@ -941,8 +944,11 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) struct iscsi_nopout hdr; struct iscsi_task *task; - if (!rhdr && conn->ping_task) - return -EINVAL; + if (!rhdr) { + if (READ_ONCE(conn->ping_task)) + return -EINVAL; + WRITE_ONCE(conn->ping_task, INVALID_SCSI_TASK); + } memset(&hdr, 0, sizeof(struct iscsi_nopout)); hdr.opcode = ISCSI_OP_NOOP_OUT | ISCSI_OP_IMMEDIATE; @@ -957,11 +963,12 @@ static int iscsi_send_nopout(struct iscsi_conn *conn, struct iscsi_nopin *rhdr) task = __iscsi_conn_send_pdu(conn, (struct iscsi_hdr *)&hdr, NULL, 0); if (!task) { + if (!rhdr) + WRITE_ONCE(conn->ping_task, NULL); iscsi_conn_printk(KERN_ERR, conn, "Could not send nopout\n"); return -EIO; } else if (!rhdr) { /* only track our nops */ - conn->ping_task = task; conn->last_ping = jiffies; } @@ -984,7 +991,7 @@ static int iscsi_nop_out_rsp(struct iscsi_task *task, struct iscsi_conn *conn = task->conn; int rc = 0; - if (conn->ping_task != task) { + if (READ_ONCE(conn->ping_task) != task) { /* * If this is not in response to one of our * nops then it must be from userspace. @@ -1923,7 +1930,7 @@ static void iscsi_start_tx(struct iscsi_conn *conn) */ static int iscsi_has_ping_timed_out(struct iscsi_conn *conn) { - if (conn->ping_task && + if (READ_ONCE(conn->ping_task) && time_before_eq(conn->last_recv + (conn->recv_timeout * HZ) + (conn->ping_timeout * HZ), jiffies)) return 1; @@ -2058,7 +2065,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc) * Checking the transport already or nop from a cmd timeout still * running */ - if (conn->ping_task) { + if (READ_ONCE(conn->ping_task)) { task->have_checked_conn = true; rc = BLK_EH_RESET_TIMER; goto done; diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index c25fb86ffae9..b3bbd10eb3f0 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -132,6 +132,9 @@ struct iscsi_task { void *dd_data; /* driver/transport data */ }; +/* invalid scsi_task pointer */ +#define INVALID_SCSI_TASK (struct iscsi_task *)-1l + static inline int iscsi_task_has_unsol_data(struct iscsi_task *task) { return task->unsol_r2t.data_length > task->unsol_r2t.sent;