diff mbox series

[v2,06/17] ibmvfc: add handlers to drain and complete Sub-CRQ responses

Message ID 20201202005329.4538-7-tyreld@linux.ibm.com
State Superseded
Headers show
Series None | expand

Commit Message

Tyrel Datwyler Dec. 2, 2020, 12:53 a.m. UTC
The logic for iterating over the Sub-CRQ responses is similiar to that
of the primary CRQ. Add the necessary handlers for processing those
responses.

Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
---
 drivers/scsi/ibmvscsi/ibmvfc.c | 77 ++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

Comments

Brian King Dec. 2, 2020, 3:46 p.m. UTC | #1
On 12/1/20 6:53 PM, Tyrel Datwyler wrote:
> The logic for iterating over the Sub-CRQ responses is similiar to that
> of the primary CRQ. Add the necessary handlers for processing those
> responses.
> 
> Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
> ---
>  drivers/scsi/ibmvscsi/ibmvfc.c | 77 ++++++++++++++++++++++++++++++++++
>  1 file changed, 77 insertions(+)
> 
> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
> index 97f00fefa809..e9da3f60c793 100644
> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
> @@ -3381,6 +3381,83 @@ static int ibmvfc_toggle_scrq_irq(struct ibmvfc_sub_queue *scrq, int enable)
>  	return rc;
>  }
>  
> +static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
> +{
> +	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
> +	unsigned long flags;
> +
> +	switch (crq->valid) {
> +	case IBMVFC_CRQ_CMD_RSP:
> +		break;
> +	case IBMVFC_CRQ_XPORT_EVENT:
> +		return;
> +	default:
> +		dev_err(vhost->dev, "Got and invalid message type 0x%02x\n", crq->valid);
> +		return;
> +	}
> +
> +	/* The only kind of payload CRQs we should get are responses to
> +	 * things we send. Make sure this response is to something we
> +	 * actually sent
> +	 */
> +	if (unlikely(!ibmvfc_valid_event(&vhost->pool, evt))) {
> +		dev_err(vhost->dev, "Returned correlation_token 0x%08llx is invalid!\n",
> +			crq->ioba);
> +		return;
> +	}
> +
> +	if (unlikely(atomic_read(&evt->free))) {
> +		dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
> +			crq->ioba);
> +		return;
> +	}
> +
> +	spin_lock_irqsave(vhost->host->host_lock, flags);
> +	del_timer(&evt->timer);
> +	list_del(&evt->queue);
> +	ibmvfc_trc_end(evt);
> +	spin_unlock_irqrestore(vhost->host->host_lock, flags);
> +	evt->done(evt);
> +}
> +
> +static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_sub_queue *scrq)
> +{
> +	struct ibmvfc_crq *crq;
> +
> +	crq = &scrq->msgs[scrq->cur].crq;
> +	if (crq->valid & 0x80) {
> +		if (++scrq->cur == scrq->size)

You are incrementing the cur pointer without any locks held. Although
unlikely, could you also be in ibmvfc_reset_crq in another thread?
If so, you'd have a subtle race condition here where the cur pointer could
be read, then ibmvfc_reset_crq writes it to zero, then this thread
writes it to a non zero value, which would then cause you to be out of
sync with the VIOS as to where the cur pointer is.

> +			scrq->cur = 0;
> +		rmb();
> +	} else
> +		crq = NULL;
> +
> +	return crq;
> +}
> +
Brian King Dec. 2, 2020, 3:56 p.m. UTC | #2
On 12/1/20 6:53 PM, Tyrel Datwyler wrote:
> +static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
> +{
> +	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
> +	unsigned long flags;
> +
> +	switch (crq->valid) {
> +	case IBMVFC_CRQ_CMD_RSP:
> +		break;
> +	case IBMVFC_CRQ_XPORT_EVENT:
> +		return;
> +	default:
> +		dev_err(vhost->dev, "Got and invalid message type 0x%02x\n", crq->valid);
> +		return;
> +	}
> +
> +	/* The only kind of payload CRQs we should get are responses to
> +	 * things we send. Make sure this response is to something we
> +	 * actually sent
> +	 */
> +	if (unlikely(!ibmvfc_valid_event(&vhost->pool, evt))) {
> +		dev_err(vhost->dev, "Returned correlation_token 0x%08llx is invalid!\n",
> +			crq->ioba);
> +		return;
> +	}
> +
> +	if (unlikely(atomic_read(&evt->free))) {
> +		dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
> +			crq->ioba);
> +		return;
> +	}
> +
> +	spin_lock_irqsave(vhost->host->host_lock, flags);
> +	del_timer(&evt->timer);
> +	list_del(&evt->queue);
> +	ibmvfc_trc_end(evt);

Another thought here... If you are going through ibmvfc_purge_requests at the same time
as this code, you could check the free bit above, then have ibmvfc_purge_requests
put the event on the free queue and call scsi_done, then you come down and get the host
lock here, remove the command from the free list, and call the done function again,
which could result in a double completion to the scsi layer.

I think you need to grab the host lock before you check the free bit to avoid this race.

> +	spin_unlock_irqrestore(vhost->host->host_lock, flags);
> +	evt->done(evt);
> +}
> +
Tyrel Datwyler Dec. 2, 2020, 10:28 p.m. UTC | #3
On 12/2/20 7:46 AM, Brian King wrote:
> On 12/1/20 6:53 PM, Tyrel Datwyler wrote:
>> The logic for iterating over the Sub-CRQ responses is similiar to that
>> of the primary CRQ. Add the necessary handlers for processing those
>> responses.
>>
>> Signed-off-by: Tyrel Datwyler <tyreld@linux.ibm.com>
>> ---
>>  drivers/scsi/ibmvscsi/ibmvfc.c | 77 ++++++++++++++++++++++++++++++++++
>>  1 file changed, 77 insertions(+)
>>
>> diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
>> index 97f00fefa809..e9da3f60c793 100644
>> --- a/drivers/scsi/ibmvscsi/ibmvfc.c
>> +++ b/drivers/scsi/ibmvscsi/ibmvfc.c
>> @@ -3381,6 +3381,83 @@ static int ibmvfc_toggle_scrq_irq(struct ibmvfc_sub_queue *scrq, int enable)
>>  	return rc;
>>  }
>>  
>> +static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
>> +{
>> +	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
>> +	unsigned long flags;
>> +
>> +	switch (crq->valid) {
>> +	case IBMVFC_CRQ_CMD_RSP:
>> +		break;
>> +	case IBMVFC_CRQ_XPORT_EVENT:
>> +		return;
>> +	default:
>> +		dev_err(vhost->dev, "Got and invalid message type 0x%02x\n", crq->valid);
>> +		return;
>> +	}
>> +
>> +	/* The only kind of payload CRQs we should get are responses to
>> +	 * things we send. Make sure this response is to something we
>> +	 * actually sent
>> +	 */
>> +	if (unlikely(!ibmvfc_valid_event(&vhost->pool, evt))) {
>> +		dev_err(vhost->dev, "Returned correlation_token 0x%08llx is invalid!\n",
>> +			crq->ioba);
>> +		return;
>> +	}
>> +
>> +	if (unlikely(atomic_read(&evt->free))) {
>> +		dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
>> +			crq->ioba);
>> +		return;
>> +	}
>> +
>> +	spin_lock_irqsave(vhost->host->host_lock, flags);
>> +	del_timer(&evt->timer);
>> +	list_del(&evt->queue);
>> +	ibmvfc_trc_end(evt);
>> +	spin_unlock_irqrestore(vhost->host->host_lock, flags);
>> +	evt->done(evt);
>> +}
>> +
>> +static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_sub_queue *scrq)
>> +{
>> +	struct ibmvfc_crq *crq;
>> +
>> +	crq = &scrq->msgs[scrq->cur].crq;
>> +	if (crq->valid & 0x80) {
>> +		if (++scrq->cur == scrq->size)
> 
> You are incrementing the cur pointer without any locks held. Although
> unlikely, could you also be in ibmvfc_reset_crq in another thread?
> If so, you'd have a subtle race condition here where the cur pointer could
> be read, then ibmvfc_reset_crq writes it to zero, then this thread
> writes it to a non zero value, which would then cause you to be out of
> sync with the VIOS as to where the cur pointer is.

Oof, yeah I was previously holding the lock the whole time, but switched it up
once I realized I can't complete a scsi command with the lock held, and got a
little too loose with it.

-Tyrel
> 
>> +			scrq->cur = 0;
>> +		rmb();
>> +	} else
>> +		crq = NULL;
>> +
>> +	return crq;
>> +}
>> +
> 
> 
>
diff mbox series

Patch

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 97f00fefa809..e9da3f60c793 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -3381,6 +3381,83 @@  static int ibmvfc_toggle_scrq_irq(struct ibmvfc_sub_queue *scrq, int enable)
 	return rc;
 }
 
+static void ibmvfc_handle_scrq(struct ibmvfc_crq *crq, struct ibmvfc_host *vhost)
+{
+	struct ibmvfc_event *evt = (struct ibmvfc_event *)be64_to_cpu(crq->ioba);
+	unsigned long flags;
+
+	switch (crq->valid) {
+	case IBMVFC_CRQ_CMD_RSP:
+		break;
+	case IBMVFC_CRQ_XPORT_EVENT:
+		return;
+	default:
+		dev_err(vhost->dev, "Got and invalid message type 0x%02x\n", crq->valid);
+		return;
+	}
+
+	/* The only kind of payload CRQs we should get are responses to
+	 * things we send. Make sure this response is to something we
+	 * actually sent
+	 */
+	if (unlikely(!ibmvfc_valid_event(&vhost->pool, evt))) {
+		dev_err(vhost->dev, "Returned correlation_token 0x%08llx is invalid!\n",
+			crq->ioba);
+		return;
+	}
+
+	if (unlikely(atomic_read(&evt->free))) {
+		dev_err(vhost->dev, "Received duplicate correlation_token 0x%08llx!\n",
+			crq->ioba);
+		return;
+	}
+
+	spin_lock_irqsave(vhost->host->host_lock, flags);
+	del_timer(&evt->timer);
+	list_del(&evt->queue);
+	ibmvfc_trc_end(evt);
+	spin_unlock_irqrestore(vhost->host->host_lock, flags);
+	evt->done(evt);
+}
+
+static struct ibmvfc_crq *ibmvfc_next_scrq(struct ibmvfc_sub_queue *scrq)
+{
+	struct ibmvfc_crq *crq;
+
+	crq = &scrq->msgs[scrq->cur].crq;
+	if (crq->valid & 0x80) {
+		if (++scrq->cur == scrq->size)
+			scrq->cur = 0;
+		rmb();
+	} else
+		crq = NULL;
+
+	return crq;
+}
+
+static void ibmvfc_drain_sub_crq(struct ibmvfc_sub_queue *scrq)
+{
+	struct ibmvfc_crq *crq;
+	int done = 0;
+
+	while (!done) {
+		while ((crq = ibmvfc_next_scrq(scrq)) != NULL) {
+			ibmvfc_handle_scrq(crq, scrq->vhost);
+			crq->valid = 0;
+			wmb();
+		}
+
+		ibmvfc_toggle_scrq_irq(scrq, 1);
+		if ((crq = ibmvfc_next_scrq(scrq)) != NULL) {
+			ibmvfc_toggle_scrq_irq(scrq, 0);
+			ibmvfc_handle_scrq(crq, scrq->vhost);
+			crq->valid = 0;
+			wmb();
+		} else
+			done = 1;
+	}
+}
+
 /**
  * ibmvfc_init_tgt - Set the next init job step for the target
  * @tgt:		ibmvfc target struct