mailbox: fix completion order for blocking requests

Message ID 1490809381-28869-1-git-send-email-jaswinder.singh@linaro.org
State New
Headers show

Commit Message

Jassi Brar March 29, 2017, 5:43 p.m.
Currently two threads, wait on blocking requests, could wake up for
completion of request of each other as ...

        Thread#1(T1)               Thread#2(T2)
     mbox_send_message           mbox_send_message
            |                           |
            V                           |
        add_to_rbuf(M1)                 V
            |                     add_to_rbuf(M2)
            |                           |
            |                           V
            V                      msg_submit(picks M1)
        msg_submit                      |
            |                           V
            V                   wait_for_completion(on M2)
     wait_for_completion(on M1)         |  (1st in waitQ)
            |   (2nd in waitQ)          V
            V                   wake_up(on completion of M1)<--incorrect

 Fix this situaion by assigning completion structures to each queued
request, so that the threads could wait on their own completions.

Reported-by: Alexey Klimov <alexey.klimov@arm.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>

---
 drivers/mailbox/mailbox.c          | 15 +++++++++++----
 drivers/mailbox/omap-mailbox.c     |  2 +-
 drivers/mailbox/pcc.c              |  2 +-
 include/linux/mailbox_controller.h |  6 ++++--
 4 files changed, 17 insertions(+), 8 deletions(-)

-- 
2.7.4

Comments

Sudeep Holla March 29, 2017, 6:01 p.m. | #1
On 29/03/17 18:43, Jassi Brar wrote:
> Currently two threads, wait on blocking requests, could wake up for

> completion of request of each other as ...

> 

>         Thread#1(T1)               Thread#2(T2)

>      mbox_send_message           mbox_send_message

>             |                           |

>             V                           |

>         add_to_rbuf(M1)                 V

>             |                     add_to_rbuf(M2)

>             |                           |

>             |                           V

>             V                      msg_submit(picks M1)

>         msg_submit                      |

>             |                           V

>             V                   wait_for_completion(on M2)

>      wait_for_completion(on M1)         |  (1st in waitQ)

>             |   (2nd in waitQ)          V

>             V                   wake_up(on completion of M1)<--incorrect

> 

>  Fix this situaion by assigning completion structures to each queued

> request, so that the threads could wait on their own completions.

> 


Alexey came up with exact similar solution. I didn't like:

1. the static array just bloats the structure with equal no. of
   completion which may be useless for !TXDONE_BY_POLL

2. We have client drivers already doing something similar. I wanted
   to fix/move those along with this fix. Or at-least see the feasibiliy

> Reported-by: Alexey Klimov <alexey.klimov@arm.com>

> Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>

> ---

>  drivers/mailbox/mailbox.c          | 15 +++++++++++----

>  drivers/mailbox/omap-mailbox.c     |  2 +-

>  drivers/mailbox/pcc.c              |  2 +-

>  include/linux/mailbox_controller.h |  6 ++++--

>  4 files changed, 17 insertions(+), 8 deletions(-)

> 

> diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c

> index 9dfbf7e..e06c50c 100644

> --- a/drivers/mailbox/mailbox.c

> +++ b/drivers/mailbox/mailbox.c

> @@ -41,6 +41,7 @@ static int add_to_rbuf(struct mbox_chan *chan, void *mssg)

>  

>  	idx = chan->msg_free;

>  	chan->msg_data[idx] = mssg;

> +	init_completion(&chan->tx_cmpl[idx]);


reinit would be better.

>  	chan->msg_count++;

>  

>  	if (idx == MBOX_TX_QUEUE_LEN - 1)

> @@ -73,6 +74,7 @@ static void msg_submit(struct mbox_chan *chan)

>  		idx += MBOX_TX_QUEUE_LEN - count;

>  

>  	data = chan->msg_data[idx];

> +	chan->tx_complete = &chan->tx_cmpl[idx];

>  

>  	if (chan->cl->tx_prepare)

>  		chan->cl->tx_prepare(chan->cl, data);

> @@ -81,7 +83,8 @@ static void msg_submit(struct mbox_chan *chan)

>  	if (!err) {

>  		chan->active_req = data;

>  		chan->msg_count--;

> -	}

> +	} else

> +		chan->tx_complete = NULL;

>  exit:

>  	spin_unlock_irqrestore(&chan->lock, flags);

>  

> @@ -92,12 +95,15 @@ static void msg_submit(struct mbox_chan *chan)

>  

>  static void tx_tick(struct mbox_chan *chan, int r)

>  {

> +	struct completion *tx_complete;

>  	unsigned long flags;

>  	void *mssg;

>  

>  	spin_lock_irqsave(&chan->lock, flags);

>  	mssg = chan->active_req;

> +	tx_complete = chan->tx_complete;

>  	chan->active_req = NULL;

> +	chan->tx_complete = NULL;

>  	spin_unlock_irqrestore(&chan->lock, flags);

>  

>  	/* Submit next message */

> @@ -111,7 +117,7 @@ static void tx_tick(struct mbox_chan *chan, int r)

>  		chan->cl->tx_done(chan->cl, mssg, r);

>  

>  	if (r != -ETIME && chan->cl->tx_block)

> -		complete(&chan->tx_complete);

> +		complete(tx_complete);

>  }

>  

>  static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)

> @@ -272,7 +278,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)

>  		else

>  			wait = msecs_to_jiffies(chan->cl->tx_tout);

>  

> -		ret = wait_for_completion_timeout(&chan->tx_complete, wait);

> +		ret = wait_for_completion_timeout(&chan->tx_cmpl[t], wait);

>  		if (ret == 0) {

>  			t = -ETIME;

>  			tx_tick(chan, t);

> @@ -348,7 +354,7 @@ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)

>  	chan->msg_count = 0;

>  	chan->active_req = NULL;

>  	chan->cl = cl;

> -	init_completion(&chan->tx_complete);

> +	chan->tx_complete = NULL;

>  

>  	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)

>  		chan->txdone_method |= TXDONE_BY_ACK;

> @@ -414,6 +420,7 @@ void mbox_free_channel(struct mbox_chan *chan)

>  	spin_lock_irqsave(&chan->lock, flags);

>  	chan->cl = NULL;

>  	chan->active_req = NULL;

> +	chan->tx_complete = NULL;

>  	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))

>  		chan->txdone_method = TXDONE_BY_POLL;

>  

> diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c

> index c5e8b9c..99b0841 100644

> --- a/drivers/mailbox/omap-mailbox.c

> +++ b/drivers/mailbox/omap-mailbox.c

> @@ -449,7 +449,7 @@ struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl,

>  	chan->msg_count = 0;

>  	chan->active_req = NULL;

>  	chan->cl = cl;

> -	init_completion(&chan->tx_complete);

> +	chan->tx_complete = NULL;

>  	spin_unlock_irqrestore(&chan->lock, flags);

>  

>  	ret = chan->mbox->ops->startup(chan);

> diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c

> index dd9ecd35..b26cc9c 100644

> --- a/drivers/mailbox/pcc.c

> +++ b/drivers/mailbox/pcc.c

> @@ -263,7 +263,7 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,

>  	chan->msg_count = 0;

>  	chan->active_req = NULL;

>  	chan->cl = cl;

> -	init_completion(&chan->tx_complete);

> +	chan->tx_complete = NULL;

>  

>  	if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)

>  		chan->txdone_method |= TXDONE_BY_ACK;

> diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h

> index 74deadb..aac8659 100644

> --- a/include/linux/mailbox_controller.h

> +++ b/include/linux/mailbox_controller.h

> @@ -106,11 +106,12 @@ struct mbox_controller {

>   * @mbox:		Pointer to the parent/provider of this channel

>   * @txdone_method:	Way to detect TXDone chosen by the API

>   * @cl:			Pointer to the current owner of this channel

> - * @tx_complete:	Transmission completion

> + * @tx_complete:	Pointer to current transmission completion

>   * @active_req:		Currently active request hook

>   * @msg_count:		No. of mssg currently queued

>   * @msg_free:		Index of next available mssg slot

>   * @msg_data:		Hook for data packet

> + * @tx_cmpl:		Per-message completion structure

>   * @lock:		Serialise access to the channel

>   * @con_priv:		Hook for controller driver to attach private data

>   */

> @@ -118,10 +119,11 @@ struct mbox_chan {

>  	struct mbox_controller *mbox;

>  	unsigned txdone_method;

>  	struct mbox_client *cl;

> -	struct completion tx_complete;

> +	struct completion *tx_complete;

>  	void *active_req;

>  	unsigned msg_count, msg_free;

>  	void *msg_data[MBOX_TX_QUEUE_LEN];

> +	struct completion tx_cmpl[MBOX_TX_QUEUE_LEN];


May be better to encapsulate msg_data and tx_cmpl into structure so
that we just have one pointer to active_req and need not track
corresponding *tx_complete

-- 
Regards,
Sudeep
Alexey Klimov April 6, 2017, 4:58 p.m. | #2
Hi Jassi/Sudeep,

On Wed, Mar 29, 2017 at 07:01:09PM +0100, Sudeep Holla wrote:
> 

> 

> On 29/03/17 18:43, Jassi Brar wrote:

> > Currently two threads, wait on blocking requests, could wake up for

> > completion of request of each other as ...

> > 

> >         Thread#1(T1)               Thread#2(T2)

> >      mbox_send_message           mbox_send_message

> >             |                           |

> >             V                           |

> >         add_to_rbuf(M1)                 V

> >             |                     add_to_rbuf(M2)

> >             |                           |

> >             |                           V

> >             V                      msg_submit(picks M1)

> >         msg_submit                      |

> >             |                           V

> >             V                   wait_for_completion(on M2)

> >      wait_for_completion(on M1)         |  (1st in waitQ)

> >             |   (2nd in waitQ)          V

> >             V                   wake_up(on completion of M1)<--incorrect

> > 

> >  Fix this situaion by assigning completion structures to each queued

> > request, so that the threads could wait on their own completions.

> > 

> 

> Alexey came up with exact similar solution. I didn't like:


Sorry for delay.

Let me attach it, just in case. It's inserted in the of the email at [1].
It has some issues with naming of structure maybe and thing that
Sudeep pointed out.
-1 is used for active request field which doesn't look good too.
 
> 1. the static array just bloats the structure with equal no. of

>    completion which may be useless for !TXDONE_BY_POLL

> 

> 2. We have client drivers already doing something similar. I wanted

>    to fix/move those along with this fix. Or at-least see the feasibiliy

> 

> > Reported-by: Alexey Klimov <alexey.klimov@arm.com>

> > Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>

> > ---

> >  drivers/mailbox/mailbox.c          | 15 +++++++++++----

> >  drivers/mailbox/omap-mailbox.c     |  2 +-

> >  drivers/mailbox/pcc.c              |  2 +-

> >  include/linux/mailbox_controller.h |  6 ++++--

> >  4 files changed, 17 insertions(+), 8 deletions(-)

> > 

> > diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c

> > index 9dfbf7e..e06c50c 100644

> > --- a/drivers/mailbox/mailbox.c

> > +++ b/drivers/mailbox/mailbox.c

> > @@ -41,6 +41,7 @@ static int add_to_rbuf(struct mbox_chan *chan, void *mssg)

> >  

> >  	idx = chan->msg_free;

> >  	chan->msg_data[idx] = mssg;

> > +	init_completion(&chan->tx_cmpl[idx]);

> 

> reinit would be better.


Agree.
Also, reinit_completion can be moved to mbox_send_message() under
"if" that checks if it's a blocking request or not.
 
> >  	chan->msg_count++;

> >  

> >  	if (idx == MBOX_TX_QUEUE_LEN - 1)

> > @@ -73,6 +74,7 @@ static void msg_submit(struct mbox_chan *chan)

> >  		idx += MBOX_TX_QUEUE_LEN - count;

> >  

> >  	data = chan->msg_data[idx];

> > +	chan->tx_complete = &chan->tx_cmpl[idx];

> >  

> >  	if (chan->cl->tx_prepare)

> >  		chan->cl->tx_prepare(chan->cl, data);

> > @@ -81,7 +83,8 @@ static void msg_submit(struct mbox_chan *chan)

> >  	if (!err) {

> >  		chan->active_req = data;

> >  		chan->msg_count--;

> > -	}

> > +	} else

> > +		chan->tx_complete = NULL;

> >  exit:

> >  	spin_unlock_irqrestore(&chan->lock, flags);

> >  

> > @@ -92,12 +95,15 @@ static void msg_submit(struct mbox_chan *chan)

> >  

> >  static void tx_tick(struct mbox_chan *chan, int r)

> >  {

> > +	struct completion *tx_complete;

> >  	unsigned long flags;

> >  	void *mssg;

> >  

> >  	spin_lock_irqsave(&chan->lock, flags);

> >  	mssg = chan->active_req;

> > +	tx_complete = chan->tx_complete;

> >  	chan->active_req = NULL;

> > +	chan->tx_complete = NULL;

> >  	spin_unlock_irqrestore(&chan->lock, flags);

> >  

> >  	/* Submit next message */

> > @@ -111,7 +117,7 @@ static void tx_tick(struct mbox_chan *chan, int r)

> >  		chan->cl->tx_done(chan->cl, mssg, r);

> >  

> >  	if (r != -ETIME && chan->cl->tx_block)

> > -		complete(&chan->tx_complete);

> > +		complete(tx_complete);

> >  }

> >  

> >  static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)

> > @@ -272,7 +278,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)

> >  		else

> >  			wait = msecs_to_jiffies(chan->cl->tx_tout);

> >  

> > -		ret = wait_for_completion_timeout(&chan->tx_complete, wait);

> > +		ret = wait_for_completion_timeout(&chan->tx_cmpl[t], wait);

> >  		if (ret == 0) {

> >  			t = -ETIME;

> >  			tx_tick(chan, t);

> > @@ -348,7 +354,7 @@ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)

> >  	chan->msg_count = 0;

> >  	chan->active_req = NULL;

> >  	chan->cl = cl;

> > -	init_completion(&chan->tx_complete);

> > +	chan->tx_complete = NULL;

> >  

> >  	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)

> >  		chan->txdone_method |= TXDONE_BY_ACK;

> > @@ -414,6 +420,7 @@ void mbox_free_channel(struct mbox_chan *chan)

> >  	spin_lock_irqsave(&chan->lock, flags);

> >  	chan->cl = NULL;

> >  	chan->active_req = NULL;

> > +	chan->tx_complete = NULL;

> >  	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))

> >  		chan->txdone_method = TXDONE_BY_POLL;

> >  

> > diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c

> > index c5e8b9c..99b0841 100644

> > --- a/drivers/mailbox/omap-mailbox.c

> > +++ b/drivers/mailbox/omap-mailbox.c

> > @@ -449,7 +449,7 @@ struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl,

> >  	chan->msg_count = 0;

> >  	chan->active_req = NULL;

> >  	chan->cl = cl;

> > -	init_completion(&chan->tx_complete);

> > +	chan->tx_complete = NULL;

> >  	spin_unlock_irqrestore(&chan->lock, flags);

> >  

> >  	ret = chan->mbox->ops->startup(chan);

> > diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c

> > index dd9ecd35..b26cc9c 100644

> > --- a/drivers/mailbox/pcc.c

> > +++ b/drivers/mailbox/pcc.c

> > @@ -263,7 +263,7 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,

> >  	chan->msg_count = 0;

> >  	chan->active_req = NULL;

> >  	chan->cl = cl;

> > -	init_completion(&chan->tx_complete);

> > +	chan->tx_complete = NULL;

> >  

> >  	if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)

> >  		chan->txdone_method |= TXDONE_BY_ACK;

> > diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h

> > index 74deadb..aac8659 100644

> > --- a/include/linux/mailbox_controller.h

> > +++ b/include/linux/mailbox_controller.h

> > @@ -106,11 +106,12 @@ struct mbox_controller {

> >   * @mbox:		Pointer to the parent/provider of this channel

> >   * @txdone_method:	Way to detect TXDone chosen by the API

> >   * @cl:			Pointer to the current owner of this channel

> > - * @tx_complete:	Transmission completion

> > + * @tx_complete:	Pointer to current transmission completion

> >   * @active_req:		Currently active request hook

> >   * @msg_count:		No. of mssg currently queued

> >   * @msg_free:		Index of next available mssg slot

> >   * @msg_data:		Hook for data packet

> > + * @tx_cmpl:		Per-message completion structure

> >   * @lock:		Serialise access to the channel

> >   * @con_priv:		Hook for controller driver to attach private data

> >   */

> > @@ -118,10 +119,11 @@ struct mbox_chan {

> >  	struct mbox_controller *mbox;

> >  	unsigned txdone_method;

> >  	struct mbox_client *cl;

> > -	struct completion tx_complete;

> > +	struct completion *tx_complete;

> >  	void *active_req;

> >  	unsigned msg_count, msg_free;

> >  	void *msg_data[MBOX_TX_QUEUE_LEN];

> > +	struct completion tx_cmpl[MBOX_TX_QUEUE_LEN];

> 

> May be better to encapsulate msg_data and tx_cmpl into structure so

> that we just have one pointer to active_req and need not track

> corresponding *tx_complete

> 

> -- 

> Regards,

> Sudeep



Best regards,
Alexey

[1]:

From dc702c9ba7c92c9204385e7d99d586a5afea936a Mon Sep 17 00:00:00 2001
From: Alexey Klimov <alexey.klimov@arm.com>

Date: Thu, 6 Apr 2017 13:57:02 +0100
Subject: [RFC][PATCH] mailbox: per-channel arrays with msg data and completion
 structures

When a mailbox client doesn't serialize sending of the message itself,
and asks mailbox framework to block on mbox_send_message(), one
completion structure per channel is not enough. Client can make a few
mbox_send_message() calls at the same time, and there is no guaranteed
order of going to sleep on completion.

If mailbox controller acks a message transfer, then tx_tick() wakes up
the first thread that waits on completion.
If mailbox controller doesn't ack the transfer and timeout happens, then
tx_tick() calls complete, and the next caller trying to sleep on
completion wakes up immediately.

This patch fixes this by changing completion structures to be inserted
into an array that contains a) pointer to data provided by client and
b) the completion structure. Thus active_req field tracks the index of
the current running request that was submitted to mailbox controller.

Signed-off-by: Alexey Klimov <alexey.klimov@arm.com>

---
 drivers/mailbox/mailbox.c          | 40 +++++++++++++++++++++++---------------
 drivers/mailbox/pcc.c              | 10 +++++++---
 include/linux/mailbox_controller.h | 24 +++++++++++++++++------
 3 files changed, 49 insertions(+), 25 deletions(-)

-- 
1.9.1diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 59b7221..d90e855 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -40,7 +40,7 @@ static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
 	}
 
 	idx = chan->msg_free;
-	chan->msg_data[idx] = mssg;
+	chan->msg_data[idx].msg_data = mssg;
 	chan->msg_count++;
 
 	if (idx == MBOX_TX_QUEUE_LEN - 1)
@@ -62,24 +62,25 @@ static void msg_submit(struct mbox_chan *chan)
 
 	spin_lock_irqsave(&chan->lock, flags);
 
-	if (!chan->msg_count || chan->active_req)
+	if (!chan->msg_count || chan->active_req >= 0)
 		goto exit;
 
 	count = chan->msg_count;
 	idx = chan->msg_free;
+
 	if (idx >= count)
 		idx -= count;
 	else
 		idx += MBOX_TX_QUEUE_LEN - count;
 
-	data = chan->msg_data[idx];
+	data = chan->msg_data[idx].msg_data;
 
 	if (chan->cl->tx_prepare)
 		chan->cl->tx_prepare(chan->cl, data);
 	/* Try to submit a message to the MBOX controller */
 	err = chan->mbox->ops->send_data(chan, data);
 	if (!err) {
-		chan->active_req = data;
+		chan->active_req = idx;
 		chan->msg_count--;
 	}
 exit:
@@ -93,11 +94,15 @@ static void msg_submit(struct mbox_chan *chan)
 static void tx_tick(struct mbox_chan *chan, int r)
 {
 	unsigned long flags;
-	void *mssg;
+	void *mssg = NULL;
+	int idx = -1;
 
 	spin_lock_irqsave(&chan->lock, flags);
-	mssg = chan->active_req;
-	chan->active_req = NULL;
+	if (chan->active_req >= 0) {
+		mssg = chan->msg_data[chan->active_req].msg_data;
+		idx = chan->active_req;
+		chan->active_req = -1;
+	}
 	spin_unlock_irqrestore(&chan->lock, flags);
 
 	/* Submit next message */
@@ -107,8 +112,9 @@ static void tx_tick(struct mbox_chan *chan, int r)
 	if (mssg && chan->cl->tx_done)
 		chan->cl->tx_done(chan->cl, mssg, r);
 
-	if (chan->cl->tx_block)
-		complete(&chan->tx_complete);
+	if (chan->cl->tx_block && idx >= 0
+			&& !completion_done(&chan->msg_data[idx].tx_complete))
+		complete(&chan->msg_data[idx].tx_complete);
 }
 
 static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)
@@ -121,7 +127,7 @@ static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)
 	for (i = 0; i < mbox->num_chans; i++) {
 		struct mbox_chan *chan = &mbox->chans[i];
 
-		if (chan->active_req && chan->cl) {
+		if ((chan->active_req >= 0) && chan->cl) {
 			txdone = chan->mbox->ops->last_tx_done(chan);
 			if (txdone)
 				tx_tick(chan, 0);
@@ -260,7 +266,7 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)
 
 	msg_submit(chan);
 
-	if (chan->cl->tx_block && chan->active_req) {
+	if (chan->cl->tx_block) {
 		unsigned long wait;
 		int ret;
 
@@ -269,7 +275,8 @@ int mbox_send_message(struct mbox_chan *chan, void *mssg)
 		else
 			wait = msecs_to_jiffies(chan->cl->tx_tout);
 
-		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+		reinit_completion(&chan->msg_data[t].tx_complete);
+		ret = wait_for_completion_timeout(&chan->msg_data[t].tx_complete, wait);
 		if (ret == 0) {
 			t = -EIO;
 			tx_tick(chan, -EIO);
@@ -304,7 +311,7 @@ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
 	struct of_phandle_args spec;
 	struct mbox_chan *chan;
 	unsigned long flags;
-	int ret;
+	int ret, i;
 
 	if (!dev || !dev->of_node) {
 		pr_debug("%s: No owner device node\n", __func__);
@@ -343,9 +350,10 @@ struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
 	spin_lock_irqsave(&chan->lock, flags);
 	chan->msg_free = 0;
 	chan->msg_count = 0;
-	chan->active_req = NULL;
+	chan->active_req = -1;
 	chan->cl = cl;
-	init_completion(&chan->tx_complete);
+	for (i = 0; i < MBOX_TX_QUEUE_LEN; i++)
+		init_completion(&chan->msg_data[i].tx_complete);
 
 	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)
 		chan->txdone_method |= TXDONE_BY_ACK;
@@ -410,7 +418,7 @@ void mbox_free_channel(struct mbox_chan *chan)
 	/* The queued TX requests are simply aborted, no callbacks are made */
 	spin_lock_irqsave(&chan->lock, flags);
 	chan->cl = NULL;
-	chan->active_req = NULL;
+	chan->active_req = -1;
 	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
 		chan->txdone_method = TXDONE_BY_POLL;
 
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index dd9ecd35..39481ec 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -243,6 +243,7 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
 	struct device *dev = pcc_mbox_ctrl.dev;
 	struct mbox_chan *chan;
 	unsigned long flags;
+	int i;
 
 	/*
 	 * Each PCC Subspace is a Mailbox Channel.
@@ -261,9 +262,11 @@ struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
 	spin_lock_irqsave(&chan->lock, flags);
 	chan->msg_free = 0;
 	chan->msg_count = 0;
-	chan->active_req = NULL;
+	chan->active_req = -1;
 	chan->cl = cl;
-	init_completion(&chan->tx_complete);
+
+	for (i = 0; i < MBOX_TX_QUEUE_LEN; i++)
+		init_completion(&chan->msg_data[i].tx_complete);
 
 	if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
 		chan->txdone_method |= TXDONE_BY_ACK;
@@ -311,7 +314,8 @@ void pcc_mbox_free_channel(struct mbox_chan *chan)
 
 	spin_lock_irqsave(&chan->lock, flags);
 	chan->cl = NULL;
-	chan->active_req = NULL;
+	chan->active_req = -1;
+
 	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
 		chan->txdone_method = TXDONE_BY_POLL;
 
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
index 74deadb..52dd45e 100644
--- a/include/linux/mailbox_controller.h
+++ b/include/linux/mailbox_controller.h
@@ -101,29 +101,41 @@ struct mbox_controller {
  */
 #define MBOX_TX_QUEUE_LEN	20
 
+
+/**
+ * struct msg_data - elements of array associated with queued requests
+ * @msg_data:		Hook for data packet
+ * @tx_complete:	Transmission completion
+ */
+
+struct msg_data {
+	void *msg_data;
+	struct completion tx_complete;
+};
+
 /**
  * struct mbox_chan - s/w representation of a communication chan
  * @mbox:		Pointer to the parent/provider of this channel
  * @txdone_method:	Way to detect TXDone chosen by the API
  * @cl:			Pointer to the current owner of this channel
- * @tx_complete:	Transmission completion
- * @active_req:		Currently active request hook
+ * @active_req:		Index of currently active slot
  * @msg_count:		No. of mssg currently queued
  * @msg_free:		Index of next available mssg slot
- * @msg_data:		Hook for data packet
  * @lock:		Serialise access to the channel
  * @con_priv:		Hook for controller driver to attach private data
+ * @msg_data:		Array containing indexed data associated with queued
+ *			requests
  */
+
 struct mbox_chan {
 	struct mbox_controller *mbox;
 	unsigned txdone_method;
 	struct mbox_client *cl;
-	struct completion tx_complete;
-	void *active_req;
+	int active_req;
 	unsigned msg_count, msg_free;
-	void *msg_data[MBOX_TX_QUEUE_LEN];
 	spinlock_t lock; /* Serialise access to the channel */
 	void *con_priv;
+	struct msg_data msg_data[MBOX_TX_QUEUE_LEN];
 };
 
 int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */

Jassi Brar April 6, 2017, 5:15 p.m. | #3
On 6 April 2017 at 22:28, Alexey Klimov <alexey.klimov@arm.com> wrote:
> Hi Jassi/Sudeep,

>

> On Wed, Mar 29, 2017 at 07:01:09PM +0100, Sudeep Holla wrote:

>>

>>

>> On 29/03/17 18:43, Jassi Brar wrote:

...

>> > diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c

>> > index 9dfbf7e..e06c50c 100644

>> > --- a/drivers/mailbox/mailbox.c

>> > +++ b/drivers/mailbox/mailbox.c

>> > @@ -41,6 +41,7 @@ static int add_to_rbuf(struct mbox_chan *chan, void *mssg)

>> >

>> >     idx = chan->msg_free;

>> >     chan->msg_data[idx] = mssg;

>> > +   init_completion(&chan->tx_cmpl[idx]);

>>

>> reinit would be better.

>

Of course.

....
> From: Alexey Klimov <alexey.klimov@arm.com>

> Date: Thu, 6 Apr 2017 13:57:02 +0100

> Subject: [RFC][PATCH] mailbox: per-channel arrays with msg data and completion

>  structures

>

> When a mailbox client doesn't serialize sending of the message itself,

> and asks mailbox framework to block on mbox_send_message(), one

> completion structure per channel is not enough. Client can make a few

> mbox_send_message() calls at the same time, and there is no guaranteed

> order of going to sleep on completion.

>

> If mailbox controller acks a message transfer, then tx_tick() wakes up

> the first thread that waits on completion.

> If mailbox controller doesn't ack the transfer and timeout happens, then

> tx_tick() calls complete, and the next caller trying to sleep on

> completion wakes up immediately.

>

> This patch fixes this by changing completion structures to be inserted

> into an array that contains a) pointer to data provided by client and

> b) the completion structure. Thus active_req field tracks the index of

> the current running request that was submitted to mailbox controller.

>

> Signed-off-by: Alexey Klimov <alexey.klimov@arm.com>

> ---

>  drivers/mailbox/mailbox.c          | 40 +++++++++++++++++++++++---------------

>  drivers/mailbox/pcc.c              | 10 +++++++---

>  include/linux/mailbox_controller.h | 24 +++++++++++++++++------

...
>  3 files changed, 49 insertions(+), 25 deletions(-)

>

 Versus   4 files changed, 17 insertions(+), 8 deletions(-)

I think we should just keep it simpler if it works just as fine.

Thanks.
Alexey Klimov April 11, 2017, 12:45 p.m. | #4
On Thu, Apr 06, 2017 at 10:45:26PM +0530, Jassi Brar wrote:
> On 6 April 2017 at 22:28, Alexey Klimov <alexey.klimov@arm.com> wrote:

> > Hi Jassi/Sudeep,

> >

> > On Wed, Mar 29, 2017 at 07:01:09PM +0100, Sudeep Holla wrote:

> >>

> >>

> >> On 29/03/17 18:43, Jassi Brar wrote:

> ...

> 

> >> > diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c

> >> > index 9dfbf7e..e06c50c 100644

> >> > --- a/drivers/mailbox/mailbox.c

> >> > +++ b/drivers/mailbox/mailbox.c

> >> > @@ -41,6 +41,7 @@ static int add_to_rbuf(struct mbox_chan *chan, void *mssg)

> >> >

> >> >     idx = chan->msg_free;

> >> >     chan->msg_data[idx] = mssg;

> >> > +   init_completion(&chan->tx_cmpl[idx]);

> >>

> >> reinit would be better.

> >

> Of course.

> 

> ....

> > From: Alexey Klimov <alexey.klimov@arm.com>

> > Date: Thu, 6 Apr 2017 13:57:02 +0100

> > Subject: [RFC][PATCH] mailbox: per-channel arrays with msg data and completion

> >  structures

> >

> > When a mailbox client doesn't serialize sending of the message itself,

> > and asks mailbox framework to block on mbox_send_message(), one

> > completion structure per channel is not enough. Client can make a few

> > mbox_send_message() calls at the same time, and there is no guaranteed

> > order of going to sleep on completion.

> >

> > If mailbox controller acks a message transfer, then tx_tick() wakes up

> > the first thread that waits on completion.

> > If mailbox controller doesn't ack the transfer and timeout happens, then

> > tx_tick() calls complete, and the next caller trying to sleep on

> > completion wakes up immediately.

> >

> > This patch fixes this by changing completion structures to be inserted

> > into an array that contains a) pointer to data provided by client and

> > b) the completion structure. Thus active_req field tracks the index of

> > the current running request that was submitted to mailbox controller.

> >

> > Signed-off-by: Alexey Klimov <alexey.klimov@arm.com>

> > ---

> >  drivers/mailbox/mailbox.c          | 40 +++++++++++++++++++++++---------------

> >  drivers/mailbox/pcc.c              | 10 +++++++---

> >  include/linux/mailbox_controller.h | 24 +++++++++++++++++------

> ...

> >  3 files changed, 49 insertions(+), 25 deletions(-)

> >

>  Versus   4 files changed, 17 insertions(+), 8 deletions(-)

> 

> I think we should just keep it simpler if it works just as fine.


Along with this patch you still need at least one patch from Sudeep with subject:
"[PATCH 1/3] mailbox: always wait in mbox_send_message for blocking Tx mode"

Decision to block or not shouldn't depend on racy reading of active_req field.

Best regards,
Alexey Klimov.
Jassi Brar April 23, 2017, 10:03 a.m. | #5
On Tue, Apr 11, 2017 at 6:15 PM, Alexey Klimov <alexey.klimov@arm.com> wrote:
> On Thu, Apr 06, 2017 at 10:45:26PM +0530, Jassi Brar wrote:

>> On 6 April 2017 at 22:28, Alexey Klimov <alexey.klimov@arm.com> wrote:

>> > Hi Jassi/Sudeep,

>> >

>> > On Wed, Mar 29, 2017 at 07:01:09PM +0100, Sudeep Holla wrote:

>> >>

>> >>

>> >> On 29/03/17 18:43, Jassi Brar wrote:

>> ...

>>

>> >> > diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c

>> >> > index 9dfbf7e..e06c50c 100644

>> >> > --- a/drivers/mailbox/mailbox.c

>> >> > +++ b/drivers/mailbox/mailbox.c

>> >> > @@ -41,6 +41,7 @@ static int add_to_rbuf(struct mbox_chan *chan, void *mssg)

>> >> >

>> >> >     idx = chan->msg_free;

>> >> >     chan->msg_data[idx] = mssg;

>> >> > +   init_completion(&chan->tx_cmpl[idx]);

>> >>

>> >> reinit would be better.

>> >

>> Of course.

>>

>> ....

>> > From: Alexey Klimov <alexey.klimov@arm.com>

>> > Date: Thu, 6 Apr 2017 13:57:02 +0100

>> > Subject: [RFC][PATCH] mailbox: per-channel arrays with msg data and completion

>> >  structures

>> >

>> > When a mailbox client doesn't serialize sending of the message itself,

>> > and asks mailbox framework to block on mbox_send_message(), one

>> > completion structure per channel is not enough. Client can make a few

>> > mbox_send_message() calls at the same time, and there is no guaranteed

>> > order of going to sleep on completion.

>> >

>> > If mailbox controller acks a message transfer, then tx_tick() wakes up

>> > the first thread that waits on completion.

>> > If mailbox controller doesn't ack the transfer and timeout happens, then

>> > tx_tick() calls complete, and the next caller trying to sleep on

>> > completion wakes up immediately.

>> >

>> > This patch fixes this by changing completion structures to be inserted

>> > into an array that contains a) pointer to data provided by client and

>> > b) the completion structure. Thus active_req field tracks the index of

>> > the current running request that was submitted to mailbox controller.

>> >

>> > Signed-off-by: Alexey Klimov <alexey.klimov@arm.com>

>> > ---

>> >  drivers/mailbox/mailbox.c          | 40 +++++++++++++++++++++++---------------

>> >  drivers/mailbox/pcc.c              | 10 +++++++---

>> >  include/linux/mailbox_controller.h | 24 +++++++++++++++++------

>> ...

>> >  3 files changed, 49 insertions(+), 25 deletions(-)

>> >

>>  Versus   4 files changed, 17 insertions(+), 8 deletions(-)

>>

>> I think we should just keep it simpler if it works just as fine.

>

> Along with this patch you still need at least one patch from Sudeep with subject:

> "[PATCH 1/3] mailbox: always wait in mbox_send_message for blocking Tx mode"

>

Yes. Just so we are on same page, can you please redo your tests and
see if this and Sudeep's patch-1/3 does the trick?

Thanks

Patch hide | download patch | download mbox

diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index 9dfbf7e..e06c50c 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -41,6 +41,7 @@  static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
 
 	idx = chan->msg_free;
 	chan->msg_data[idx] = mssg;
+	init_completion(&chan->tx_cmpl[idx]);
 	chan->msg_count++;
 
 	if (idx == MBOX_TX_QUEUE_LEN - 1)
@@ -73,6 +74,7 @@  static void msg_submit(struct mbox_chan *chan)
 		idx += MBOX_TX_QUEUE_LEN - count;
 
 	data = chan->msg_data[idx];
+	chan->tx_complete = &chan->tx_cmpl[idx];
 
 	if (chan->cl->tx_prepare)
 		chan->cl->tx_prepare(chan->cl, data);
@@ -81,7 +83,8 @@  static void msg_submit(struct mbox_chan *chan)
 	if (!err) {
 		chan->active_req = data;
 		chan->msg_count--;
-	}
+	} else
+		chan->tx_complete = NULL;
 exit:
 	spin_unlock_irqrestore(&chan->lock, flags);
 
@@ -92,12 +95,15 @@  static void msg_submit(struct mbox_chan *chan)
 
 static void tx_tick(struct mbox_chan *chan, int r)
 {
+	struct completion *tx_complete;
 	unsigned long flags;
 	void *mssg;
 
 	spin_lock_irqsave(&chan->lock, flags);
 	mssg = chan->active_req;
+	tx_complete = chan->tx_complete;
 	chan->active_req = NULL;
+	chan->tx_complete = NULL;
 	spin_unlock_irqrestore(&chan->lock, flags);
 
 	/* Submit next message */
@@ -111,7 +117,7 @@  static void tx_tick(struct mbox_chan *chan, int r)
 		chan->cl->tx_done(chan->cl, mssg, r);
 
 	if (r != -ETIME && chan->cl->tx_block)
-		complete(&chan->tx_complete);
+		complete(tx_complete);
 }
 
 static enum hrtimer_restart txdone_hrtimer(struct hrtimer *hrtimer)
@@ -272,7 +278,7 @@  int mbox_send_message(struct mbox_chan *chan, void *mssg)
 		else
 			wait = msecs_to_jiffies(chan->cl->tx_tout);
 
-		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+		ret = wait_for_completion_timeout(&chan->tx_cmpl[t], wait);
 		if (ret == 0) {
 			t = -ETIME;
 			tx_tick(chan, t);
@@ -348,7 +354,7 @@  struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
 	chan->msg_count = 0;
 	chan->active_req = NULL;
 	chan->cl = cl;
-	init_completion(&chan->tx_complete);
+	chan->tx_complete = NULL;
 
 	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)
 		chan->txdone_method |= TXDONE_BY_ACK;
@@ -414,6 +420,7 @@  void mbox_free_channel(struct mbox_chan *chan)
 	spin_lock_irqsave(&chan->lock, flags);
 	chan->cl = NULL;
 	chan->active_req = NULL;
+	chan->tx_complete = NULL;
 	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
 		chan->txdone_method = TXDONE_BY_POLL;
 
diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c
index c5e8b9c..99b0841 100644
--- a/drivers/mailbox/omap-mailbox.c
+++ b/drivers/mailbox/omap-mailbox.c
@@ -449,7 +449,7 @@  struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl,
 	chan->msg_count = 0;
 	chan->active_req = NULL;
 	chan->cl = cl;
-	init_completion(&chan->tx_complete);
+	chan->tx_complete = NULL;
 	spin_unlock_irqrestore(&chan->lock, flags);
 
 	ret = chan->mbox->ops->startup(chan);
diff --git a/drivers/mailbox/pcc.c b/drivers/mailbox/pcc.c
index dd9ecd35..b26cc9c 100644
--- a/drivers/mailbox/pcc.c
+++ b/drivers/mailbox/pcc.c
@@ -263,7 +263,7 @@  struct mbox_chan *pcc_mbox_request_channel(struct mbox_client *cl,
 	chan->msg_count = 0;
 	chan->active_req = NULL;
 	chan->cl = cl;
-	init_completion(&chan->tx_complete);
+	chan->tx_complete = NULL;
 
 	if (chan->txdone_method == TXDONE_BY_POLL && cl->knows_txdone)
 		chan->txdone_method |= TXDONE_BY_ACK;
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
index 74deadb..aac8659 100644
--- a/include/linux/mailbox_controller.h
+++ b/include/linux/mailbox_controller.h
@@ -106,11 +106,12 @@  struct mbox_controller {
  * @mbox:		Pointer to the parent/provider of this channel
  * @txdone_method:	Way to detect TXDone chosen by the API
  * @cl:			Pointer to the current owner of this channel
- * @tx_complete:	Transmission completion
+ * @tx_complete:	Pointer to current transmission completion
  * @active_req:		Currently active request hook
  * @msg_count:		No. of mssg currently queued
  * @msg_free:		Index of next available mssg slot
  * @msg_data:		Hook for data packet
+ * @tx_cmpl:		Per-message completion structure
  * @lock:		Serialise access to the channel
  * @con_priv:		Hook for controller driver to attach private data
  */
@@ -118,10 +119,11 @@  struct mbox_chan {
 	struct mbox_controller *mbox;
 	unsigned txdone_method;
 	struct mbox_client *cl;
-	struct completion tx_complete;
+	struct completion *tx_complete;
 	void *active_req;
 	unsigned msg_count, msg_free;
 	void *msg_data[MBOX_TX_QUEUE_LEN];
+	struct completion tx_cmpl[MBOX_TX_QUEUE_LEN];
 	spinlock_t lock; /* Serialise access to the channel */
 	void *con_priv;
 };