diff mbox

[v4,10/10] IB/mlx5: Simplify completion into a wait_event

Message ID 1477551554-30349-11-git-send-email-binoy.jayan@linaro.org
State New
Headers show

Commit Message

Binoy Jayan Oct. 27, 2016, 6:59 a.m. UTC
Convert the completion 'mlx5_ib_umr_context:done' to a wait_event as it
just waits for the return value to be filled.

Signed-off-by: Binoy Jayan <binoy.jayan@linaro.org>

---
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +-
 drivers/infiniband/hw/mlx5/mr.c      | 9 +++++----
 include/rdma/ib_verbs.h              | 1 +
 3 files changed, 7 insertions(+), 5 deletions(-)

-- 
The Qualcomm Innovation Center, Inc. is a member of the Code Aurora Forum,
a Linux Foundation Collaborative Project

Comments

Sagi Grimberg Oct. 30, 2016, 9:17 p.m. UTC | #1
On 27/10/16 09:59, Binoy Jayan wrote:
> Convert the completion 'mlx5_ib_umr_context:done' to a wait_event as it

> just waits for the return value to be filled.

>

> Signed-off-by: Binoy Jayan <binoy.jayan@linaro.org>

> ---

>  drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +-

>  drivers/infiniband/hw/mlx5/mr.c      | 9 +++++----

>  include/rdma/ib_verbs.h              | 1 +

>  3 files changed, 7 insertions(+), 5 deletions(-)

>

> diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h

> index de31b5f..cf496b5 100644

> --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h

> +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h

> @@ -524,7 +524,7 @@ struct mlx5_ib_mw {

>  struct mlx5_ib_umr_context {

>  	struct ib_cqe		cqe;

>  	enum ib_wc_status	status;

> -	struct completion	done;

> +	wait_queue_head_t	wq;

>  };

>

>  struct umr_common {

> diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c

> index dfaf6f6..49ff2af 100644

> --- a/drivers/infiniband/hw/mlx5/mr.c

> +++ b/drivers/infiniband/hw/mlx5/mr.c

> @@ -846,14 +846,14 @@ static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)

>  		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);

>

>  	context->status = wc->status;

> -	complete(&context->done);

> +	wake_up(&context->wq);

>  }

>

>  static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)

>  {

>  	context->cqe.done = mlx5_ib_umr_done;

> -	context->status = -1;

> -	init_completion(&context->done);

> +	context->status = IB_WC_STATUS_NONE;

> +	init_waitqueue_head(&context->wq);

>  }

>

>  static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,

> @@ -873,7 +873,8 @@ static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,

>  	if (err) {

>  		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);

>  	} else {

> -		wait_for_completion(&umr_context.done);

> +		wait_event(umr_context.wq,

> +			   umr_context.status != IB_WC_STATUS_NONE);


How is this simpler?


>  enum ib_wc_status {

> +	IB_WC_STATUS_NONE = -1,

>  	IB_WC_SUCCESS,

>  	IB_WC_LOC_LEN_ERR,

>  	IB_WC_LOC_QP_OP_ERR,

>


Huh? Where did this bogus status came from? IMHO, this is polluting
the verbs interface for no good reason at all, sorry.
Leon Romanovsky Nov. 2, 2016, 3:59 p.m. UTC | #2
On Sun, Oct 30, 2016 at 11:17:57PM +0200, Sagi Grimberg wrote:
>

>

> On 27/10/16 09:59, Binoy Jayan wrote:

> >Convert the completion 'mlx5_ib_umr_context:done' to a wait_event as it

> >just waits for the return value to be filled.


On top of Sagi's response, I'm failing to understand why it is needed.
Can you elaborate more about it?

> >

> >Signed-off-by: Binoy Jayan <binoy.jayan@linaro.org>

> >---

> > drivers/infiniband/hw/mlx5/mlx5_ib.h | 2 +-

> > drivers/infiniband/hw/mlx5/mr.c      | 9 +++++----

> > include/rdma/ib_verbs.h              | 1 +

> > 3 files changed, 7 insertions(+), 5 deletions(-)

> >

> >diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h

> >index de31b5f..cf496b5 100644

> >--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h

> >+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h

> >@@ -524,7 +524,7 @@ struct mlx5_ib_mw {

> > struct mlx5_ib_umr_context {

> > 	struct ib_cqe		cqe;

> > 	enum ib_wc_status	status;

> >-	struct completion	done;

> >+	wait_queue_head_t	wq;

> > };

> >

> > struct umr_common {

> >diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c

> >index dfaf6f6..49ff2af 100644

> >--- a/drivers/infiniband/hw/mlx5/mr.c

> >+++ b/drivers/infiniband/hw/mlx5/mr.c

> >@@ -846,14 +846,14 @@ static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)

> > 		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);

> >

> > 	context->status = wc->status;

> >-	complete(&context->done);

> >+	wake_up(&context->wq);

> > }

> >

> > static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)

> > {

> > 	context->cqe.done = mlx5_ib_umr_done;

> >-	context->status = -1;

> >-	init_completion(&context->done);

> >+	context->status = IB_WC_STATUS_NONE;

> >+	init_waitqueue_head(&context->wq);

> > }

> >

> > static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,

> >@@ -873,7 +873,8 @@ static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,

> > 	if (err) {

> > 		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);

> > 	} else {

> >-		wait_for_completion(&umr_context.done);

> >+		wait_event(umr_context.wq,

> >+			   umr_context.status != IB_WC_STATUS_NONE);

>

> How is this simpler?

>

>

> > enum ib_wc_status {

> >+	IB_WC_STATUS_NONE = -1,

> > 	IB_WC_SUCCESS,

> > 	IB_WC_LOC_LEN_ERR,

> > 	IB_WC_LOC_QP_OP_ERR,

> >

>

> Huh? Where did this bogus status came from? IMHO, this is polluting

> the verbs interface for no good reason at all, sorry.

> --

> To unsubscribe from this list: send the line "unsubscribe linux-rdma" in

> the body of a message to majordomo@vger.kernel.org

> More majordomo info at  http://vger.kernel.org/majordomo-info.html
Binoy Jayan Nov. 3, 2016, 6:03 a.m. UTC | #3
Hi,

On 31 October 2016 at 02:47, Sagi Grimberg <sagi@grimberg.me> wrote:

> How is this simpler?


It is simpler in the sense that it is a light weight primitive and that only
one thread waits on the event here. In our case since 'umr_context.done'
is an "on stack" variable, and has only one thread waiting on that event,
no race conditions occur. So, we do not need completions here which
are usually used to provide a race-free but easy-to-use solution involving
multiple threads waiting on an event.

>>  enum ib_wc_status {

>> +       IB_WC_STATUS_NONE = -1,

>>         IB_WC_SUCCESS,

>>         IB_WC_LOC_LEN_ERR,

>>         IB_WC_LOC_QP_OP_ERR,

>>

>

> Huh? Where did this bogus status came from? IMHO, this is polluting

> the verbs interface for no good reason at all, sorry.


context->status is initialized to -1 in the following code, so I just thought of
replacing it with a name.

static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
 {
        context->cqe.done = mlx5_ib_umr_done;
-       context->status = -1;
-       init_completion(&context->done);
+       context->status = IB_WC_STATUS_NONE;
+       init_waitqueue_head(&context->wq);
 }

Thanks,
Binoy
Linus Torvalds Nov. 3, 2016, 3:37 p.m. UTC | #4
On Wed, Oct 26, 2016 at 11:59 PM, Binoy Jayan <binoy.jayan@linaro.org> wrote:
> Convert the completion 'mlx5_ib_umr_context:done' to a wait_event as it

> just waits for the return value to be filled.


This is wrong.

Since that "umr_context" variable is on the stack, and you are waiting
for it to be fully done, it really should be a completion.

Why?

Because a "complete()" guarantees that the *last* access to the
variable is done by the thread that does "wait_for_completion()".

In contrast, doing a "wait_event()" can actually *race* with whoever
wakes it up, and the "wait_event()" may race with the wakeup, where
the wakeup() ends up removing the entry from the list, so that
"list_empty_careful()" sees that it's all done, but the "wakeup()" can
still be holding (and about to release) the waitqueue spinlock.

What's the problem?

When the waiter is on the stack, the umr_context" variable may be
about to be relased, and then the "wake_up()" may end up accessing the
umr_context waitqueue spinlock after it has already become something
else.

This is unlikely, but it's very much one of the things that a
"completion" is all about. A completion (along with a plain spinlock)
is guaranteed to be synchronous in a way that semaphores and
wait-queues are *not*, so that when somebody has done "complete()",
there is no access to the variable that can race.

So no. A wait-queue wakeup is NOT AT ALL the same thing as a completion.

There really is a very real reason why "complete()" exists, and why it
is used for one-time initializations like this. "complete()" along
with spinlocks are synchronous in ways that other concepts are not.

                  Linus
Binoy Jayan Nov. 7, 2016, 5:51 a.m. UTC | #5
Hi Linus,

On 3 November 2016 at 21:07, Linus Torvalds
<torvalds@linux-foundation.org> wrote:
> This is wrong.


Will change it back.

> Since that "umr_context" variable is on the stack, and you are waiting

> for it to be fully done, it really should be a completion.


Thank you for sharing your insight with wait_event and completion.

-Binoy
diff mbox

Patch

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index de31b5f..cf496b5 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -524,7 +524,7 @@  struct mlx5_ib_mw {
 struct mlx5_ib_umr_context {
 	struct ib_cqe		cqe;
 	enum ib_wc_status	status;
-	struct completion	done;
+	wait_queue_head_t	wq;
 };
 
 struct umr_common {
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index dfaf6f6..49ff2af 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -846,14 +846,14 @@  static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
 		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
 
 	context->status = wc->status;
-	complete(&context->done);
+	wake_up(&context->wq);
 }
 
 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
 {
 	context->cqe.done = mlx5_ib_umr_done;
-	context->status = -1;
-	init_completion(&context->done);
+	context->status = IB_WC_STATUS_NONE;
+	init_waitqueue_head(&context->wq);
 }
 
 static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
@@ -873,7 +873,8 @@  static inline int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
 	if (err) {
 		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
 	} else {
-		wait_for_completion(&umr_context.done);
+		wait_event(umr_context.wq,
+			   umr_context.status != IB_WC_STATUS_NONE);
 		if (umr_context.status != IB_WC_SUCCESS) {
 			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
 				     umr_context.status);
diff --git a/include/rdma/ib_verbs.h b/include/rdma/ib_verbs.h
index 5ad43a4..8b15b6f 100644
--- a/include/rdma/ib_verbs.h
+++ b/include/rdma/ib_verbs.h
@@ -823,6 +823,7 @@  struct ib_ah_attr {
 };
 
 enum ib_wc_status {
+	IB_WC_STATUS_NONE = -1,
 	IB_WC_SUCCESS,
 	IB_WC_LOC_LEN_ERR,
 	IB_WC_LOC_QP_OP_ERR,