diff mbox series

rdma/siw: avoid smp_store_mb() on a u64

Message ID 20190712085212.3901785-1-arnd@arndb.de
State New
Headers show
Series rdma/siw: avoid smp_store_mb() on a u64 | expand

Commit Message

Arnd Bergmann July 12, 2019, 8:51 a.m. UTC
The new siw driver fails to build on i386 with

drivers/infiniband/sw/siw/siw_qp.c:1025:3: error: invalid output size for constraint '+q'
                smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
                ^
include/asm-generic/barrier.h:141:35: note: expanded from macro 'smp_store_mb'
 #define smp_store_mb(var, value)  __smp_store_mb(var, value)
                                  ^
arch/x86/include/asm/barrier.h:65:47: note: expanded from macro '__smp_store_mb'
 #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)
                                              ^
include/asm-generic/atomic-instrumented.h:1648:2: note: expanded from macro 'xchg'
        arch_xchg(__ai_ptr, __VA_ARGS__);                               \
        ^
arch/x86/include/asm/cmpxchg.h:78:27: note: expanded from macro 'arch_xchg'
 #define arch_xchg(ptr, v)       __xchg_op((ptr), (v), xchg, "")
                                ^
arch/x86/include/asm/cmpxchg.h:48:19: note: expanded from macro '__xchg_op'
                                      : "+q" (__ret), "+m" (*(ptr))     \
                                              ^
drivers/infiniband/sw/siw/siw_qp.o: In function `siw_sqe_complete':
siw_qp.c:(.text+0x1450): undefined reference to `__xchg_wrong_size'
drivers/infiniband/sw/siw/siw_qp.o: In function `siw_rqe_complete':
siw_qp.c:(.text+0x15b0): undefined reference to `__xchg_wrong_size'
drivers/infiniband/sw/siw/siw_verbs.o: In function `siw_req_notify_cq':
siw_verbs.c:(.text+0x18ff): undefined reference to `__xchg_wrong_size'

Since smp_store_mb() has to be an atomic store, but the architecture
can only do this on 32-bit quantities or smaller, but 'cq->notify'
is a 64-bit word.

Apparently the smp_store_mb() is paired with a READ_ONCE() here, which
seems like an odd choice because there is only a barrier on the writer
side and not the reader, and READ_ONCE() is already not atomic on
quantities larger than a CPU register.

I suspect it is sufficient to use the (possibly nonatomic) WRITE_ONCE()
and an SMP memory barrier here. If it does need to be atomic as well
as 64-bit quantities, using an atomic64_set_release()/atomic64_read_acquire()
may be a better choice.

Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")
Fixes: f29dd55b0236 ("rdma/siw: queue pair methods")
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>

---
 drivers/infiniband/sw/siw/siw_qp.c    | 4 +++-
 drivers/infiniband/sw/siw/siw_verbs.c | 5 +++--
 2 files changed, 6 insertions(+), 3 deletions(-)

-- 
2.20.0

Comments

Bernard Metzler July 12, 2019, 11:33 a.m. UTC | #1
-----"Arnd Bergmann" <arnd@arndb.de> wrote: -----

>To: "Bernard Metzler" <bmt@zurich.ibm.com>, "Doug Ledford"

><dledford@redhat.com>, "Jason Gunthorpe" <jgg@ziepe.ca>

>From: "Arnd Bergmann" <arnd@arndb.de>

>Date: 07/12/2019 10:52AM

>Cc: "Arnd Bergmann" <arnd@arndb.de>, "Peter Zijlstra"

><peterz@infradead.org>, "Jason Gunthorpe" <jgg@mellanox.com>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] [PATCH] rdma/siw: avoid smp_store_mb() on a u64

>

>The new siw driver fails to build on i386 with

>

>drivers/infiniband/sw/siw/siw_qp.c:1025:3: error: invalid output size

>for constraint '+q'

>                smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);

>                ^

>include/asm-generic/barrier.h:141:35: note: expanded from macro

>'smp_store_mb'

> #define smp_store_mb(var, value)  __smp_store_mb(var, value)

>                                  ^

>arch/x86/include/asm/barrier.h:65:47: note: expanded from macro

>'__smp_store_mb'

> #define __smp_store_mb(var, value) do { (void)xchg(&var, value); }

>while (0)

>                                              ^

>include/asm-generic/atomic-instrumented.h:1648:2: note: expanded from

>macro 'xchg'

>        arch_xchg(__ai_ptr, __VA_ARGS__);

>  \

>        ^

>arch/x86/include/asm/cmpxchg.h:78:27: note: expanded from macro

>'arch_xchg'

> #define arch_xchg(ptr, v)       __xchg_op((ptr), (v), xchg, "")

>                                ^

>arch/x86/include/asm/cmpxchg.h:48:19: note: expanded from macro

>'__xchg_op'

>                                      : "+q" (__ret), "+m" (*(ptr))

>  \

>                                              ^

>drivers/infiniband/sw/siw/siw_qp.o: In function `siw_sqe_complete':

>siw_qp.c:(.text+0x1450): undefined reference to `__xchg_wrong_size'

>drivers/infiniband/sw/siw/siw_qp.o: In function `siw_rqe_complete':

>siw_qp.c:(.text+0x15b0): undefined reference to `__xchg_wrong_size'

>drivers/infiniband/sw/siw/siw_verbs.o: In function

>`siw_req_notify_cq':

>siw_verbs.c:(.text+0x18ff): undefined reference to

>`__xchg_wrong_size'

>

>Since smp_store_mb() has to be an atomic store, but the architecture

>can only do this on 32-bit quantities or smaller, but 'cq->notify'

>is a 64-bit word.

>

>Apparently the smp_store_mb() is paired with a READ_ONCE() here,

>which

>seems like an odd choice because there is only a barrier on the

>writer

>side and not the reader, and READ_ONCE() is already not atomic on

>quantities larger than a CPU register.

>

>I suspect it is sufficient to use the (possibly nonatomic)

>WRITE_ONCE()

>and an SMP memory barrier here. If it does need to be atomic as well

>as 64-bit quantities, using an

>atomic64_set_release()/atomic64_read_acquire()

>may be a better choice.

>

>Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")

>Fixes: f29dd55b0236 ("rdma/siw: queue pair methods")

>Cc: Peter Zijlstra <peterz@infradead.org>

>Signed-off-by: Arnd Bergmann <arnd@arndb.de>

>---

> drivers/infiniband/sw/siw/siw_qp.c    | 4 +++-

> drivers/infiniband/sw/siw/siw_verbs.c | 5 +++--

> 2 files changed, 6 insertions(+), 3 deletions(-)

>

>diff --git a/drivers/infiniband/sw/siw/siw_qp.c

>b/drivers/infiniband/sw/siw/siw_qp.c

>index 11383d9f95ef..a2c08f17f13d 100644

>--- a/drivers/infiniband/sw/siw/siw_qp.c

>+++ b/drivers/infiniband/sw/siw/siw_qp.c

>@@ -1016,13 +1016,15 @@ static bool siw_cq_notify_now(struct siw_cq

>*cq, u32 flags)

> 	if (!cq->base_cq.comp_handler)

> 		return false;

> 

>+	smp_rmb();

> 	cq_notify = READ_ONCE(*cq->notify);

> 

> 	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||

> 	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&

> 	     (flags & SIW_WQE_SOLICITED))) {

> 		/* dis-arm CQ */

>-		smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);

>+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_NOT);

>+		smp_wmb();

> 

> 		return true;

> 	}

>diff --git a/drivers/infiniband/sw/siw/siw_verbs.c

>b/drivers/infiniband/sw/siw/siw_verbs.c

>index 32dc79d0e898..41c5ab293fe1 100644

>--- a/drivers/infiniband/sw/siw/siw_verbs.c

>+++ b/drivers/infiniband/sw/siw/siw_verbs.c

>@@ -1142,10 +1142,11 @@ int siw_req_notify_cq(struct ib_cq *base_cq,

>enum ib_cq_notify_flags flags)

> 

> 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

> 		/* CQ event for next solicited completion */

>-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

>+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_SOLICITED);

> 	else

> 		/* CQ event for any signalled completion */

>-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);

>+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_ALL);

>+	smp_wmb();

> 

> 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)

> 		return cq->cq_put - cq->cq_get;

>-- 

>2.20.0

>

>



Hi Arnd,
Many thanks for pointing that out! Indeed, this CQ notification
mechanism does not take 32 bit architectures into account.
Since we have only three flags to hold here, it's probably better
to make it a 32bit value. That would remove the issue w/o
introducing extra smp_wmb(). I'd prefer smp_store_mb(),
since on some architectures it shall be more efficient.
That would also make it sufficient to use READ_ONCE. 

That would be the proposed fix:

From c7c3e2dbc3555581be52cb5d76c15726dced0331 Mon Sep 17 00:00:00 2001
From: Bernard Metzler <bmt@zurich.ibm.com>

Date: Fri, 12 Jul 2019 13:19:27 +0200
Subject: [PATCH] Make shared CQ notification flags 32bit to respect 32bit
 architectures

Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>

---
 drivers/infiniband/sw/siw/siw.h       | 2 +-
 drivers/infiniband/sw/siw/siw_qp.c    | 6 +++---
 drivers/infiniband/sw/siw/siw_verbs.c | 6 +++---
 include/uapi/rdma/siw-abi.h           | 3 ++-
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 409e2987cd45..d59d81f4d86b 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -216,7 +216,7 @@ struct siw_wqe {
 struct siw_cq {
 	struct ib_cq base_cq;
 	spinlock_t lock;
-	u64 *notify;
+	struct siw_cq_ctrl *notify;
 	struct siw_cqe *queue;
 	u32 cq_put;
 	u32 cq_get;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 83e50fe8e48b..0fcc5002d2da 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1011,18 +1011,18 @@ int siw_activate_tx(struct siw_qp *qp)
  */
 static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
 {
-	u64 cq_notify;
+	u32 cq_notify;
 
 	if (!cq->base_cq.comp_handler)
 		return false;
 
-	cq_notify = READ_ONCE(*cq->notify);
+	cq_notify = READ_ONCE(cq->notify->flags);
 
 	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
 	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&
 	     (flags & SIW_WQE_SOLICITED))) {
 		/* dis-arm CQ */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_NOT);
 
 		return true;
 	}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index d4fb78780765..bc6892229af0 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1049,7 +1049,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,
 
 	spin_lock_init(&cq->lock);
 
-	cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify;
+	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];
 
 	if (udata) {
 		struct siw_uresp_create_cq uresp = {};
@@ -1142,10 +1142,10 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
 
 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
 		/* CQ event for next solicited completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);
 	else
 		/* CQ event for any signalled completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);
+	smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);
 
 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
 		return cq->cq_put - cq->cq_get;
diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h
index ba4d5315cb76..93298980d3a7 100644
--- a/include/uapi/rdma/siw-abi.h
+++ b/include/uapi/rdma/siw-abi.h
@@ -178,6 +178,7 @@ struct siw_cqe {
  * to control CQ arming.
  */
 struct siw_cq_ctrl {
-	__aligned_u64 notify;
+	__u32 flags;
+	__u32 pad;
 };
 #endif
-- 
2.17.2
Peter Zijlstra July 12, 2019, 11:47 a.m. UTC | #2
On Fri, Jul 12, 2019 at 11:33:46AM +0000, Bernard Metzler wrote:
> Many thanks for pointing that out! Indeed, this CQ notification

> mechanism does not take 32 bit architectures into account.

> Since we have only three flags to hold here, it's probably better

> to make it a 32bit value. That would remove the issue w/o

> introducing extra smp_wmb(). I'd prefer smp_store_mb(),

> since on some architectures it shall be more efficient.

> That would also make it sufficient to use READ_ONCE. 

> 


The below fails review due to a distinct lack of comments describing the
memory ordering.

Describe which variables (at least two) are ordered how and what
guarantees that provides and how that helps.

> From c7c3e2dbc3555581be52cb5d76c15726dced0331 Mon Sep 17 00:00:00 2001

> From: Bernard Metzler <bmt@zurich.ibm.com>

> Date: Fri, 12 Jul 2019 13:19:27 +0200

> Subject: [PATCH] Make shared CQ notification flags 32bit to respect 32bit

>  architectures

> 

> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>

> ---

>  drivers/infiniband/sw/siw/siw.h       | 2 +-

>  drivers/infiniband/sw/siw/siw_qp.c    | 6 +++---

>  drivers/infiniband/sw/siw/siw_verbs.c | 6 +++---

>  include/uapi/rdma/siw-abi.h           | 3 ++-

>  4 files changed, 9 insertions(+), 8 deletions(-)

> 

> diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h

> index 409e2987cd45..d59d81f4d86b 100644

> --- a/drivers/infiniband/sw/siw/siw.h

> +++ b/drivers/infiniband/sw/siw/siw.h

> @@ -216,7 +216,7 @@ struct siw_wqe {

>  struct siw_cq {

>  	struct ib_cq base_cq;

>  	spinlock_t lock;

> -	u64 *notify;

> +	struct siw_cq_ctrl *notify;

>  	struct siw_cqe *queue;

>  	u32 cq_put;

>  	u32 cq_get;

> diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c

> index 83e50fe8e48b..0fcc5002d2da 100644

> --- a/drivers/infiniband/sw/siw/siw_qp.c

> +++ b/drivers/infiniband/sw/siw/siw_qp.c

> @@ -1011,18 +1011,18 @@ int siw_activate_tx(struct siw_qp *qp)

>   */

>  static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)

>  {

> -	u64 cq_notify;

> +	u32 cq_notify;

>  

>  	if (!cq->base_cq.comp_handler)

>  		return false;

>  

> -	cq_notify = READ_ONCE(*cq->notify);

> +	cq_notify = READ_ONCE(cq->notify->flags);

>  

>  	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||

>  	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&

>  	     (flags & SIW_WQE_SOLICITED))) {

>  		/* dis-arm CQ */

> -		smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);

> +		smp_store_mb(cq->notify->flags, SIW_NOTIFY_NOT);

>  

>  		return true;

>  	}

> diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c

> index d4fb78780765..bc6892229af0 100644

> --- a/drivers/infiniband/sw/siw/siw_verbs.c

> +++ b/drivers/infiniband/sw/siw/siw_verbs.c

> @@ -1049,7 +1049,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,

>  

>  	spin_lock_init(&cq->lock);

>  

> -	cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify;

> +	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];

>  

>  	if (udata) {

>  		struct siw_uresp_create_cq uresp = {};

> @@ -1142,10 +1142,10 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)

>  

>  	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

>  		/* CQ event for next solicited completion */

> -		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

> +		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);

>  	else

>  		/* CQ event for any signalled completion */

> -		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);

> +	smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);

>  

>  	if (flags & IB_CQ_REPORT_MISSED_EVENTS)

>  		return cq->cq_put - cq->cq_get;

> diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h

> index ba4d5315cb76..93298980d3a7 100644

> --- a/include/uapi/rdma/siw-abi.h

> +++ b/include/uapi/rdma/siw-abi.h

> @@ -178,6 +178,7 @@ struct siw_cqe {

>   * to control CQ arming.

>   */

>  struct siw_cq_ctrl {

> -	__aligned_u64 notify;

> +	__u32 flags;

> +	__u32 pad;

>  };

>  #endif

> -- 

> 2.17.2

> 

>
Jason Gunthorpe July 12, 2019, 12:03 p.m. UTC | #3
On Fri, Jul 12, 2019 at 11:33:46AM +0000, Bernard Metzler wrote:
> >diff --git a/drivers/infiniband/sw/siw/siw_verbs.c

> >b/drivers/infiniband/sw/siw/siw_verbs.c

> >index 32dc79d0e898..41c5ab293fe1 100644

> >+++ b/drivers/infiniband/sw/siw/siw_verbs.c

> >@@ -1142,10 +1142,11 @@ int siw_req_notify_cq(struct ib_cq *base_cq,

> >enum ib_cq_notify_flags flags)

> > 

> > 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

> > 		/* CQ event for next solicited completion */

> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_SOLICITED);

> > 	else

> > 		/* CQ event for any signalled completion */

> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);

> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_ALL);

> >+	smp_wmb();

> > 

> > 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)

> > 		return cq->cq_put - cq->cq_get;

> 

> 

> Hi Arnd,

> Many thanks for pointing that out! Indeed, this CQ notification

> mechanism does not take 32 bit architectures into account.

> Since we have only three flags to hold here, it's probably better

> to make it a 32bit value. That would remove the issue w/o

> introducing extra smp_wmb(). 


I also prefer not to see smp_wmb() in drivers..

> I'd prefer smp_store_mb(), since on some architectures it shall be

> more efficient.  That would also make it sufficient to use

> READ_ONCE.


The READ_ONCE is confusing to me too, if you need store_release
semantics then the reader also needs to pair with load_acquite -
otherwise it doesn't work.

Still, we need to do something rapidly to fix the i386 build, please
revise right away..

Jason
Bernard Metzler July 12, 2019, 12:27 p.m. UTC | #4
-----"Jason Gunthorpe" <jgg@ziepe.ca> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>

>From: "Jason Gunthorpe" <jgg@ziepe.ca>

>Date: 07/12/2019 02:03PM

>Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] Re: [PATCH] rdma/siw: avoid smp_store_mb() on a

>u64

>

>On Fri, Jul 12, 2019 at 11:33:46AM +0000, Bernard Metzler wrote:

>> >diff --git a/drivers/infiniband/sw/siw/siw_verbs.c

>> >b/drivers/infiniband/sw/siw/siw_verbs.c

>> >index 32dc79d0e898..41c5ab293fe1 100644

>> >+++ b/drivers/infiniband/sw/siw/siw_verbs.c

>> >@@ -1142,10 +1142,11 @@ int siw_req_notify_cq(struct ib_cq

>*base_cq,

>> >enum ib_cq_notify_flags flags)

>> > 

>> > 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

>> > 		/* CQ event for next solicited completion */

>> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

>> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_SOLICITED);

>> > 	else

>> > 		/* CQ event for any signalled completion */

>> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);

>> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_ALL);

>> >+	smp_wmb();

>> > 

>> > 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)

>> > 		return cq->cq_put - cq->cq_get;

>> 

>> 

>> Hi Arnd,

>> Many thanks for pointing that out! Indeed, this CQ notification

>> mechanism does not take 32 bit architectures into account.

>> Since we have only three flags to hold here, it's probably better

>> to make it a 32bit value. That would remove the issue w/o

>> introducing extra smp_wmb(). 

>

>I also prefer not to see smp_wmb() in drivers..

>

>> I'd prefer smp_store_mb(), since on some architectures it shall be

>> more efficient.  That would also make it sufficient to use

>> READ_ONCE.

>

>The READ_ONCE is confusing to me too, if you need store_release

>semantics then the reader also needs to pair with load_acquite -

>otherwise it doesn't work.

>


I used READ_ONCE since it is a potentially user land shared
object. Since we are under spinlock, we might just do a matching
WRITE_ONCE instead of the smp_store_mb() ??

Many thanks!
Bernard.

>Still, we need to do something rapidly to fix the i386 build, please

>revise right away..

>

>Jason

>

>
Bernard Metzler July 12, 2019, 1:05 p.m. UTC | #5
-----"Jason Gunthorpe" <jgg@ziepe.ca> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>

>From: "Jason Gunthorpe" <jgg@ziepe.ca>

>Date: 07/12/2019 02:03PM

>Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] Re: [PATCH] rdma/siw: avoid smp_store_mb() on a

>u64

>

>On Fri, Jul 12, 2019 at 11:33:46AM +0000, Bernard Metzler wrote:

>> >diff --git a/drivers/infiniband/sw/siw/siw_verbs.c

>> >b/drivers/infiniband/sw/siw/siw_verbs.c

>> >index 32dc79d0e898..41c5ab293fe1 100644

>> >+++ b/drivers/infiniband/sw/siw/siw_verbs.c

>> >@@ -1142,10 +1142,11 @@ int siw_req_notify_cq(struct ib_cq

>*base_cq,

>> >enum ib_cq_notify_flags flags)

>> > 

>> > 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

>> > 		/* CQ event for next solicited completion */

>> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

>> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_SOLICITED);

>> > 	else

>> > 		/* CQ event for any signalled completion */

>> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);

>> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_ALL);

>> >+	smp_wmb();

>> > 

>> > 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)

>> > 		return cq->cq_put - cq->cq_get;

>> 

>> 

>> Hi Arnd,

>> Many thanks for pointing that out! Indeed, this CQ notification

>> mechanism does not take 32 bit architectures into account.

>> Since we have only three flags to hold here, it's probably better

>> to make it a 32bit value. That would remove the issue w/o

>> introducing extra smp_wmb(). 

>

>I also prefer not to see smp_wmb() in drivers..

>

>> I'd prefer smp_store_mb(), since on some architectures it shall be

>> more efficient.  That would also make it sufficient to use

>> READ_ONCE.

>

>The READ_ONCE is confusing to me too, if you need store_release

>semantics then the reader also needs to pair with load_acquite -

>otherwise it doesn't work.

>

>Still, we need to do something rapidly to fix the i386 build, please

>revise right away..

>

>Jason

>

>


We share CQ (completion queue) notification flags between application
(which may be user land) and producer (kernel QP's (queue pairs)).
Those flags can be written by both application and QP's. The application
writes those flags to let the driver know if it shall inform about new
work completions. It can write those flags at any time.
Only a kernel producer reads those flags to decide if
the CQ notification handler shall be kicked, if a new CQ element gets
added to the CQ. When kicking the completion handler, the driver resets the
notification flag, which must get re-armed by the application.

We use READ_ONCE() and WRITE_ONCE(), since the flags are potentially
shared (mmap'd) between user and kernel land.

siw_req_notify_cq() is being called only by kernel consumers to change
(write) the CQ notification state. We use smp_store_mb() to make sure
the new value becomes visible to all kernel producers (QP's) asap.


From cfb861a09dcfb24a98ba0f1e26bdaa1529d1b006 Mon Sep 17 00:00:00 2001
From: Bernard Metzler <bmt@zurich.ibm.com>

Date: Fri, 12 Jul 2019 13:19:27 +0200
Subject: [PATCH] Make shared CQ notification flags 32bit to respect 32bit
 architectures

Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>

---
 drivers/infiniband/sw/siw/siw.h       |  2 +-
 drivers/infiniband/sw/siw/siw_qp.c    | 12 ++++++++----
 drivers/infiniband/sw/siw/siw_verbs.c | 20 +++++++++++++++-----
 include/uapi/rdma/siw-abi.h           |  3 ++-
 4 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw.h b/drivers/infiniband/sw/siw/siw.h
index 409e2987cd45..d59d81f4d86b 100644
--- a/drivers/infiniband/sw/siw/siw.h
+++ b/drivers/infiniband/sw/siw/siw.h
@@ -216,7 +216,7 @@ struct siw_wqe {
 struct siw_cq {
 struct ib_cq base_cq;
 	spinlock_t lock;
-	u64 *notify;
+	struct siw_cq_ctrl *notify;
 	struct siw_cqe *queue;
 	u32 cq_put;
 	u32 cq_get;
diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 83e50fe8e48b..f4b8d55839a7 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1011,18 +1011,22 @@ int siw_activate_tx(struct siw_qp *qp)
  */
 static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
 {
-	u64 cq_notify;
+	u32 cq_notify;
 
 	if (!cq->base_cq.comp_handler)
 		return false;
 
-	cq_notify = READ_ONCE(*cq->notify);
+	/* Read application shared notification state */
+	cq_notify = READ_ONCE(cq->notify->flags);
 
 	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
 	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&
 	     (flags & SIW_WQE_SOLICITED))) {
-		/* dis-arm CQ */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
+		/*
+		 * dis-arm CQ: write (potentially user land)
+		 * application shared notification state.
+		 */
+		WRITE_ONCE(cq->notify->flags, SIW_NOTIFY_NOT);
 
 		return true;
 	}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index d4fb78780765..cda92e4c7cc9 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1049,7 +1049,7 @@ int siw_create_cq(struct ib_cq *base_cq, const struct ib_cq_init_attr *attr,

 	spin_lock_init(&cq->lock);
 
-	cq->notify = &((struct siw_cq_ctrl *)&cq->queue[size])->notify;
+	cq->notify = (struct siw_cq_ctrl *)&cq->queue[size];

 	if (udata) {
 		struct siw_uresp_create_cq uresp = {};
@@ -1131,6 +1131,10 @@ int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc)
  *   number of not reaped CQE's regardless of its notification
  *   type and current or new CQ notification settings.
  *
+ * This function gets called only by kernel consumers.
+ * Notification state must immediately become visible to all
+ * associated kernel producers (QP's).
+ *
  * @base_cq:   Base CQ contained in siw CQ.
  * @flags:     Requested notification flags.
  */
@@ -1141,11 +1145,17 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
 	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);
 
 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
-		/* CQ event for next solicited completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);
+		/*
+		 * Enable CQ event for next solicited completion.
+		 * and make it visible to all associated producers.
+		 */
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);
 	else
-		/* CQ event for any signalled completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);
+		/*
+		 * Enable CQ event for any signalled completion.
+		 * and make it visible to all associated producers.
+		 */
+		smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);
 
 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
 		return cq->cq_put - cq->cq_get;
diff --git a/include/uapi/rdma/siw-abi.h b/include/uapi/rdma/siw-abi.h
index ba4d5315cb76..93298980d3a7 100644
--- a/include/uapi/rdma/siw-abi.h
+++ b/include/uapi/rdma/siw-abi.h
@@ -178,6 +178,7 @@ struct siw_cqe {
  * to control CQ arming.
  */
 struct siw_cq_ctrl {
-	__aligned_u64 notify;
+	__u32 flags;
+	__u32 pad;
 };
 #endif
-- 
2.17.2
Peter Zijlstra July 12, 2019, 1:19 p.m. UTC | #6
On Fri, Jul 12, 2019 at 01:05:14PM +0000, Bernard Metzler wrote:
> @@ -1131,6 +1131,10 @@ int siw_poll_cq(struct ib_cq *base_cq, int num_cqe, struct ib_wc *wc)

>   *   number of not reaped CQE's regardless of its notification

>   *   type and current or new CQ notification settings.

>   *

> + * This function gets called only by kernel consumers.

> + * Notification state must immediately become visible to all

> + * associated kernel producers (QP's).


No amount of memory barriers can achieve that goal.
Arnd Bergmann July 12, 2019, 1:22 p.m. UTC | #7
On Fri, Jul 12, 2019 at 3:05 PM Bernard Metzler <BMT@zurich.ibm.com> wrote:

>

> We share CQ (completion queue) notification flags between application

> (which may be user land) and producer (kernel QP's (queue pairs)).

> Those flags can be written by both application and QP's. The application

> writes those flags to let the driver know if it shall inform about new

> work completions. It can write those flags at any time.

> Only a kernel producer reads those flags to decide if

> the CQ notification handler shall be kicked, if a new CQ element gets

> added to the CQ. When kicking the completion handler, the driver resets the

> notification flag, which must get re-armed by the application.

>

> We use READ_ONCE() and WRITE_ONCE(), since the flags are potentially

> shared (mmap'd) between user and kernel land.

>

> siw_req_notify_cq() is being called only by kernel consumers to change

> (write) the CQ notification state. We use smp_store_mb() to make sure

> the new value becomes visible to all kernel producers (QP's) asap.

>

>

> From cfb861a09dcfb24a98ba0f1e26bdaa1529d1b006 Mon Sep 17 00:00:00 2001

> From: Bernard Metzler <bmt@zurich.ibm.com>

> Date: Fri, 12 Jul 2019 13:19:27 +0200

> Subject: [PATCH] Make shared CQ notification flags 32bit to respect 32bit

>  architectures

>

> Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>


This fixes the build for me, thanks!

Tested-by: Arnd Bergmann <arnd@arndb.de>
Bernard Metzler July 12, 2019, 1:35 p.m. UTC | #8
-----"Peter Zijlstra" <peterz@infradead.org> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>

>From: "Peter Zijlstra" <peterz@infradead.org>

>Date: 07/12/2019 03:19PM

>Cc: "Jason Gunthorpe" <jgg@ziepe.ca>, "Arnd Bergmann"

><arnd@arndb.de>, "Doug Ledford" <dledford@redhat.com>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] Re: Re: [PATCH] rdma/siw: avoid smp_store_mb() on

>a u64

>

>On Fri, Jul 12, 2019 at 01:05:14PM +0000, Bernard Metzler wrote:

>> @@ -1131,6 +1131,10 @@ int siw_poll_cq(struct ib_cq *base_cq, int

>num_cqe, struct ib_wc *wc)

>>   *   number of not reaped CQE's regardless of its notification

>>   *   type and current or new CQ notification settings.

>>   *

>> + * This function gets called only by kernel consumers.

>> + * Notification state must immediately become visible to all

>> + * associated kernel producers (QP's).

>

>No amount of memory barriers can achieve that goal.

>

>

Oh right, that is correct. And it is not needed. This statement
is to bold. We simply want it asap. Actually, per API semantics, there is
no ordering guarantee between creating a completion queue event and
concurrently arming/disarming the completion queue. Since it is
not possible.
I use the barrier to minimize the likelihood a CQ event is missed
since the CQ is not yet visible to be re-armed to all producers.
But it can happen. This is what this optional
IB_CQ_REPORT_MISSED_EVENTS right below the re-arming is for.

Would be OK if we just adapt the comment from 'must immediately become visible'
to 'shall become visible asap'. That would reflect the intention.

Thanks very much!
Bernard.

int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
{
        struct siw_cq *cq = to_siw_cq(base_cq);

        siw_dbg_cq(cq, "flags: 0x%02x\n", flags);

        if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
                /*
                 * Enable CQ event for next solicited completion.
                 * and make it visible to all associated producers.
                 */
                smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);
        else
                /*
                 * Enable CQ event for any signalled completion.
                 * and make it visible to all associated producers.
                 */
                smp_store_mb(cq->notify->flags, SIW_NOTIFY_ALL);

        if (flags & IB_CQ_REPORT_MISSED_EVENTS)
                return cq->cq_put - cq->cq_get;

        return 0;
}
Jason Gunthorpe July 12, 2019, 1:53 p.m. UTC | #9
On Fri, Jul 12, 2019 at 01:05:14PM +0000, Bernard Metzler wrote:
> 

> >To: "Bernard Metzler" <BMT@zurich.ibm.com>

> >From: "Jason Gunthorpe" <jgg@ziepe.ca>

> >Date: 07/12/2019 02:03PM

> >Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

> ><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

> >linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

> >Subject: [EXTERNAL] Re: [PATCH] rdma/siw: avoid smp_store_mb() on a

> >u64

> >

> >On Fri, Jul 12, 2019 at 11:33:46AM +0000, Bernard Metzler wrote:

> >> >diff --git a/drivers/infiniband/sw/siw/siw_verbs.c

> >> >b/drivers/infiniband/sw/siw/siw_verbs.c

> >> >index 32dc79d0e898..41c5ab293fe1 100644

> >> >+++ b/drivers/infiniband/sw/siw/siw_verbs.c

> >> >@@ -1142,10 +1142,11 @@ int siw_req_notify_cq(struct ib_cq

> >*base_cq,

> >> >enum ib_cq_notify_flags flags)

> >> > 

> >> > 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

> >> > 		/* CQ event for next solicited completion */

> >> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

> >> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_SOLICITED);

> >> > 	else

> >> > 		/* CQ event for any signalled completion */

> >> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);

> >> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_ALL);

> >> >+	smp_wmb();

> >> > 

> >> > 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)

> >> > 		return cq->cq_put - cq->cq_get;

> >> 

> >> 

> >> Hi Arnd,

> >> Many thanks for pointing that out! Indeed, this CQ notification

> >> mechanism does not take 32 bit architectures into account.

> >> Since we have only three flags to hold here, it's probably better

> >> to make it a 32bit value. That would remove the issue w/o

> >> introducing extra smp_wmb(). 

> >

> >I also prefer not to see smp_wmb() in drivers..

> >

> >> I'd prefer smp_store_mb(), since on some architectures it shall be

> >> more efficient.  That would also make it sufficient to use

> >> READ_ONCE.

> >

> >The READ_ONCE is confusing to me too, if you need store_release

> >semantics then the reader also needs to pair with load_acquite -

> >otherwise it doesn't work.

> >

> >Still, we need to do something rapidly to fix the i386 build, please

> >revise right away..

> >

> >Jason

> >

> >

> 

> We share CQ (completion queue) notification flags between application

> (which may be user land) and producer (kernel QP's (queue pairs)).

> Those flags can be written by both application and QP's. The application

> writes those flags to let the driver know if it shall inform about new

> work completions. It can write those flags at any time.

> Only a kernel producer reads those flags to decide if

> the CQ notification handler shall be kicked, if a new CQ element gets

> added to the CQ. When kicking the completion handler, the driver resets the

> notification flag, which must get re-armed by the application.


This looks wrong to me.. a userspace notification re-arm cannot be
lost, so have a split READ/TEST/WRITE sequence can't possibly work?

I'd expect an atomic test and clear here?


> @@ -1141,11 +1145,17 @@ int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)

>  	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);

>  

>  	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

> -		/* CQ event for next solicited completion */

> -		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

> +		/*

> +		 * Enable CQ event for next solicited completion.

> +		 * and make it visible to all associated producers.

> +		 */

> +		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);


But what is the 2nd piece of data to motivate the smp_store_mb?

Jason
Bernard Metzler July 12, 2019, 2:35 p.m. UTC | #10
-----"Jason Gunthorpe" <jgg@ziepe.ca> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>

>From: "Jason Gunthorpe" <jgg@ziepe.ca>

>Date: 07/12/2019 03:53PM

>Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] Re: Re: [PATCH] rdma/siw: avoid smp_store_mb() on

>a u64

>

>On Fri, Jul 12, 2019 at 01:05:14PM +0000, Bernard Metzler wrote:

>> 

>> >To: "Bernard Metzler" <BMT@zurich.ibm.com>

>> >From: "Jason Gunthorpe" <jgg@ziepe.ca>

>> >Date: 07/12/2019 02:03PM

>> >Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

>> ><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>> >linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>> >Subject: [EXTERNAL] Re: [PATCH] rdma/siw: avoid smp_store_mb() on

>a

>> >u64

>> >

>> >On Fri, Jul 12, 2019 at 11:33:46AM +0000, Bernard Metzler wrote:

>> >> >diff --git a/drivers/infiniband/sw/siw/siw_verbs.c

>> >> >b/drivers/infiniband/sw/siw/siw_verbs.c

>> >> >index 32dc79d0e898..41c5ab293fe1 100644

>> >> >+++ b/drivers/infiniband/sw/siw/siw_verbs.c

>> >> >@@ -1142,10 +1142,11 @@ int siw_req_notify_cq(struct ib_cq

>> >*base_cq,

>> >> >enum ib_cq_notify_flags flags)

>> >> > 

>> >> > 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

>> >> > 		/* CQ event for next solicited completion */

>> >> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

>> >> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_SOLICITED);

>> >> > 	else

>> >> > 		/* CQ event for any signalled completion */

>> >> >-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);

>> >> >+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_ALL);

>> >> >+	smp_wmb();

>> >> > 

>> >> > 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)

>> >> > 		return cq->cq_put - cq->cq_get;

>> >> 

>> >> 

>> >> Hi Arnd,

>> >> Many thanks for pointing that out! Indeed, this CQ notification

>> >> mechanism does not take 32 bit architectures into account.

>> >> Since we have only three flags to hold here, it's probably

>better

>> >> to make it a 32bit value. That would remove the issue w/o

>> >> introducing extra smp_wmb(). 

>> >

>> >I also prefer not to see smp_wmb() in drivers..

>> >

>> >> I'd prefer smp_store_mb(), since on some architectures it shall

>be

>> >> more efficient.  That would also make it sufficient to use

>> >> READ_ONCE.

>> >

>> >The READ_ONCE is confusing to me too, if you need store_release

>> >semantics then the reader also needs to pair with load_acquite -

>> >otherwise it doesn't work.

>> >

>> >Still, we need to do something rapidly to fix the i386 build,

>please

>> >revise right away..

>> >

>> >Jason

>> >

>> >

>> 

>> We share CQ (completion queue) notification flags between

>application

>> (which may be user land) and producer (kernel QP's (queue pairs)).

>> Those flags can be written by both application and QP's. The

>application

>> writes those flags to let the driver know if it shall inform about

>new

>> work completions. It can write those flags at any time.

>> Only a kernel producer reads those flags to decide if

>> the CQ notification handler shall be kicked, if a new CQ element

>gets

>> added to the CQ. When kicking the completion handler, the driver

>resets the

>> notification flag, which must get re-armed by the application.

>

>This looks wrong to me.. a userspace notification re-arm cannot be

>lost, so have a split READ/TEST/WRITE sequence can't possibly work?

>

>I'd expect an atomic test and clear here?


We cannot avoid the case that the application re-arms the
CQ only after a CQE got placed. That is why folks are polling the
CQ once after re-arming it - to make sure they do not miss the
very last and single CQE which would have produced a CQ event.

I do not think an atomic test and clear would change that picture.

Also, per RDMA verbs semantics, if the user would re-arm the CQ
more then once before it gets a CQ event, it will still get only one
CQ event if a new CQ element becomes ready. 
>

>

>> @@ -1141,11 +1145,17 @@ int siw_req_notify_cq(struct ib_cq

>*base_cq, enum ib_cq_notify_flags flags)

>>  	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);

>>  

>>  	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

>> -		/* CQ event for next solicited completion */

>> -		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

>> +		/*

>> +		 * Enable CQ event for next solicited completion.

>> +		 * and make it visible to all associated producers.

>> +		 */

>> +		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);

>

>But what is the 2nd piece of data to motivate the smp_store_mb?


Another core (such as a concurrent RX operation) shall see this
CQ being re-armed asap.

Best,
Bernard.
>

>Jason

>

>
Jason Gunthorpe July 12, 2019, 2:42 p.m. UTC | #11
On Fri, Jul 12, 2019 at 02:35:50PM +0000, Bernard Metzler wrote:

> >This looks wrong to me.. a userspace notification re-arm cannot be

> >lost, so have a split READ/TEST/WRITE sequence can't possibly work?

> >

> >I'd expect an atomic test and clear here?

> 

> We cannot avoid the case that the application re-arms the

> CQ only after a CQE got placed. That is why folks are polling the

> CQ once after re-arming it - to make sure they do not miss the

> very last and single CQE which would have produced a CQ event.


That is different, that is re-arm happing after a CQE placement and
this can't be fixed.

What I said is that a re-arm from userspace cannot be lost. So you
can't blindly clear the arm flag with the WRITE_ONCE. It might be OK
beacuse of the if, but...

It is just goofy to write it without a 'test and clear' atomic. If the
writer side consumes the notify it should always be done atomically.

And then I think all the weird barriers go away

> >> @@ -1141,11 +1145,17 @@ int siw_req_notify_cq(struct ib_cq

> >*base_cq, enum ib_cq_notify_flags flags)

> >>  	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);

> >>  

> >>  	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

> >> -		/* CQ event for next solicited completion */

> >> -		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

> >> +		/*

> >> +		 * Enable CQ event for next solicited completion.

> >> +		 * and make it visible to all associated producers.

> >> +		 */

> >> +		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);

> >

> >But what is the 2nd piece of data to motivate the smp_store_mb?

> 

> Another core (such as a concurrent RX operation) shall see this

> CQ being re-armed asap.


'ASAP' is not a '2nd piece of data'. 

AFAICT this requirement is just a normal atomic set_bit which does
also expedite making the change visible?
 
Jason
Jason Gunthorpe July 12, 2019, 3:14 p.m. UTC | #12
On Fri, Jul 12, 2019 at 03:22:35PM +0200, Arnd Bergmann wrote:
> On Fri, Jul 12, 2019 at 3:05 PM Bernard Metzler <BMT@zurich.ibm.com> wrote:

> 

> >

> > We share CQ (completion queue) notification flags between application

> > (which may be user land) and producer (kernel QP's (queue pairs)).

> > Those flags can be written by both application and QP's. The application

> > writes those flags to let the driver know if it shall inform about new

> > work completions. It can write those flags at any time.

> > Only a kernel producer reads those flags to decide if

> > the CQ notification handler shall be kicked, if a new CQ element gets

> > added to the CQ. When kicking the completion handler, the driver resets the

> > notification flag, which must get re-armed by the application.

> >

> > We use READ_ONCE() and WRITE_ONCE(), since the flags are potentially

> > shared (mmap'd) between user and kernel land.

> >

> > siw_req_notify_cq() is being called only by kernel consumers to change

> > (write) the CQ notification state. We use smp_store_mb() to make sure

> > the new value becomes visible to all kernel producers (QP's) asap.

> >

> >

> > From cfb861a09dcfb24a98ba0f1e26bdaa1529d1b006 Mon Sep 17 00:00:00 2001

> > From: Bernard Metzler <bmt@zurich.ibm.com>

> > Date: Fri, 12 Jul 2019 13:19:27 +0200

> > Subject: [PATCH] Make shared CQ notification flags 32bit to respect 32bit

> >  architectures

> >

> > Signed-off-by: Bernard Metzler <bmt@zurich.ibm.com>

> 

> This fixes the build for me, thanks!

> 

> Tested-by: Arnd Bergmann <arnd@arndb.de>


Since this is coming up so late in the merge window, I'm inclined to
take the simple path while Bernard makes a complete solution
here. What do you think Arnd?

From 0b043644c0ca601cb19943a81aa1f1455dbe9461 Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@mellanox.com>

Date: Fri, 12 Jul 2019 12:12:06 -0300
Subject: [PATCH] RMDA/siw: Require a 64 bit arch

The new siw driver fails to build on i386 with

drivers/infiniband/sw/siw/siw_qp.c:1025:3: error: invalid output size for constraint '+q'
                smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);

As it is using 64 bit values with the smp_store_mb.

Since the entire scheme here seems questionable, and we are in the merge
window, fix the compile failures by disabling 32 bit support on this
driver.

A proper fix will be reviewed post merge window.

Fixes: c0cf5bdde46c ("rdma/siw: addition to kernel build environment")
Reported-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>

---
 drivers/infiniband/sw/siw/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/infiniband/sw/siw/Kconfig b/drivers/infiniband/sw/siw/Kconfig
index b622fc62f2cd6d..dace276aea1413 100644
--- a/drivers/infiniband/sw/siw/Kconfig
+++ b/drivers/infiniband/sw/siw/Kconfig
@@ -1,6 +1,6 @@
 config RDMA_SIW
 	tristate "Software RDMA over TCP/IP (iWARP) driver"
-	depends on INET && INFINIBAND && LIBCRC32C
+	depends on INET && INFINIBAND && LIBCRC32C && 64BIT
 	select DMA_VIRT_OPS
 	help
 	This driver implements the iWARP RDMA transport over
-- 
2.21.0
Bernard Metzler July 12, 2019, 3:24 p.m. UTC | #13
-----"Jason Gunthorpe" <jgg@ziepe.ca> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>

>From: "Jason Gunthorpe" <jgg@ziepe.ca>

>Date: 07/12/2019 04:43PM

>Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] Re: Re: Re: [PATCH] rdma/siw: avoid

>smp_store_mb() on a u64

>

>On Fri, Jul 12, 2019 at 02:35:50PM +0000, Bernard Metzler wrote:

>

>> >This looks wrong to me.. a userspace notification re-arm cannot be

>> >lost, so have a split READ/TEST/WRITE sequence can't possibly

>work?

>> >

>> >I'd expect an atomic test and clear here?

>> 

>> We cannot avoid the case that the application re-arms the

>> CQ only after a CQE got placed. That is why folks are polling the

>> CQ once after re-arming it - to make sure they do not miss the

>> very last and single CQE which would have produced a CQ event.

>

>That is different, that is re-arm happing after a CQE placement and

>this can't be fixed.

>

>What I said is that a re-arm from userspace cannot be lost. So you

>can't blindly clear the arm flag with the WRITE_ONCE. It might be OK

>beacuse of the if, but...

>

>It is just goofy to write it without a 'test and clear' atomic. If

>the

>writer side consumes the notify it should always be done atomically.

>

Hmmm, I don't yet get why we should test and clear atomically, if we
clear anyway - is it because we want to avoid clearing a re-arm which
happens just after testing and before clearing?
(1) If the test was positive, we will call the CQ event handler,
and per RDMA verbs spec, the application MUST re-arm the CQ after it
got a CQ event, to get another one. So clearing it sometimes before
calling the handler is right.
(2) If the test was negative, a test and reset would not change
anything.

Another complication -- test_and_set_bit() operates on a single
bit, but we have to test two bits, and reset both, if one is
set. Can we do that atomically, if we test the bits conditionally?
I didn't find anything appropriate.

>And then I think all the weird barriers go away

>

>> >> @@ -1141,11 +1145,17 @@ int siw_req_notify_cq(struct ib_cq

>> >*base_cq, enum ib_cq_notify_flags flags)

>> >>  	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);

>> >>  

>> >>  	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

>> >> -		/* CQ event for next solicited completion */

>> >> -		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

>> >> +		/*

>> >> +		 * Enable CQ event for next solicited completion.

>> >> +		 * and make it visible to all associated producers.

>> >> +		 */

>> >> +		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);

>> >

>> >But what is the 2nd piece of data to motivate the smp_store_mb?

>> 

>> Another core (such as a concurrent RX operation) shall see this

>> CQ being re-armed asap.

>

>'ASAP' is not a '2nd piece of data'. 

>

>AFAICT this requirement is just a normal atomic set_bit which does

>also expedite making the change visible?


Absolutely!!
good point....this is just a single flag we are operating on.
And the weird barrier goes away ;)

Many thanks!
Bernard.
Jason Gunthorpe July 12, 2019, 3:32 p.m. UTC | #14
On Fri, Jul 12, 2019 at 03:24:09PM +0000, Bernard Metzler wrote:
> 

> >To: "Bernard Metzler" <BMT@zurich.ibm.com>

> >From: "Jason Gunthorpe" <jgg@ziepe.ca>

> >Date: 07/12/2019 04:43PM

> >Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

> ><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

> >linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

> >Subject: [EXTERNAL] Re: Re: Re: [PATCH] rdma/siw: avoid

> >smp_store_mb() on a u64

> >

> >On Fri, Jul 12, 2019 at 02:35:50PM +0000, Bernard Metzler wrote:

> >

> >> >This looks wrong to me.. a userspace notification re-arm cannot be

> >> >lost, so have a split READ/TEST/WRITE sequence can't possibly

> >work?

> >> >

> >> >I'd expect an atomic test and clear here?

> >> 

> >> We cannot avoid the case that the application re-arms the

> >> CQ only after a CQE got placed. That is why folks are polling the

> >> CQ once after re-arming it - to make sure they do not miss the

> >> very last and single CQE which would have produced a CQ event.

> >

> >That is different, that is re-arm happing after a CQE placement and

> >this can't be fixed.

> >

> >What I said is that a re-arm from userspace cannot be lost. So you

> >can't blindly clear the arm flag with the WRITE_ONCE. It might be OK

> >beacuse of the if, but...

> >

> >It is just goofy to write it without a 'test and clear' atomic. If

> >the

> >writer side consumes the notify it should always be done atomically.

> >

> Hmmm, I don't yet get why we should test and clear atomically, if we

> clear anyway - is it because we want to avoid clearing a re-arm which

> happens just after testing and before clearing?


It is just clearer as to the intent.. 

Are you trying to optimize away an atomic or something? That might
work better as a dual counter scheme.

> Another complication -- test_and_set_bit() operates on a single

> bit, but we have to test two bits, and reset both, if one is

> set.


Why are two bits needed to represent armed and !armed?

Jason
Peter Zijlstra July 12, 2019, 4:12 p.m. UTC | #15
On Fri, Jul 12, 2019 at 03:24:09PM +0000, Bernard Metzler wrote:
> -----"Jason Gunthorpe" <jgg@ziepe.ca> wrote: -----


> Hmmm, I don't yet get why we should test and clear atomically, if we

> clear anyway - is it because we want to avoid clearing a re-arm which

> happens just after testing and before clearing?

> (1) If the test was positive, we will call the CQ event handler,

> and per RDMA verbs spec, the application MUST re-arm the CQ after it

> got a CQ event, to get another one. So clearing it sometimes before

> calling the handler is right.

> (2) If the test was negative, a test and reset would not change

> anything.

> 

> Another complication -- test_and_set_bit() operates on a single

> bit, but we have to test two bits, and reset both, if one is

> set. Can we do that atomically, if we test the bits conditionally?

> I didn't find anything appropriate.


There's cmpxchg() loops that can do that.

	unsigned int new, val = atomic_read(&var);
	do {
		if (!(val & MY_TWO_BITS))
			break;

		new = val | MY_TWO_BITS;
	} while (!atomic_try_cmpxchg(&var, &val, new));

only problem is you probably shouldn't share atomic_t with userspace,
unless you put conditions on what archs you support.

> >And then I think all the weird barriers go away

> >

> >> >> @@ -1141,11 +1145,17 @@ int siw_req_notify_cq(struct ib_cq

> >> >*base_cq, enum ib_cq_notify_flags flags)

> >> >>  	siw_dbg_cq(cq, "flags: 0x%02x\n", flags);

> >> >>  

> >> >>  	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)

> >> >> -		/* CQ event for next solicited completion */

> >> >> -		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);

> >> >> +		/*

> >> >> +		 * Enable CQ event for next solicited completion.

> >> >> +		 * and make it visible to all associated producers.

> >> >> +		 */

> >> >> +		smp_store_mb(cq->notify->flags, SIW_NOTIFY_SOLICITED);

> >> >

> >> >But what is the 2nd piece of data to motivate the smp_store_mb?

> >> 

> >> Another core (such as a concurrent RX operation) shall see this

> >> CQ being re-armed asap.

> >

> >'ASAP' is not a '2nd piece of data'. 

> >

> >AFAICT this requirement is just a normal atomic set_bit which does

> >also expedite making the change visible?

> 

> Absolutely!!

> good point....this is just a single flag we are operating on.

> And the weird barrier goes away ;)


atomic ops don't expedite anything, and memory barriers don't make
things happen asap.

That is; the stores from atomic ops can stay in store buffers just like
any other store, and memory barriers don't flush store buffers, they
only impose order between memops.
Bernard Metzler July 12, 2019, 5:40 p.m. UTC | #16
-----"Jason Gunthorpe" <jgg@ziepe.ca> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>

>From: "Jason Gunthorpe" <jgg@ziepe.ca>

>Date: 07/12/2019 05:33PM

>Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] Re: Re: Re: Re: [PATCH] rdma/siw: avoid

>smp_store_mb() on a u64

>

>On Fri, Jul 12, 2019 at 03:24:09PM +0000, Bernard Metzler wrote:

>> 

>> >To: "Bernard Metzler" <BMT@zurich.ibm.com>

>> >From: "Jason Gunthorpe" <jgg@ziepe.ca>

>> >Date: 07/12/2019 04:43PM

>> >Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

>> ><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>> >linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>> >Subject: [EXTERNAL] Re: Re: Re: [PATCH] rdma/siw: avoid

>> >smp_store_mb() on a u64

>> >

>> >On Fri, Jul 12, 2019 at 02:35:50PM +0000, Bernard Metzler wrote:

>> >

>> >> >This looks wrong to me.. a userspace notification re-arm cannot

>be

>> >> >lost, so have a split READ/TEST/WRITE sequence can't possibly

>> >work?

>> >> >

>> >> >I'd expect an atomic test and clear here?

>> >> 

>> >> We cannot avoid the case that the application re-arms the

>> >> CQ only after a CQE got placed. That is why folks are polling

>the

>> >> CQ once after re-arming it - to make sure they do not miss the

>> >> very last and single CQE which would have produced a CQ event.

>> >

>> >That is different, that is re-arm happing after a CQE placement

>and

>> >this can't be fixed.

>> >

>> >What I said is that a re-arm from userspace cannot be lost. So you

>> >can't blindly clear the arm flag with the WRITE_ONCE. It might be

>OK

>> >beacuse of the if, but...

>> >

>> >It is just goofy to write it without a 'test and clear' atomic. If

>> >the

>> >writer side consumes the notify it should always be done

>atomically.

>> >

>> Hmmm, I don't yet get why we should test and clear atomically, if

>we

>> clear anyway - is it because we want to avoid clearing a re-arm

>which

>> happens just after testing and before clearing?

>

>It is just clearer as to the intent.. 

>

>Are you trying to optimize away an atomic or something? That might

>work better as a dual counter scheme.

>

>> Another complication -- test_and_set_bit() operates on a single

>> bit, but we have to test two bits, and reset both, if one is

>> set.

>

>Why are two bits needed to represent armed and !armed?

>

>Jason


It is because there are two levels a CQ can be armed:

       #include <infiniband/verbs.h>

       int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only);

If we kick the CQ handler, we have to clear the whole
thing. The user later again decides how he wants to get it
re-armed...SOLICITED completions only, or ALL signaled.

Best,
Bernard.

>

>
Jason Gunthorpe July 12, 2019, 5:45 p.m. UTC | #17
On Fri, Jul 12, 2019 at 05:40:43PM +0000, Bernard Metzler wrote:
 
> It is because there are two levels a CQ can be armed:

> 

>        #include <infiniband/verbs.h>

> 

>        int ibv_req_notify_cq(struct ibv_cq *cq, int solicited_only);

> 

> If we kick the CQ handler, we have to clear the whole

> thing. The user later again decides how he wants to get it

> re-armed...SOLICITED completions only, or ALL signaled.


Arrange it so only one of the two bits is ever set and do two
test-and-set bits when a SOLICITED CQE comes in?

Jason
Bernard Metzler July 12, 2019, 6:06 p.m. UTC | #18
-----"Jason Gunthorpe" <jgg@ziepe.ca> wrote: -----

>To: "Bernard Metzler" <BMT@zurich.ibm.com>

>From: "Jason Gunthorpe" <jgg@ziepe.ca>

>Date: 07/12/2019 07:45PM

>Cc: "Arnd Bergmann" <arnd@arndb.de>, "Doug Ledford"

><dledford@redhat.com>, "Peter Zijlstra" <peterz@infradead.org>,

>linux-rdma@vger.kernel.org, linux-kernel@vger.kernel.org

>Subject: [EXTERNAL] Re: Re: Re: Re: Re: [PATCH] rdma/siw: avoid

>smp_store_mb() on a u64

>

>On Fri, Jul 12, 2019 at 05:40:43PM +0000, Bernard Metzler wrote:

> 

>> It is because there are two levels a CQ can be armed:

>> 

>>        #include <infiniband/verbs.h>

>> 

>>        int ibv_req_notify_cq(struct ibv_cq *cq, int

>solicited_only);

>> 

>> If we kick the CQ handler, we have to clear the whole

>> thing. The user later again decides how he wants to get it

>> re-armed...SOLICITED completions only, or ALL signaled.

>

>Arrange it so only one of the two bits is ever set and do two

>test-and-set bits when a SOLICITED CQE comes in?

>

right, but that's too easy ;)
I'll probably do it along those lines.



Many thanks,
Bernard.
Arnd Bergmann July 12, 2019, 8:24 p.m. UTC | #19
On Fri, Jul 12, 2019 at 5:14 PM Jason Gunthorpe <jgg@ziepe.ca> wrote:
> On Fri, Jul 12, 2019 at 03:22:35PM +0200, Arnd Bergmann wrote:

> > On Fri, Jul 12, 2019 at 3:05 PM Bernard Metzler <BMT@zurich.ibm.com> wrote:

>

> Since this is coming up so late in the merge window, I'm inclined to

> take the simple path while Bernard makes a complete solution

> here. What do you think Arnd?


I don't see a problem either way, since it's a new driver, so both
disabling it on 32-bit and applying a fix that might cause something
to break is not going to make it worse for anyone compared to the
linux-5.2 release.

      Arnd
Jason Gunthorpe July 25, 2019, 5:23 p.m. UTC | #20
On Fri, Jul 12, 2019 at 10:51:23AM +0200, Arnd Bergmann wrote:
> The new siw driver fails to build on i386 with

> 

> drivers/infiniband/sw/siw/siw_qp.c:1025:3: error: invalid output size for constraint '+q'

>                 smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);

>                 ^

> include/asm-generic/barrier.h:141:35: note: expanded from macro 'smp_store_mb'

>  #define smp_store_mb(var, value)  __smp_store_mb(var, value)

>                                   ^

> arch/x86/include/asm/barrier.h:65:47: note: expanded from macro '__smp_store_mb'

>  #define __smp_store_mb(var, value) do { (void)xchg(&var, value); } while (0)

>                                               ^

> include/asm-generic/atomic-instrumented.h:1648:2: note: expanded from macro 'xchg'

>         arch_xchg(__ai_ptr, __VA_ARGS__);                               \

>         ^

> arch/x86/include/asm/cmpxchg.h:78:27: note: expanded from macro 'arch_xchg'

>  #define arch_xchg(ptr, v)       __xchg_op((ptr), (v), xchg, "")

>                                 ^

> arch/x86/include/asm/cmpxchg.h:48:19: note: expanded from macro '__xchg_op'

>                                       : "+q" (__ret), "+m" (*(ptr))     \

>                                               ^

> drivers/infiniband/sw/siw/siw_qp.o: In function `siw_sqe_complete':

> siw_qp.c:(.text+0x1450): undefined reference to `__xchg_wrong_size'

> drivers/infiniband/sw/siw/siw_qp.o: In function `siw_rqe_complete':

> siw_qp.c:(.text+0x15b0): undefined reference to `__xchg_wrong_size'

> drivers/infiniband/sw/siw/siw_verbs.o: In function `siw_req_notify_cq':

> siw_verbs.c:(.text+0x18ff): undefined reference to `__xchg_wrong_size'

> 

> Since smp_store_mb() has to be an atomic store, but the architecture

> can only do this on 32-bit quantities or smaller, but 'cq->notify'

> is a 64-bit word.

> 

> Apparently the smp_store_mb() is paired with a READ_ONCE() here, which

> seems like an odd choice because there is only a barrier on the writer

> side and not the reader, and READ_ONCE() is already not atomic on

> quantities larger than a CPU register.

> 

> I suspect it is sufficient to use the (possibly nonatomic) WRITE_ONCE()

> and an SMP memory barrier here. If it does need to be atomic as well

> as 64-bit quantities, using an atomic64_set_release()/atomic64_read_acquire()

> may be a better choice.

> 

> Fixes: 303ae1cdfdf7 ("rdma/siw: application interface")

> Fixes: f29dd55b0236 ("rdma/siw: queue pair methods")

> Cc: Peter Zijlstra <peterz@infradead.org>

> Signed-off-by: Arnd Bergmann <arnd@arndb.de>

> ---

>  drivers/infiniband/sw/siw/siw_qp.c    | 4 +++-

>  drivers/infiniband/sw/siw/siw_verbs.c | 5 +++--

>  2 files changed, 6 insertions(+), 3 deletions(-)


Bernard, please send at patch for whatever solution we settled on
against 5.3-rc1

Thanks,
Jason
diff mbox series

Patch

diff --git a/drivers/infiniband/sw/siw/siw_qp.c b/drivers/infiniband/sw/siw/siw_qp.c
index 11383d9f95ef..a2c08f17f13d 100644
--- a/drivers/infiniband/sw/siw/siw_qp.c
+++ b/drivers/infiniband/sw/siw/siw_qp.c
@@ -1016,13 +1016,15 @@  static bool siw_cq_notify_now(struct siw_cq *cq, u32 flags)
 	if (!cq->base_cq.comp_handler)
 		return false;
 
+	smp_rmb();
 	cq_notify = READ_ONCE(*cq->notify);
 
 	if ((cq_notify & SIW_NOTIFY_NEXT_COMPLETION) ||
 	    ((cq_notify & SIW_NOTIFY_SOLICITED) &&
 	     (flags & SIW_WQE_SOLICITED))) {
 		/* dis-arm CQ */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_NOT);
+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_NOT);
+		smp_wmb();
 
 		return true;
 	}
diff --git a/drivers/infiniband/sw/siw/siw_verbs.c b/drivers/infiniband/sw/siw/siw_verbs.c
index 32dc79d0e898..41c5ab293fe1 100644
--- a/drivers/infiniband/sw/siw/siw_verbs.c
+++ b/drivers/infiniband/sw/siw/siw_verbs.c
@@ -1142,10 +1142,11 @@  int siw_req_notify_cq(struct ib_cq *base_cq, enum ib_cq_notify_flags flags)
 
 	if ((flags & IB_CQ_SOLICITED_MASK) == IB_CQ_SOLICITED)
 		/* CQ event for next solicited completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_SOLICITED);
+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_SOLICITED);
 	else
 		/* CQ event for any signalled completion */
-		smp_store_mb(*cq->notify, SIW_NOTIFY_ALL);
+		WRITE_ONCE(*cq->notify, SIW_NOTIFY_ALL);
+	smp_wmb();
 
 	if (flags & IB_CQ_REPORT_MISSED_EVENTS)
 		return cq->cq_put - cq->cq_get;