diff mbox series

Trial of conflict resolution of Alan's patch

Message ID fa35db31-7a73-4865-1505-5378dcb2a0f8@gmail.com
State New
Headers show
Series Trial of conflict resolution of Alan's patch | expand

Commit Message

Akira Yokosawa Feb. 15, 2018, 3:58 p.m. UTC
From 81bd6374057f0d89894ead482b870e2f001d2998 Mon Sep 17 00:00:00 2001
From: Alan Stern <stern@rowland.harvard.edu>

Date: Fri, 16 Feb 2018 00:29:56 +0900
Subject: [PATCH] [TENTATIVE] Trial conflict resolution of Alan's patch

This is a trial of conflict resolution of the patch Alan provided.

Alan's message and original patch:

    Here's the relevant patch.  It may need some manual adjustment, because
    it was not made against the files currently in the repository.

    Alan

    diff --git a/linux-kernel-hardware.cat b/linux-kernel-hardware.cat
    Index: memory-model/linux-kernel-hardware.cat
    ===================================================================
    --- memory-model.orig/linux-kernel-hardware.cat
    +++ memory-model/linux-kernel-hardware.cat
    @@ -31,7 +31,7 @@ let strong-fence = mb | gp
     (* Release Acquire *)
     let acq-po = [Acquire] ; po ; [M]
     let po-rel = [M] ; po ; [Release]
    -let rfi-rel-acq = [Release] ; rfi ; [Acquire]
    +let rel-rf-acq-po = [Release] ; rf ; [Acquire] ; po

     (**********************************)
     (* Fundamental coherence ordering *)
    @@ -57,13 +57,13 @@ let to-w = rwdep | addrpo | (overwrite &
     let rdw = po-loc & (fre ; rfe)
     let detour = po-loc & (coe ; rfe)
     let rrdep = addr | (dep ; rfi)
    -let to-r = rrdep | rfi-rel-acq
    -let fence = strong-fence | wmb | po-rel | rmb | acq-po
    +let to-r = rrdep
    +let fence = strong-fence | wmb | po-rel | rmb | acq-po | (rel-rf-acq-po & int)
     let ppo = to-r | to-w | fence | rdw | detour

     (* Happens Before *)
     let A-cumul(r) = rfe? ; r
    -let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
    +let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | rel-rf-acq-po

     let rec prop = (overwrite & ext)? ; cumul-fence ; hb*
         and hb = ppo | rfe | ((hb* ; prop) & int)
    Index: memory-model/linux-kernel.cat
    ===================================================================
    --- memory-model.orig/linux-kernel.cat
    +++ memory-model/linux-kernel.cat
    @@ -31,7 +31,7 @@ let strong-fence = mb | gp
     (* Release Acquire *)
     let acq-po = [Acquire] ; po ; [M]
     let po-rel = [M] ; po ; [Release]
    -let rfi-rel-acq = [Release] ; rfi ; [Acquire]
    +let rel-rf-acq-po = [Release] ; rf ; [Acquire] ; po

     (**********************************)
     (* Fundamental coherence ordering *)
    @@ -54,13 +54,13 @@ let rwdep = (dep | ctrl) ; [W]
     let overwrite = co | fr
     let to-w = rwdep | (overwrite & int)
     let rrdep = addr | (dep ; rfi)
    -let to-r = rrdep | rfi-rel-acq
    -let fence = strong-fence | wmb | po-rel | rmb | acq-po
    +let to-r = rrdep
    +let fence = strong-fence | wmb | po-rel | rmb | acq-po | (rel-rf-acq-po & int)
     let ppo = to-r | to-w | fence

     (* Propagation: Ordering from release operations and strong fences. *)
     let A-cumul(r) = rfe? ; r
    -let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
    +let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | rel-rf-acq-po
     let prop = (overwrite & ext)? ; cumul-fence* ; rfe?

     (*

Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

[akiyks: Rebased to current master]
Signed-off-by: Akira Yokosawa <akiyks@gmail.com>

---
So, I attempted to rebase the patch to current (somewhat old) master of
https://github.com/aparri/memory-model. Why? Because the lkmm branch
in Paul's -rcu tree doesn't have linux-kernel-hardware.cat.

However, after this change, Z6.0+pooncelock+pooncelock+pombonce still
has the result "Sometimes". I must have done something wrong in the
conflict resolution.

Note: I have almost no idea what this patch is doing. I'm just hoping
to give a starting point of a discussion.

    Thanks, Akira
--
 linux-kernel-hardware.cat | 9 ++++-----
 linux-kernel.cat          | 9 ++++-----
 2 files changed, 8 insertions(+), 10 deletions(-)

-- 
2.7.4

Comments

Alan Stern Feb. 15, 2018, 5:51 p.m. UTC | #1
On Fri, 16 Feb 2018, Akira Yokosawa wrote:

> So, I attempted to rebase the patch to current (somewhat old) master of

> https://github.com/aparri/memory-model. Why? Because the lkmm branch

> in Paul's -rcu tree doesn't have linux-kernel-hardware.cat.

> 

> However, after this change, Z6.0+pooncelock+pooncelock+pombonce still

> has the result "Sometimes". I must have done something wrong in the

> conflict resolution.

> 

> Note: I have almost no idea what this patch is doing. I'm just hoping

> to give a starting point of a discussion.


Yes, that litmus test gives "Sometimes" both with and without the 
patch.  But consider instead this slightly changed version of that 
test, in which P2 reads Z instead of writing it:

C Z6.0-variant

{}

P0(int *x, int *y, spinlock_t *mylock)
{
	spin_lock(mylock);
	WRITE_ONCE(*x, 1);
	WRITE_ONCE(*y, 1);
	spin_unlock(mylock);
}

P1(int *y, int *z, spinlock_t *mylock)
{
	int r0;

	spin_lock(mylock);
	r0 = READ_ONCE(*y);
	WRITE_ONCE(*z, 1);
	spin_unlock(mylock);
}

P2(int *x, int *z)
{
	int r1;
	int r2;

	r2 = READ_ONCE(*z);
	smp_mb();
	r1 = READ_ONCE(*x);
}

exists (1:r0=1 /\ 2:r2=1 /\ 2:r1=0)

Without the patch, this test gives "Sometimes"; with the patch it gives 
"Never".  That is what I thought Paul was talking about originally.  

Sorry if my misunderstanding caused too much confusion for other 
people.

Alan
Paul E. McKenney Feb. 15, 2018, 7:29 p.m. UTC | #2
On Thu, Feb 15, 2018 at 12:51:56PM -0500, Alan Stern wrote:
> On Fri, 16 Feb 2018, Akira Yokosawa wrote:

> 

> > So, I attempted to rebase the patch to current (somewhat old) master of

> > https://github.com/aparri/memory-model. Why? Because the lkmm branch

> > in Paul's -rcu tree doesn't have linux-kernel-hardware.cat.

> > 

> > However, after this change, Z6.0+pooncelock+pooncelock+pombonce still

> > has the result "Sometimes". I must have done something wrong in the

> > conflict resolution.

> > 

> > Note: I have almost no idea what this patch is doing. I'm just hoping

> > to give a starting point of a discussion.

> 

> Yes, that litmus test gives "Sometimes" both with and without the 

> patch.  But consider instead this slightly changed version of that 

> test, in which P2 reads Z instead of writing it:

> 

> C Z6.0-variant

> 

> {}

> 

> P0(int *x, int *y, spinlock_t *mylock)

> {

> 	spin_lock(mylock);

> 	WRITE_ONCE(*x, 1);

> 	WRITE_ONCE(*y, 1);

> 	spin_unlock(mylock);

> }

> 

> P1(int *y, int *z, spinlock_t *mylock)

> {

> 	int r0;

> 

> 	spin_lock(mylock);

> 	r0 = READ_ONCE(*y);

> 	WRITE_ONCE(*z, 1);

> 	spin_unlock(mylock);

> }

> 

> P2(int *x, int *z)

> {

> 	int r1;

> 	int r2;

> 

> 	r2 = READ_ONCE(*z);

> 	smp_mb();

> 	r1 = READ_ONCE(*x);

> }

> 

> exists (1:r0=1 /\ 2:r2=1 /\ 2:r1=0)

> 

> Without the patch, this test gives "Sometimes"; with the patch it gives 

> "Never".  That is what I thought Paul was talking about originally.  

> 

> Sorry if my misunderstanding caused too much confusion for other 

> people.


Ah, I did indeed get confused.  I have changed the "Result:" for
Z6.0+pooncelock+pooncelock+pombonce.litmus back to "Never", as in
the patch below (which I merged into the patch adding all the
comments).

I have added the above test as ISA2+pooncelock+pooncelock+pombonce.litmus,
with the Result: of Sometimes with you (Alan) as author and with your
Signed-off-by -- please let me know if you would prefer some other
approach.

Please change the Result: when sending the proposed patch.  Or please let
me know if you would like me to apply the forward-port that Akira sent,
in which case I will add the Result: change to that patch.  Or for that
matter, Akira might repost his forward-port of your patch with this change.

							Thanx, Paul

------------------------------------------------------------------------

commit b2950420e1154131c0667f1ac58666bad3a06a69
Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Date:   Thu Feb 15 10:35:25 2018 -0800

    fixup! EXP litmus_tests:  Add comments explaining tests' purposes
    
    Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>


diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
index fad47258a3e3..95890669859b 100644
--- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
+++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus
@@ -1,7 +1,7 @@
 C Z6.0+pooncelock+pooncelock+pombonce
 
 (*
- * Result: Never
+ * Result: Somtimes
  *
  * This example demonstrates that a pair of accesses made by different
  * processes each while holding a given lock will not necessarily be
Akira Yokosawa Feb. 15, 2018, 9:51 p.m. UTC | #3
On 2018/02/15 11:29:14 -0800, Paul E. McKenney wrote:
> On Thu, Feb 15, 2018 at 12:51:56PM -0500, Alan Stern wrote:

>> On Fri, 16 Feb 2018, Akira Yokosawa wrote:

>>

>>> So, I attempted to rebase the patch to current (somewhat old) master of

>>> https://github.com/aparri/memory-model. Why? Because the lkmm branch

>>> in Paul's -rcu tree doesn't have linux-kernel-hardware.cat.

>>>

>>> However, after this change, Z6.0+pooncelock+pooncelock+pombonce still

>>> has the result "Sometimes". I must have done something wrong in the

>>> conflict resolution.

>>>

>>> Note: I have almost no idea what this patch is doing. I'm just hoping

>>> to give a starting point of a discussion.

>>

>> Yes, that litmus test gives "Sometimes" both with and without the 

>> patch.  But consider instead this slightly changed version of that 

>> test, in which P2 reads Z instead of writing it:

>>

>> C Z6.0-variant

>>

>> {}

>>

>> P0(int *x, int *y, spinlock_t *mylock)

>> {

>> 	spin_lock(mylock);

>> 	WRITE_ONCE(*x, 1);

>> 	WRITE_ONCE(*y, 1);

>> 	spin_unlock(mylock);

>> }

>>

>> P1(int *y, int *z, spinlock_t *mylock)

>> {

>> 	int r0;

>>

>> 	spin_lock(mylock);

>> 	r0 = READ_ONCE(*y);

>> 	WRITE_ONCE(*z, 1);

>> 	spin_unlock(mylock);

>> }

>>

>> P2(int *x, int *z)

>> {

>> 	int r1;

>> 	int r2;

>>

>> 	r2 = READ_ONCE(*z);

>> 	smp_mb();

>> 	r1 = READ_ONCE(*x);

>> }

>>

>> exists (1:r0=1 /\ 2:r2=1 /\ 2:r1=0)

>>

>> Without the patch, this test gives "Sometimes"; with the patch it gives 

>> "Never".  That is what I thought Paul was talking about originally.  

>>

>> Sorry if my misunderstanding caused too much confusion for other 

>> people.

> 

> Ah, I did indeed get confused.  I have changed the "Result:" for

> Z6.0+pooncelock+pooncelock+pombonce.litmus back to "Never", as in

> the patch below (which I merged into the patch adding all the

> comments).

> 

> I have added the above test as ISA2+pooncelock+pooncelock+pombonce.litmus,

> with the Result: of Sometimes with you (Alan) as author and with your

> Signed-off-by -- please let me know if you would prefer some other

> approach.

> 

> Please change the Result: when sending the proposed patch.  Or please let

> me know if you would like me to apply the forward-port that Akira sent,

> in which case I will add the Result: change to that patch.  Or for that

> matter, Akira might repost his forward-port of your patch with this change.

> 


Hi,

My forward-port patch doesn't apply to the "lkmm" branch.
It looks like "linux-kernel-hardware.cat" is intentionally omitted there.
Am I guessing right?

If this is the case, I can prepare a patch to be applied to "lkmm".
But I can't compose a proper change log. So I'd like Alan to post
a patch with my SOB appended. Does this approach sound reasonable?

      Thanks, Akira

> 							Thanx, Paul

> 

> ------------------------------------------------------------------------

> 

> commit b2950420e1154131c0667f1ac58666bad3a06a69

> Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> Date:   Thu Feb 15 10:35:25 2018 -0800

> 

>     fixup! EXP litmus_tests:  Add comments explaining tests' purposes

>     

>     Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> 

> diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> index fad47258a3e3..95890669859b 100644

> --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> @@ -1,7 +1,7 @@

>  C Z6.0+pooncelock+pooncelock+pombonce

>  

>  (*

> - * Result: Never

> + * Result: Somtimes

>   *

>   * This example demonstrates that a pair of accesses made by different

>   * processes each while holding a given lock will not necessarily be

>
Andrea Parri Feb. 15, 2018, 10:05 p.m. UTC | #4
On Thu, Feb 15, 2018 at 11:29:14AM -0800, Paul E. McKenney wrote:
> On Thu, Feb 15, 2018 at 12:51:56PM -0500, Alan Stern wrote:

> > On Fri, 16 Feb 2018, Akira Yokosawa wrote:

> > 

> > > So, I attempted to rebase the patch to current (somewhat old) master of

> > > https://github.com/aparri/memory-model. Why? Because the lkmm branch

> > > in Paul's -rcu tree doesn't have linux-kernel-hardware.cat.

> > > 

> > > However, after this change, Z6.0+pooncelock+pooncelock+pombonce still

> > > has the result "Sometimes". I must have done something wrong in the

> > > conflict resolution.

> > > 

> > > Note: I have almost no idea what this patch is doing. I'm just hoping

> > > to give a starting point of a discussion.

> > 

> > Yes, that litmus test gives "Sometimes" both with and without the 

> > patch.  But consider instead this slightly changed version of that 

> > test, in which P2 reads Z instead of writing it:

> > 

> > C Z6.0-variant

> > 

> > {}

> > 

> > P0(int *x, int *y, spinlock_t *mylock)

> > {

> > 	spin_lock(mylock);

> > 	WRITE_ONCE(*x, 1);

> > 	WRITE_ONCE(*y, 1);

> > 	spin_unlock(mylock);

> > }

> > 

> > P1(int *y, int *z, spinlock_t *mylock)

> > {

> > 	int r0;

> > 

> > 	spin_lock(mylock);

> > 	r0 = READ_ONCE(*y);

> > 	WRITE_ONCE(*z, 1);

> > 	spin_unlock(mylock);

> > }

> > 

> > P2(int *x, int *z)

> > {

> > 	int r1;

> > 	int r2;

> > 

> > 	r2 = READ_ONCE(*z);

> > 	smp_mb();

> > 	r1 = READ_ONCE(*x);

> > }

> > 

> > exists (1:r0=1 /\ 2:r2=1 /\ 2:r1=0)

> > 

> > Without the patch, this test gives "Sometimes"; with the patch it gives 

> > "Never".  That is what I thought Paul was talking about originally.  

> > 

> > Sorry if my misunderstanding caused too much confusion for other 

> > people.

> 

> Ah, I did indeed get confused.  I have changed the "Result:" for

> Z6.0+pooncelock+pooncelock+pombonce.litmus back to "Never", as in

> the patch below (which I merged into the patch adding all the

> comments).

> 

> I have added the above test as ISA2+pooncelock+pooncelock+pombonce.litmus,

> with the Result: of Sometimes with you (Alan) as author and with your

> Signed-off-by -- please let me know if you would prefer some other

> approach.

> 

> Please change the Result: when sending the proposed patch.  Or please let

> me know if you would like me to apply the forward-port that Akira sent,

> in which case I will add the Result: change to that patch.  Or for that

> matter, Akira might repost his forward-port of your patch with this change.

> 

> 							Thanx, Paul

> 

> ------------------------------------------------------------------------

> 

> commit b2950420e1154131c0667f1ac58666bad3a06a69

> Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> Date:   Thu Feb 15 10:35:25 2018 -0800

> 

>     fixup! EXP litmus_tests:  Add comments explaining tests' purposes

>     

>     Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> 

> diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> index fad47258a3e3..95890669859b 100644

> --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> @@ -1,7 +1,7 @@

>  C Z6.0+pooncelock+pooncelock+pombonce

>  

>  (*

> - * Result: Never

> + * Result: Somtimes


nit: s/Somtimes/Sometimes

  Andrea


>   *

>   * This example demonstrates that a pair of accesses made by different

>   * processes each while holding a given lock will not necessarily be

>
Paul E. McKenney Feb. 15, 2018, 10:41 p.m. UTC | #5
On Thu, Feb 15, 2018 at 11:05:39PM +0100, Andrea Parri wrote:
> On Thu, Feb 15, 2018 at 11:29:14AM -0800, Paul E. McKenney wrote:

> > On Thu, Feb 15, 2018 at 12:51:56PM -0500, Alan Stern wrote:

> > > On Fri, 16 Feb 2018, Akira Yokosawa wrote:

> > > 

> > > > So, I attempted to rebase the patch to current (somewhat old) master of

> > > > https://github.com/aparri/memory-model. Why? Because the lkmm branch

> > > > in Paul's -rcu tree doesn't have linux-kernel-hardware.cat.

> > > > 

> > > > However, after this change, Z6.0+pooncelock+pooncelock+pombonce still

> > > > has the result "Sometimes". I must have done something wrong in the

> > > > conflict resolution.

> > > > 

> > > > Note: I have almost no idea what this patch is doing. I'm just hoping

> > > > to give a starting point of a discussion.

> > > 

> > > Yes, that litmus test gives "Sometimes" both with and without the 

> > > patch.  But consider instead this slightly changed version of that 

> > > test, in which P2 reads Z instead of writing it:

> > > 

> > > C Z6.0-variant

> > > 

> > > {}

> > > 

> > > P0(int *x, int *y, spinlock_t *mylock)

> > > {

> > > 	spin_lock(mylock);

> > > 	WRITE_ONCE(*x, 1);

> > > 	WRITE_ONCE(*y, 1);

> > > 	spin_unlock(mylock);

> > > }

> > > 

> > > P1(int *y, int *z, spinlock_t *mylock)

> > > {

> > > 	int r0;

> > > 

> > > 	spin_lock(mylock);

> > > 	r0 = READ_ONCE(*y);

> > > 	WRITE_ONCE(*z, 1);

> > > 	spin_unlock(mylock);

> > > }

> > > 

> > > P2(int *x, int *z)

> > > {

> > > 	int r1;

> > > 	int r2;

> > > 

> > > 	r2 = READ_ONCE(*z);

> > > 	smp_mb();

> > > 	r1 = READ_ONCE(*x);

> > > }

> > > 

> > > exists (1:r0=1 /\ 2:r2=1 /\ 2:r1=0)

> > > 

> > > Without the patch, this test gives "Sometimes"; with the patch it gives 

> > > "Never".  That is what I thought Paul was talking about originally.  

> > > 

> > > Sorry if my misunderstanding caused too much confusion for other 

> > > people.

> > 

> > Ah, I did indeed get confused.  I have changed the "Result:" for

> > Z6.0+pooncelock+pooncelock+pombonce.litmus back to "Never", as in

> > the patch below (which I merged into the patch adding all the

> > comments).

> > 

> > I have added the above test as ISA2+pooncelock+pooncelock+pombonce.litmus,

> > with the Result: of Sometimes with you (Alan) as author and with your

> > Signed-off-by -- please let me know if you would prefer some other

> > approach.

> > 

> > Please change the Result: when sending the proposed patch.  Or please let

> > me know if you would like me to apply the forward-port that Akira sent,

> > in which case I will add the Result: change to that patch.  Or for that

> > matter, Akira might repost his forward-port of your patch with this change.

> > 

> > 							Thanx, Paul

> > 

> > ------------------------------------------------------------------------

> > 

> > commit b2950420e1154131c0667f1ac58666bad3a06a69

> > Author: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> > Date:   Thu Feb 15 10:35:25 2018 -0800

> > 

> >     fixup! EXP litmus_tests:  Add comments explaining tests' purposes

> >     

> >     Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

> > 

> > diff --git a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> > index fad47258a3e3..95890669859b 100644

> > --- a/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> > +++ b/tools/memory-model/litmus-tests/Z6.0+pooncelock+pooncelock+pombonce.litmus

> > @@ -1,7 +1,7 @@

> >  C Z6.0+pooncelock+pooncelock+pombonce

> >  

> >  (*

> > - * Result: Never

> > + * Result: Somtimes

> 

> nit: s/Somtimes/Sometimes


Good catch, fixed!

							Thanx, Paul

>   Andrea

> 

> 

> >   *

> >   * This example demonstrates that a pair of accesses made by different

> >   * processes each while holding a given lock will not necessarily be

> > 

>
Alan Stern Feb. 16, 2018, 3:18 p.m. UTC | #6
On Fri, 16 Feb 2018, Akira Yokosawa wrote:

> Hi,

> 

> My forward-port patch doesn't apply to the "lkmm" branch.

> It looks like "linux-kernel-hardware.cat" is intentionally omitted there.

> Am I guessing right?

> 

> If this is the case, I can prepare a patch to be applied to "lkmm".

> But I can't compose a proper change log. So I'd like Alan to post

> a patch with my SOB appended. Does this approach sound reasonable?


The patch is not yet ready to be merged.  At the very least, I need to
include an update to explanation.txt along with it.  When it is all 
ready, I will rebase it on Paul's repository and post it.

Which reminds me: Now that the material has been accepted into the 
kernel, do we need to keep the github repository?  It has the 
linux-kernel-hardware.cat file, but otherwise it seems to be redundant.

Alan
Andrea Parri Feb. 16, 2018, 3:47 p.m. UTC | #7
On Fri, Feb 16, 2018 at 10:18:34AM -0500, Alan Stern wrote:
> On Fri, 16 Feb 2018, Akira Yokosawa wrote:

> 

> > Hi,

> > 

> > My forward-port patch doesn't apply to the "lkmm" branch.

> > It looks like "linux-kernel-hardware.cat" is intentionally omitted there.

> > Am I guessing right?

> > 

> > If this is the case, I can prepare a patch to be applied to "lkmm".

> > But I can't compose a proper change log. So I'd like Alan to post

> > a patch with my SOB appended. Does this approach sound reasonable?

> 

> The patch is not yet ready to be merged.  At the very least, I need to

> include an update to explanation.txt along with it.  When it is all 

> ready, I will rebase it on Paul's repository and post it.

> 

> Which reminds me: Now that the material has been accepted into the 

> kernel, do we need to keep the github repository?  It has the 

> linux-kernel-hardware.cat file, but otherwise it seems to be redundant.


If you mean "to keep up-to-date", I'd say "No, we don't..."  ;-)

My plan/hope is to add such a disclaimer together with a pointer
to Linus's tree ASAP...

  Andrea


> 

> Alan

>
Paul E. McKenney Feb. 16, 2018, 4:53 p.m. UTC | #8
On Fri, Feb 16, 2018 at 04:47:04PM +0100, Andrea Parri wrote:
> On Fri, Feb 16, 2018 at 10:18:34AM -0500, Alan Stern wrote:

> > On Fri, 16 Feb 2018, Akira Yokosawa wrote:

> > 

> > > Hi,

> > > 

> > > My forward-port patch doesn't apply to the "lkmm" branch.

> > > It looks like "linux-kernel-hardware.cat" is intentionally omitted there.

> > > Am I guessing right?

> > > 

> > > If this is the case, I can prepare a patch to be applied to "lkmm".

> > > But I can't compose a proper change log. So I'd like Alan to post

> > > a patch with my SOB appended. Does this approach sound reasonable?

> > 

> > The patch is not yet ready to be merged.  At the very least, I need to

> > include an update to explanation.txt along with it.  When it is all 

> > ready, I will rebase it on Paul's repository and post it.

> > 

> > Which reminds me: Now that the material has been accepted into the 

> > kernel, do we need to keep the github repository?  It has the 

> > linux-kernel-hardware.cat file, but otherwise it seems to be redundant.

> 

> If you mean "to keep up-to-date", I'd say "No, we don't..."  ;-)

> 

> My plan/hope is to add such a disclaimer together with a pointer

> to Linus's tree ASAP...


I agree that the github repository is useful as a historical reference
but that we should not try to keep it up to date.  The -rc1 release
came out last Sunday, so in 7-8 weeks we will hopefully have this in
the Linux kernel.  Fingers firmly crossed and all that...

							Thanx, Paul
Paul E. McKenney Feb. 17, 2018, 12:39 a.m. UTC | #9
On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote:
> Since commit 76ebbe78f739 ("locking/barriers: Add implicit

> smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15

> kernel, it has not been necessary to use smp_read_barrier_depends().

> Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill

> lockless_dereference()") removed lockless_dereference() from the

> kernel.

> 

> Since these primitives are no longer part of the kernel, they do not

> belong in the Linux Kernel Memory Consistency Model.  This patch

> removes them, along with the internal rb-dep relation, and updates the

> revelant documentation.

> 

> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>


I queued this, but would welcome an update that addressed Akira's
feedback as appropriate.

							Thanx, Paul

> ---

> 

> Index: usb-4.x/tools/memory-model/linux-kernel.cat

> ===================================================================

> --- usb-4.x/tools/memory-model.orig/linux-kernel.cat

> +++ usb-4.x/tools/memory-model/linux-kernel.cat

> @@ -25,7 +25,6 @@ include "lock.cat"

>  (*******************)

> 

>  (* Fences *)

> -let rb-dep = [R] ; fencerel(Rb_dep) ; [R]

>  let rmb = [R \ Noreturn] ; fencerel(Rmb) ; [R \ Noreturn]

>  let wmb = [W] ; fencerel(Wmb) ; [W]

>  let mb = ([M] ; fencerel(Mb) ; [M]) |

> @@ -61,11 +60,9 @@ let dep = addr | data

>  let rwdep = (dep | ctrl) ; [W]

>  let overwrite = co | fr

>  let to-w = rwdep | (overwrite & int)

> -let rrdep = addr | (dep ; rfi)

> -let strong-rrdep = rrdep+ & rb-dep

> -let to-r = strong-rrdep | rfi-rel-acq

> +let to-r = addr | (dep ; rfi) | rfi-rel-acq

>  let fence = strong-fence | wmb | po-rel | rmb | acq-po

> -let ppo = rrdep* ; (to-r | to-w | fence)

> +let ppo = to-r | to-w | fence

> 

>  (* Propagation: Ordering from release operations and strong fences. *)

>  let A-cumul(r) = rfe? ; r

> Index: usb-4.x/tools/memory-model/Documentation/explanation.txt

> ===================================================================

> --- usb-4.x/tools/memory-model.orig/Documentation/explanation.txt

> +++ usb-4.x/tools/memory-model/Documentation/explanation.txt

> @@ -1,5 +1,5 @@

> -Explanation of the Linux-Kernel Memory Model

> -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

> +Explanation of the Linux-Kernel Memory Consistency Model

> +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

> 

>  :Author: Alan Stern <stern@rowland.harvard.edu>

>  :Created: October 2017

> @@ -35,25 +35,24 @@ Explanation of the Linux-Kernel Memory M

>  INTRODUCTION

>  ------------

> 

> -The Linux-kernel memory model (LKMM) is rather complex and obscure.

> -This is particularly evident if you read through the linux-kernel.bell

> -and linux-kernel.cat files that make up the formal version of the

> -memory model; they are extremely terse and their meanings are far from

> -clear.

> +The Linux-kernel memory consistency model (LKMM) is rather complex and

> +obscure.  This is particularly evident if you read through the

> +linux-kernel.bell and linux-kernel.cat files that make up the formal

> +version of the model; they are extremely terse and their meanings are

> +far from clear.

> 

>  This document describes the ideas underlying the LKMM.  It is meant

> -for people who want to understand how the memory model was designed.

> -It does not go into the details of the code in the .bell and .cat

> -files; rather, it explains in English what the code expresses

> -symbolically.

> +for people who want to understand how the model was designed.  It does

> +not go into the details of the code in the .bell and .cat files;

> +rather, it explains in English what the code expresses symbolically.

> 

>  Sections 2 (BACKGROUND) through 5 (ORDERING AND CYCLES) are aimed

> -toward beginners; they explain what memory models are and the basic

> -notions shared by all such models.  People already familiar with these

> -concepts can skim or skip over them.  Sections 6 (EVENTS) through 12

> -(THE FROM_READS RELATION) describe the fundamental relations used in

> -many memory models.  Starting in Section 13 (AN OPERATIONAL MODEL),

> -the workings of the LKMM itself are covered.

> +toward beginners; they explain what memory consistency models are and

> +the basic notions shared by all such models.  People already familiar

> +with these concepts can skim or skip over them.  Sections 6 (EVENTS)

> +through 12 (THE FROM_READS RELATION) describe the fundamental

> +relations used in many models.  Starting in Section 13 (AN OPERATIONAL

> +MODEL), the workings of the LKMM itself are covered.

> 

>  Warning: The code examples in this document are not written in the

>  proper format for litmus tests.  They don't include a header line, the

> @@ -827,8 +826,8 @@ A-cumulative; they only affect the propa

>  executed on C before the fence (i.e., those which precede the fence in

>  program order).

> 

> -smp_read_barrier_depends(), rcu_read_lock(), rcu_read_unlock(), and

> -synchronize_rcu() fences have other properties which we discuss later.

> +read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have

> +other properties which we discuss later.

> 

> 

>  PROPAGATION ORDER RELATION: cumul-fence

> @@ -988,8 +987,8 @@ Another possibility, not mentioned earli

>  section, is:

> 

>  	X and Y are both loads, X ->addr Y (i.e., there is an address

> -	dependency from X to Y), and an smp_read_barrier_depends()

> -	fence occurs between them.

> +	dependency from X to Y), and X is a READ_ONCE() or an atomic

> +	access.

> 

>  Dependencies can also cause instructions to be executed in program

>  order.  This is uncontroversial when the second instruction is a

> @@ -1015,9 +1014,9 @@ After all, a CPU cannot ask the memory s

>  a particular location before it knows what that location is.  However,

>  the split-cache design used by Alpha can cause it to behave in a way

>  that looks as if the loads were executed out of order (see the next

> -section for more details).  For this reason, the LKMM does not include

> -address dependencies between read events in the ppo relation unless an

> -smp_read_barrier_depends() fence is present.

> +section for more details).  The kernel includes a workaround for this

> +problem when the loads come from READ_ONCE(), and therefore the LKMM

> +includes address dependencies to loads in the ppo relation.

> 

>  On the other hand, dependencies can indirectly affect the ordering of

>  two loads.  This happens when there is a dependency from a load to a

> @@ -1114,11 +1113,12 @@ code such as the following:

>  		int *r1;

>  		int r2;

> 

> -		r1 = READ_ONCE(ptr);

> +		r1 = ptr;

>  		r2 = READ_ONCE(*r1);

>  	}

> 

> -can malfunction on Alpha systems.  It is quite possible that r1 = &x

> +can malfunction on Alpha systems (notice that P1 uses an ordinary load

> +to read ptr instead of READ_ONCE()).  It is quite possible that r1 = &x

>  and r2 = 0 at the end, in spite of the address dependency.

> 

>  At first glance this doesn't seem to make sense.  We know that the

> @@ -1141,11 +1141,15 @@ This could not have happened if the loca

>  incoming stores in FIFO order.  In constrast, other architectures

>  maintain at least the appearance of FIFO order.

> 

> -In practice, this difficulty is solved by inserting an

> -smp_read_barrier_depends() fence between P1's two loads.  The effect

> -of this fence is to cause the CPU not to execute any po-later

> -instructions until after the local cache has finished processing all

> -the stores it has already received.  Thus, if the code was changed to:

> +In practice, this difficulty is solved by inserting a special fence

> +between P1's two loads when the kernel is compiled for the Alpha

> +architecture.  In fact, as of version 4.15, the kernel automatically

> +adds this fence (called smp_read_barrier_depends() and defined as

> +nothing at all on non-Alpha builds) after every READ_ONCE() and atomic

> +load.  The effect of the fence is to cause the CPU not to execute any

> +po-later instructions until after the local cache has finished

> +processing all the stores it has already received.  Thus, if the code

> +was changed to:

> 

>  	P1()

>  	{

> @@ -1153,13 +1157,15 @@ the stores it has already received.  Thu

>  		int r2;

> 

>  		r1 = READ_ONCE(ptr);

> -		smp_read_barrier_depends();

>  		r2 = READ_ONCE(*r1);

>  	}

> 

>  then we would never get r1 = &x and r2 = 0.  By the time P1 executed

>  its second load, the x = 1 store would already be fully processed by

> -the local cache and available for satisfying the read request.

> +the local cache and available for satisfying the read request.  Thus

> +we have yet another reason why shared data should always be read with

> +READ_ONCE() or another synchronization primitive rather than accessed

> +directly.

> 

>  The LKMM requires that smp_rmb(), acquire fences, and strong fences

>  share this property with smp_read_barrier_depends(): They do not allow

> @@ -1751,11 +1757,10 @@ no further involvement from the CPU.  Si

>  the value of x, there is nothing for the smp_rmb() fence to act on.

> 

>  The LKMM defines a few extra synchronization operations in terms of

> -things we have already covered.  In particular, rcu_dereference() and

> -lockless_dereference() are both treated as a READ_ONCE() followed by

> -smp_read_barrier_depends() -- which also happens to be how they are

> -defined in include/linux/rcupdate.h and include/linux/compiler.h,

> -respectively.

> +things we have already covered.  In particular, rcu_dereference() is

> +treated as READ_ONCE() and rcu_assign_pointer() is treated as

> +smp_store_release() -- which is basically how the Linux kernel treats

> +them.

> 

>  There are a few oddball fences which need special treatment:

>  smp_mb__before_atomic(), smp_mb__after_atomic(), and

> Index: usb-4.x/tools/memory-model/linux-kernel.bell

> ===================================================================

> --- usb-4.x/tools/memory-model.orig/linux-kernel.bell

> +++ usb-4.x/tools/memory-model/linux-kernel.bell

> @@ -24,7 +24,6 @@ instructions RMW[{'once,'acquire,'releas

>  enum Barriers = 'wmb (*smp_wmb*) ||

>  		'rmb (*smp_rmb*) ||

>  		'mb (*smp_mb*) ||

> -		'rb_dep (*smp_read_barrier_depends*) ||

>  		'rcu-lock (*rcu_read_lock*)  ||

>  		'rcu-unlock (*rcu_read_unlock*) ||

>  		'sync-rcu (*synchronize_rcu*) ||

> Index: usb-4.x/tools/memory-model/linux-kernel.def

> ===================================================================

> --- usb-4.x/tools/memory-model.orig/linux-kernel.def

> +++ usb-4.x/tools/memory-model/linux-kernel.def

> @@ -13,14 +13,12 @@ WRITE_ONCE(X,V) { __store{once}(X,V); }

>  smp_store_release(X,V) { __store{release}(*X,V); }

>  smp_load_acquire(X) __load{acquire}(*X)

>  rcu_assign_pointer(X,V) { __store{release}(X,V); }

> -lockless_dereference(X) __load{lderef}(X)

>  rcu_dereference(X) __load{deref}(X)

> 

>  // Fences

>  smp_mb() { __fence{mb} ; }

>  smp_rmb() { __fence{rmb} ; }

>  smp_wmb() { __fence{wmb} ; }

> -smp_read_barrier_depends() { __fence{rb_dep}; }

>  smp_mb__before_atomic() { __fence{before-atomic} ; }

>  smp_mb__after_atomic() { __fence{after-atomic} ; }

>  smp_mb__after_spinlock() { __fence{after-spinlock} ; }

> Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> ===================================================================

> --- usb-4.x/tools/memory-model.orig/Documentation/cheatsheet.txt

> +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> @@ -6,8 +6,7 @@

>  Store, e.g., WRITE_ONCE()            Y                                       Y

>  Load, e.g., READ_ONCE()              Y                              Y        Y

>  Unsuccessful RMW operation           Y                              Y        Y

> -smp_read_barrier_depends()              Y                       Y   Y

> -*_dereference()                      Y                          Y   Y        Y

> +rcu_dereference()                    Y                          Y   Y        Y

>  Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y

>  Successful *_release()         C        Y  Y    Y     W                      Y

>  smp_rmb()                               Y       R        Y      Y        R

> 

>
Andrea Parri Feb. 17, 2018, 3:25 a.m. UTC | #10
On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote:
> Since commit 76ebbe78f739 ("locking/barriers: Add implicit

> smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15

> kernel, it has not been necessary to use smp_read_barrier_depends().

> Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill

> lockless_dereference()") removed lockless_dereference() from the

> kernel.

> 

> Since these primitives are no longer part of the kernel, they do not

> belong in the Linux Kernel Memory Consistency Model.  This patch

> removes them, along with the internal rb-dep relation, and updates the

> revelant documentation.

> 

> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> 

> ---


[...]


> Index: usb-4.x/tools/memory-model/linux-kernel.def

> ===================================================================

> --- usb-4.x/tools/memory-model.orig/linux-kernel.def

> +++ usb-4.x/tools/memory-model/linux-kernel.def

> @@ -13,14 +13,12 @@ WRITE_ONCE(X,V) { __store{once}(X,V); }

>  smp_store_release(X,V) { __store{release}(*X,V); }

>  smp_load_acquire(X) __load{acquire}(*X)

>  rcu_assign_pointer(X,V) { __store{release}(X,V); }

> -lockless_dereference(X) __load{lderef}(X)

>  rcu_dereference(X) __load{deref}(X)


^^^ __load{once}


>  

>  // Fences

>  smp_mb() { __fence{mb} ; }

>  smp_rmb() { __fence{rmb} ; }

>  smp_wmb() { __fence{wmb} ; }

> -smp_read_barrier_depends() { __fence{rb_dep}; }

>  smp_mb__before_atomic() { __fence{before-atomic} ; }

>  smp_mb__after_atomic() { __fence{after-atomic} ; }

>  smp_mb__after_spinlock() { __fence{after-spinlock} ; }

> Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> ===================================================================

> --- usb-4.x/tools/memory-model.orig/Documentation/cheatsheet.txt

> +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> @@ -6,8 +6,7 @@

>  Store, e.g., WRITE_ONCE()            Y                                       Y

>  Load, e.g., READ_ONCE()              Y                              Y        Y

>  Unsuccessful RMW operation           Y                              Y        Y

> -smp_read_barrier_depends()              Y                       Y   Y

> -*_dereference()                      Y                          Y   Y        Y

> +rcu_dereference()                    Y                          Y   Y        Y

>  Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y

>  Successful *_release()         C        Y  Y    Y     W                      Y

>  smp_rmb()                               Y       R        Y      Y        R


Akira's observation about READ_ONCE extends to all (annotated) loads.  In
fact, it also applies to loads corresponding to unsuccessful RMW operations;
consider, for example, the following variation of MP+onceassign+derefonce:

C T

{
y=z;
z=0;
}

P0(int *x, int **y)
{
	WRITE_ONCE(*x, 1);
	smp_store_release(y, x);
}

P1(int **y, int *z)
{
	int *r0;
	int r1;

	r0 = cmpxchg_relaxed(y, z, z);
	r1 = READ_ONCE(*r0);
}

exists (1:r0=x /\ 1:r1=0)

The final state is allowed w/o the patch, and forbidden w/ the patch.

This also reminds me of

   5a8897cc7631fa544d079c443800f4420d1b173f
   ("locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics")

(that we probably want to mention in the commit message).

  Andrea


> 

>
Andrea Parri Feb. 17, 2018, 3:14 p.m. UTC | #11
On Sat, Feb 17, 2018 at 04:25:35AM +0100, Andrea Parri wrote:
> On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote:

> > Since commit 76ebbe78f739 ("locking/barriers: Add implicit

> > smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15

> > kernel, it has not been necessary to use smp_read_barrier_depends().

> > Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill

> > lockless_dereference()") removed lockless_dereference() from the

> > kernel.

> > 

> > Since these primitives are no longer part of the kernel, they do not

> > belong in the Linux Kernel Memory Consistency Model.  This patch

> > removes them, along with the internal rb-dep relation, and updates the

> > revelant documentation.

> > 

> > Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> > 

> > ---

> 

> [...]

> 

> 

> > Index: usb-4.x/tools/memory-model/linux-kernel.def

> > ===================================================================

> > --- usb-4.x/tools/memory-model.orig/linux-kernel.def

> > +++ usb-4.x/tools/memory-model/linux-kernel.def

> > @@ -13,14 +13,12 @@ WRITE_ONCE(X,V) { __store{once}(X,V); }

> >  smp_store_release(X,V) { __store{release}(*X,V); }

> >  smp_load_acquire(X) __load{acquire}(*X)

> >  rcu_assign_pointer(X,V) { __store{release}(X,V); }

> > -lockless_dereference(X) __load{lderef}(X)

> >  rcu_dereference(X) __load{deref}(X)

> 

> ^^^ __load{once}

> 

> 

> >  

> >  // Fences

> >  smp_mb() { __fence{mb} ; }

> >  smp_rmb() { __fence{rmb} ; }

> >  smp_wmb() { __fence{wmb} ; }

> > -smp_read_barrier_depends() { __fence{rb_dep}; }

> >  smp_mb__before_atomic() { __fence{before-atomic} ; }

> >  smp_mb__after_atomic() { __fence{after-atomic} ; }

> >  smp_mb__after_spinlock() { __fence{after-spinlock} ; }

> > Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> > ===================================================================

> > --- usb-4.x/tools/memory-model.orig/Documentation/cheatsheet.txt

> > +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> > @@ -6,8 +6,7 @@

> >  Store, e.g., WRITE_ONCE()            Y                                       Y

> >  Load, e.g., READ_ONCE()              Y                              Y        Y

> >  Unsuccessful RMW operation           Y                              Y        Y

> > -smp_read_barrier_depends()              Y                       Y   Y

> > -*_dereference()                      Y                          Y   Y        Y

> > +rcu_dereference()                    Y                          Y   Y        Y

> >  Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y

> >  Successful *_release()         C        Y  Y    Y     W                      Y

> >  smp_rmb()                               Y       R        Y      Y        R

> 

> Akira's observation about READ_ONCE extends to all (annotated) loads.  In

> fact, it also applies to loads corresponding to unsuccessful RMW operations;

> consider, for example, the following variation of MP+onceassign+derefonce:

> 

> C T

> 

> {

> y=z;

> z=0;

> }

> 

> P0(int *x, int **y)

> {

> 	WRITE_ONCE(*x, 1);

> 	smp_store_release(y, x);

> }

> 

> P1(int **y, int *z)

> {

> 	int *r0;

> 	int r1;

> 

> 	r0 = cmpxchg_relaxed(y, z, z);

> 	r1 = READ_ONCE(*r0);

> }

> 

> exists (1:r0=x /\ 1:r1=0)

> 

> The final state is allowed w/o the patch, and forbidden w/ the patch.

> 

> This also reminds me of

> 

>    5a8897cc7631fa544d079c443800f4420d1b173f

>    ("locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics")

> 

> (that we probably want to mention in the commit message).


Please also notice that 5a8897cc7631f only touched alpha's atomic.h:
I see no corresponding commit/change on {,cmp}xchg.h (where the "mb"
is currently conditionally executed).

  Andrea


> 

>   Andrea

> 

> 

> > 

> >
Alan Stern Feb. 19, 2018, 5:14 p.m. UTC | #12
On Sat, 17 Feb 2018, Andrea Parri wrote:

> > Akira's observation about READ_ONCE extends to all (annotated) loads.  In

> > fact, it also applies to loads corresponding to unsuccessful RMW operations;

> > consider, for example, the following variation of MP+onceassign+derefonce:

> > 

> > C T

> > 

> > {

> > y=z;

> > z=0;

> > }

> > 

> > P0(int *x, int **y)

> > {

> > 	WRITE_ONCE(*x, 1);

> > 	smp_store_release(y, x);

> > }

> > 

> > P1(int **y, int *z)

> > {

> > 	int *r0;

> > 	int r1;

> > 

> > 	r0 = cmpxchg_relaxed(y, z, z);

> > 	r1 = READ_ONCE(*r0);

> > }

> > 

> > exists (1:r0=x /\ 1:r1=0)

> > 

> > The final state is allowed w/o the patch, and forbidden w/ the patch.

> > 

> > This also reminds me of

> > 

> >    5a8897cc7631fa544d079c443800f4420d1b173f

> >    ("locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics")

> > 

> > (that we probably want to mention in the commit message).

> 

> Please also notice that 5a8897cc7631f only touched alpha's atomic.h:

> I see no corresponding commit/change on {,cmp}xchg.h (where the "mb"

> is currently conditionally executed).


This leaves us with a question: Do we want to change the kernel by
adding memory barriers after unsuccessful RMW operations on Alpha, or
do we want to change the model by excluding such operations from
address dependencies?

Note that operations like atomic_add_unless() already include memory 
barriers.

Alan
Peter Zijlstra Feb. 19, 2018, 5:43 p.m. UTC | #13
On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:
> This leaves us with a question: Do we want to change the kernel by

> adding memory barriers after unsuccessful RMW operations on Alpha, or

> do we want to change the model by excluding such operations from

> address dependencies?


I think we want to put all the 'pain and weirdness' in Alpha.
Peter Zijlstra Feb. 19, 2018, 5:44 p.m. UTC | #14
On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:
> Note that operations like atomic_add_unless() already include memory 

> barriers.


It is valid for atomic_add_unless() to not imply any barriers when the
addition doesn't happen.
Paul E. McKenney Feb. 19, 2018, 7:41 p.m. UTC | #15
On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:
> On Sat, 17 Feb 2018, Andrea Parri wrote:

> 

> > > Akira's observation about READ_ONCE extends to all (annotated) loads.  In

> > > fact, it also applies to loads corresponding to unsuccessful RMW operations;

> > > consider, for example, the following variation of MP+onceassign+derefonce:

> > > 

> > > C T

> > > 

> > > {

> > > y=z;

> > > z=0;

> > > }

> > > 

> > > P0(int *x, int **y)

> > > {

> > > 	WRITE_ONCE(*x, 1);

> > > 	smp_store_release(y, x);

> > > }

> > > 

> > > P1(int **y, int *z)

> > > {

> > > 	int *r0;

> > > 	int r1;

> > > 

> > > 	r0 = cmpxchg_relaxed(y, z, z);

> > > 	r1 = READ_ONCE(*r0);

> > > }

> > > 

> > > exists (1:r0=x /\ 1:r1=0)

> > > 

> > > The final state is allowed w/o the patch, and forbidden w/ the patch.

> > > 

> > > This also reminds me of

> > > 

> > >    5a8897cc7631fa544d079c443800f4420d1b173f

> > >    ("locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics")

> > > 

> > > (that we probably want to mention in the commit message).

> > 

> > Please also notice that 5a8897cc7631f only touched alpha's atomic.h:

> > I see no corresponding commit/change on {,cmp}xchg.h (where the "mb"

> > is currently conditionally executed).

> 

> This leaves us with a question: Do we want to change the kernel by

> adding memory barriers after unsuccessful RMW operations on Alpha, or

> do we want to change the model by excluding such operations from

> address dependencies?


I vote for adding the barrier on Alpha.  However, I don't know of any
code in the Linux kernel that relies on read-to-read address dependency
ordering headed by a failing RMW operation, so I don't feel all that
strongly about this.

> Note that operations like atomic_add_unless() already include memory 

> barriers.


And I don't see an atomic_add_unless_relaxed(), so we are good on this
one.  So far, anyway!  ;-)

							Thanx, Paul
Peter Zijlstra Feb. 19, 2018, 8:28 p.m. UTC | #16
On Mon, Feb 19, 2018 at 11:41:23AM -0800, Paul E. McKenney wrote:
> On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:

> > This leaves us with a question: Do we want to change the kernel by

> > adding memory barriers after unsuccessful RMW operations on Alpha, or

> > do we want to change the model by excluding such operations from

> > address dependencies?

> 

> I vote for adding the barrier on Alpha.  However, I don't know of any

> code in the Linux kernel that relies on read-to-read address dependency

> ordering headed by a failing RMW operation, so I don't feel all that

> strongly about this.


Right, but not knowing doesn't mean doesn't exist, and most certainly
doesn't mean will never exist.

> > Note that operations like atomic_add_unless() already include memory 

> > barriers.

> 

> And I don't see an atomic_add_unless_relaxed(), so we are good on this

> one.  So far, anyway!  ;-)


Not the point, add_unless() is a conditional operation, and therefore
doesn't need to imply anything when failing.
Andrea Parri Feb. 20, 2018, 9:33 a.m. UTC | #17
On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:
> On Sat, 17 Feb 2018, Andrea Parri wrote:

> 

> > > Akira's observation about READ_ONCE extends to all (annotated) loads.  In

> > > fact, it also applies to loads corresponding to unsuccessful RMW operations;

> > > consider, for example, the following variation of MP+onceassign+derefonce:

> > > 

> > > C T

> > > 

> > > {

> > > y=z;

> > > z=0;

> > > }

> > > 

> > > P0(int *x, int **y)

> > > {

> > > 	WRITE_ONCE(*x, 1);

> > > 	smp_store_release(y, x);

> > > }

> > > 

> > > P1(int **y, int *z)

> > > {

> > > 	int *r0;

> > > 	int r1;

> > > 

> > > 	r0 = cmpxchg_relaxed(y, z, z);

> > > 	r1 = READ_ONCE(*r0);

> > > }

> > > 

> > > exists (1:r0=x /\ 1:r1=0)

> > > 

> > > The final state is allowed w/o the patch, and forbidden w/ the patch.

> > > 

> > > This also reminds me of

> > > 

> > >    5a8897cc7631fa544d079c443800f4420d1b173f

> > >    ("locking/atomics/alpha: Add smp_read_barrier_depends() to _release()/_relaxed() atomics")

> > > 

> > > (that we probably want to mention in the commit message).

> > 

> > Please also notice that 5a8897cc7631f only touched alpha's atomic.h:

> > I see no corresponding commit/change on {,cmp}xchg.h (where the "mb"

> > is currently conditionally executed).

> 

> This leaves us with a question: Do we want to change the kernel by

> adding memory barriers after unsuccessful RMW operations on Alpha, or

> do we want to change the model by excluding such operations from

> address dependencies?


I'd like to continue to treat R[once] and R*[once] equally if possible.
Given the (unconditional) smp_read_barrier_depends in READ_ONCE and in
atomics, it seems reasonable to have it unconditionally in cmpxchg.

As with the following patch?

  Andrea

---
diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h
index 68dfb3cb71454..e2660866ce972 100644
--- a/arch/alpha/include/asm/xchg.h
+++ b/arch/alpha/include/asm/xchg.h
@@ -128,10 +128,9 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)
  * store NEW in MEM.  Return the initial value in MEM.  Success is
  * indicated by comparing RETURN with OLD.
  *
- * The memory barrier should be placed in SMP only when we actually
- * make the change. If we don't change anything (so if the returned
- * prev is equal to old) then we aren't acquiring anything new and
- * we don't need any memory barrier as far I can tell.
+ * The memory barrier is placed in SMP unconditionally, in order to
+ * guarantee that dependency ordering is preserved when a dependency
+ * is headed by an unsuccessful operation.
  */
 
 static inline unsigned long
@@ -150,8 +149,8 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%4)\n"
 	"	beq	%2,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br	1b\n"
 	".previous"
@@ -177,8 +176,8 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)
 	"	or	%1,%2,%2\n"
 	"	stq_c	%2,0(%4)\n"
 	"	beq	%2,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br	1b\n"
 	".previous"
@@ -200,8 +199,8 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)
 	"	mov %4,%1\n"
 	"	stl_c %1,%2\n"
 	"	beq %1,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br 1b\n"
 	".previous"
@@ -223,8 +222,8 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)
 	"	mov %4,%1\n"
 	"	stq_c %1,%2\n"
 	"	beq %1,3f\n"
-		__ASM__MB
 	"2:\n"
+		__ASM__MB
 	".subsection 2\n"
 	"3:	br 1b\n"
 	".previous"


> 

> Note that operations like atomic_add_unless() already include memory 

> barriers.

> 

> Alan

>
Peter Zijlstra Feb. 20, 2018, 9:51 a.m. UTC | #18
On Tue, Feb 20, 2018 at 10:33:47AM +0100, Andrea Parri wrote:
> I'd like to continue to treat R[once] and R*[once] equally if possible.

> Given the (unconditional) smp_read_barrier_depends in READ_ONCE and in

> atomics, it seems reasonable to have it unconditionally in cmpxchg.

> 

> As with the following patch?

> 

>   Andrea

> 

> ---

> diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h

> index 68dfb3cb71454..e2660866ce972 100644

> --- a/arch/alpha/include/asm/xchg.h

> +++ b/arch/alpha/include/asm/xchg.h

> @@ -128,10 +128,9 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)

>   * store NEW in MEM.  Return the initial value in MEM.  Success is

>   * indicated by comparing RETURN with OLD.

>   *

> - * The memory barrier should be placed in SMP only when we actually

> - * make the change. If we don't change anything (so if the returned

> - * prev is equal to old) then we aren't acquiring anything new and

> - * we don't need any memory barrier as far I can tell.

> + * The memory barrier is placed in SMP unconditionally, in order to

> + * guarantee that dependency ordering is preserved when a dependency

> + * is headed by an unsuccessful operation.

>   */

>  

>  static inline unsigned long

> @@ -150,8 +149,8 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)

>  	"	or	%1,%2,%2\n"

>  	"	stq_c	%2,0(%4)\n"

>  	"	beq	%2,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br	1b\n"

>  	".previous"

> @@ -177,8 +176,8 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)

>  	"	or	%1,%2,%2\n"

>  	"	stq_c	%2,0(%4)\n"

>  	"	beq	%2,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br	1b\n"

>  	".previous"

> @@ -200,8 +199,8 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)

>  	"	mov %4,%1\n"

>  	"	stl_c %1,%2\n"

>  	"	beq %1,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br 1b\n"

>  	".previous"

> @@ -223,8 +222,8 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)

>  	"	mov %4,%1\n"

>  	"	stq_c %1,%2\n"

>  	"	beq %1,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br 1b\n"

>  	".previous"


ACK
Paul E. McKenney Feb. 20, 2018, 2:48 p.m. UTC | #19
On Mon, Feb 19, 2018 at 06:44:13PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:

> > Note that operations like atomic_add_unless() already include memory 

> > barriers.

> 

> It is valid for atomic_add_unless() to not imply any barriers when the

> addition doesn't happen.


Agreed, given that atomic_add_unless() just returns 0 or 1, not the
pointer being added.  Of course, the __atomic_add_unless() function
that it calls is another story, as it does return the old value.  Sigh.
And __atomic_add_unless() is called directly from some code.  All of
which looks to be counters rather than pointers, thankfully.

So, do we want to rely on atomic_add_unless() always being
invoked on counters rather than pointers, or does it need an
smp_read_barrier_depends()?

							Thanx, Paul
Paul E. McKenney Feb. 20, 2018, 2:49 p.m. UTC | #20
On Mon, Feb 19, 2018 at 09:28:44PM +0100, Peter Zijlstra wrote:
> On Mon, Feb 19, 2018 at 11:41:23AM -0800, Paul E. McKenney wrote:

> > On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:

> > > This leaves us with a question: Do we want to change the kernel by

> > > adding memory barriers after unsuccessful RMW operations on Alpha, or

> > > do we want to change the model by excluding such operations from

> > > address dependencies?

> > 

> > I vote for adding the barrier on Alpha.  However, I don't know of any

> > code in the Linux kernel that relies on read-to-read address dependency

> > ordering headed by a failing RMW operation, so I don't feel all that

> > strongly about this.

> 

> Right, but not knowing doesn't mean doesn't exist, and most certainly

> doesn't mean will never exist.


Fair enough, safety first!

> > > Note that operations like atomic_add_unless() already include memory 

> > > barriers.

> > 

> > And I don't see an atomic_add_unless_relaxed(), so we are good on this

> > one.  So far, anyway!  ;-)

> 

> Not the point, add_unless() is a conditional operation, and therefore

> doesn't need to imply anything when failing.


Plus it doesn't return a pointer, so there is no problem with dereferences.
Unless someone wants to use its return value as an array index and rely
on dependency ordering to the array, but I would NAK that use case.

							Thanx, Paul
Alan Stern Feb. 20, 2018, 3:11 p.m. UTC | #21
On Tue, 20 Feb 2018, Paul E. McKenney wrote:

> On Mon, Feb 19, 2018 at 09:28:44PM +0100, Peter Zijlstra wrote:

> > On Mon, Feb 19, 2018 at 11:41:23AM -0800, Paul E. McKenney wrote:

> > > On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:

> > > > This leaves us with a question: Do we want to change the kernel by

> > > > adding memory barriers after unsuccessful RMW operations on Alpha, or

> > > > do we want to change the model by excluding such operations from

> > > > address dependencies?

> > > 

> > > I vote for adding the barrier on Alpha.  However, I don't know of any

> > > code in the Linux kernel that relies on read-to-read address dependency

> > > ordering headed by a failing RMW operation, so I don't feel all that

> > > strongly about this.

> > 

> > Right, but not knowing doesn't mean doesn't exist, and most certainly

> > doesn't mean will never exist.

> 

> Fair enough, safety first!

> 

> > > > Note that operations like atomic_add_unless() already include memory 

> > > > barriers.

> > > 

> > > And I don't see an atomic_add_unless_relaxed(), so we are good on this

> > > one.  So far, anyway!  ;-)

> > 

> > Not the point, add_unless() is a conditional operation, and therefore

> > doesn't need to imply anything when failing.

> 

> Plus it doesn't return a pointer, so there is no problem with dereferences.

> Unless someone wants to use its return value as an array index and rely

> on dependency ordering to the array, but I would NAK that use case.


You may not get the chance to NAK it.

We need to be consistent.  Array indexing is indeed a form of address
dependency, so either we assert that the dependency is enforced when
the array index is derived from a failed atomic operation, or else we
assert that failed atomic operations do not create address
dependencies.

Alan
Andrea Parri Feb. 20, 2018, 3:17 p.m. UTC | #22
On Tue, Feb 20, 2018 at 06:48:13AM -0800, Paul E. McKenney wrote:
> On Mon, Feb 19, 2018 at 06:44:13PM +0100, Peter Zijlstra wrote:

> > On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:

> > > Note that operations like atomic_add_unless() already include memory 

> > > barriers.

> > 

> > It is valid for atomic_add_unless() to not imply any barriers when the

> > addition doesn't happen.

> 

> Agreed, given that atomic_add_unless() just returns 0 or 1, not the

> pointer being added.  Of course, the __atomic_add_unless() function

> that it calls is another story, as it does return the old value.  Sigh.

> And __atomic_add_unless() is called directly from some code.  All of

> which looks to be counters rather than pointers, thankfully.

> 

> So, do we want to rely on atomic_add_unless() always being

> invoked on counters rather than pointers, or does it need an

> smp_read_barrier_depends()?


alpha's implementation of __atomic_add_unless() has an unconditional smp_mb()
before returning so, as far as dependencies are concerned, these seem fine.

  Andrea


> 

> 							Thanx, Paul

>
Alan Stern Feb. 20, 2018, 3:38 p.m. UTC | #23
On Tue, 20 Feb 2018, Andrea Parri wrote:

> > This leaves us with a question: Do we want to change the kernel by

> > adding memory barriers after unsuccessful RMW operations on Alpha, or

> > do we want to change the model by excluding such operations from

> > address dependencies?

> 

> I'd like to continue to treat R[once] and R*[once] equally if possible.

> Given the (unconditional) smp_read_barrier_depends in READ_ONCE and in

> atomics, it seems reasonable to have it unconditionally in cmpxchg.

> 

> As with the following patch?


Yes, this seems reasonable to me.  If Will gives it his "Acked-by"  
to go with Peter's, you should submit it to Ingo Molnar.

And once this is made, there shouldn't be any trouble with the proposed 
patch for the memory model.

Alan

>   Andrea

> 

> ---

> diff --git a/arch/alpha/include/asm/xchg.h b/arch/alpha/include/asm/xchg.h

> index 68dfb3cb71454..e2660866ce972 100644

> --- a/arch/alpha/include/asm/xchg.h

> +++ b/arch/alpha/include/asm/xchg.h

> @@ -128,10 +128,9 @@ ____xchg(, volatile void *ptr, unsigned long x, int size)

>   * store NEW in MEM.  Return the initial value in MEM.  Success is

>   * indicated by comparing RETURN with OLD.

>   *

> - * The memory barrier should be placed in SMP only when we actually

> - * make the change. If we don't change anything (so if the returned

> - * prev is equal to old) then we aren't acquiring anything new and

> - * we don't need any memory barrier as far I can tell.

> + * The memory barrier is placed in SMP unconditionally, in order to

> + * guarantee that dependency ordering is preserved when a dependency

> + * is headed by an unsuccessful operation.

>   */

>  

>  static inline unsigned long

> @@ -150,8 +149,8 @@ ____cmpxchg(_u8, volatile char *m, unsigned char old, unsigned char new)

>  	"	or	%1,%2,%2\n"

>  	"	stq_c	%2,0(%4)\n"

>  	"	beq	%2,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br	1b\n"

>  	".previous"

> @@ -177,8 +176,8 @@ ____cmpxchg(_u16, volatile short *m, unsigned short old, unsigned short new)

>  	"	or	%1,%2,%2\n"

>  	"	stq_c	%2,0(%4)\n"

>  	"	beq	%2,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br	1b\n"

>  	".previous"

> @@ -200,8 +199,8 @@ ____cmpxchg(_u32, volatile int *m, int old, int new)

>  	"	mov %4,%1\n"

>  	"	stl_c %1,%2\n"

>  	"	beq %1,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br 1b\n"

>  	".previous"

> @@ -223,8 +222,8 @@ ____cmpxchg(_u64, volatile long *m, unsigned long old, unsigned long new)

>  	"	mov %4,%1\n"

>  	"	stq_c %1,%2\n"

>  	"	beq %1,3f\n"

> -		__ASM__MB

>  	"2:\n"

> +		__ASM__MB

>  	".subsection 2\n"

>  	"3:	br 1b\n"

>  	".previous"

> 

> 

> > 

> > Note that operations like atomic_add_unless() already include memory 

> > barriers.

> > 

> > Alan
Paul E. McKenney Feb. 20, 2018, 4:10 p.m. UTC | #24
On Tue, Feb 20, 2018 at 10:11:06AM -0500, Alan Stern wrote:
> On Tue, 20 Feb 2018, Paul E. McKenney wrote:

> 

> > On Mon, Feb 19, 2018 at 09:28:44PM +0100, Peter Zijlstra wrote:

> > > On Mon, Feb 19, 2018 at 11:41:23AM -0800, Paul E. McKenney wrote:

> > > > On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:

> > > > > This leaves us with a question: Do we want to change the kernel by

> > > > > adding memory barriers after unsuccessful RMW operations on Alpha, or

> > > > > do we want to change the model by excluding such operations from

> > > > > address dependencies?

> > > > 

> > > > I vote for adding the barrier on Alpha.  However, I don't know of any

> > > > code in the Linux kernel that relies on read-to-read address dependency

> > > > ordering headed by a failing RMW operation, so I don't feel all that

> > > > strongly about this.

> > > 

> > > Right, but not knowing doesn't mean doesn't exist, and most certainly

> > > doesn't mean will never exist.

> > 

> > Fair enough, safety first!

> > 

> > > > > Note that operations like atomic_add_unless() already include memory 

> > > > > barriers.

> > > > 

> > > > And I don't see an atomic_add_unless_relaxed(), so we are good on this

> > > > one.  So far, anyway!  ;-)

> > > 

> > > Not the point, add_unless() is a conditional operation, and therefore

> > > doesn't need to imply anything when failing.

> > 

> > Plus it doesn't return a pointer, so there is no problem with dereferences.

> > Unless someone wants to use its return value as an array index and rely

> > on dependency ordering to the array, but I would NAK that use case.

> 

> You may not get the chance to NAK it.

> 

> We need to be consistent.  Array indexing is indeed a form of address

> dependency, so either we assert that the dependency is enforced when

> the array index is derived from a failed atomic operation, or else we

> assert that failed atomic operations do not create address

> dependencies.


The problem is that the compiler can get up to a great deal more mischief
with integers than it can with pointers.  I agree that it would be quite
challenging to represent this distinction between integers and pointers in
the model, but then again herd does not yet support array indexing anyway.

So for the time being, anyway, I must stand by my NAK.

							Thanx, Paul
Paul E. McKenney Feb. 20, 2018, 4:11 p.m. UTC | #25
On Tue, Feb 20, 2018 at 04:17:52PM +0100, Andrea Parri wrote:
> On Tue, Feb 20, 2018 at 06:48:13AM -0800, Paul E. McKenney wrote:

> > On Mon, Feb 19, 2018 at 06:44:13PM +0100, Peter Zijlstra wrote:

> > > On Mon, Feb 19, 2018 at 12:14:45PM -0500, Alan Stern wrote:

> > > > Note that operations like atomic_add_unless() already include memory 

> > > > barriers.

> > > 

> > > It is valid for atomic_add_unless() to not imply any barriers when the

> > > addition doesn't happen.

> > 

> > Agreed, given that atomic_add_unless() just returns 0 or 1, not the

> > pointer being added.  Of course, the __atomic_add_unless() function

> > that it calls is another story, as it does return the old value.  Sigh.

> > And __atomic_add_unless() is called directly from some code.  All of

> > which looks to be counters rather than pointers, thankfully.

> > 

> > So, do we want to rely on atomic_add_unless() always being

> > invoked on counters rather than pointers, or does it need an

> > smp_read_barrier_depends()?

> 

> alpha's implementation of __atomic_add_unless() has an unconditional smp_mb()

> before returning so, as far as dependencies are concerned, these seem fine.


Very good!

							Thanx, Paul
Alan Stern Feb. 21, 2018, 3 p.m. UTC | #26
On Fri, 16 Feb 2018, Paul E. McKenney wrote:

> On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote:

> > Since commit 76ebbe78f739 ("locking/barriers: Add implicit

> > smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15

> > kernel, it has not been necessary to use smp_read_barrier_depends().

> > Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill

> > lockless_dereference()") removed lockless_dereference() from the

> > kernel.

> > 

> > Since these primitives are no longer part of the kernel, they do not

> > belong in the Linux Kernel Memory Consistency Model.  This patch

> > removes them, along with the internal rb-dep relation, and updates the

> > revelant documentation.

> > 

> > Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> 

> I queued this, but would welcome an update that addressed Akira's

> feedback as appropriate.


Is it too late to send a v2 of this patch?  I didn't want to do it 
before now because the issue raised by Andrea wasn't settled.  (Some 
could claim that it still isn't fully settled...)

Alan
Paul E. McKenney Feb. 21, 2018, 4:06 p.m. UTC | #27
On Wed, Feb 21, 2018 at 10:00:20AM -0500, Alan Stern wrote:
> On Fri, 16 Feb 2018, Paul E. McKenney wrote:

> 

> > On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote:

> > > Since commit 76ebbe78f739 ("locking/barriers: Add implicit

> > > smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15

> > > kernel, it has not been necessary to use smp_read_barrier_depends().

> > > Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill

> > > lockless_dereference()") removed lockless_dereference() from the

> > > kernel.

> > > 

> > > Since these primitives are no longer part of the kernel, they do not

> > > belong in the Linux Kernel Memory Consistency Model.  This patch

> > > removes them, along with the internal rb-dep relation, and updates the

> > > revelant documentation.

> > > 

> > > Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> > 

> > I queued this, but would welcome an update that addressed Akira's

> > feedback as appropriate.

> 

> Is it too late to send a v2 of this patch?  I didn't want to do it 

> before now because the issue raised by Andrea wasn't settled.  (Some 

> could claim that it still isn't fully settled...)


Would you be willing to send a delta patch?  I can then place it directly
on top of your original patch, and once it settles out, I can ask Ingo
if he is willing to update the patch in -tip.

							Thanx, Paul
Ingo Molnar Feb. 21, 2018, 4:51 p.m. UTC | #28
* Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

> On Wed, Feb 21, 2018 at 10:00:20AM -0500, Alan Stern wrote:

> > On Fri, 16 Feb 2018, Paul E. McKenney wrote:

> > 

> > > On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote:

> > > > Since commit 76ebbe78f739 ("locking/barriers: Add implicit

> > > > smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15

> > > > kernel, it has not been necessary to use smp_read_barrier_depends().

> > > > Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill

> > > > lockless_dereference()") removed lockless_dereference() from the

> > > > kernel.

> > > > 

> > > > Since these primitives are no longer part of the kernel, they do not

> > > > belong in the Linux Kernel Memory Consistency Model.  This patch

> > > > removes them, along with the internal rb-dep relation, and updates the

> > > > revelant documentation.

> > > > 

> > > > Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> > > 

> > > I queued this, but would welcome an update that addressed Akira's

> > > feedback as appropriate.

> > 

> > Is it too late to send a v2 of this patch?  I didn't want to do it 

> > before now because the issue raised by Andrea wasn't settled.  (Some 

> > could claim that it still isn't fully settled...)

> 

> Would you be willing to send a delta patch?  I can then place it directly

> on top of your original patch, and once it settles out, I can ask Ingo

> if he is willing to update the patch in -tip.


It would be better to have followup fixes as separate patches. I applied the 
current set of fixes/improvements today to help move things along - it's all 
advancing very nicely!

Thanks,

	Ingo
Paul E. McKenney Feb. 21, 2018, 5:53 p.m. UTC | #29
On Wed, Feb 21, 2018 at 05:51:43PM +0100, Ingo Molnar wrote:
> 

> * Paul E. McKenney <paulmck@linux.vnet.ibm.com> wrote:

> 

> > On Wed, Feb 21, 2018 at 10:00:20AM -0500, Alan Stern wrote:

> > > On Fri, 16 Feb 2018, Paul E. McKenney wrote:

> > > 

> > > > On Fri, Feb 16, 2018 at 05:22:55PM -0500, Alan Stern wrote:

> > > > > Since commit 76ebbe78f739 ("locking/barriers: Add implicit

> > > > > smp_read_barrier_depends() to READ_ONCE()") was merged for the 4.15

> > > > > kernel, it has not been necessary to use smp_read_barrier_depends().

> > > > > Similarly, commit 59ecbbe7b31c ("locking/barriers: Kill

> > > > > lockless_dereference()") removed lockless_dereference() from the

> > > > > kernel.

> > > > > 

> > > > > Since these primitives are no longer part of the kernel, they do not

> > > > > belong in the Linux Kernel Memory Consistency Model.  This patch

> > > > > removes them, along with the internal rb-dep relation, and updates the

> > > > > revelant documentation.

> > > > > 

> > > > > Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> > > > 

> > > > I queued this, but would welcome an update that addressed Akira's

> > > > feedback as appropriate.

> > > 

> > > Is it too late to send a v2 of this patch?  I didn't want to do it 

> > > before now because the issue raised by Andrea wasn't settled.  (Some 

> > > could claim that it still isn't fully settled...)

> > 

> > Would you be willing to send a delta patch?  I can then place it directly

> > on top of your original patch, and once it settles out, I can ask Ingo

> > if he is willing to update the patch in -tip.

> 

> It would be better to have followup fixes as separate patches. I applied the 

> current set of fixes/improvements today to help move things along - it's all 

> advancing very nicely!


Will do, and glad you like it!

								Thanx, Paul
Andrea Parri Feb. 21, 2018, 5:58 p.m. UTC | #30
On Wed, Feb 21, 2018 at 12:15:56PM -0500, Alan Stern wrote:
> Commit bf28ae562744 ("tools/memory-model: Remove rb-dep,

> smp_read_barrier_depends, and lockless_dereference") was accidentally

> merged too early, while it was still in RFC form.  This patch adds in

> the missing pieces.

> 

> Akira pointed out some typos in the original patch, and he noted that

> cheatsheet.txt should be updated to indicate how unsuccessful RMW

> operations relate to address dependencies.

> 

> Andrea pointed out that the macro for rcu_dereference() in linux.def

> should now use the "once" annotation instead of "deref".  He also

> suggested that the comments should mention commit 5a8897cc7631

> ("locking/atomics/alpha: Add smp_read_barrier_depends() to

> _release()/_relaxed() atomics") as an important precursor, and he

> contributed commit cb13b424e986 ("locking/xchg/alpha: Add

> unconditional memory barrier to cmpxchg()"), another prerequisite.

> 

> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> Suggested-by: Akira Yokosawa <akiyks@gmail.com>

> Suggested-by: Andrea Parri <parri.andrea@gmail.com>

> Fixes: bf28ae562744 ("tools/memory-model: Remove rb-dep, smp_read_barrier_depends, and lockless_dereference")


Acked-by: Andrea Parri <parri.andrea@gmail.com>


Thanks,
  Andrea


> 

> ---

> 

>  tools/memory-model/Documentation/cheatsheet.txt  |    6 +++---

>  tools/memory-model/Documentation/explanation.txt |    4 ++--

>  tools/memory-model/linux-kernel.def              |    2 +-

>  3 files changed, 6 insertions(+), 6 deletions(-)

> 

> Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> ===================================================================

> --- usb-4.x.orig/tools/memory-model/Documentation/cheatsheet.txt

> +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> @@ -1,11 +1,11 @@

>                                    Prior Operation     Subsequent Operation

>                                    ---------------  ---------------------------

>                                 C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV

> -                              __  ----  -  -  ---  ----  -  -  --  --  ---  --

> +                              --  ----  -  -  ---  ----  -  -  --  --  ---  --

>  

>  Store, e.g., WRITE_ONCE()            Y                                       Y

> -Load, e.g., READ_ONCE()              Y                              Y        Y

> -Unsuccessful RMW operation           Y                              Y        Y

> +Load, e.g., READ_ONCE()              Y                          Y   Y        Y

> +Unsuccessful RMW operation           Y                          Y   Y        Y

>  rcu_dereference()                    Y                          Y   Y        Y

>  Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y

>  Successful *_release()         C        Y  Y    Y     W                      Y

> Index: usb-4.x/tools/memory-model/Documentation/explanation.txt

> ===================================================================

> --- usb-4.x.orig/tools/memory-model/Documentation/explanation.txt

> +++ usb-4.x/tools/memory-model/Documentation/explanation.txt

> @@ -826,7 +826,7 @@ A-cumulative; they only affect the propa

>  executed on C before the fence (i.e., those which precede the fence in

>  program order).

>  

> -read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have

> +read_read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have

>  other properties which we discuss later.

>  

>  

> @@ -1138,7 +1138,7 @@ final effect is that even though the two

>  program order, it appears that they aren't.

>  

>  This could not have happened if the local cache had processed the

> -incoming stores in FIFO order.  In constrast, other architectures

> +incoming stores in FIFO order.  By contrast, other architectures

>  maintain at least the appearance of FIFO order.

>  

>  In practice, this difficulty is solved by inserting a special fence

> Index: usb-4.x/tools/memory-model/linux-kernel.def

> ===================================================================

> --- usb-4.x.orig/tools/memory-model/linux-kernel.def

> +++ usb-4.x/tools/memory-model/linux-kernel.def

> @@ -13,7 +13,7 @@ WRITE_ONCE(X,V) { __store{once}(X,V); }

>  smp_store_release(X,V) { __store{release}(*X,V); }

>  smp_load_acquire(X) __load{acquire}(*X)

>  rcu_assign_pointer(X,V) { __store{release}(X,V); }

> -rcu_dereference(X) __load{deref}(X)

> +rcu_dereference(X) __load{once}(X)

>  

>  // Fences

>  smp_mb() { __fence{mb} ; }

>
Paul E. McKenney Feb. 21, 2018, 6 p.m. UTC | #31
On Wed, Feb 21, 2018 at 12:15:56PM -0500, Alan Stern wrote:
> Commit bf28ae562744 ("tools/memory-model: Remove rb-dep,

> smp_read_barrier_depends, and lockless_dereference") was accidentally

> merged too early, while it was still in RFC form.  This patch adds in

> the missing pieces.

> 

> Akira pointed out some typos in the original patch, and he noted that

> cheatsheet.txt should be updated to indicate how unsuccessful RMW

> operations relate to address dependencies.

> 

> Andrea pointed out that the macro for rcu_dereference() in linux.def

> should now use the "once" annotation instead of "deref".  He also

> suggested that the comments should mention commit 5a8897cc7631

> ("locking/atomics/alpha: Add smp_read_barrier_depends() to

> _release()/_relaxed() atomics") as an important precursor, and he

> contributed commit cb13b424e986 ("locking/xchg/alpha: Add

> unconditional memory barrier to cmpxchg()"), another prerequisite.

> 

> Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> Suggested-by: Akira Yokosawa <akiyks@gmail.com>

> Suggested-by: Andrea Parri <parri.andrea@gmail.com>

> Fixes: bf28ae562744 ("tools/memory-model: Remove rb-dep, smp_read_barrier_depends, and lockless_dereference")


Queued, thank you!

							Thanx, Paul

> ---

> 

>  tools/memory-model/Documentation/cheatsheet.txt  |    6 +++---

>  tools/memory-model/Documentation/explanation.txt |    4 ++--

>  tools/memory-model/linux-kernel.def              |    2 +-

>  3 files changed, 6 insertions(+), 6 deletions(-)

> 

> Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> ===================================================================

> --- usb-4.x.orig/tools/memory-model/Documentation/cheatsheet.txt

> +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> @@ -1,11 +1,11 @@

>                                    Prior Operation     Subsequent Operation

>                                    ---------------  ---------------------------

>                                 C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV

> -                              __  ----  -  -  ---  ----  -  -  --  --  ---  --

> +                              --  ----  -  -  ---  ----  -  -  --  --  ---  --

> 

>  Store, e.g., WRITE_ONCE()            Y                                       Y

> -Load, e.g., READ_ONCE()              Y                              Y        Y

> -Unsuccessful RMW operation           Y                              Y        Y

> +Load, e.g., READ_ONCE()              Y                          Y   Y        Y

> +Unsuccessful RMW operation           Y                          Y   Y        Y

>  rcu_dereference()                    Y                          Y   Y        Y

>  Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y

>  Successful *_release()         C        Y  Y    Y     W                      Y

> Index: usb-4.x/tools/memory-model/Documentation/explanation.txt

> ===================================================================

> --- usb-4.x.orig/tools/memory-model/Documentation/explanation.txt

> +++ usb-4.x/tools/memory-model/Documentation/explanation.txt

> @@ -826,7 +826,7 @@ A-cumulative; they only affect the propa

>  executed on C before the fence (i.e., those which precede the fence in

>  program order).

> 

> -read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have

> +read_read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have

>  other properties which we discuss later.

> 

> 

> @@ -1138,7 +1138,7 @@ final effect is that even though the two

>  program order, it appears that they aren't.

> 

>  This could not have happened if the local cache had processed the

> -incoming stores in FIFO order.  In constrast, other architectures

> +incoming stores in FIFO order.  By contrast, other architectures

>  maintain at least the appearance of FIFO order.

> 

>  In practice, this difficulty is solved by inserting a special fence

> Index: usb-4.x/tools/memory-model/linux-kernel.def

> ===================================================================

> --- usb-4.x.orig/tools/memory-model/linux-kernel.def

> +++ usb-4.x/tools/memory-model/linux-kernel.def

> @@ -13,7 +13,7 @@ WRITE_ONCE(X,V) { __store{once}(X,V); }

>  smp_store_release(X,V) { __store{release}(*X,V); }

>  smp_load_acquire(X) __load{acquire}(*X)

>  rcu_assign_pointer(X,V) { __store{release}(X,V); }

> -rcu_dereference(X) __load{deref}(X)

> +rcu_dereference(X) __load{once}(X)

> 

>  // Fences

>  smp_mb() { __fence{mb} ; }

>
Paul E. McKenney Feb. 21, 2018, 6:14 p.m. UTC | #32
On Wed, Feb 21, 2018 at 06:58:57PM +0100, Andrea Parri wrote:
> On Wed, Feb 21, 2018 at 12:15:56PM -0500, Alan Stern wrote:

> > Commit bf28ae562744 ("tools/memory-model: Remove rb-dep,

> > smp_read_barrier_depends, and lockless_dereference") was accidentally

> > merged too early, while it was still in RFC form.  This patch adds in

> > the missing pieces.

> > 

> > Akira pointed out some typos in the original patch, and he noted that

> > cheatsheet.txt should be updated to indicate how unsuccessful RMW

> > operations relate to address dependencies.

> > 

> > Andrea pointed out that the macro for rcu_dereference() in linux.def

> > should now use the "once" annotation instead of "deref".  He also

> > suggested that the comments should mention commit 5a8897cc7631

> > ("locking/atomics/alpha: Add smp_read_barrier_depends() to

> > _release()/_relaxed() atomics") as an important precursor, and he

> > contributed commit cb13b424e986 ("locking/xchg/alpha: Add

> > unconditional memory barrier to cmpxchg()"), another prerequisite.

> > 

> > Signed-off-by: Alan Stern <stern@rowland.harvard.edu>

> > Suggested-by: Akira Yokosawa <akiyks@gmail.com>

> > Suggested-by: Andrea Parri <parri.andrea@gmail.com>

> > Fixes: bf28ae562744 ("tools/memory-model: Remove rb-dep, smp_read_barrier_depends, and lockless_dereference")

> 

> Acked-by: Andrea Parri <parri.andrea@gmail.com>


Applied, thank you!

							Thanx, Paul

> Thanks,

>   Andrea

> 

> 

> > 

> > ---

> > 

> >  tools/memory-model/Documentation/cheatsheet.txt  |    6 +++---

> >  tools/memory-model/Documentation/explanation.txt |    4 ++--

> >  tools/memory-model/linux-kernel.def              |    2 +-

> >  3 files changed, 6 insertions(+), 6 deletions(-)

> > 

> > Index: usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> > ===================================================================

> > --- usb-4.x.orig/tools/memory-model/Documentation/cheatsheet.txt

> > +++ usb-4.x/tools/memory-model/Documentation/cheatsheet.txt

> > @@ -1,11 +1,11 @@

> >                                    Prior Operation     Subsequent Operation

> >                                    ---------------  ---------------------------

> >                                 C  Self  R  W  RWM  Self  R  W  DR  DW  RMW  SV

> > -                              __  ----  -  -  ---  ----  -  -  --  --  ---  --

> > +                              --  ----  -  -  ---  ----  -  -  --  --  ---  --

> >  

> >  Store, e.g., WRITE_ONCE()            Y                                       Y

> > -Load, e.g., READ_ONCE()              Y                              Y        Y

> > -Unsuccessful RMW operation           Y                              Y        Y

> > +Load, e.g., READ_ONCE()              Y                          Y   Y        Y

> > +Unsuccessful RMW operation           Y                          Y   Y        Y

> >  rcu_dereference()                    Y                          Y   Y        Y

> >  Successful *_acquire()               R                   Y  Y   Y   Y    Y   Y

> >  Successful *_release()         C        Y  Y    Y     W                      Y

> > Index: usb-4.x/tools/memory-model/Documentation/explanation.txt

> > ===================================================================

> > --- usb-4.x.orig/tools/memory-model/Documentation/explanation.txt

> > +++ usb-4.x/tools/memory-model/Documentation/explanation.txt

> > @@ -826,7 +826,7 @@ A-cumulative; they only affect the propa

> >  executed on C before the fence (i.e., those which precede the fence in

> >  program order).

> >  

> > -read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have

> > +read_read_lock(), rcu_read_unlock(), and synchronize_rcu() fences have

> >  other properties which we discuss later.

> >  

> >  

> > @@ -1138,7 +1138,7 @@ final effect is that even though the two

> >  program order, it appears that they aren't.

> >  

> >  This could not have happened if the local cache had processed the

> > -incoming stores in FIFO order.  In constrast, other architectures

> > +incoming stores in FIFO order.  By contrast, other architectures

> >  maintain at least the appearance of FIFO order.

> >  

> >  In practice, this difficulty is solved by inserting a special fence

> > Index: usb-4.x/tools/memory-model/linux-kernel.def

> > ===================================================================

> > --- usb-4.x.orig/tools/memory-model/linux-kernel.def

> > +++ usb-4.x/tools/memory-model/linux-kernel.def

> > @@ -13,7 +13,7 @@ WRITE_ONCE(X,V) { __store{once}(X,V); }

> >  smp_store_release(X,V) { __store{release}(*X,V); }

> >  smp_load_acquire(X) __load{acquire}(*X)

> >  rcu_assign_pointer(X,V) { __store{release}(X,V); }

> > -rcu_dereference(X) __load{deref}(X)

> > +rcu_dereference(X) __load{once}(X)

> >  

> >  // Fences

> >  smp_mb() { __fence{mb} ; }

> > 

>
diff mbox series

Patch

diff --git a/linux-kernel-hardware.cat b/linux-kernel-hardware.cat
index 7768bf7..6c35655 100644
--- a/linux-kernel-hardware.cat
+++ b/linux-kernel-hardware.cat
@@ -34,7 +34,7 @@  let strong-fence = mb | gp
 (* Release Acquire *)
 let acq-po = [Acquire] ; po ; [M]
 let po-rel = [M] ; po ; [Release]
-let rfi-rel-acq = [Release] ; rfi ; [Acquire]
+let rel-rf-acq-po = [Release] ; rf ; [Acquire] ; po
 
 (**********************************)
 (* Fundamental coherence ordering *)
@@ -60,14 +60,13 @@  let to-w = rwdep | addrpo | (overwrite & int)
 let rdw = po-loc & (fre ; rfe)
 let detour = po-loc & (coe ; rfe)
 let rrdep = addr | (dep ; rfi) | rdw
-let strong-rrdep = rrdep+ & rb-dep
-let to-r = strong-rrdep | rfi-rel-acq
-let fence = strong-fence | wmb | po-rel | rmb | acq-po
+let to-r = rrdep
+let fence = strong-fence | wmb | po-rel | rmb | acq-po | (rel-rf-acq-po & int)
 let ppo = (rrdep* ; (to-r | to-w | fence)) | rdw | detour
 
 (* Happens Before *)
 let A-cumul(r) = rfe? ; r
-let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
+let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | rel-rf-acq-po
 
 let rec prop = (overwrite & ext)? ; cumul-fence ; hb*
     and hb = ppo | rfe | ((hb* ; prop) & int)
diff --git a/linux-kernel.cat b/linux-kernel.cat
index 15b7a5d..c6b0752 100644
--- a/linux-kernel.cat
+++ b/linux-kernel.cat
@@ -39,7 +39,7 @@  let strong-fence = mb | gp
 (* Release Acquire *)
 let acq-po = [Acquire] ; po ; [M]
 let po-rel = [M] ; po ; [Release]
-let rfi-rel-acq = [Release] ; rfi ; [Acquire]
+let rel-rf-acq-po = [Release] ; rf ; [Acquire] ; po
 
 (**********************************)
 (* Fundamental coherence ordering *)
@@ -62,14 +62,13 @@  let rwdep = (dep | ctrl) ; [W]
 let overwrite = co | fr
 let to-w = rwdep | (overwrite & int)
 let rrdep = addr | (dep ; rfi)
-let strong-rrdep = rrdep+ & rb-dep
-let to-r = strong-rrdep | rfi-rel-acq
-let fence = strong-fence | wmb | po-rel | rmb | acq-po
+let to-r = rrdep
+let fence = strong-fence | wmb | po-rel | rmb | acq-po | (rel-rf-acq-po & int)
 let ppo = rrdep* ; (to-r | to-w | fence)
 
 (* Propagation: Ordering from release operations and strong fences. *)
 let A-cumul(r) = rfe? ; r
-let cumul-fence = A-cumul(strong-fence | po-rel) | wmb
+let cumul-fence = A-cumul(strong-fence | po-rel) | wmb | rel-rf-acq-po
 let prop = (overwrite & ext)? ; cumul-fence* ; rfe?
 
 (*