[AArch64,05/11] aarch64: Emit LSE st<op> instructions

Message ID 20180926050355.32746-6-richard.henderson@linaro.org
State New
Headers show
Series
  • LSE atomics out-of-line
Related show

Commit Message

Richard Henderson Sept. 26, 2018, 5:03 a.m.
From: Richard Henderson <richard.henderson@linaro.org>


When the result of an operation is not used, we can ignore the
result by storing to XZR.  For two of the memory models, using
XZR with LD<op> has a preferred assembler alias, ST<op>.

	* config/aarch64/atomics.md (aarch64_atomic_<ATOMIC_LDOP><ALLI>_lse):
	Use ST<op> for relaxed and release models; load to XZR otherwise;
	remove the now unnecessary scratch register.

	* gcc.target/aarch64/atomic-inst-ldadd.c: Expect stadd{,l}.
	* gcc.target/aarch64/atomic-inst-ldlogic.c: Similarly.
---
 .../gcc.target/aarch64/atomic-inst-ldadd.c    | 18 ++++---
 .../gcc.target/aarch64/atomic-inst-ldlogic.c  | 54 ++++++++++++-------
 gcc/config/aarch64/atomics.md                 | 15 +++---
 3 files changed, 57 insertions(+), 30 deletions(-)

-- 
2.17.1

Patch

diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
index 4b2282c6861..db2206186b4 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldadd.c
@@ -67,20 +67,26 @@  TEST (add_load_notreturn, ADD_LOAD_NORETURN)
 TEST (sub_load, SUB_LOAD)
 TEST (sub_load_notreturn, SUB_LOAD_NORETURN)
 
-/* { dg-final { scan-assembler-times "ldaddb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddb\t" 8} } */
 /* { dg-final { scan-assembler-times "ldaddab\t" 32} } */
-/* { dg-final { scan-assembler-times "ldaddlb\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddlb\t" 8} } */
 /* { dg-final { scan-assembler-times "ldaddalb\t" 32} } */
+/* { dg-final { scan-assembler-times "staddb\t" 8} } */
+/* { dg-final { scan-assembler-times "staddlb\t" 8} } */
 
-/* { dg-final { scan-assembler-times "ldaddh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddh\t" 8} } */
 /* { dg-final { scan-assembler-times "ldaddah\t" 32} } */
-/* { dg-final { scan-assembler-times "ldaddlh\t" 16} } */
+/* { dg-final { scan-assembler-times "ldaddlh\t" 8} } */
 /* { dg-final { scan-assembler-times "ldaddalh\t" 32} } */
+/* { dg-final { scan-assembler-times "staddh\t" 8} } */
+/* { dg-final { scan-assembler-times "staddlh\t" 8} } */
 
-/* { dg-final { scan-assembler-times "ldadd\t" 32} } */
+/* { dg-final { scan-assembler-times "ldadd\t" 16} } */
 /* { dg-final { scan-assembler-times "ldadda\t" 64} } */
-/* { dg-final { scan-assembler-times "ldaddl\t" 32} } */
+/* { dg-final { scan-assembler-times "ldaddl\t" 16} } */
 /* { dg-final { scan-assembler-times "ldaddal\t" 64} } */
+/* { dg-final { scan-assembler-times "stadd\t" 16} } */
+/* { dg-final { scan-assembler-times "staddl\t" 16} } */
 
 /* { dg-final { scan-assembler-not "ldaxr\t" } } */
 /* { dg-final { scan-assembler-not "stlxr\t" } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
index 4879d52b9b4..b8a53e0a676 100644
--- a/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
+++ b/gcc/testsuite/gcc.target/aarch64/atomic-inst-ldlogic.c
@@ -101,54 +101,72 @@  TEST (xor_load_notreturn, XOR_LOAD_NORETURN)
 
 /* Load-OR.  */
 
-/* { dg-final { scan-assembler-times "ldsetb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetb\t" 4} } */
 /* { dg-final { scan-assembler-times "ldsetab\t" 16} } */
-/* { dg-final { scan-assembler-times "ldsetlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetlb\t" 4} } */
 /* { dg-final { scan-assembler-times "ldsetalb\t" 16} } */
+/* { dg-final { scan-assembler-times "stsetb\t" 4} } */
+/* { dg-final { scan-assembler-times "stsetlb\t" 4} } */
 
-/* { dg-final { scan-assembler-times "ldseth\t" 8} } */
+/* { dg-final { scan-assembler-times "ldseth\t" 4} } */
 /* { dg-final { scan-assembler-times "ldsetah\t" 16} } */
-/* { dg-final { scan-assembler-times "ldsetlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldsetlh\t" 4} } */
 /* { dg-final { scan-assembler-times "ldsetalh\t" 16} } */
+/* { dg-final { scan-assembler-times "stseth\t" 4} } */
+/* { dg-final { scan-assembler-times "stsetlh\t" 4} } */
 
-/* { dg-final { scan-assembler-times "ldset\t" 16} } */
+/* { dg-final { scan-assembler-times "ldset\t" 8} } */
 /* { dg-final { scan-assembler-times "ldseta\t" 32} } */
-/* { dg-final { scan-assembler-times "ldsetl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldsetl\t" 8} } */
 /* { dg-final { scan-assembler-times "ldsetal\t" 32} } */
+/* { dg-final { scan-assembler-times "stset\t" 8} } */
+/* { dg-final { scan-assembler-times "stsetl\t" 8} } */
 
 /* Load-AND.  */
 
-/* { dg-final { scan-assembler-times "ldclrb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrb\t" 4} } */
 /* { dg-final { scan-assembler-times "ldclrab\t" 16} } */
-/* { dg-final { scan-assembler-times "ldclrlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrlb\t" 4} } */
 /* { dg-final { scan-assembler-times "ldclralb\t" 16} } */
+/* { dg-final { scan-assembler-times "stclrb\t" 4} } */
+/* { dg-final { scan-assembler-times "stclrlb\t" 4} } */
 
-/* { dg-final { scan-assembler-times "ldclrh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrh\t" 4} } */
 /* { dg-final { scan-assembler-times "ldclrah\t" 16} } */
-/* { dg-final { scan-assembler-times "ldclrlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldclrlh\t" 4} } */
 /* { dg-final { scan-assembler-times "ldclralh\t" 16} } */
+/* { dg-final { scan-assembler-times "stclrh\t" 4} } */
+/* { dg-final { scan-assembler-times "stclrlh\t" 4} } */
 
-/* { dg-final { scan-assembler-times "ldclr\t" 16} */
+/* { dg-final { scan-assembler-times "ldclr\t" 8} */
 /* { dg-final { scan-assembler-times "ldclra\t" 32} } */
-/* { dg-final { scan-assembler-times "ldclrl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldclrl\t" 8} } */
 /* { dg-final { scan-assembler-times "ldclral\t" 32} } */
+/* { dg-final { scan-assembler-times "stclr\t" 8} */
+/* { dg-final { scan-assembler-times "stclrl\t" 8} } */
 
 /* Load-XOR.  */
 
-/* { dg-final { scan-assembler-times "ldeorb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorb\t" 4} } */
 /* { dg-final { scan-assembler-times "ldeorab\t" 16} } */
-/* { dg-final { scan-assembler-times "ldeorlb\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorlb\t" 4} } */
 /* { dg-final { scan-assembler-times "ldeoralb\t" 16} } */
+/* { dg-final { scan-assembler-times "steorb\t" 4} } */
+/* { dg-final { scan-assembler-times "steorlb\t" 4} } */
 
-/* { dg-final { scan-assembler-times "ldeorh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorh\t" 4} } */
 /* { dg-final { scan-assembler-times "ldeorah\t" 16} } */
-/* { dg-final { scan-assembler-times "ldeorlh\t" 8} } */
+/* { dg-final { scan-assembler-times "ldeorlh\t" 4} } */
 /* { dg-final { scan-assembler-times "ldeoralh\t" 16} } */
+/* { dg-final { scan-assembler-times "steorh\t" 4} } */
+/* { dg-final { scan-assembler-times "steorlh\t" 4} } */
 
-/* { dg-final { scan-assembler-times "ldeor\t" 16} */
+/* { dg-final { scan-assembler-times "ldeor\t" 8} */
 /* { dg-final { scan-assembler-times "ldeora\t" 32} } */
-/* { dg-final { scan-assembler-times "ldeorl\t" 16} } */
+/* { dg-final { scan-assembler-times "ldeorl\t" 8} } */
 /* { dg-final { scan-assembler-times "ldeoral\t" 32} } */
+/* { dg-final { scan-assembler-times "steor\t" 8} */
+/* { dg-final { scan-assembler-times "steorl\t" 8} } */
 
 /* { dg-final { scan-assembler-not "ldaxr\t" } } */
 /* { dg-final { scan-assembler-not "stlxr\t" } } */
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 05ac6232fe5..f74521f885d 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -269,19 +269,22 @@ 
 	  [(match_dup 0)
 	   (match_operand:ALLI 1 "register_operand" "r")
 	   (match_operand:SI 2 "const_int_operand")]
-      ATOMIC_LDOP))
-   (clobber (match_scratch:ALLI 3 "=&r"))]
+      ATOMIC_LDOP))]
   "TARGET_LSE"
   {
    enum memmodel model = memmodel_from_int (INTVAL (operands[2]));
+   /* Note that while ST<op> is an alias for LD<op> with the second
+      operand as XZR, the assember only defines them for the RELAXED
+      and REL models.  But there's nothing that prevents us from explicitly
+      using XZR with LD<op> for the ACQ and ACQ_REL models.  */
    if (is_mm_relaxed (model))
-     return "ld<atomic_ldop><atomic_sfx>\t%<w>1, %<w>3, %0";
+     return "st<atomic_ldop><atomic_sfx>\t%<w>1, %0";
    else if (is_mm_release (model))
-     return "ld<atomic_ldop>l<atomic_sfx>\t%<w>1, %<w>3, %0";
+     return "st<atomic_ldop>l<atomic_sfx>\t%<w>1, %0";
    else if (is_mm_acquire (model) || is_mm_consume (model))
-     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, %<w>3, %0";
+     return "ld<atomic_ldop>a<atomic_sfx>\t%<w>1, <w>zr, %0";
    else
-     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, %<w>3, %0";
+     return "ld<atomic_ldop>al<atomic_sfx>\t%<w>1, <w>zr, %0";
   }
 )