[AArch64] Add a tlsdesc call pattern for SVE

Message ID 87h8qry9v3.fsf@linaro.org
State New
Headers show
Series
  • [AArch64] Add a tlsdesc call pattern for SVE
Related show

Commit Message

Richard Sandiford Feb. 8, 2018, 11:54 a.m.
tlsdesc calls are guaranteed to preserve all Advanced SIMD registers,
but are not guaranteed to preserve the SVE extension of them.
The calls also don't preserve the SVE predicate registers.

The long-term plan for handling the SVE vector registers is CLOBBER_HIGH,
which adds a clobber equivalent of TARGET_HARD_REGNO_CALL_PART_CLOBBERED.
The pattern can then directly model the fact that the low 128 bits are
preserved and the upper bits are clobbered.

However, it's too late now for that to be included in GCC 8, so this
patch conservatively treats the whole vector register as being clobbered.
This has the obvious disadvantage that compiling for SVE can make NEON
code worse, but I don't think there's much we can do about that until
CLOBBER_HIGH is in.

Tested on aarch64-linux-gnu and aarch64_be-elf.  OK to install?

Richard


2018-02-08  Richard Sandiford  <richard.sandiford@linaro.org>

gcc/
	* config/aarch64/aarch64.md (V4_REGNUM, V8_REGNUM, V12_REGNUM)
	(V20_REGNUM, V24_REGNUM, V28_REGNUM, P1_REGNUM, P2_REGNUM, P3_REGNUM)
	(P4_REGNUM, P5_REGNUM, P6_REGNUM, P8_REGNUM, P9_REGNUM, P10_REGNUM)
	(P11_REGNUM, P12_REGNUM, P13_REGNUM, P14_REGNUM): New define_constants.
	(tlsdesc_small_<mode>): Turn a define_expand and use
	tlsdesc_small_sve_<mode> for SVE.  Rename original define_insn to...
	(tlsdesc_small_advsimd_<mode>): ...this.
	(tlsdesc_small_sve_<mode>): New pattern.

gcc/testsuite/
	* gcc.target/aarch64/sve/tls_1.c: New test.
	* gcc.target/aarch64/sve/tls_2.C: Likewise.

Patch

Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md	2018-02-01 11:04:16.726191903 +0000
+++ gcc/config/aarch64/aarch64.md	2018-02-08 11:51:37.226675644 +0000
@@ -57,7 +57,14 @@  (define_constants
     (LR_REGNUM		30)
     (SP_REGNUM		31)
     (V0_REGNUM		32)
+    (V4_REGNUM		36)
+    (V8_REGNUM		40)
+    (V12_REGNUM		44)
     (V15_REGNUM		47)
+    (V16_REGNUM		48)
+    (V20_REGNUM		52)
+    (V24_REGNUM		56)
+    (V28_REGNUM		60)
     (V31_REGNUM		63)
     (LAST_SAVED_REGNUM	63)
     (SFP_REGNUM		64)
@@ -66,7 +73,20 @@  (define_constants
     ;; Defined only to make the DWARF description simpler.
     (VG_REGNUM		67)
     (P0_REGNUM		68)
+    (P1_REGNUM		69)
+    (P2_REGNUM		70)
+    (P3_REGNUM		71)
+    (P4_REGNUM		72)
+    (P5_REGNUM		73)
+    (P6_REGNUM		74)
     (P7_REGNUM		75)
+    (P8_REGNUM		76)
+    (P9_REGNUM		77)
+    (P10_REGNUM		78)
+    (P11_REGNUM		79)
+    (P12_REGNUM		80)
+    (P13_REGNUM		81)
+    (P14_REGNUM		82)
     (P15_REGNUM		83)
   ]
 )
@@ -5786,14 +5806,68 @@  (define_insn "tlsle48_<mode>"
    (set_attr "length" "12")]
 )
 
-(define_insn "tlsdesc_small_<mode>"
+(define_expand "tlsdesc_small_<mode>"
+  [(unspec:PTR [(match_operand 0 "aarch64_valid_symref")] UNSPEC_TLSDESC)]
+  "TARGET_TLS_DESC"
+  {
+    if (TARGET_SVE)
+      emit_insn (gen_tlsdesc_small_sve_<mode> (operands[0]));
+    else
+      emit_insn (gen_tlsdesc_small_advsimd_<mode> (operands[0]));
+    DONE;
+  }
+)
+
+;; tlsdesc calls preserve all core and Advanced SIMD registers except
+;; R0 and LR.
+(define_insn "tlsdesc_small_advsimd_<mode>"
   [(set (reg:PTR R0_REGNUM)
         (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
-		   UNSPEC_TLSDESC))
+		    UNSPEC_TLSDESC))
    (clobber (reg:DI LR_REGNUM))
    (clobber (reg:CC CC_REGNUM))
    (clobber (match_scratch:DI 1 "=r"))]
-  "TARGET_TLS_DESC"
+  "TARGET_TLS_DESC && !TARGET_SVE"
+  "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
+  [(set_attr "type" "call")
+   (set_attr "length" "16")])
+
+;; For SVE, model tlsdesc calls as clobbering all vector and predicate
+;; registers, on top of the usual R0 and LR.  In reality the calls
+;; preserve the low 128 bits of the vector registers, but we don't
+;; yet have a way of representing that in the instruction pattern.
+(define_insn "tlsdesc_small_sve_<mode>"
+  [(set (reg:PTR R0_REGNUM)
+        (unspec:PTR [(match_operand 0 "aarch64_valid_symref" "S")]
+		    UNSPEC_TLSDESC))
+   (clobber (reg:DI LR_REGNUM))
+   (clobber (reg:CC CC_REGNUM))
+   (clobber (reg:XI V0_REGNUM))
+   (clobber (reg:XI V4_REGNUM))
+   (clobber (reg:XI V8_REGNUM))
+   (clobber (reg:XI V12_REGNUM))
+   (clobber (reg:XI V16_REGNUM))
+   (clobber (reg:XI V20_REGNUM))
+   (clobber (reg:XI V24_REGNUM))
+   (clobber (reg:XI V28_REGNUM))
+   (clobber (reg:VNx2BI P0_REGNUM))
+   (clobber (reg:VNx2BI P1_REGNUM))
+   (clobber (reg:VNx2BI P2_REGNUM))
+   (clobber (reg:VNx2BI P3_REGNUM))
+   (clobber (reg:VNx2BI P4_REGNUM))
+   (clobber (reg:VNx2BI P5_REGNUM))
+   (clobber (reg:VNx2BI P6_REGNUM))
+   (clobber (reg:VNx2BI P7_REGNUM))
+   (clobber (reg:VNx2BI P8_REGNUM))
+   (clobber (reg:VNx2BI P9_REGNUM))
+   (clobber (reg:VNx2BI P10_REGNUM))
+   (clobber (reg:VNx2BI P11_REGNUM))
+   (clobber (reg:VNx2BI P12_REGNUM))
+   (clobber (reg:VNx2BI P13_REGNUM))
+   (clobber (reg:VNx2BI P14_REGNUM))
+   (clobber (reg:VNx2BI P15_REGNUM))
+   (clobber (match_scratch:DI 1 "=r"))]
+  "TARGET_TLS_DESC && TARGET_SVE"
   "adrp\\tx0, %A0\;ldr\\t%<w>1, [x0, #%L0]\;add\\t<w>0, <w>0, %L0\;.tlsdesccall\\t%0\;blr\\t%1"
   [(set_attr "type" "call")
    (set_attr "length" "16")])
Index: gcc/testsuite/gcc.target/aarch64/sve/tls_1.c
===================================================================
--- /dev/null	2018-02-08 11:17:10.862716283 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/tls_1.c	2018-02-08 11:51:37.229675401 +0000
@@ -0,0 +1,17 @@ 
+/* { dg-options "-O2 -fPIC -msve-vector-bits=256" } */
+
+typedef unsigned int v8si __attribute__((vector_size(32)));
+
+extern __thread int y;
+
+void
+f (int *a)
+{
+  v8si x;
+  asm volatile ("dup %0.s, #0x11" : "=w" (x) :: "memory");
+  if (*a)
+    asm volatile ("insr %0.s, %w2" : "=w" (x) : "0" (x), "r" (y));
+}
+
+/* { dg-final { scan-assembler {\tst(r|1.)\tz[0-9]} } } */
+/* { dg-final { scan-assembler {\tld(r|1.)\tz[0-9]} } } */
Index: gcc/testsuite/gcc.target/aarch64/sve/tls_2.C
===================================================================
--- /dev/null	2018-02-08 11:17:10.862716283 +0000
+++ gcc/testsuite/gcc.target/aarch64/sve/tls_2.C	2018-02-08 11:51:37.229675401 +0000
@@ -0,0 +1,30 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -fPIC -msve-vector-bits=256" } */
+
+#include <stdint.h>
+
+typedef int8_t v32qi __attribute__((vector_size (32)));
+
+extern __thread int z;
+
+void
+foo (v32qi *a, int *b)
+{
+  v32qi x = a[0], y = a[1];
+  asm volatile ("" :: "w" ((v32qi) { -1, 0, 0, -1, -1, -1, 0, 0,
+				     -1, -1, -1, -1, 0, 0, 0, 0,
+				     -1, -1, -1, -1, -1, -1, -1, -1,
+				     0, 0, 0, 0, 0, 0, 0, 0 } ? x : y)
+		: "memory");
+  if (*b)
+    {
+      x = a[2], y = a[3];
+      asm volatile ("" :: "w" ((v32qi) { -1, 0, 0, -1, -1, -1, 0, 0,
+					 -1, -1, -1, -1, 0, 0, 0, 0,
+					 -1, -1, -1, -1, -1, -1, -1, -1,
+					 0, 0, 0, 0, 0, 0, 0, 0 } ? x : y),
+		    "r" (z));
+    }
+}
+
+/* { dg-final { scan-assembler-times {\tldr\tp[0-9]} 2 } } */