[AArch64,v2,10/11] aarch64: Implement TImode compare-and-swap

Message ID	20181002161915.18843-11-richard.henderson@linaro.org
State	New
Headers	show Delivered-To: patch@linaro.org Received-SPF: pass (google.com: domain of gcc-patches-return-486826-patch=linaro.org@gcc.gnu.org designates 209.132.180.131 as permitted sender) client-ip=209.132.180.131; DomainKey-Signature: a=rsa-sha1; c=nofws; d=gcc.gnu.org; h=list-id :list-unsubscribe:list-archive:list-post:list-help:sender:from :to:cc:subject:date:message-id:in-reply-to:references; q=dns; s= default; b=cbKfoaL1pOKFgu51ohpwzSZ+BoeNU4SMWvI56NNjyMIk/GouI4F/b 5TFRqavQdgcQMg0MrUH54pAeuaGvSkHG2+7ErFYVeB1hybr7KtMsIZDjzqhOUat7 OlbsCA9BTIi/sYyRI4A9UIZ7bM7sELtoXA3JOE5ABU7sLdtsELOrN8= Mailing-List: contact gcc-patches-help@gcc.gnu.org; run by ezmlm Precedence: bulk Sender: gcc-patches-owner@gcc.gnu.org From: Richard Henderson <richard.henderson@linaro.org> To: gcc-patches@gcc.gnu.org Cc: ramana.radhakrishnan@arm.com, agraf@suse.de, marcus.shawcroft@arm.com, james.greenhalgh@arm.com, richard.earnshaw@arm.com Subject: [PATCH, AArch64 v2 10/11] aarch64: Implement TImode compare-and-swap Date: Tue, 2 Oct 2018 11:19:14 -0500 Message-Id: <20181002161915.18843-11-richard.henderson@linaro.org> In-Reply-To: <20181002161915.18843-1-richard.henderson@linaro.org> References: <20181002161915.18843-1-richard.henderson@linaro.org>
Series	LSE atomics out-of-line \| expand [AArch64,v2,00/11] LSE atomics out-of-line [AArch64,v2,01/11] aarch64: Simplify LSE cas generation [AArch64,v2,02/11] aarch64: Improve cas generation [AArch64,v2,03/11] aarch64: Improve swp generation [AArch64,v2,04/11] aarch64: Improve atomic-op lse generation [AArch64,v2,05/11] aarch64: Emit LSE st<op> instructions [AArch64,v2,06/11] Add visibility to libfunc constructors [AArch64,v2,07/11] aarch64: Add out-of-line functions for LSE atomics [AArch64,v2,08/11] aarch64: Implement -matomic-ool [AArch64,v2,09/11] aarch64: Force TImode values into even registers [AArch64,v2,10/11] aarch64: Implement TImode compare-and-swap [AArch64,v2,11/11] Enable -matomic-ool by default

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h index c7b96b12bbe..f735c4e5ad8 100644 --- a/gcc/config/aarch64/aarch64-protos.h +++ b/gcc/config/aarch64/aarch64-protos.h @@ -626,7 +626,7 @@ bool aarch64_high_bits_all_ones_p (HOST_WIDE_INT); struct atomic_ool_names { - const char *str[4][4]; + const char *str[5][4]; }; rtx aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c index ce4d7e51d00..ac2f055a09e 100644 --- a/gcc/config/aarch64/aarch64.c +++ b/gcc/config/aarch64/aarch64.c @@ -1610,10 +1610,33 @@ emit_set_insn (rtx x, rtx y) rtx aarch64_gen_compare_reg (RTX_CODE code, rtx x, rtx y) { - machine_mode mode = SELECT_CC_MODE (code, x, y); - rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM); + machine_mode cmp_mode = GET_MODE (x); + machine_mode cc_mode; + rtx cc_reg; - emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y)); + if (cmp_mode == E_TImode) + { + gcc_assert (code == NE); + + cc_mode = E_CCmode; + cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + + rtx x_lo = operand_subword (x, 0, 0, TImode); + rtx y_lo = operand_subword (y, 0, 0, TImode); + emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x_lo, y_lo)); + + rtx x_hi = operand_subword (x, 1, 0, TImode); + rtx y_hi = operand_subword (y, 1, 0, TImode); + emit_insn (gen_ccmpdi (cc_reg, cc_reg, x_hi, y_hi, + gen_rtx_EQ (cc_mode, cc_reg, const0_rtx), + GEN_INT (AARCH64_EQ))); + } + else + { + cc_mode = SELECT_CC_MODE (code, x, y); + cc_reg = gen_rtx_REG (cc_mode, CC_REGNUM); + emit_set_insn (cc_reg, gen_rtx_COMPARE (cc_mode, x, y)); + } return cc_reg; } @@ -6693,7 +6716,7 @@ sizetochar (int size) 'S/T/U/V': Print a FP/SIMD register name for a register list. The register printed is the FP/SIMD register name of X + 0/1/2/3 for S/T/U/V. - 'R': Print a scalar FP/SIMD register name + 1. + 'R': Print a scalar Integer/FP/SIMD register name + 1. 'X': Print bottom 16 bits of integer constant in hex. 'w/x': Print a general register name or the zero register (32-bit or 64-bit). @@ -6885,12 +6908,13 @@ aarch64_print_operand (FILE *f, rtx x, int code) break; case 'R': - if (!REG_P (x) || !FP_REGNUM_P (REGNO (x))) - { - output_operand_lossage ("incompatible floating point / vector register operand for '%%%c'", code); - return; - } - asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); + if (REG_P (x) && FP_REGNUM_P (REGNO (x))) + asm_fprintf (f, "q%d", REGNO (x) - V0_REGNUM + 1); + else if (REG_P (x) && GP_REGNUM_P (REGNO (x))) + asm_fprintf (f, "x%d", REGNO (x) - R0_REGNUM + 1); + else + output_operand_lossage ("incompatible register operand for '%%%c'", + code); break; case 'X': @@ -14143,16 +14167,26 @@ static void aarch64_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, rtx model_rtx) { - emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); + if (mode == E_TImode) + emit_insn (gen_aarch64_load_exclusive_pair (gen_lowpart (DImode, rval), + gen_highpart (DImode, rval), + mem, model_rtx)); + else + emit_insn (gen_aarch64_load_exclusive (mode, rval, mem, model_rtx)); } /* Emit store exclusive. */ static void aarch64_emit_store_exclusive (machine_mode mode, rtx bval, - rtx rval, rtx mem, rtx model_rtx) + rtx mem, rtx val, rtx model_rtx) { - emit_insn (gen_aarch64_store_exclusive (mode, bval, rval, mem, model_rtx)); + if (mode == E_TImode) + emit_insn (gen_aarch64_store_exclusive_pair + (bval, mem, operand_subword (val, 0, 0, TImode), + operand_subword (val, 1, 0, TImode), model_rtx)); + else + emit_insn (gen_aarch64_store_exclusive (mode, bval, mem, val, model_rtx)); } /* Mark the previous jump instruction as unlikely. */ @@ -14164,7 +14198,7 @@ aarch64_emit_unlikely_jump (rtx insn) add_reg_br_prob_note (jump, profile_probability::very_unlikely ()); } -/* We store the names of the various atomic helpers in a 4x4 array. +/* We store the names of the various atomic helpers in a 5x4 array. Return the libcall function given MODE, MODEL and NAMES. */ rtx @@ -14188,6 +14222,9 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, case E_DImode: mode_idx = 3; break; + case E_TImode: + mode_idx = 4; + break; default: gcc_unreachable (); } @@ -14222,9 +14259,11 @@ aarch64_atomic_ool_func(machine_mode mode, rtx model_rtx, "__aa64_" #B #N "_rel", \ "__aa64_" #B #N "_acq_rel" } -#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8) +#define DEF4(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), \ + { NULL, NULL, NULL, NULL } +#define DEF5(B) DEF0(B, 1), DEF0(B, 2), DEF0(B, 4), DEF0(B, 8), DEF0(B, 16) -static const atomic_ool_names aarch64_ool_cas_names = { { DEF4(cas) } }; +static const atomic_ool_names aarch64_ool_cas_names = { { DEF5(cas) } }; const atomic_ool_names aarch64_ool_swp_names = { { DEF4(swp) } }; const atomic_ool_names aarch64_ool_ldadd_names = { { DEF4(ldadd) } }; const atomic_ool_names aarch64_ool_ldset_names = { { DEF4(ldset) } }; @@ -14247,6 +14286,7 @@ const atomic_ool_names aarch64_ool_steor_names = { { DEF4(eor) } }; #undef DEF0 #undef DEF4 +#undef DEF5 /* Expand a compare and swap pattern. */ diff --git a/libgcc/config/aarch64/lse.c b/libgcc/config/aarch64/lse.c index 68ca7df667b..f6114add71a 100644 --- a/libgcc/config/aarch64/lse.c +++ b/libgcc/config/aarch64/lse.c @@ -91,6 +91,7 @@ asm(".arch armv8-a+lse"); #elif SIZE == 4 || SIZE == 8 # define S "" # define MASK "" +#elif SIZE == 16 #else # error #endif @@ -98,9 +99,11 @@ asm(".arch armv8-a+lse"); #if SIZE < 8 # define T unsigned int # define W "w" -#else +#elif SIZE == 8 # define T unsigned long long # define W "" +#else +# define T unsigned __int128 #endif #if MODEL == 1 @@ -138,19 +141,38 @@ T NAME(cas) (T cmp, T new, T *ptr) unsigned tmp; if (have_atomics) - __asm__("cas" A L S " %"W"0, %"W"2, %1" - : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp)); + { +#if SIZE == 16 + __asm__("casp" A L " %0, %R0, %2, %R2, %1" + : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp)); +#else + __asm__("cas" A L S " %"W"0, %"W"2, %1" + : "=r"(old), "+m"(*ptr) : "r"(new), "0"(cmp)); +#endif + } else - __asm__( - "0: " - "ld" A "xr"S" %"W"0, %1\n\t" - "cmp %"W"0, %"W"4" MASK "\n\t" - "bne 1f\n\t" - "st" L "xr"S" %w2, %"W"3, %1\n\t" - "cbnz %w2, 0b\n" - "1:" - : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp)); - + { +#if SIZE == 16 + __asm__("0: " + "ld" A "xp %0, %R0, %1\n\t" + "cmp %0, %4\n\t" + "ccmp %R0, %R4, #0, eq\n\t" + "bne 1f\n\t" + "st" L "xp %w2, %3, %R3, %1\n\t" + "cbnz %w2, 0b\n" + "1:" + : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp)); +#else + __asm__("0: " + "ld" A "xr"S" %"W"0, %1\n\t" + "cmp %"W"0, %"W"4" MASK "\n\t" + "bne 1f\n\t" + "st" L "xr"S" %w2, %"W"3, %1\n\t" + "cbnz %w2, 0b\n" + "1:" + : "=&r"(old), "+m"(*ptr), "=&r"(tmp) : "r"(new), "r"(cmp)); +#endif + } return old; } #endif diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md index 72f9962fe55..fe604606bdd 100644 --- a/gcc/config/aarch64/atomics.md +++ b/gcc/config/aarch64/atomics.md @@ -22,10 +22,10 @@ (define_expand "@atomic_compare_and_swap<mode>" [(match_operand:SI 0 "register_operand" "") ;; bool out - (match_operand:ALLI 1 "register_operand" "") ;; val out - (match_operand:ALLI 2 "aarch64_sync_memory_operand" "") ;; memory - (match_operand:ALLI 3 "nonmemory_operand" "") ;; expected - (match_operand:ALLI 4 "aarch64_reg_or_zero" "") ;; desired + (match_operand:ALLI_TI 1 "register_operand" "") ;; val out + (match_operand:ALLI_TI 2 "aarch64_sync_memory_operand" "") ;; memory + (match_operand:ALLI_TI 3 "nonmemory_operand" "") ;; expected + (match_operand:ALLI_TI 4 "aarch64_reg_or_zero" "") ;; desired (match_operand:SI 5 "const_int_operand") ;; is_weak (match_operand:SI 6 "const_int_operand") ;; mod_s (match_operand:SI 7 "const_int_operand")] ;; mod_f @@ -88,6 +88,30 @@ } ) +(define_insn_and_split "@aarch64_compare_and_swap<mode>" + [(set (reg:CC CC_REGNUM) ;; bool out + (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW)) + (set (match_operand:JUST_TI 0 "register_operand" "=&r") ;; val out + (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:JUST_TI + [(match_operand:JUST_TI 2 "register_operand" "r") ;; expect + (match_operand:JUST_TI 3 "aarch64_reg_or_zero" "rZ") ;; desired + (match_operand:SI 4 "const_int_operand") ;; is_weak + (match_operand:SI 5 "const_int_operand") ;; mod_s + (match_operand:SI 6 "const_int_operand")] ;; mod_f + UNSPECV_ATOMIC_CMPSW)) + (clobber (match_scratch:SI 7 "=&r"))] + "" + "#" + "&& reload_completed" + [(const_int 0)] + { + aarch64_split_compare_and_swap (operands); + DONE; + } +) + (define_insn "@aarch64_compare_and_swap<mode>_lse" [(set (match_operand:SI 0 "register_operand" "+r") ;; val out (zero_extend:SI @@ -133,6 +157,28 @@ return "casal<atomic_sfx>\t%<w>0, %<w>2, %1"; }) +(define_insn "@aarch64_compare_and_swap<mode>_lse" + [(set (match_operand:JUST_TI 0 "register_operand" "+r") ;; val out + (match_operand:JUST_TI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory + (set (match_dup 1) + (unspec_volatile:JUST_TI + [(match_dup 0) ;; expect + (match_operand:JUST_TI 2 "register_operand" "r") ;; desired + (match_operand:SI 3 "const_int_operand")] ;; mod_s + UNSPECV_ATOMIC_CMPSW))] + "TARGET_LSE" +{ + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model)) + return "casp\t%0, %R0, %2, %R2, %1"; + else if (is_mm_acquire (model) || is_mm_consume (model)) + return "caspa\t%0, %R0, %2, %R2, %1"; + else if (is_mm_release (model)) + return "caspl\t%0, %R0, %2, %R2, %1"; + else + return "caspal\t%0, %R0, %2, %R2, %1"; +}) + (define_expand "atomic_exchange<mode>" [(match_operand:ALLI 0 "register_operand" "") (match_operand:ALLI 1 "aarch64_sync_memory_operand" "") @@ -650,6 +696,24 @@ } ) +(define_insn "aarch64_load_exclusive_pair" + [(set (match_operand:DI 0 "register_operand" "=r") + (unspec_volatile:DI + [(match_operand:TI 2 "aarch64_sync_memory_operand" "Q") + (match_operand:SI 3 "const_int_operand")] + UNSPECV_LX)) + (set (match_operand:DI 1 "register_operand" "=r") + (unspec_volatile:DI [(match_dup 2) (match_dup 3)] UNSPECV_LX))] + "" + { + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_release (model)) + return "ldxp\t%0, %1, %2"; + else + return "ldaxp\t%0, %1, %2"; + } +) + (define_insn "@aarch64_store_exclusive<mode>" [(set (match_operand:SI 0 "register_operand" "=&r") (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) @@ -668,6 +732,25 @@ } ) +(define_insn "aarch64_store_exclusive_pair" + [(set (match_operand:SI 0 "register_operand" "=&r") + (unspec_volatile:SI [(const_int 0)] UNSPECV_SX)) + (set (match_operand:TI 1 "aarch64_sync_memory_operand" "=Q") + (unspec_volatile:TI + [(match_operand:DI 2 "aarch64_reg_or_zero" "rZ") + (match_operand:DI 3 "aarch64_reg_or_zero" "rZ") + (match_operand:SI 4 "const_int_operand")] + UNSPECV_SX))] + "" + { + enum memmodel model = memmodel_from_int (INTVAL (operands[3])); + if (is_mm_relaxed (model) || is_mm_consume (model) || is_mm_acquire (model)) + return "stxp\t%w0, %x2, %x3, %1"; + else + return "stlxp\t%w0, %x2, %x3, %1"; + } +) + (define_expand "mem_thread_fence" [(match_operand:SI 0 "const_int_operand" "")] "" diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md index 524e4e6929b..dd26bdbbc6b 100644 --- a/gcc/config/aarch64/iterators.md +++ b/gcc/config/aarch64/iterators.md @@ -29,6 +29,9 @@ ;; Iterator for HI, SI, DI, some instructions can only work on these modes. (define_mode_iterator GPI_I16 [(HI "AARCH64_ISA_F16") SI DI]) +;; "Iterator" for just TI -- features like @pattern only work with iterators. +(define_mode_iterator JUST_TI [TI]) + ;; Iterator for QI and HI modes (define_mode_iterator SHORT [QI HI]) diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse index e862b0c2448..534ff6efea8 100644 --- a/libgcc/config/aarch64/t-lse +++ b/libgcc/config/aarch64/t-lse @@ -18,15 +18,19 @@ # along with GCC; see the file COPYING3. If not see # <http://www.gnu.org/licenses/>. -# CAS, Swap, Load-and-operate have 4 sizes and 4 memory models -S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), cas swp ldadd ldclr ldeor ldset)) +# Compare-and-swap has 5 sizes and 4 memory models. +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0))) + +# Swap, Load-and-operate have 4 sizes and 4 memory models +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset)) O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1))) # Store-and-operate has 4 sizes but only 2 memory models (relaxed, release). S2 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), stadd stclr steor stset)) O2 := $(foreach m, 1 3, $(addsuffix _$(m)$(objext), $(S2))) -LSE_OBJS := $(O1) $(O2) +LSE_OBJS := $(O0) $(O1) $(O2) libgcc-objects += $(LSE_OBJS) have_atomic$(objext)

[AArch64,v2,10/11] aarch64: Implement TImode compare-and-swap

Commit Message

Patch