diff mbox series

[AArch64,v4,4/6] aarch64: Add out-of-line functions for LSE atomics

Message ID 20190918015817.24408-5-richard.henderson@linaro.org
State New
Headers show
Series LSE atomics out-of-line | expand

Commit Message

Richard Henderson Sept. 18, 2019, 1:58 a.m. UTC
This is the libgcc part of the interface -- providing the functions.
Rationale is provided at the top of libgcc/config/aarch64/lse.S.

	* config/aarch64/lse-init.c: New file.
	* config/aarch64/lse.S: New file.
	* config/aarch64/t-lse: New file.
	* config.host: Add t-lse to all aarch64 tuples.
---
 libgcc/config/aarch64/lse-init.c |  45 ++++++
 libgcc/config.host               |   4 +
 libgcc/config/aarch64/lse.S      | 235 +++++++++++++++++++++++++++++++
 libgcc/config/aarch64/t-lse      |  44 ++++++
 4 files changed, 328 insertions(+)
 create mode 100644 libgcc/config/aarch64/lse-init.c
 create mode 100644 libgcc/config/aarch64/lse.S
 create mode 100644 libgcc/config/aarch64/t-lse

-- 
2.17.1

Comments

Kyrill Tkachov Sept. 18, 2019, 12:58 p.m. UTC | #1
On 9/18/19 2:58 AM, Richard Henderson wrote:
> This is the libgcc part of the interface -- providing the functions.

> Rationale is provided at the top of libgcc/config/aarch64/lse.S.

>

> 	* config/aarch64/lse-init.c: New file.

> 	* config/aarch64/lse.S: New file.

> 	* config/aarch64/t-lse: New file.

> 	* config.host: Add t-lse to all aarch64 tuples.

> ---

>   libgcc/config/aarch64/lse-init.c |  45 ++++++

>   libgcc/config.host               |   4 +

>   libgcc/config/aarch64/lse.S      | 235 +++++++++++++++++++++++++++++++

>   libgcc/config/aarch64/t-lse      |  44 ++++++

>   4 files changed, 328 insertions(+)

>   create mode 100644 libgcc/config/aarch64/lse-init.c

>   create mode 100644 libgcc/config/aarch64/lse.S

>   create mode 100644 libgcc/config/aarch64/t-lse

>

> diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c

> new file mode 100644

> index 00000000000..51fb21d45c9

> --- /dev/null

> +++ b/libgcc/config/aarch64/lse-init.c

> @@ -0,0 +1,45 @@

> +/* Out-of-line LSE atomics for AArch64 architecture, Init.

> +   Copyright (C) 2018 Free Software Foundation, Inc.

> +   Contributed by Linaro Ltd.

> +



This, and the other new files, will need an updated copyright date now.

Thanks,

Kyrill


> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it under

> +the terms of the GNU General Public License as published by the Free

> +Software Foundation; either version 3, or (at your option) any later

> +version.

> +

> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY

> +WARRANTY; without even the implied warranty of MERCHANTABILITY or

> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

> +for more details.

> +

> +Under Section 7 of GPL version 3, you are granted additional

> +permissions described in the GCC Runtime Library Exception, version

> +3.1, as published by the Free Software Foundation.

> +

> +You should have received a copy of the GNU General Public License and

> +a copy of the GCC Runtime Library Exception along with this program;

> +see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

> +<http://www.gnu.org/licenses/>.  */

> +

> +/* Define the symbol gating the LSE implementations.  */

> +_Bool __aarch64_have_lse_atomics

> +  __attribute__((visibility("hidden"), nocommon));

> +

> +/* Disable initialization of __aarch64_have_lse_atomics during bootstrap.  */

> +#ifndef inhibit_libc

> +# include <sys/auxv.h>

> +

> +/* Disable initialization if the system headers are too old.  */

> +# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)

> +

> +static void __attribute__((constructor))

> +init_have_lse_atomics (void)

> +{

> +  unsigned long hwcap = getauxval (AT_HWCAP);

> +  __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;

> +}

> +

> +# endif /* HWCAP */

> +#endif /* inhibit_libc */

> diff --git a/libgcc/config.host b/libgcc/config.host

> index 728e543ea39..122113fc519 100644

> --- a/libgcc/config.host

> +++ b/libgcc/config.host

> @@ -350,12 +350,14 @@ aarch64*-*-elf | aarch64*-*-rtems*)

>   	extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"

>   	extra_parts="$extra_parts crtfastmath.o"

>   	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>   	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"

>   	md_unwind_header=aarch64/aarch64-unwind.h

>   	;;

>   aarch64*-*-freebsd*)

>   	extra_parts="$extra_parts crtfastmath.o"

>   	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>   	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"

>   	md_unwind_header=aarch64/freebsd-unwind.h

>   	;;

> @@ -367,12 +369,14 @@ aarch64*-*-netbsd*)

>   	;;

>   aarch64*-*-fuchsia*)

>   	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>   	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"

>   	;;

>   aarch64*-*-linux*)

>   	extra_parts="$extra_parts crtfastmath.o"

>   	md_unwind_header=aarch64/linux-unwind.h

>   	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>   	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"

>   	;;

>   alpha*-*-linux*)

> diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S

> new file mode 100644

> index 00000000000..c24a39242ca

> --- /dev/null

> +++ b/libgcc/config/aarch64/lse.S

> @@ -0,0 +1,235 @@

> +/* Out-of-line LSE atomics for AArch64 architecture.

> +   Copyright (C) 2018 Free Software Foundation, Inc.

> +   Contributed by Linaro Ltd.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it under

> +the terms of the GNU General Public License as published by the Free

> +Software Foundation; either version 3, or (at your option) any later

> +version.

> +

> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY

> +WARRANTY; without even the implied warranty of MERCHANTABILITY or

> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

> +for more details.

> +

> +Under Section 7 of GPL version 3, you are granted additional

> +permissions described in the GCC Runtime Library Exception, version

> +3.1, as published by the Free Software Foundation.

> +

> +You should have received a copy of the GNU General Public License and

> +a copy of the GCC Runtime Library Exception along with this program;

> +see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

> +<http://www.gnu.org/licenses/>.  */

> +

> +/*

> + * The problem that we are trying to solve is operating system deployment

> + * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).

> + *

> + * There are a number of potential solutions for this problem which have

> + * been proposed and rejected for various reasons.  To recap:

> + *

> + * (1) Multiple builds.  The dynamic linker will examine /lib64/atomics/

> + * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.

> + * However, not all Linux distributions are happy with multiple builds,

> + * and anyway it has no effect on main applications.

> + *

> + * (2) IFUNC.  We could put these functions into libgcc_s.so, and have

> + * a single copy of each function for all DSOs.  However, ARM is concerned

> + * that the branch-to-indirect-branch that is implied by using a PLT,

> + * as required by IFUNC, is too much overhead for smaller cpus.

> + *

> + * (3) Statically predicted direct branches.  This is the approach that

> + * is taken here.  These functions are linked into every DSO that uses them.

> + * All of the symbols are hidden, so that the functions are called via a

> + * direct branch.  The choice of LSE vs non-LSE is done via one byte load

> + * followed by a well-predicted direct branch.  The functions are compiled

> + * separately to minimize code size.

> + */

> +

> +/* Tell the assembler to accept LSE instructions.  */

> +	.arch armv8-a+lse

> +

> +/* Declare the symbol gating the LSE implementations.  */

> +	.hidden	__aarch64_have_lse_atomics

> +

> +/* Turn size and memory model defines into mnemonic fragments.  */

> +#if SIZE == 1

> +# define S     b

> +# define UXT   uxtb

> +#elif SIZE == 2

> +# define S     h

> +# define UXT   uxth

> +#elif SIZE == 4 || SIZE == 8 || SIZE == 16

> +# define S

> +# define UXT   mov

> +#else

> +# error

> +#endif

> +

> +#if MODEL == 1

> +# define SUFF  _relax

> +# define A

> +# define L

> +#elif MODEL == 2

> +# define SUFF  _acq

> +# define A     a

> +# define L

> +#elif MODEL == 3

> +# define SUFF  _rel

> +# define A

> +# define L     l

> +#elif MODEL == 4

> +# define SUFF  _acq_rel

> +# define A     a

> +# define L     l

> +#else

> +# error

> +#endif

> +

> +/* Concatenate symbols.  */

> +#define glue2_(A, B)		A ## B

> +#define glue2(A, B)		glue2_(A, B)

> +#define glue3_(A, B, C)		A ## B ## C

> +#define glue3(A, B, C)		glue3_(A, B, C)

> +#define glue4_(A, B, C, D)	A ## B ## C ## D

> +#define glue4(A, B, C, D)	glue4_(A, B, C, D)

> +

> +/* Select the size of a register, given a regno.  */

> +#define x(N)			glue2(x, N)

> +#define w(N)			glue2(w, N)

> +#if SIZE < 8

> +# define s(N)			w(N)

> +#else

> +# define s(N)			x(N)

> +#endif

> +

> +#define NAME(BASE)		glue4(__aarch64_, BASE, SIZE, SUFF)

> +#define LDXR			glue4(ld, A, xr, S)

> +#define STXR			glue4(st, L, xr, S)

> +

> +/* Temporary registers used.  Other than these, only the return value

> +   register (x0) and the flags are modified.  */

> +#define tmp0	16

> +#define tmp1	17

> +#define tmp2	15

> +

> +/* Start and end a function.  */

> +.macro	STARTFN name

> +	.text

> +	.balign	16

> +	.globl	\name

> +	.hidden	\name

> +	.type	\name, %function

> +	.cfi_startproc

> +\name:

> +.endm

> +

> +.macro	ENDFN name

> +	.cfi_endproc

> +	.size	\name, . - \name

> +.endm

> +

> +/* Branch to LABEL if LSE is disabled.  */

> +.macro	JUMP_IF_NOT_LSE label

> +	adrp	x(tmp0), __aarch64_have_lse_atomics

> +	ldrb	w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]

> +	cbz	w(tmp0), \label

> +.endm

> +

> +#ifdef L_cas

> +

> +STARTFN	NAME(cas)

> +	JUMP_IF_NOT_LSE	8f

> +

> +#if SIZE < 16

> +#define CAS	glue4(cas, A, L, S)

> +

> +	CAS		s(0), s(1), [x2]

> +	ret

> +

> +8:	UXT		s(tmp0), s(0)

> +0:	LDXR		s(0), [x2]

> +	cmp		s(0), s(tmp0)

> +	bne		1f

> +	STXR		w(tmp1), s(1), [x2]

> +	cbnz		w(tmp1), 0b

> +1:	ret

> +

> +#else

> +#define LDXP	glue3(ld, A, xp)

> +#define STXP	glue3(st, L, xp)

> +#define CASP	glue3(casp, A, L)

> +

> +	CASP		x0, x1, x2, x3, [x4]

> +	ret

> +

> +8:	mov		x(tmp0), x0

> +	mov		x(tmp1), x1

> +0:	LDXP		x0, x1, [x4]

> +	cmp		x0, x(tmp0)

> +	ccmp		x1, x(tmp1), #0, eq

> +	bne		1f

> +	STXP		w(tmp2), x(tmp0), x(tmp1), [x4]

> +	cbnz		w(tmp2), 0b

> +1:	ret

> +

> +#endif

> +

> +ENDFN	NAME(cas)

> +#endif

> +

> +#ifdef L_swp

> +#define SWP	glue4(swp, A, L, S)

> +

> +STARTFN	NAME(swp)

> +	JUMP_IF_NOT_LSE	8f

> +

> +	SWP		s(0), s(0), [x1]

> +	ret

> +

> +8:	mov		s(tmp0), s(0)

> +0:	LDXR		s(0), [x1]

> +	STXR		w(tmp1), s(tmp0), [x1]

> +	cbnz		w(tmp1), 0b

> +	ret

> +

> +ENDFN	NAME(swp)

> +#endif

> +

> +#if defined(L_ldadd) || defined(L_ldclr) \

> +    || defined(L_ldeor) || defined(L_ldset)

> +

> +#ifdef L_ldadd

> +#define LDNM	ldadd

> +#define OP	add

> +#elif defined(L_ldclr)

> +#define LDNM	ldclr

> +#define OP	bic

> +#elif defined(L_ldeor)

> +#define LDNM	ldeor

> +#define OP	eor

> +#elif defined(L_ldset)

> +#define LDNM	ldset

> +#define OP	orr

> +#else

> +#error

> +#endif

> +#define LDOP	glue4(LDNM, A, L, S)

> +

> +STARTFN	NAME(LDNM)

> +	JUMP_IF_NOT_LSE	8f

> +

> +	LDOP		s(0), s(0), [x1]

> +	ret

> +

> +8:	mov		s(tmp0), s(0)

> +0:	LDXR		s(0), [x1]

> +	OP		s(tmp1), s(0), s(tmp0)

> +	STXR		w(tmp1), s(tmp1), [x1]

> +	cbnz		w(tmp1), 0b

> +	ret

> +

> +ENDFN	NAME(LDNM)

> +#endif

> diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse

> new file mode 100644

> index 00000000000..c7f4223cd45

> --- /dev/null

> +++ b/libgcc/config/aarch64/t-lse

> @@ -0,0 +1,44 @@

> +# Out-of-line LSE atomics for AArch64 architecture.

> +# Copyright (C) 2018 Free Software Foundation, Inc.

> +# Contributed by Linaro Ltd.

> +#

> +# This file is part of GCC.

> +#

> +# GCC is free software; you can redistribute it and/or modify it

> +# under the terms of the GNU General Public License as published by

> +# the Free Software Foundation; either version 3, or (at your option)

> +# any later version.

> +#

> +# GCC is distributed in the hope that it will be useful, but

> +# WITHOUT ANY WARRANTY; without even the implied warranty of

> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +# General Public License for more details.

> +#

> +# You should have received a copy of the GNU General Public License

> +# along with GCC; see the file COPYING3.  If not see

> +# <http://www.gnu.org/licenses/>.

> +

> +# Compare-and-swap has 5 sizes and 4 memory models.

> +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))

> +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))

> +

> +# Swap, Load-and-operate have 4 sizes and 4 memory models

> +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))

> +O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))

> +

> +LSE_OBJS := $(O0) $(O1)

> +

> +libgcc-objects += $(LSE_OBJS) lse-init$(objext)

> +

> +empty      =

> +space      = $(empty) $(empty)

> +PAT_SPLIT  = $(subst _,$(space),$(*F))

> +PAT_BASE   = $(word 1,$(PAT_SPLIT))

> +PAT_N      = $(word 2,$(PAT_SPLIT))

> +PAT_M      = $(word 3,$(PAT_SPLIT))

> +

> +lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c

> +	$(gcc_compile) -c $<

> +

> +$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S

> +	$(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<
Roman Zhuykov Dec. 23, 2019, 3:38 p.m. UTC | #2
This caused:

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=93053

--
Roman

Richard Henderson wrote 18.09.2019 04:58:
> This is the libgcc part of the interface -- providing the functions.

> Rationale is provided at the top of libgcc/config/aarch64/lse.S.

> 

> 	* config/aarch64/lse-init.c: New file.

> 	* config/aarch64/lse.S: New file.

> 	* config/aarch64/t-lse: New file.

> 	* config.host: Add t-lse to all aarch64 tuples.

> ---

>  libgcc/config/aarch64/lse-init.c |  45 ++++++

>  libgcc/config.host               |   4 +

>  libgcc/config/aarch64/lse.S      | 235 +++++++++++++++++++++++++++++++

>  libgcc/config/aarch64/t-lse      |  44 ++++++

>  4 files changed, 328 insertions(+)

>  create mode 100644 libgcc/config/aarch64/lse-init.c

>  create mode 100644 libgcc/config/aarch64/lse.S

>  create mode 100644 libgcc/config/aarch64/t-lse

> 

> diff --git a/libgcc/config/aarch64/lse-init.c 

> b/libgcc/config/aarch64/lse-init.c

> new file mode 100644

> index 00000000000..51fb21d45c9

> --- /dev/null

> +++ b/libgcc/config/aarch64/lse-init.c

> @@ -0,0 +1,45 @@

> +/* Out-of-line LSE atomics for AArch64 architecture, Init.

> +   Copyright (C) 2018 Free Software Foundation, Inc.

> +   Contributed by Linaro Ltd.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it under

> +the terms of the GNU General Public License as published by the Free

> +Software Foundation; either version 3, or (at your option) any later

> +version.

> +

> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY

> +WARRANTY; without even the implied warranty of MERCHANTABILITY or

> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

> +for more details.

> +

> +Under Section 7 of GPL version 3, you are granted additional

> +permissions described in the GCC Runtime Library Exception, version

> +3.1, as published by the Free Software Foundation.

> +

> +You should have received a copy of the GNU General Public License and

> +a copy of the GCC Runtime Library Exception along with this program;

> +see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

> +<http://www.gnu.org/licenses/>.  */

> +

> +/* Define the symbol gating the LSE implementations.  */

> +_Bool __aarch64_have_lse_atomics

> +  __attribute__((visibility("hidden"), nocommon));

> +

> +/* Disable initialization of __aarch64_have_lse_atomics during 

> bootstrap.  */

> +#ifndef inhibit_libc

> +# include <sys/auxv.h>

> +

> +/* Disable initialization if the system headers are too old.  */

> +# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)

> +

> +static void __attribute__((constructor))

> +init_have_lse_atomics (void)

> +{

> +  unsigned long hwcap = getauxval (AT_HWCAP);

> +  __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;

> +}

> +

> +# endif /* HWCAP */

> +#endif /* inhibit_libc */

> diff --git a/libgcc/config.host b/libgcc/config.host

> index 728e543ea39..122113fc519 100644

> --- a/libgcc/config.host

> +++ b/libgcc/config.host

> @@ -350,12 +350,14 @@ aarch64*-*-elf | aarch64*-*-rtems*)

>  	extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"

>  	extra_parts="$extra_parts crtfastmath.o"

>  	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>  	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"

>  	md_unwind_header=aarch64/aarch64-unwind.h

>  	;;

>  aarch64*-*-freebsd*)

>  	extra_parts="$extra_parts crtfastmath.o"

>  	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>  	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"

>  	md_unwind_header=aarch64/freebsd-unwind.h

>  	;;

> @@ -367,12 +369,14 @@ aarch64*-*-netbsd*)

>  	;;

>  aarch64*-*-fuchsia*)

>  	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>  	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"

>  	;;

>  aarch64*-*-linux*)

>  	extra_parts="$extra_parts crtfastmath.o"

>  	md_unwind_header=aarch64/linux-unwind.h

>  	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"

> +	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"

>  	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"

>  	;;

>  alpha*-*-linux*)

> diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S

> new file mode 100644

> index 00000000000..c24a39242ca

> --- /dev/null

> +++ b/libgcc/config/aarch64/lse.S

> @@ -0,0 +1,235 @@

> +/* Out-of-line LSE atomics for AArch64 architecture.

> +   Copyright (C) 2018 Free Software Foundation, Inc.

> +   Contributed by Linaro Ltd.

> +

> +This file is part of GCC.

> +

> +GCC is free software; you can redistribute it and/or modify it under

> +the terms of the GNU General Public License as published by the Free

> +Software Foundation; either version 3, or (at your option) any later

> +version.

> +

> +GCC is distributed in the hope that it will be useful, but WITHOUT ANY

> +WARRANTY; without even the implied warranty of MERCHANTABILITY or

> +FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License

> +for more details.

> +

> +Under Section 7 of GPL version 3, you are granted additional

> +permissions described in the GCC Runtime Library Exception, version

> +3.1, as published by the Free Software Foundation.

> +

> +You should have received a copy of the GNU General Public License and

> +a copy of the GCC Runtime Library Exception along with this program;

> +see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see

> +<http://www.gnu.org/licenses/>.  */

> +

> +/*

> + * The problem that we are trying to solve is operating system 

> deployment

> + * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).

> + *

> + * There are a number of potential solutions for this problem which 

> have

> + * been proposed and rejected for various reasons.  To recap:

> + *

> + * (1) Multiple builds.  The dynamic linker will examine 

> /lib64/atomics/

> + * if HWCAP_ATOMICS is set, allowing entire libraries to be 

> overwritten.

> + * However, not all Linux distributions are happy with multiple 

> builds,

> + * and anyway it has no effect on main applications.

> + *

> + * (2) IFUNC.  We could put these functions into libgcc_s.so, and have

> + * a single copy of each function for all DSOs.  However, ARM is 

> concerned

> + * that the branch-to-indirect-branch that is implied by using a PLT,

> + * as required by IFUNC, is too much overhead for smaller cpus.

> + *

> + * (3) Statically predicted direct branches.  This is the approach 

> that

> + * is taken here.  These functions are linked into every DSO that uses 

> them.

> + * All of the symbols are hidden, so that the functions are called via 

> a

> + * direct branch.  The choice of LSE vs non-LSE is done via one byte 

> load

> + * followed by a well-predicted direct branch.  The functions are 

> compiled

> + * separately to minimize code size.

> + */

> +

> +/* Tell the assembler to accept LSE instructions.  */

> +	.arch armv8-a+lse

> +

> +/* Declare the symbol gating the LSE implementations.  */

> +	.hidden	__aarch64_have_lse_atomics

> +

> +/* Turn size and memory model defines into mnemonic fragments.  */

> +#if SIZE == 1

> +# define S     b

> +# define UXT   uxtb

> +#elif SIZE == 2

> +# define S     h

> +# define UXT   uxth

> +#elif SIZE == 4 || SIZE == 8 || SIZE == 16

> +# define S

> +# define UXT   mov

> +#else

> +# error

> +#endif

> +

> +#if MODEL == 1

> +# define SUFF  _relax

> +# define A

> +# define L

> +#elif MODEL == 2

> +# define SUFF  _acq

> +# define A     a

> +# define L

> +#elif MODEL == 3

> +# define SUFF  _rel

> +# define A

> +# define L     l

> +#elif MODEL == 4

> +# define SUFF  _acq_rel

> +# define A     a

> +# define L     l

> +#else

> +# error

> +#endif

> +

> +/* Concatenate symbols.  */

> +#define glue2_(A, B)		A ## B

> +#define glue2(A, B)		glue2_(A, B)

> +#define glue3_(A, B, C)		A ## B ## C

> +#define glue3(A, B, C)		glue3_(A, B, C)

> +#define glue4_(A, B, C, D)	A ## B ## C ## D

> +#define glue4(A, B, C, D)	glue4_(A, B, C, D)

> +

> +/* Select the size of a register, given a regno.  */

> +#define x(N)			glue2(x, N)

> +#define w(N)			glue2(w, N)

> +#if SIZE < 8

> +# define s(N)			w(N)

> +#else

> +# define s(N)			x(N)

> +#endif

> +

> +#define NAME(BASE)		glue4(__aarch64_, BASE, SIZE, SUFF)

> +#define LDXR			glue4(ld, A, xr, S)

> +#define STXR			glue4(st, L, xr, S)

> +

> +/* Temporary registers used.  Other than these, only the return value

> +   register (x0) and the flags are modified.  */

> +#define tmp0	16

> +#define tmp1	17

> +#define tmp2	15

> +

> +/* Start and end a function.  */

> +.macro	STARTFN name

> +	.text

> +	.balign	16

> +	.globl	\name

> +	.hidden	\name

> +	.type	\name, %function

> +	.cfi_startproc

> +\name:

> +.endm

> +

> +.macro	ENDFN name

> +	.cfi_endproc

> +	.size	\name, . - \name

> +.endm

> +

> +/* Branch to LABEL if LSE is disabled.  */

> +.macro	JUMP_IF_NOT_LSE label

> +	adrp	x(tmp0), __aarch64_have_lse_atomics

> +	ldrb	w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]

> +	cbz	w(tmp0), \label

> +.endm

> +

> +#ifdef L_cas

> +

> +STARTFN	NAME(cas)

> +	JUMP_IF_NOT_LSE	8f

> +

> +#if SIZE < 16

> +#define CAS	glue4(cas, A, L, S)

> +

> +	CAS		s(0), s(1), [x2]

> +	ret

> +

> +8:	UXT		s(tmp0), s(0)

> +0:	LDXR		s(0), [x2]

> +	cmp		s(0), s(tmp0)

> +	bne		1f

> +	STXR		w(tmp1), s(1), [x2]

> +	cbnz		w(tmp1), 0b

> +1:	ret

> +

> +#else

> +#define LDXP	glue3(ld, A, xp)

> +#define STXP	glue3(st, L, xp)

> +#define CASP	glue3(casp, A, L)

> +

> +	CASP		x0, x1, x2, x3, [x4]

> +	ret

> +

> +8:	mov		x(tmp0), x0

> +	mov		x(tmp1), x1

> +0:	LDXP		x0, x1, [x4]

> +	cmp		x0, x(tmp0)

> +	ccmp		x1, x(tmp1), #0, eq

> +	bne		1f

> +	STXP		w(tmp2), x(tmp0), x(tmp1), [x4]

> +	cbnz		w(tmp2), 0b

> +1:	ret

> +

> +#endif

> +

> +ENDFN	NAME(cas)

> +#endif

> +

> +#ifdef L_swp

> +#define SWP	glue4(swp, A, L, S)

> +

> +STARTFN	NAME(swp)

> +	JUMP_IF_NOT_LSE	8f

> +

> +	SWP		s(0), s(0), [x1]

> +	ret

> +

> +8:	mov		s(tmp0), s(0)

> +0:	LDXR		s(0), [x1]

> +	STXR		w(tmp1), s(tmp0), [x1]

> +	cbnz		w(tmp1), 0b

> +	ret

> +

> +ENDFN	NAME(swp)

> +#endif

> +

> +#if defined(L_ldadd) || defined(L_ldclr) \

> +    || defined(L_ldeor) || defined(L_ldset)

> +

> +#ifdef L_ldadd

> +#define LDNM	ldadd

> +#define OP	add

> +#elif defined(L_ldclr)

> +#define LDNM	ldclr

> +#define OP	bic

> +#elif defined(L_ldeor)

> +#define LDNM	ldeor

> +#define OP	eor

> +#elif defined(L_ldset)

> +#define LDNM	ldset

> +#define OP	orr

> +#else

> +#error

> +#endif

> +#define LDOP	glue4(LDNM, A, L, S)

> +

> +STARTFN	NAME(LDNM)

> +	JUMP_IF_NOT_LSE	8f

> +

> +	LDOP		s(0), s(0), [x1]

> +	ret

> +

> +8:	mov		s(tmp0), s(0)

> +0:	LDXR		s(0), [x1]

> +	OP		s(tmp1), s(0), s(tmp0)

> +	STXR		w(tmp1), s(tmp1), [x1]

> +	cbnz		w(tmp1), 0b

> +	ret

> +

> +ENDFN	NAME(LDNM)

> +#endif

> diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse

> new file mode 100644

> index 00000000000..c7f4223cd45

> --- /dev/null

> +++ b/libgcc/config/aarch64/t-lse

> @@ -0,0 +1,44 @@

> +# Out-of-line LSE atomics for AArch64 architecture.

> +# Copyright (C) 2018 Free Software Foundation, Inc.

> +# Contributed by Linaro Ltd.

> +#

> +# This file is part of GCC.

> +#

> +# GCC is free software; you can redistribute it and/or modify it

> +# under the terms of the GNU General Public License as published by

> +# the Free Software Foundation; either version 3, or (at your option)

> +# any later version.

> +#

> +# GCC is distributed in the hope that it will be useful, but

> +# WITHOUT ANY WARRANTY; without even the implied warranty of

> +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU

> +# General Public License for more details.

> +#

> +# You should have received a copy of the GNU General Public License

> +# along with GCC; see the file COPYING3.  If not see

> +# <http://www.gnu.org/licenses/>.

> +

> +# Compare-and-swap has 5 sizes and 4 memory models.

> +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))

> +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))

> +

> +# Swap, Load-and-operate have 4 sizes and 4 memory models

> +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor 

> ldset))

> +O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))

> +

> +LSE_OBJS := $(O0) $(O1)

> +

> +libgcc-objects += $(LSE_OBJS) lse-init$(objext)

> +

> +empty      =

> +space      = $(empty) $(empty)

> +PAT_SPLIT  = $(subst _,$(space),$(*F))

> +PAT_BASE   = $(word 1,$(PAT_SPLIT))

> +PAT_N      = $(word 2,$(PAT_SPLIT))

> +PAT_M      = $(word 3,$(PAT_SPLIT))

> +

> +lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c

> +	$(gcc_compile) -c $<

> +

> +$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S

> +	$(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<
diff mbox series

Patch

diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c
new file mode 100644
index 00000000000..51fb21d45c9
--- /dev/null
+++ b/libgcc/config/aarch64/lse-init.c
@@ -0,0 +1,45 @@ 
+/* Out-of-line LSE atomics for AArch64 architecture, Init.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by Linaro Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* Define the symbol gating the LSE implementations.  */
+_Bool __aarch64_have_lse_atomics
+  __attribute__((visibility("hidden"), nocommon));
+
+/* Disable initialization of __aarch64_have_lse_atomics during bootstrap.  */
+#ifndef inhibit_libc
+# include <sys/auxv.h>
+
+/* Disable initialization if the system headers are too old.  */
+# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS)
+
+static void __attribute__((constructor))
+init_have_lse_atomics (void)
+{
+  unsigned long hwcap = getauxval (AT_HWCAP);
+  __aarch64_have_lse_atomics = (hwcap & HWCAP_ATOMICS) != 0;
+}
+
+# endif /* HWCAP */
+#endif /* inhibit_libc */
diff --git a/libgcc/config.host b/libgcc/config.host
index 728e543ea39..122113fc519 100644
--- a/libgcc/config.host
+++ b/libgcc/config.host
@@ -350,12 +350,14 @@  aarch64*-*-elf | aarch64*-*-rtems*)
 	extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o"
 	extra_parts="$extra_parts crtfastmath.o"
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	md_unwind_header=aarch64/aarch64-unwind.h
 	;;
 aarch64*-*-freebsd*)
 	extra_parts="$extra_parts crtfastmath.o"
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	md_unwind_header=aarch64/freebsd-unwind.h
 	;;
@@ -367,12 +369,14 @@  aarch64*-*-netbsd*)
 	;;
 aarch64*-*-fuchsia*)
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp"
 	;;
 aarch64*-*-linux*)
 	extra_parts="$extra_parts crtfastmath.o"
 	md_unwind_header=aarch64/linux-unwind.h
 	tmake_file="${tmake_file} ${cpu_type}/t-aarch64"
+	tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc"
 	tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm"
 	;;
 alpha*-*-linux*)
diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S
new file mode 100644
index 00000000000..c24a39242ca
--- /dev/null
+++ b/libgcc/config/aarch64/lse.S
@@ -0,0 +1,235 @@ 
+/* Out-of-line LSE atomics for AArch64 architecture.
+   Copyright (C) 2018 Free Software Foundation, Inc.
+   Contributed by Linaro Ltd.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/*
+ * The problem that we are trying to solve is operating system deployment
+ * of ARMv8.1-Atomics, also known as Large System Exensions (LSE).
+ *
+ * There are a number of potential solutions for this problem which have
+ * been proposed and rejected for various reasons.  To recap:
+ *
+ * (1) Multiple builds.  The dynamic linker will examine /lib64/atomics/
+ * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten.
+ * However, not all Linux distributions are happy with multiple builds,
+ * and anyway it has no effect on main applications.
+ *
+ * (2) IFUNC.  We could put these functions into libgcc_s.so, and have
+ * a single copy of each function for all DSOs.  However, ARM is concerned
+ * that the branch-to-indirect-branch that is implied by using a PLT,
+ * as required by IFUNC, is too much overhead for smaller cpus.
+ *
+ * (3) Statically predicted direct branches.  This is the approach that
+ * is taken here.  These functions are linked into every DSO that uses them.
+ * All of the symbols are hidden, so that the functions are called via a
+ * direct branch.  The choice of LSE vs non-LSE is done via one byte load
+ * followed by a well-predicted direct branch.  The functions are compiled
+ * separately to minimize code size.
+ */
+
+/* Tell the assembler to accept LSE instructions.  */
+	.arch armv8-a+lse
+
+/* Declare the symbol gating the LSE implementations.  */
+	.hidden	__aarch64_have_lse_atomics
+
+/* Turn size and memory model defines into mnemonic fragments.  */
+#if SIZE == 1
+# define S     b
+# define UXT   uxtb
+#elif SIZE == 2
+# define S     h
+# define UXT   uxth
+#elif SIZE == 4 || SIZE == 8 || SIZE == 16
+# define S
+# define UXT   mov
+#else
+# error
+#endif
+
+#if MODEL == 1
+# define SUFF  _relax
+# define A
+# define L
+#elif MODEL == 2
+# define SUFF  _acq
+# define A     a
+# define L
+#elif MODEL == 3
+# define SUFF  _rel
+# define A
+# define L     l
+#elif MODEL == 4
+# define SUFF  _acq_rel
+# define A     a
+# define L     l
+#else
+# error
+#endif
+
+/* Concatenate symbols.  */
+#define glue2_(A, B)		A ## B
+#define glue2(A, B)		glue2_(A, B)
+#define glue3_(A, B, C)		A ## B ## C
+#define glue3(A, B, C)		glue3_(A, B, C)
+#define glue4_(A, B, C, D)	A ## B ## C ## D
+#define glue4(A, B, C, D)	glue4_(A, B, C, D)
+
+/* Select the size of a register, given a regno.  */
+#define x(N)			glue2(x, N)
+#define w(N)			glue2(w, N)
+#if SIZE < 8
+# define s(N)			w(N)
+#else
+# define s(N)			x(N)
+#endif
+
+#define NAME(BASE)		glue4(__aarch64_, BASE, SIZE, SUFF)
+#define LDXR			glue4(ld, A, xr, S)
+#define STXR			glue4(st, L, xr, S)
+
+/* Temporary registers used.  Other than these, only the return value
+   register (x0) and the flags are modified.  */
+#define tmp0	16
+#define tmp1	17
+#define tmp2	15
+
+/* Start and end a function.  */
+.macro	STARTFN name
+	.text
+	.balign	16
+	.globl	\name
+	.hidden	\name
+	.type	\name, %function
+	.cfi_startproc
+\name:
+.endm
+
+.macro	ENDFN name
+	.cfi_endproc
+	.size	\name, . - \name
+.endm
+
+/* Branch to LABEL if LSE is disabled.  */
+.macro	JUMP_IF_NOT_LSE label
+	adrp	x(tmp0), __aarch64_have_lse_atomics
+	ldrb	w(tmp0), [x(tmp0), :lo12:__aarch64_have_lse_atomics]
+	cbz	w(tmp0), \label
+.endm
+
+#ifdef L_cas
+
+STARTFN	NAME(cas)
+	JUMP_IF_NOT_LSE	8f
+
+#if SIZE < 16
+#define CAS	glue4(cas, A, L, S)
+
+	CAS		s(0), s(1), [x2]
+	ret
+
+8:	UXT		s(tmp0), s(0)
+0:	LDXR		s(0), [x2]
+	cmp		s(0), s(tmp0)
+	bne		1f
+	STXR		w(tmp1), s(1), [x2]
+	cbnz		w(tmp1), 0b
+1:	ret
+
+#else
+#define LDXP	glue3(ld, A, xp)
+#define STXP	glue3(st, L, xp)
+#define CASP	glue3(casp, A, L)
+
+	CASP		x0, x1, x2, x3, [x4]
+	ret
+
+8:	mov		x(tmp0), x0
+	mov		x(tmp1), x1
+0:	LDXP		x0, x1, [x4]
+	cmp		x0, x(tmp0)
+	ccmp		x1, x(tmp1), #0, eq
+	bne		1f
+	STXP		w(tmp2), x(tmp0), x(tmp1), [x4]
+	cbnz		w(tmp2), 0b
+1:	ret
+
+#endif
+
+ENDFN	NAME(cas)
+#endif
+
+#ifdef L_swp
+#define SWP	glue4(swp, A, L, S)
+
+STARTFN	NAME(swp)
+	JUMP_IF_NOT_LSE	8f
+
+	SWP		s(0), s(0), [x1]
+	ret
+
+8:	mov		s(tmp0), s(0)
+0:	LDXR		s(0), [x1]
+	STXR		w(tmp1), s(tmp0), [x1]
+	cbnz		w(tmp1), 0b
+	ret
+
+ENDFN	NAME(swp)
+#endif
+
+#if defined(L_ldadd) || defined(L_ldclr) \
+    || defined(L_ldeor) || defined(L_ldset)
+
+#ifdef L_ldadd
+#define LDNM	ldadd
+#define OP	add
+#elif defined(L_ldclr)
+#define LDNM	ldclr
+#define OP	bic
+#elif defined(L_ldeor)
+#define LDNM	ldeor
+#define OP	eor
+#elif defined(L_ldset)
+#define LDNM	ldset
+#define OP	orr
+#else
+#error
+#endif
+#define LDOP	glue4(LDNM, A, L, S)
+
+STARTFN	NAME(LDNM)
+	JUMP_IF_NOT_LSE	8f
+
+	LDOP		s(0), s(0), [x1]
+	ret
+
+8:	mov		s(tmp0), s(0)
+0:	LDXR		s(0), [x1]
+	OP		s(tmp1), s(0), s(tmp0)
+	STXR		w(tmp1), s(tmp1), [x1]
+	cbnz		w(tmp1), 0b
+	ret
+
+ENDFN	NAME(LDNM)
+#endif
diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse
new file mode 100644
index 00000000000..c7f4223cd45
--- /dev/null
+++ b/libgcc/config/aarch64/t-lse
@@ -0,0 +1,44 @@ 
+# Out-of-line LSE atomics for AArch64 architecture.
+# Copyright (C) 2018 Free Software Foundation, Inc.
+# Contributed by Linaro Ltd.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+# Compare-and-swap has 5 sizes and 4 memory models.
+S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas))
+O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0)))
+
+# Swap, Load-and-operate have 4 sizes and 4 memory models
+S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset))
+O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1)))
+
+LSE_OBJS := $(O0) $(O1)
+
+libgcc-objects += $(LSE_OBJS) lse-init$(objext)
+
+empty      =
+space      = $(empty) $(empty)
+PAT_SPLIT  = $(subst _,$(space),$(*F))
+PAT_BASE   = $(word 1,$(PAT_SPLIT))
+PAT_N      = $(word 2,$(PAT_SPLIT))
+PAT_M      = $(word 3,$(PAT_SPLIT))
+
+lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c
+	$(gcc_compile) -c $<
+
+$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S
+	$(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<