Message ID | 20181101214648.29432-5-richard.henderson@linaro.org |
---|---|
State | Superseded |
Headers | show |
Series | LSE atomics out-of-line | expand |
Hi Richard, On 11/1/18 9:46 PM, Richard Henderson wrote: > This is the libgcc part of the interface -- providing the functions. > Rationale is provided at the top of libgcc/config/aarch64/lse.S. > > * config/aarch64/lse-init.c: New file. > * config/aarch64/lse.S: New file. > * config/aarch64/t-lse: New file. > * config.host: Add t-lse to all aarch64 tuples. > --- > libgcc/config/aarch64/lse-init.c | 45 ++++++ > libgcc/config.host | 4 + > libgcc/config/aarch64/lse.S | 238 +++++++++++++++++++++++++++++++ > libgcc/config/aarch64/t-lse | 44 ++++++ > 4 files changed, 331 insertions(+) > create mode 100644 libgcc/config/aarch64/lse-init.c > create mode 100644 libgcc/config/aarch64/lse.S > create mode 100644 libgcc/config/aarch64/t-lse > > diff --git a/libgcc/config/aarch64/lse-init.c > b/libgcc/config/aarch64/lse-init.c > new file mode 100644 > index 00000000000..03b4e1e8ea8 > --- /dev/null > +++ b/libgcc/config/aarch64/lse-init.c > @@ -0,0 +1,45 @@ > +/* Out-of-line LSE atomics for AArch64 architecture, Init. > + Copyright (C) 2018 Free Software Foundation, Inc. > + Contributed by Linaro Ltd. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +Under Section 7 of GPL version 3, you are granted additional > +permissions described in the GCC Runtime Library Exception, version > +3.1, as published by the Free Software Foundation. > + > +You should have received a copy of the GNU General Public License and > +a copy of the GCC Runtime Library Exception along with this program; > +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > +<http://www.gnu.org/licenses/>. */ > + > +/* Define the symbol gating the LSE implementations. */ > +extern _Bool __aa64_have_atomics > + __attribute__((visibility("hidden"), nocommon)); > + Bootstrapping this patch series on an Armv8-A system with OOL atomics enabled by default gave me link errors when building libgomp about __aa64_have_atomics being undefined. I haven't followed the series from the start so maybe I'm missing some things, but I don't see where this variable is supposed to "live"? Removing the 'extern' from here allows the bootstrap to proceed but it fails at a later stage with bizzare errors like: In file included from build/gencondmd.c:51: $SRC/gcc/config/aarch64/constraints.md: In function ‘bool satisfies_constraint_S(rtx)’: $SRC/gcc/config/aarch64/constraints.md:120:10: error: ‘C’ was not declared in this scope; did you mean ‘PC’? 120 | (define_constraint "Y" | ^ | PC which looks like a miscompilation of sorts. Thanks, Kyrill > +/* Disable initialization of __aa64_have_atomics during bootstrap. */ > +#ifndef inhibit_libc > +# include <sys/auxv.h> > + > +/* Disable initialization if the system headers are too old. */ > +# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS) > + > +static void __attribute__((constructor)) > +init_have_atomics (void) > +{ > + unsigned long hwcap = getauxval (AT_HWCAP); > + __aa64_have_atomics = (hwcap & HWCAP_ATOMICS) != 0; > +} > + > +# endif /* HWCAP */ > +#endif /* inhibit_libc */ > diff --git a/libgcc/config.host b/libgcc/config.host > index 029f6569caf..7e9a8b6bc8f 100644 > --- a/libgcc/config.host > +++ b/libgcc/config.host > @@ -340,23 +340,27 @@ aarch64*-*-elf | aarch64*-*-rtems*) > extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o" > extra_parts="$extra_parts crtfastmath.o" > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" > md_unwind_header=aarch64/aarch64-unwind.h > ;; > aarch64*-*-freebsd*) > extra_parts="$extra_parts crtfastmath.o" > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" > md_unwind_header=aarch64/freebsd-unwind.h > ;; > aarch64*-*-fuchsia*) > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp" > ;; > aarch64*-*-linux*) > extra_parts="$extra_parts crtfastmath.o" > md_unwind_header=aarch64/linux-unwind.h > tmake_file="${tmake_file} ${cpu_type}/t-aarch64" > + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" > tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" > ;; > alpha*-*-linux*) > diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S > new file mode 100644 > index 00000000000..3e42a6569af > --- /dev/null > +++ b/libgcc/config/aarch64/lse.S > @@ -0,0 +1,238 @@ > +/* Out-of-line LSE atomics for AArch64 architecture. > + Copyright (C) 2018 Free Software Foundation, Inc. > + Contributed by Linaro Ltd. > + > +This file is part of GCC. > + > +GCC is free software; you can redistribute it and/or modify it under > +the terms of the GNU General Public License as published by the Free > +Software Foundation; either version 3, or (at your option) any later > +version. > + > +GCC is distributed in the hope that it will be useful, but WITHOUT ANY > +WARRANTY; without even the implied warranty of MERCHANTABILITY or > +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License > +for more details. > + > +Under Section 7 of GPL version 3, you are granted additional > +permissions described in the GCC Runtime Library Exception, version > +3.1, as published by the Free Software Foundation. > + > +You should have received a copy of the GNU General Public License and > +a copy of the GCC Runtime Library Exception along with this program; > +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see > +<http://www.gnu.org/licenses/>. */ > + > +/* > + * The problem that we are trying to solve is operating system deployment > + * of ARMv8.1-Atomics, also known as Large System Exensions (LSE). > + * > + * There are a number of potential solutions for this problem which have > + * been proposed and rejected for various reasons. To recap: > + * > + * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/ > + * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten. > + * However, not all Linux distributions are happy with multiple builds, > + * and anyway it has no effect on main applications. > + * > + * (2) IFUNC. We could put these functions into libgcc_s.so, and have > + * a single copy of each function for all DSOs. However, ARM is > concerned > + * that the branch-to-indirect-branch that is implied by using a PLT, > + * as required by IFUNC, is too much overhead for smaller cpus. > + * > + * (3) Statically predicted direct branches. This is the approach that > + * is taken here. These functions are linked into every DSO that > uses them. > + * All of the symbols are hidden, so that the functions are called via a > + * direct branch. The choice of LSE vs non-LSE is done via one byte load > + * followed by a well-predicted direct branch. The functions are > compiled > + * separately to minimize code size. > + */ > + > +/* Tell the assembler to accept LSE instructions. */ > + .arch armv8-a+lse > + > +/* Declare the symbol gating the LSE implementations. */ > + .hidden __aa64_have_atomics > + > +/* Turn size and memory model defines into mnemonic fragments. */ > +#if SIZE == 1 > +# define S b > +# define MASK , uxtb > +#elif SIZE == 2 > +# define S h > +# define MASK , uxth > +#elif SIZE == 4 || SIZE == 8 || SIZE == 16 > +# define S > +# define MASK > +#else > +# error > +#endif > + > +#if MODEL == 1 > +# define SUFF _relax > +# define A > +# define L > +#elif MODEL == 2 > +# define SUFF _acq > +# define A a > +# define L > +#elif MODEL == 3 > +# define SUFF _rel > +# define A > +# define L l > +#elif MODEL == 4 > +# define SUFF _acq_rel > +# define A a > +# define L l > +#else > +# error > +#endif > + > +/* Concatenate symbols. */ > +#define glue2_(A, B) A ## B > +#define glue2(A, B) glue2_(A, B) > +#define glue3_(A, B, C) A ## B ## C > +#define glue3(A, B, C) glue3_(A, B, C) > +#define glue4_(A, B, C, D) A ## B ## C ## D > +#define glue4(A, B, C, D) glue4_(A, B, C, D) > + > +/* Select the size of a register, given a regno. */ > +#define x(N) glue2(x, N) > +#define w(N) glue2(w, N) > +#if SIZE < 8 > +# define s(N) w(N) > +#else > +# define s(N) x(N) > +#endif > + > +#define NAME(BASE) glue4(__aa64_, BASE, SIZE, SUFF) > +#define LDXR glue4(ld, A, xr, S) > +#define STXR glue4(st, L, xr, S) > + > +/* Temporary registers used. Other than these, only the return value > + register (x0) and the flags are modified. */ > +#define tmp0 16 > +#define tmp1 17 > +#define tmp2 15 > + > +/* Start and end a function. */ > +.macro STARTFN name > + .text > + .balign 16 > + .globl \name > + .hidden \name > + .type \name, %function > +\name: > +.endm > + > +.macro ENDFN name > + .size \name, . - \name > +.endm > + > +/* Branch to LABEL if LSE is enabled. > + The branch should be easily predicted, in that it will, after > constructors, > + always branch the same way. The expectation is that systems that > implement > + ARMv8.1-Atomics are "beefier" than those that omit the extension. > + By arranging for the fall-through path to use load-store-exclusive > insns, > + we aid the branch predictor of the smallest cpus. */ > +.macro JUMP_IF_LSE label > + adrp x(tmp0), __aa64_have_atomics > + ldrb w(tmp0), [x(tmp0), :lo12:__aa64_have_atomics] > + cbnz w(tmp0), \label > +.endm > + > +#ifdef L_cas > + > +STARTFN NAME(cas) > + JUMP_IF_LSE 8f > + > +#if SIZE < 16 > +#define CAS glue4(cas, A, L, S) > + > + mov s(tmp0), s(0) > +0: LDXR s(0), [x2] > + cmp s(0), s(tmp0) MASK > + bne 1f > + STXR w(tmp1), s(1), [x2] > + cbnz w(tmp1), 0b > +1: ret > + > +8: CAS w(0), w(1), [x2] > + ret > + > +#else > +#define LDXP glue3(ld, A, xp) > +#define STXP glue3(st, L, xp) > +#define CASP glue3(casp, A, L) > + > + mov x(tmp0), x0 > + mov x(tmp1), x1 > +0: LDXP x0, x1, [x4] > + cmp x0, x(tmp0) > + ccmp x1, x(tmp1), #0, eq > + bne 1f > + STXP w(tmp2), x(tmp0), x(tmp1), [x4] > + cbnz w(tmp2), 0b > +1: ret > + > +8: CASP x0, x1, x2, x3, [x4] > + ret > + > +#endif > + > +ENDFN NAME(cas) > +#endif > + > +#ifdef L_swp > +#define SWP glue4(swp, A, L, S) > + > +STARTFN NAME(swp) > + JUMP_IF_LSE 8f > + > + mov s(tmp0), s(0) > +0: LDXR s(0), [x1] > + STXR w(tmp1), s(tmp0), [x1] > + cbnz w(tmp1), 0b > + ret > + > +8: SWP w(0), w(0), [x1] > + ret > + > +ENDFN NAME(swp) > +#endif > + > +#if defined(L_ldadd) || defined(L_ldclr) \ > + || defined(L_ldeor) || defined(L_ldset) > + > +#ifdef L_ldadd > +#define LDNM ldadd > +#define OP add > +#elif defined(L_ldclr) > +#define LDNM ldclr > +#define OP bic > +#elif defined(L_ldeor) > +#define LDNM ldeor > +#define OP eor > +#elif defined(L_ldset) > +#define LDNM ldset > +#define OP orr > +#else > +#error > +#endif > +#define LDOP glue4(LDNM, A, L, S) > + > +STARTFN NAME(LDNM) > + JUMP_IF_LSE 8f > + > + mov s(tmp0), s(0) > +0: LDXR s(0), [x1] > + OP s(tmp1), s(0), s(tmp0) > + STXR w(tmp1), s(tmp1), [x1] > + cbnz w(tmp1), 0b > + ret > + > +8: LDOP s(0), s(0), [x1] > + ret > + > +ENDFN NAME(LDNM) > +#endif > diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse > new file mode 100644 > index 00000000000..c7f4223cd45 > --- /dev/null > +++ b/libgcc/config/aarch64/t-lse > @@ -0,0 +1,44 @@ > +# Out-of-line LSE atomics for AArch64 architecture. > +# Copyright (C) 2018 Free Software Foundation, Inc. > +# Contributed by Linaro Ltd. > +# > +# This file is part of GCC. > +# > +# GCC is free software; you can redistribute it and/or modify it > +# under the terms of the GNU General Public License as published by > +# the Free Software Foundation; either version 3, or (at your option) > +# any later version. > +# > +# GCC is distributed in the hope that it will be useful, but > +# WITHOUT ANY WARRANTY; without even the implied warranty of > +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > +# General Public License for more details. > +# > +# You should have received a copy of the GNU General Public License > +# along with GCC; see the file COPYING3. If not see > +# <http://www.gnu.org/licenses/>. > + > +# Compare-and-swap has 5 sizes and 4 memory models. > +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) > +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0))) > + > +# Swap, Load-and-operate have 4 sizes and 4 memory models > +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor > ldset)) > +O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1))) > + > +LSE_OBJS := $(O0) $(O1) > + > +libgcc-objects += $(LSE_OBJS) lse-init$(objext) > + > +empty = > +space = $(empty) $(empty) > +PAT_SPLIT = $(subst _,$(space),$(*F)) > +PAT_BASE = $(word 1,$(PAT_SPLIT)) > +PAT_N = $(word 2,$(PAT_SPLIT)) > +PAT_M = $(word 3,$(PAT_SPLIT)) > + > +lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c > + $(gcc_compile) -c $< > + > +$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S > + $(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) > -DMODEL=$(PAT_M) -c $< > -- > 2.17.2 >
On 9/5/19 3:00 AM, Kyrill Tkachov wrote: >> +/* Define the symbol gating the LSE implementations. */ >> +extern _Bool __aa64_have_atomics >> + __attribute__((visibility("hidden"), nocommon)); >> + > > Bootstrapping this patch series on an Armv8-A system with OOL atomics enabled > by default gave me link errors > > when building libgomp about __aa64_have_atomics being undefined. > > I haven't followed the series from the start so maybe I'm missing some things, > but I don't see where this variable is supposed to "live"? Removing the extern here is the correct fix. Obviously the v3 patch set conversion from C to assembly wasn't properly tested, or I made some last-minute changes before posting. Time has erased that memory. > Removing the 'extern' from here allows the bootstrap to proceed but it fails at > a later stage with bizzare errors like: > > In file included from build/gencondmd.c:51: > $SRC/gcc/config/aarch64/constraints.md: In function ‘bool > satisfies_constraint_S(rtx)’: > $SRC/gcc/config/aarch64/constraints.md:120:10: error: ‘C’ was not declared in > this scope; did you mean ‘PC’? > 120 | (define_constraint "Y" > | ^ > | PC > > which looks like a miscompilation of sorts. I noticed a couple of typos in the assembly that used the wrong register form (wN vs xN) on the LSE instructions. By chance were you testing on a system with LSE instructions enabled? r~
On 9/5/19 1:12 PM, Richard Henderson wrote: > On 9/5/19 3:00 AM, Kyrill Tkachov wrote: >>> +/* Define the symbol gating the LSE implementations. */ >>> +extern _Bool __aa64_have_atomics >>> + __attribute__((visibility("hidden"), nocommon)); >>> + >> Bootstrapping this patch series on an Armv8-A system with OOL atomics enabled >> by default gave me link errors >> >> when building libgomp about __aa64_have_atomics being undefined. >> >> I haven't followed the series from the start so maybe I'm missing some things, >> but I don't see where this variable is supposed to "live"? > Removing the extern here is the correct fix. > > Obviously the v3 patch set conversion from C to assembly wasn't properly > tested, or I made some last-minute changes before posting. Time has erased > that memory. > >> Removing the 'extern' from here allows the bootstrap to proceed but it fails at >> a later stage with bizzare errors like: >> >> In file included from build/gencondmd.c:51: >> $SRC/gcc/config/aarch64/constraints.md: In function ‘bool >> satisfies_constraint_S(rtx)’: >> $SRC/gcc/config/aarch64/constraints.md:120:10: error: ‘C’ was not declared in >> this scope; did you mean ‘PC’? >> 120 | (define_constraint "Y" >> | ^ >> | PC >> >> which looks like a miscompilation of sorts. > I noticed a couple of typos in the assembly that used the wrong register form > (wN vs xN) on the LSE instructions. By chance were you testing on a system > with LSE instructions enabled? No, it was an Armv8.0-A system without LSE. If you need help with testing I'd be happy to help! Kyrill > > > r~
diff --git a/libgcc/config/aarch64/lse-init.c b/libgcc/config/aarch64/lse-init.c new file mode 100644 index 00000000000..03b4e1e8ea8 --- /dev/null +++ b/libgcc/config/aarch64/lse-init.c @@ -0,0 +1,45 @@ +/* Out-of-line LSE atomics for AArch64 architecture, Init. + Copyright (C) 2018 Free Software Foundation, Inc. + Contributed by Linaro Ltd. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* Define the symbol gating the LSE implementations. */ +extern _Bool __aa64_have_atomics + __attribute__((visibility("hidden"), nocommon)); + +/* Disable initialization of __aa64_have_atomics during bootstrap. */ +#ifndef inhibit_libc +# include <sys/auxv.h> + +/* Disable initialization if the system headers are too old. */ +# if defined(AT_HWCAP) && defined(HWCAP_ATOMICS) + +static void __attribute__((constructor)) +init_have_atomics (void) +{ + unsigned long hwcap = getauxval (AT_HWCAP); + __aa64_have_atomics = (hwcap & HWCAP_ATOMICS) != 0; +} + +# endif /* HWCAP */ +#endif /* inhibit_libc */ diff --git a/libgcc/config.host b/libgcc/config.host index 029f6569caf..7e9a8b6bc8f 100644 --- a/libgcc/config.host +++ b/libgcc/config.host @@ -340,23 +340,27 @@ aarch64*-*-elf | aarch64*-*-rtems*) extra_parts="$extra_parts crtbegin.o crtend.o crti.o crtn.o" extra_parts="$extra_parts crtfastmath.o" tmake_file="${tmake_file} ${cpu_type}/t-aarch64" + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" md_unwind_header=aarch64/aarch64-unwind.h ;; aarch64*-*-freebsd*) extra_parts="$extra_parts crtfastmath.o" tmake_file="${tmake_file} ${cpu_type}/t-aarch64" + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" md_unwind_header=aarch64/freebsd-unwind.h ;; aarch64*-*-fuchsia*) tmake_file="${tmake_file} ${cpu_type}/t-aarch64" + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp" ;; aarch64*-*-linux*) extra_parts="$extra_parts crtfastmath.o" md_unwind_header=aarch64/linux-unwind.h tmake_file="${tmake_file} ${cpu_type}/t-aarch64" + tmake_file="${tmake_file} ${cpu_type}/t-lse t-slibgcc-libgcc" tmake_file="${tmake_file} ${cpu_type}/t-softfp t-softfp t-crtfm" ;; alpha*-*-linux*) diff --git a/libgcc/config/aarch64/lse.S b/libgcc/config/aarch64/lse.S new file mode 100644 index 00000000000..3e42a6569af --- /dev/null +++ b/libgcc/config/aarch64/lse.S @@ -0,0 +1,238 @@ +/* Out-of-line LSE atomics for AArch64 architecture. + Copyright (C) 2018 Free Software Foundation, Inc. + Contributed by Linaro Ltd. + +This file is part of GCC. + +GCC is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 3, or (at your option) any later +version. + +GCC is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +Under Section 7 of GPL version 3, you are granted additional +permissions described in the GCC Runtime Library Exception, version +3.1, as published by the Free Software Foundation. + +You should have received a copy of the GNU General Public License and +a copy of the GCC Runtime Library Exception along with this program; +see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +<http://www.gnu.org/licenses/>. */ + +/* + * The problem that we are trying to solve is operating system deployment + * of ARMv8.1-Atomics, also known as Large System Exensions (LSE). + * + * There are a number of potential solutions for this problem which have + * been proposed and rejected for various reasons. To recap: + * + * (1) Multiple builds. The dynamic linker will examine /lib64/atomics/ + * if HWCAP_ATOMICS is set, allowing entire libraries to be overwritten. + * However, not all Linux distributions are happy with multiple builds, + * and anyway it has no effect on main applications. + * + * (2) IFUNC. We could put these functions into libgcc_s.so, and have + * a single copy of each function for all DSOs. However, ARM is concerned + * that the branch-to-indirect-branch that is implied by using a PLT, + * as required by IFUNC, is too much overhead for smaller cpus. + * + * (3) Statically predicted direct branches. This is the approach that + * is taken here. These functions are linked into every DSO that uses them. + * All of the symbols are hidden, so that the functions are called via a + * direct branch. The choice of LSE vs non-LSE is done via one byte load + * followed by a well-predicted direct branch. The functions are compiled + * separately to minimize code size. + */ + +/* Tell the assembler to accept LSE instructions. */ + .arch armv8-a+lse + +/* Declare the symbol gating the LSE implementations. */ + .hidden __aa64_have_atomics + +/* Turn size and memory model defines into mnemonic fragments. */ +#if SIZE == 1 +# define S b +# define MASK , uxtb +#elif SIZE == 2 +# define S h +# define MASK , uxth +#elif SIZE == 4 || SIZE == 8 || SIZE == 16 +# define S +# define MASK +#else +# error +#endif + +#if MODEL == 1 +# define SUFF _relax +# define A +# define L +#elif MODEL == 2 +# define SUFF _acq +# define A a +# define L +#elif MODEL == 3 +# define SUFF _rel +# define A +# define L l +#elif MODEL == 4 +# define SUFF _acq_rel +# define A a +# define L l +#else +# error +#endif + +/* Concatenate symbols. */ +#define glue2_(A, B) A ## B +#define glue2(A, B) glue2_(A, B) +#define glue3_(A, B, C) A ## B ## C +#define glue3(A, B, C) glue3_(A, B, C) +#define glue4_(A, B, C, D) A ## B ## C ## D +#define glue4(A, B, C, D) glue4_(A, B, C, D) + +/* Select the size of a register, given a regno. */ +#define x(N) glue2(x, N) +#define w(N) glue2(w, N) +#if SIZE < 8 +# define s(N) w(N) +#else +# define s(N) x(N) +#endif + +#define NAME(BASE) glue4(__aa64_, BASE, SIZE, SUFF) +#define LDXR glue4(ld, A, xr, S) +#define STXR glue4(st, L, xr, S) + +/* Temporary registers used. Other than these, only the return value + register (x0) and the flags are modified. */ +#define tmp0 16 +#define tmp1 17 +#define tmp2 15 + +/* Start and end a function. */ +.macro STARTFN name + .text + .balign 16 + .globl \name + .hidden \name + .type \name, %function +\name: +.endm + +.macro ENDFN name + .size \name, . - \name +.endm + +/* Branch to LABEL if LSE is enabled. + The branch should be easily predicted, in that it will, after constructors, + always branch the same way. The expectation is that systems that implement + ARMv8.1-Atomics are "beefier" than those that omit the extension. + By arranging for the fall-through path to use load-store-exclusive insns, + we aid the branch predictor of the smallest cpus. */ +.macro JUMP_IF_LSE label + adrp x(tmp0), __aa64_have_atomics + ldrb w(tmp0), [x(tmp0), :lo12:__aa64_have_atomics] + cbnz w(tmp0), \label +.endm + +#ifdef L_cas + +STARTFN NAME(cas) + JUMP_IF_LSE 8f + +#if SIZE < 16 +#define CAS glue4(cas, A, L, S) + + mov s(tmp0), s(0) +0: LDXR s(0), [x2] + cmp s(0), s(tmp0) MASK + bne 1f + STXR w(tmp1), s(1), [x2] + cbnz w(tmp1), 0b +1: ret + +8: CAS w(0), w(1), [x2] + ret + +#else +#define LDXP glue3(ld, A, xp) +#define STXP glue3(st, L, xp) +#define CASP glue3(casp, A, L) + + mov x(tmp0), x0 + mov x(tmp1), x1 +0: LDXP x0, x1, [x4] + cmp x0, x(tmp0) + ccmp x1, x(tmp1), #0, eq + bne 1f + STXP w(tmp2), x(tmp0), x(tmp1), [x4] + cbnz w(tmp2), 0b +1: ret + +8: CASP x0, x1, x2, x3, [x4] + ret + +#endif + +ENDFN NAME(cas) +#endif + +#ifdef L_swp +#define SWP glue4(swp, A, L, S) + +STARTFN NAME(swp) + JUMP_IF_LSE 8f + + mov s(tmp0), s(0) +0: LDXR s(0), [x1] + STXR w(tmp1), s(tmp0), [x1] + cbnz w(tmp1), 0b + ret + +8: SWP w(0), w(0), [x1] + ret + +ENDFN NAME(swp) +#endif + +#if defined(L_ldadd) || defined(L_ldclr) \ + || defined(L_ldeor) || defined(L_ldset) + +#ifdef L_ldadd +#define LDNM ldadd +#define OP add +#elif defined(L_ldclr) +#define LDNM ldclr +#define OP bic +#elif defined(L_ldeor) +#define LDNM ldeor +#define OP eor +#elif defined(L_ldset) +#define LDNM ldset +#define OP orr +#else +#error +#endif +#define LDOP glue4(LDNM, A, L, S) + +STARTFN NAME(LDNM) + JUMP_IF_LSE 8f + + mov s(tmp0), s(0) +0: LDXR s(0), [x1] + OP s(tmp1), s(0), s(tmp0) + STXR w(tmp1), s(tmp1), [x1] + cbnz w(tmp1), 0b + ret + +8: LDOP s(0), s(0), [x1] + ret + +ENDFN NAME(LDNM) +#endif diff --git a/libgcc/config/aarch64/t-lse b/libgcc/config/aarch64/t-lse new file mode 100644 index 00000000000..c7f4223cd45 --- /dev/null +++ b/libgcc/config/aarch64/t-lse @@ -0,0 +1,44 @@ +# Out-of-line LSE atomics for AArch64 architecture. +# Copyright (C) 2018 Free Software Foundation, Inc. +# Contributed by Linaro Ltd. +# +# This file is part of GCC. +# +# GCC is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# GCC is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +# General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with GCC; see the file COPYING3. If not see +# <http://www.gnu.org/licenses/>. + +# Compare-and-swap has 5 sizes and 4 memory models. +S0 := $(foreach s, 1 2 4 8 16, $(addsuffix _$(s), cas)) +O0 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S0))) + +# Swap, Load-and-operate have 4 sizes and 4 memory models +S1 := $(foreach s, 1 2 4 8, $(addsuffix _$(s), swp ldadd ldclr ldeor ldset)) +O1 := $(foreach m, 1 2 3 4, $(addsuffix _$(m)$(objext), $(S1))) + +LSE_OBJS := $(O0) $(O1) + +libgcc-objects += $(LSE_OBJS) lse-init$(objext) + +empty = +space = $(empty) $(empty) +PAT_SPLIT = $(subst _,$(space),$(*F)) +PAT_BASE = $(word 1,$(PAT_SPLIT)) +PAT_N = $(word 2,$(PAT_SPLIT)) +PAT_M = $(word 3,$(PAT_SPLIT)) + +lse-init$(objext): $(srcdir)/config/aarch64/lse-init.c + $(gcc_compile) -c $< + +$(LSE_OBJS): $(srcdir)/config/aarch64/lse.S + $(gcc_compile) -DL_$(PAT_BASE) -DSIZE=$(PAT_N) -DMODEL=$(PAT_M) -c $<