Message ID | 20190909202153.144970-1-arnd@arndb.de |
---|---|
State | New |
Headers | show |
Series | arm64: fix unreachable code issue with cmpxchg | expand |
On Mon, Sep 9, 2019 at 1:21 PM Arnd Bergmann <arnd@arndb.de> wrote: > > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > when CONFIG_OPTIMIZE_INLINING is set. > Clang then fails a compile-time assertion, because it cannot tell at > compile time what the size of the argument is: > > mm/memcontrol.o: In function `__cmpxchg_mb': > memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175' > memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175' > > Mark all of the cmpxchg() style functions as __always_inline to > ensure that the compiler can see the result. Acked-by: Nick Desaulniers <ndesaulniers@google.com> > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++------- > 1 file changed, 8 insertions(+), 7 deletions(-) > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h > index a1398f2f9994..fd64dc8a235f 100644 > --- a/arch/arm64/include/asm/cmpxchg.h > +++ b/arch/arm64/include/asm/cmpxchg.h > @@ -19,7 +19,7 @@ > * acquire+release for the latter. > */ > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\ > { \ > u##sz ret; \ > unsigned long tmp; \ > @@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory") > #undef __XCHG_CASE > > #define __XCHG_GEN(sfx) \ > -static inline unsigned long __xchg##sfx(unsigned long x, \ > +static __always_inline unsigned long __xchg##sfx(unsigned long x, \ > volatile void *ptr, \ > int size) \ > { \ > @@ -103,8 +103,9 @@ __XCHG_GEN(_mb) > #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) > #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) > > -#define __CMPXCHG_CASE(name, sz) \ > -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \ > +#define __CMPXCHG_CASE(name, sz) \ > +static __always_inline u##sz \ > +__cmpxchg_case_##name##sz(volatile void *ptr, \ > u##sz old, \ > u##sz new) \ > { \ > @@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb) > #undef __CMPXCHG_DBL > > #define __CMPXCHG_GEN(sfx) \ > -static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ > +static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ > unsigned long old, \ > unsigned long new, \ > int size) \ > @@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb) > }) > > #define __CMPWAIT_CASE(w, sfx, sz) \ > -static inline void __cmpwait_case_##sz(volatile void *ptr, \ > +static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \ > unsigned long val) \ > { \ > unsigned long tmp; \ > @@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64); > #undef __CMPWAIT_CASE > > #define __CMPWAIT_GEN(sfx) \ > -static inline void __cmpwait##sfx(volatile void *ptr, \ > +static __always_inline void __cmpwait##sfx(volatile void *ptr, \ > unsigned long val, \ > int size) \ > { \ > -- > 2.20.0 > > -- > You received this message because you are subscribed to the Google Groups "Clang Built Linux" group. > To unsubscribe from this group and stop receiving emails from it, send an email to clang-built-linux+unsubscribe@googlegroups.com. > To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20190909202153.144970-1-arnd%40arndb.de. -- Thanks, ~Nick Desaulniers
On Mon, Sep 9, 2019 at 2:06 PM Nick Desaulniers <ndesaulniers@google.com> wrote: > > On Mon, Sep 9, 2019 at 1:21 PM Arnd Bergmann <arnd@arndb.de> wrote: > > > > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > > when CONFIG_OPTIMIZE_INLINING is set. > > Clang then fails a compile-time assertion, because it cannot tell at > > compile time what the size of the argument is: > > > > mm/memcontrol.o: In function `__cmpxchg_mb': > > memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175' > > memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175' > > > > Mark all of the cmpxchg() style functions as __always_inline to > > ensure that the compiler can see the result. > > Acked-by: Nick Desaulniers <ndesaulniers@google.com> Also, I think a Link tag may be appropriate as I believe it fixes this report: Reported-by: Nathan Chancellor <natechancellor@gmail.com> Link: https://github.com/ClangBuiltLinux/linux/issues/648 > > > > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > > --- > > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++------- > > 1 file changed, 8 insertions(+), 7 deletions(-) > > > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h > > index a1398f2f9994..fd64dc8a235f 100644 > > --- a/arch/arm64/include/asm/cmpxchg.h > > +++ b/arch/arm64/include/asm/cmpxchg.h > > @@ -19,7 +19,7 @@ > > * acquire+release for the latter. > > */ > > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ > > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ > > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\ > > { \ > > u##sz ret; \ > > unsigned long tmp; \ > > @@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory") > > #undef __XCHG_CASE > > > > #define __XCHG_GEN(sfx) \ > > -static inline unsigned long __xchg##sfx(unsigned long x, \ > > +static __always_inline unsigned long __xchg##sfx(unsigned long x, \ > > volatile void *ptr, \ > > int size) \ > > { \ > > @@ -103,8 +103,9 @@ __XCHG_GEN(_mb) > > #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) > > #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) > > > > -#define __CMPXCHG_CASE(name, sz) \ > > -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \ > > +#define __CMPXCHG_CASE(name, sz) \ > > +static __always_inline u##sz \ > > +__cmpxchg_case_##name##sz(volatile void *ptr, \ > > u##sz old, \ > > u##sz new) \ > > { \ > > @@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb) > > #undef __CMPXCHG_DBL > > > > #define __CMPXCHG_GEN(sfx) \ > > -static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ > > +static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ > > unsigned long old, \ > > unsigned long new, \ > > int size) \ > > @@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb) > > }) > > > > #define __CMPWAIT_CASE(w, sfx, sz) \ > > -static inline void __cmpwait_case_##sz(volatile void *ptr, \ > > +static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \ > > unsigned long val) \ > > { \ > > unsigned long tmp; \ > > @@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64); > > #undef __CMPWAIT_CASE > > > > #define __CMPWAIT_GEN(sfx) \ > > -static inline void __cmpwait##sfx(volatile void *ptr, \ > > +static __always_inline void __cmpwait##sfx(volatile void *ptr, \ > > unsigned long val, \ > > int size) \ > > { \ > > -- > > 2.20.0 > > > > -- > > You received this message because you are subscribed to the Google Groups "Clang Built Linux" group. > > To unsubscribe from this group and stop receiving emails from it, send an email to clang-built-linux+unsubscribe@googlegroups.com. > > To view this discussion on the web visit https://groups.google.com/d/msgid/clang-built-linux/20190909202153.144970-1-arnd%40arndb.de. > > > > -- > Thanks, > ~Nick Desaulniers -- Thanks, ~Nick Desaulniers
On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote: > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > when CONFIG_OPTIMIZE_INLINING is set. > Clang then fails a compile-time assertion, because it cannot tell at > compile time what the size of the argument is: > > mm/memcontrol.o: In function `__cmpxchg_mb': > memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175' > memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175' > > Mark all of the cmpxchg() style functions as __always_inline to > ensure that the compiler can see the result. > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> Reviewed-by: Nathan Chancellor <natechancellor@gmail.com> Tested-by: Nathan Chancellor <natechancellor@gmail.com>
On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote: > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > when CONFIG_OPTIMIZE_INLINING is set. Hmm. Given that CONFIG_OPTIMIZE_INLINING has also been shown to break assignment of local 'register' variables on GCC, perhaps we should just disable that option for arm64 (at least) since we don't have any toolchains that seem to like it very much! I'd certainly prefer that over playing whack-a-mole with __always_inline. Will
On Tue, Sep 10, 2019 at 9:46 AM Will Deacon <will@kernel.org> wrote: > > On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote: > > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > > when CONFIG_OPTIMIZE_INLINING is set. > > Hmm. Given that CONFIG_OPTIMIZE_INLINING has also been shown to break > assignment of local 'register' variables on GCC, perhaps we should just > disable that option for arm64 (at least) since we don't have any toolchains > that seem to like it very much! I'd certainly prefer that over playing > whack-a-mole with __always_inline. Right, but I can also see good reasons to keep going: - In theory, CONFIG_OPTIMIZE_INLINING is the right thing to do -- the compilers also make some particularly bad decisions around inlining when each inline turns into an __always_inline, as has been the case in Linux for a long time. I think in most cases, we get better object code with CONFIG_OPTIMIZE_INLINING and in the cases where this is worse, it may be better to fix the compiler. The new "asm_inline" macro should also help with that. - The x86 folks have apparently whacked most of the moles already, see this commit from 2008 commit 3f9b5cc018566ad9562df0648395649aebdbc5e0 Author: Ingo Molnar <mingo@elte.hu> Date: Fri Jul 18 16:30:05 2008 +0200 x86: re-enable OPTIMIZE_INLINING re-enable OPTIMIZE_INLINING more widely. Jeff Dike fixed the remaining outstanding issue in this commit: | commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6 | Author: Jeff Dike <jdike@addtoit.com> | Date: Mon Jul 7 13:36:56 2008 -0400 | | [UML] fix gcc ICEs and unresolved externs [...] | This patch reintroduces unit-at-a-time for gcc >= 4.0, bringing back the | possibility of Uli's crash. If that happens, we'll debug it. it's still default-off and thus opt-in. - The inlining decisions of gcc and clang are already very different, and the bugs we are finding around that are much more common than the difference between CONFIG_OPTIMIZE_INLINING=y/n on a given compiler. Arnd
On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote: > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > when CONFIG_OPTIMIZE_INLINING is set. > Clang then fails a compile-time assertion, because it cannot tell at > compile time what the size of the argument is: > > mm/memcontrol.o: In function `__cmpxchg_mb': > memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175' > memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175' > > Mark all of the cmpxchg() style functions as __always_inline to > ensure that the compiler can see the result. > > Signed-off-by: Arnd Bergmann <arnd@arndb.de> > --- I was able to reproduce this with the following: $ git describe HEAD next-20190904 $ clang --version Android (5821526 based on r365631) clang version 9.0.6 (https://android.googlesource.com/toolchain/llvm-project 85305eaf1e90ff529d304abac8a979e1d967f0a2) (based on LLVM 9.0.6svn) Target: x86_64-unknown-linux-gnu Thread model: posix InstalledDir: /home/andrewm/android-clang/clang-r365631/bin $ make O=~/linux-build/ ARCH=arm64 CROSS_COMPILE=aarch64-linux-gnu- CC=clang -j56 allyesconfig Image (I was unable to reproduce with defconfig+OPTIMIZE_INLINING). However... > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++------- > 1 file changed, 8 insertions(+), 7 deletions(-) > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h > index a1398f2f9994..fd64dc8a235f 100644 > --- a/arch/arm64/include/asm/cmpxchg.h > +++ b/arch/arm64/include/asm/cmpxchg.h > @@ -19,7 +19,7 @@ > * acquire+release for the latter. > */ > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\ This hunk isn't needed, there is no BUILD_BUG here. > { \ > u##sz ret; \ > unsigned long tmp; \ > @@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory") > #undef __XCHG_CASE > > #define __XCHG_GEN(sfx) \ > -static inline unsigned long __xchg##sfx(unsigned long x, \ > +static __always_inline unsigned long __xchg##sfx(unsigned long x, \ > volatile void *ptr, \ > int size) \ > { \ > @@ -103,8 +103,9 @@ __XCHG_GEN(_mb) > #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) > #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) > > -#define __CMPXCHG_CASE(name, sz) \ > -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \ > +#define __CMPXCHG_CASE(name, sz) \ > +static __always_inline u##sz \ > +__cmpxchg_case_##name##sz(volatile void *ptr, \ This hunk isn't needed, there is no BUILD_BUG here. > u##sz old, \ > u##sz new) \ > { \ > @@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb) > #undef __CMPXCHG_DBL > > #define __CMPXCHG_GEN(sfx) \ > -static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ > +static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ > unsigned long old, \ > unsigned long new, \ > int size) \ > @@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb) > }) > > #define __CMPWAIT_CASE(w, sfx, sz) \ > -static inline void __cmpwait_case_##sz(volatile void *ptr, \ > +static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \ > unsigned long val) \ This hunk isn't needed, there is no BUILD_BUG here. > { \ > unsigned long tmp; \ > @@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64); > #undef __CMPWAIT_CASE > > #define __CMPWAIT_GEN(sfx) \ > -static inline void __cmpwait##sfx(volatile void *ptr, \ > +static __always_inline void __cmpwait##sfx(volatile void *ptr, \ > unsigned long val, \ > int size) \ > { \ Alternatively is it possible to replace the BUILD_BUG's with something else? I think because we use BUILD_BUG at the end of a switch statement, we make the assumption that size is known at compile time, for this reason we should ensure the function containing the BUILD_BUG is __always_inline. Looking across the kernel where BUILD_BUG is used as a default in a switch statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are within macros, but many are found in an __always_inline function: arch/x86/kvm/cpuid.h mm/kasan/generic.c Though some are not: include/linux/signal.h arch/arm64/include/asm/arm_dsu/pmu.h I wonder if there may be a latent mole ready to whack with pmu.h? Anyway with just the three remaining hunks: Reviewed-by: Andrew Murray <andrew.murray@arm.com> Tested-by: Andrew Murray <andrew.murray@arm.com> > -- > 2.20.0 >
On Tue, Sep 10, 2019 at 11:23 AM Andrew Murray <andrew.murray@arm.com> wrote: > > > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++------- > > 1 file changed, 8 insertions(+), 7 deletions(-) > > > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h > > index a1398f2f9994..fd64dc8a235f 100644 > > --- a/arch/arm64/include/asm/cmpxchg.h > > +++ b/arch/arm64/include/asm/cmpxchg.h > > @@ -19,7 +19,7 @@ > > * acquire+release for the latter. > > */ > > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ > > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ > > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\ > > This hunk isn't needed, there is no BUILD_BUG here. Right, I noticed this, but it seemed like a good idea regardless given the small size of the function compared with the overhead of a function call. We clearly want these to be inlined all the time. Same for the others. > Alternatively is it possible to replace the BUILD_BUG's with something else? > > I think because we use BUILD_BUG at the end of a switch statement, we make > the assumption that size is known at compile time, for this reason we should > ensure the function containing the BUILD_BUG is __always_inline. > > Looking across the kernel where BUILD_BUG is used as a default in a switch > statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are > within macros, but many are found in an __always_inline function: > > arch/x86/kvm/cpuid.h > mm/kasan/generic.c > > Though some are not: > > include/linux/signal.h > arch/arm64/include/asm/arm_dsu/pmu.h > > I wonder if there may be a latent mole ready to whack with pmu.h? Right, it can't hurt to annotate those as well. I actually have another fixup for linux/signal.h that I would have to revisit at some point. See https://bugs.llvm.org/show_bug.cgi?id=38789, I think this is fixed with clang-9 now, but maybe not with clang-8. Arnd
On Tue, Sep 10, 2019 at 6:38 PM Arnd Bergmann <arnd@arndb.de> wrote: > > On Tue, Sep 10, 2019 at 11:23 AM Andrew Murray <andrew.murray@arm.com> wrote: > > > > > > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++------- > > > 1 file changed, 8 insertions(+), 7 deletions(-) > > > > > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h > > > index a1398f2f9994..fd64dc8a235f 100644 > > > --- a/arch/arm64/include/asm/cmpxchg.h > > > +++ b/arch/arm64/include/asm/cmpxchg.h > > > @@ -19,7 +19,7 @@ > > > * acquire+release for the latter. > > > */ > > > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ > > > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ > > > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\ > > > > This hunk isn't needed, there is no BUILD_BUG here. > > Right, I noticed this, but it seemed like a good idea regardless given the small > size of the function compared with the overhead of a function call. We clearly > want these to be inlined all the time. Generally speaking, this should be judged by the compiler, not by humans. If the function size is quite small compared with the cost of function call, the compiler will determine to inline it anyway. (If the compiler's inlining heuristic is not good, we should fix the compiler.) So, I personally agree with Andrew Murray. We should use __always_inline only when we must to do so. Masahiro Yamada > > Same for the others. > > > Alternatively is it possible to replace the BUILD_BUG's with something else? > > > > I think because we use BUILD_BUG at the end of a switch statement, we make > > the assumption that size is known at compile time, for this reason we should > > ensure the function containing the BUILD_BUG is __always_inline. > > > > Looking across the kernel where BUILD_BUG is used as a default in a switch > > statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are > > within macros, but many are found in an __always_inline function: > > > > arch/x86/kvm/cpuid.h > > mm/kasan/generic.c > > > > Though some are not: > > > > include/linux/signal.h > > arch/arm64/include/asm/arm_dsu/pmu.h > > > > I wonder if there may be a latent mole ready to whack with pmu.h? > > Right, it can't hurt to annotate those as well. I actually have another > fixup for linux/signal.h that I would have to revisit at some point. > See https://bugs.llvm.org/show_bug.cgi?id=38789, I think this is > fixed with clang-9 now, but maybe not with clang-8. > > Arnd -- Best Regards Masahiro Yamada
On Tue, Sep 10, 2019 at 11:38:37AM +0200, Arnd Bergmann wrote: > On Tue, Sep 10, 2019 at 11:23 AM Andrew Murray <andrew.murray@arm.com> wrote: > > > > > > arch/arm64/include/asm/cmpxchg.h | 15 ++++++++------- > > > 1 file changed, 8 insertions(+), 7 deletions(-) > > > > > > diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h > > > index a1398f2f9994..fd64dc8a235f 100644 > > > --- a/arch/arm64/include/asm/cmpxchg.h > > > +++ b/arch/arm64/include/asm/cmpxchg.h > > > @@ -19,7 +19,7 @@ > > > * acquire+release for the latter. > > > */ > > > #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ > > > -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ > > > +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\ > > > > This hunk isn't needed, there is no BUILD_BUG here. > > Right, I noticed this, but it seemed like a good idea regardless given the small > size of the function compared with the overhead of a function call. We clearly > want these to be inlined all the time. > > Same for the others. I'm not so sure - isn't the point of something like OPTIMIZE_INLINING to give more freedom to the tooling (and by virtue of the option - the user)? Surely any decent optimising compiler will do the right thing by inlining small trivial functions that are annotated with inline? And if not, the compiler should be fixed not the kernel - unless of course it causes an issue - and then we should fix those specific cases. There must be dozens of trivial functions that are marked with __inline, I don't think it would make sense to mark those as __always_inline. For example the atomics in atomic_lse.h are trivial but only marked inline. We obviously want them inline, though I don't think we should babysit the compiler to do the right thing. (Also the commit message implies that all the hunks are required to fix this particular issue which they are not). Thanks, Andrew Murray > > > Alternatively is it possible to replace the BUILD_BUG's with something else? > > > > I think because we use BUILD_BUG at the end of a switch statement, we make > > the assumption that size is known at compile time, for this reason we should > > ensure the function containing the BUILD_BUG is __always_inline. > > > > Looking across the kernel where BUILD_BUG is used as a default in a switch > > statment ($ git grep -B 3 BUILD_BUG\( | grep default), most instances are > > within macros, but many are found in an __always_inline function: > > > > arch/x86/kvm/cpuid.h > > mm/kasan/generic.c > > > > Though some are not: > > > > include/linux/signal.h > > arch/arm64/include/asm/arm_dsu/pmu.h > > > > I wonder if there may be a latent mole ready to whack with pmu.h? > > Right, it can't hurt to annotate those as well. I actually have another > fixup for linux/signal.h that I would have to revisit at some point. > See https://bugs.llvm.org/show_bug.cgi?id=38789, I think this is > fixed with clang-9 now, but maybe not with clang-8. > > Arnd
On Tue, Sep 10, 2019 at 10:04:24AM +0200, Arnd Bergmann wrote: > On Tue, Sep 10, 2019 at 9:46 AM Will Deacon <will@kernel.org> wrote: > > > > On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote: > > > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > > > when CONFIG_OPTIMIZE_INLINING is set. > > > > Hmm. Given that CONFIG_OPTIMIZE_INLINING has also been shown to break > > assignment of local 'register' variables on GCC, perhaps we should just > > disable that option for arm64 (at least) since we don't have any toolchains > > that seem to like it very much! I'd certainly prefer that over playing > > whack-a-mole with __always_inline. > > Right, but I can also see good reasons to keep going: > > - In theory, CONFIG_OPTIMIZE_INLINING is the right thing to do -- the compilers > also make some particularly bad decisions around inlining when each inline > turns into an __always_inline, as has been the case in Linux for a long time. > I think in most cases, we get better object code with CONFIG_OPTIMIZE_INLINING > and in the cases where this is worse, it may be better to fix the compiler. > The new "asm_inline" macro should also help with that. Sure, in theory, but it looks like there isn't a single arm64 compiler out there which gets it right. > - The x86 folks have apparently whacked most of the moles already, see this > commit from 2008 > > commit 3f9b5cc018566ad9562df0648395649aebdbc5e0 > Author: Ingo Molnar <mingo@elte.hu> > Date: Fri Jul 18 16:30:05 2008 +0200 > > x86: re-enable OPTIMIZE_INLINING > > re-enable OPTIMIZE_INLINING more widely. Jeff Dike fixed the remaining > outstanding issue in this commit: > > | commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6 > | Author: Jeff Dike <jdike@addtoit.com> > | Date: Mon Jul 7 13:36:56 2008 -0400 > | > | [UML] fix gcc ICEs and unresolved externs > [...] > | This patch reintroduces unit-at-a-time for gcc >= 4.0, > bringing back the > | possibility of Uli's crash. If that happens, we'll debug it. > > it's still default-off and thus opt-in. This appears to be fixing an ICE, whereas the issue reported recently for arm64 gcc was silent miscompilation of atomics in some cases. Unfortunately, I can't seem to find the thread :/ Mark, you were on that one too, right? > - The inlining decisions of gcc and clang are already very different, and > the bugs we are finding around that are much more common than > the difference between CONFIG_OPTIMIZE_INLINING=y/n on a > given compiler. Sorry, not sure that you're getting at here. Anyway, the second version of your patch looks fine, but I would still prefer to go the extra mile and disable CONFIG_OPTIMIZE_INLINING altogether given that I don't think it's a safe option to enable for us. Will
On Tue, Sep 10, 2019 at 3:24 PM Will Deacon <will@kernel.org> wrote: > On Tue, Sep 10, 2019 at 10:04:24AM +0200, Arnd Bergmann wrote: > > On Tue, Sep 10, 2019 at 9:46 AM Will Deacon <will@kernel.org> wrote: > > - In theory, CONFIG_OPTIMIZE_INLINING is the right thing to do -- the compilers > > also make some particularly bad decisions around inlining when each inline > > turns into an __always_inline, as has been the case in Linux for a long time. > > I think in most cases, we get better object code with CONFIG_OPTIMIZE_INLINING > > and in the cases where this is worse, it may be better to fix the compiler. > > The new "asm_inline" macro should also help with that. > > Sure, in theory, but it looks like there isn't a single arm64 compiler out > there which gets it right. I don't see anything architecture specific in here. When the option was made generic instead of x86 specific, I fixed a ton of bugs that showed up all over the place. If we don't want it on arm64, I'd suggest making it a per-architecture opt-in instead of an opt-out. > > > > | commit 4f81c5350b44bcc501ab6f8a089b16d064b4d2f6 > > | Author: Jeff Dike <jdike@addtoit.com> > > | Date: Mon Jul 7 13:36:56 2008 -0400 > > | > > | [UML] fix gcc ICEs and unresolved externs > > [...] > > | This patch reintroduces unit-at-a-time for gcc >= 4.0, > > bringing back the > > | possibility of Uli's crash. If that happens, we'll debug it. > > > > it's still default-off and thus opt-in. > > This appears to be fixing an ICE, whereas the issue reported recently for > arm64 gcc was silent miscompilation of atomics in some cases. Unfortunately, > I can't seem to find the thread :/ Mark, you were on that one too, right? Sorry, that reference was unclear, I meant the text for commit 3f9b5cc01856, which in turn contains a citation of the earlier 4f81c5350b44bc commit. > > - The inlining decisions of gcc and clang are already very different, and > > the bugs we are finding around that are much more common than > > the difference between CONFIG_OPTIMIZE_INLINING=y/n on a > > given compiler. > > Sorry, not sure that you're getting at here. > > Anyway, the second version of your patch looks fine, but I would still > prefer to go the extra mile and disable CONFIG_OPTIMIZE_INLINING altogether > given that I don't think it's a safe option to enable for us. The point is that function inlining frequently causes all kinds of problems when code was written in a way that is not entirely reproducible but depends on the behavior of a particular implementation. I've fixed lots of bugs based on any of these: - gcc-4.0 and higher started ignoring 'inline' without __attribute__((always_inline)), so a workaround got applied in 2.6.26, and this turned into CONFIG_OPTIMIZE_INLINING=n later - gcc -O2 makes different decisions compared to -Os and -O3, which is an endless source of "uninitialized variable" warnings and similar problems - Some configuration options like KASAN grow the code to result in less inlining - clang and gcc behave completely differently - gcc is traditionally bad at guessing the size of inline assembly to make a good decision - newer compilers tend to get better at identifying which functions benefit from inlining, which changes the balance CONFIG_OPTIMIZE_INLINING clearly adds to that mess, but it's not the worst part. The only real solution tends to be to write portable and correct code rather than making assumptions about compiler behavior. Arnd
On Tue, Sep 10, 2019 at 08:46:07AM +0100, Will Deacon wrote: > On Mon, Sep 09, 2019 at 10:21:35PM +0200, Arnd Bergmann wrote: > > On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined > > when CONFIG_OPTIMIZE_INLINING is set. > > Hmm. Given that CONFIG_OPTIMIZE_INLINING has also been shown to break > assignment of local 'register' variables on GCC, perhaps we should just > disable that option for arm64 (at least) since we don't have any toolchains > that seem to like it very much! I'd certainly prefer that over playing > whack-a-mole with __always_inline. I assume we're referring to stuff such as the following? https://www.spinics.net/lists/arm-kernel/msg730329.html Are these breakages limited to the out-of-line hacks made for LL/SC atomics, or were there other breakages elsewhere? Now that the out-of-line hacks have gone, I wonder if this is actually still a problem anymore. In any case isn't the right thing to do there to add the __always_inline to functions that use the register keyword in a function currently annotated inline? I'm happy to look into this if there is likely to be some benefit in turning on CONFIG_OPTIMIZE_INLINING. Thanks, Andrew Murray > > Will
diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index a1398f2f9994..fd64dc8a235f 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -19,7 +19,7 @@ * acquire+release for the latter. */ #define __XCHG_CASE(w, sfx, name, sz, mb, nop_lse, acq, acq_lse, rel, cl) \ -static inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr) \ +static __always_inline u##sz __xchg_case_##name##sz(u##sz x, volatile void *ptr)\ { \ u##sz ret; \ unsigned long tmp; \ @@ -62,7 +62,7 @@ __XCHG_CASE( , , mb_, 64, dmb ish, nop, , a, l, "memory") #undef __XCHG_CASE #define __XCHG_GEN(sfx) \ -static inline unsigned long __xchg##sfx(unsigned long x, \ +static __always_inline unsigned long __xchg##sfx(unsigned long x, \ volatile void *ptr, \ int size) \ { \ @@ -103,8 +103,9 @@ __XCHG_GEN(_mb) #define arch_xchg_release(...) __xchg_wrapper(_rel, __VA_ARGS__) #define arch_xchg(...) __xchg_wrapper( _mb, __VA_ARGS__) -#define __CMPXCHG_CASE(name, sz) \ -static inline u##sz __cmpxchg_case_##name##sz(volatile void *ptr, \ +#define __CMPXCHG_CASE(name, sz) \ +static __always_inline u##sz \ +__cmpxchg_case_##name##sz(volatile void *ptr, \ u##sz old, \ u##sz new) \ { \ @@ -148,7 +149,7 @@ __CMPXCHG_DBL(_mb) #undef __CMPXCHG_DBL #define __CMPXCHG_GEN(sfx) \ -static inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ +static __always_inline unsigned long __cmpxchg##sfx(volatile void *ptr, \ unsigned long old, \ unsigned long new, \ int size) \ @@ -230,7 +231,7 @@ __CMPXCHG_GEN(_mb) }) #define __CMPWAIT_CASE(w, sfx, sz) \ -static inline void __cmpwait_case_##sz(volatile void *ptr, \ +static __always_inline void __cmpwait_case_##sz(volatile void *ptr, \ unsigned long val) \ { \ unsigned long tmp; \ @@ -255,7 +256,7 @@ __CMPWAIT_CASE( , , 64); #undef __CMPWAIT_CASE #define __CMPWAIT_GEN(sfx) \ -static inline void __cmpwait##sfx(volatile void *ptr, \ +static __always_inline void __cmpwait##sfx(volatile void *ptr, \ unsigned long val, \ int size) \ { \
On arm64 build with clang, sometimes the __cmpxchg_mb is not inlined when CONFIG_OPTIMIZE_INLINING is set. Clang then fails a compile-time assertion, because it cannot tell at compile time what the size of the argument is: mm/memcontrol.o: In function `__cmpxchg_mb': memcontrol.c:(.text+0x1a4c): undefined reference to `__compiletime_assert_175' memcontrol.c:(.text+0x1a4c): relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `__compiletime_assert_175' Mark all of the cmpxchg() style functions as __always_inline to ensure that the compiler can see the result. Signed-off-by: Arnd Bergmann <arnd@arndb.de> --- arch/arm64/include/asm/cmpxchg.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) -- 2.20.0