diff mbox series

[v5,2/4] elf: Do not change stack permission on dlopen/dlmopen

Message ID 20241128173851.1920696-3-adhemerval.zanella@linaro.org
State Superseded
Headers show
Series Improve executable stack handling | expand

Commit Message

Adhemerval Zanella Nov. 28, 2024, 5:36 p.m. UTC
If some shared library loaded with dlopen/dlmopen requires an executable
stack, either implicitly because of a missing GNU_STACK ELF header
(where the ABI default flags implies in the executable bit) or explicitly
because of the executable bit from GNU_STACK; the loader will try to set
the both the main thread and all thread stacks (from the pthread cache)
as executable.

Besides the issue where any __nptl_change_stack_perm failure does not
undo the previous executable transition (meaning that if the library
fails to load, there can be thread stacks with executable stacks), this
behavior was used on recent CVE [1] as a vector for RCE.

This patch changes that if a shared library requires an executable
stack, and the current stack is not executable, dlopen fails.  The
change is done only for dynamically loaded modules, if the program
or any dependency requires an executable stack, the loader will still
change the main thread before program execution and any thread created
with default stack configuration.

[1] https://www.qualys.com/2023/07/19/cve-2023-38408/rce-openssh-forwarded-ssh-agent.txt

Checked on x86_64-linux-gnu and i686-linux-gnu.
---
 NEWS                                   |   6 ++
 elf/dl-load.c                          |  13 +--
 elf/dl-support.c                       |   4 -
 elf/rtld.c                             |   6 --
 elf/tst-execstack.c                    | 142 ++++++++++---------------
 nptl/allocatestack.c                   |  19 ----
 sysdeps/generic/ldsodefs.h             |  22 +---
 sysdeps/mach/hurd/dl-execstack.c       |   1 -
 sysdeps/nptl/pthreadP.h                |   6 --
 sysdeps/unix/sysv/linux/Versions       |   3 -
 sysdeps/unix/sysv/linux/dl-execstack.c |  67 +-----------
 sysdeps/unix/sysv/linux/mips/Makefile  |   7 ++
 12 files changed, 80 insertions(+), 216 deletions(-)

Comments

Florian Weimer Nov. 29, 2024, 7:51 p.m. UTC | #1
* Adhemerval Zanella:

> If some shared library loaded with dlopen/dlmopen requires an executable
> stack, either implicitly because of a missing GNU_STACK ELF header
> (where the ABI default flags implies in the executable bit) or explicitly
> because of the executable bit from GNU_STACK; the loader will try to set
> the both the main thread and all thread stacks (from the pthread cache)
> as executable.
>
> Besides the issue where any __nptl_change_stack_perm failure does not
> undo the previous executable transition (meaning that if the library
> fails to load, there can be thread stacks with executable stacks), this
> behavior was used on recent CVE [1] as a vector for RCE.

> [1] https://www.qualys.com/2023/07/19/cve-2023-38408/rce-openssh-forwarded-ssh-agent.txt

It's no longer recent, I would say.

> diff --git a/elf/dl-load.c b/elf/dl-load.c
> index e986d7faab..f525eec662 100644
> --- a/elf/dl-load.c
> +++ b/elf/dl-load.c
> @@ -1282,12 +1282,13 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
>    if (__glibc_unlikely ((stack_flags &~ GL(dl_stack_flags)) & PF_X))
>      {
>        /* The stack is presently not executable, but this module
> -	 requires that it be executable.  */
> -#if PTHREAD_IN_LIBC
> -      errval = _dl_make_stacks_executable (stack_endp);
> -#else
> -      errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
> -#endif
> +	 requires that it be executable.  Only tries to change the
> +	 stack protection during process startup.  */
> +      if ((mode & __RTLD_DLOPEN) == 0)
> +	errval = _dl_make_stack_executable (stack_endp);
> +      else
> +	errval = EINVAL;
> +
>        if (errval)
>  	{
>  	  errstring = N_("\

The specified error message is the same for the initial vs dlopen
failure case, but I think that's okay because the full dlopen failure
message will look different because it includes the DSO name.

> diff --git a/elf/tst-execstack.c b/elf/tst-execstack.c
> index 560b353918..cd758c089e 100644
> --- a/elf/tst-execstack.c
> +++ b/elf/tst-execstack.c
> @@ -9,6 +9,11 @@
>  #include <error.h>
>  #include <stackinfo.h>
>  
> +#include <stdlib.h>
> +#include <support/check.h>
> +#include <support/xthread.h>
> +#include <support/xdlfcn.h>

These cleanups seem mostly unrelated?

And why doesn't the dlopen call fail?


> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
> index 91447a5e77..b897da7e7b 100644
> --- a/sysdeps/generic/ldsodefs.h
> +++ b/sysdeps/generic/ldsodefs.h
> @@ -413,13 +413,6 @@ struct rtld_global
>  #endif
>  #include <dl-procruntime.c>
>  
> -#if !PTHREAD_IN_LIBC
> -  /* If loading a shared object requires that we make the stack executable
> -     when it was not, we do it by calling this function.
> -     It returns an errno code or zero on success.  */
> -  EXTERN int (*_dl_make_stack_executable_hook) (void **);
> -#endif
> -

Huh.  Hurd doesn't even use that, presumably because stacks are always
executable?

> diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
> index b986898598..68db6737f0 100644
> --- a/sysdeps/unix/sysv/linux/dl-execstack.c
> +++ b/sysdeps/unix/sysv/linux/dl-execstack.c

> -static int
> -make_main_stack_executable (void **stack_endp)
> +int
> +_dl_make_stack_executable (void **stack_endp)
>  {
>    /* This gives us the highest/lowest page that needs to be changed.  */
>    uintptr_t page = ((uintptr_t) *stack_endp
> @@ -52,57 +43,3 @@ make_main_stack_executable (void **stack_endp)
>  
>    return 0;
>  }

Okay.  We only need this for the main thread because pthread_create
creates new stacks directly with the right permissions.


> diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile
> index d5725c69d8..05ec9150b2 100644
> --- a/sysdeps/unix/sysv/linux/mips/Makefile
> +++ b/sysdeps/unix/sysv/linux/mips/Makefile
> @@ -61,6 +61,7 @@ ifeq ($(subdir),elf)
>  # this test is expected to fail.
>  ifneq ($(mips-has-gnustack),yes)
>  test-xfail-check-execstack = yes
> +CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
>  endif
>  endif

Is the xfail still needed?

>  
> @@ -68,6 +69,12 @@ ifeq ($(subdir),stdlib)
>  gen-as-const-headers += ucontext_i.sym
>  endif
>  
> +ifeq ($(subdir),nptl)
> +ifeq ($(mips-force-execstack),yes)
> +CFLAGS-tst-execstack-threads.c += -DDEFAULT_RWX_STACK=1
> +endif
> +endif
> +
>  ifeq ($(mips-force-execstack),yes)
>  CFLAGS-.o += -Wa,-execstack
>  CFLAGS-.os += -Wa,-execstack

Why is setting DEFAULT_RWX_STACK only needed on MIPS, but not on i386?
Does it reflect the combined kernel/toolchain default?

Thanks,
Florian
Adhemerval Zanella Dec. 2, 2024, 8:35 p.m. UTC | #2
On 29/11/24 16:51, Florian Weimer wrote:
> * Adhemerval Zanella:
> 
>> If some shared library loaded with dlopen/dlmopen requires an executable
>> stack, either implicitly because of a missing GNU_STACK ELF header
>> (where the ABI default flags implies in the executable bit) or explicitly
>> because of the executable bit from GNU_STACK; the loader will try to set
>> the both the main thread and all thread stacks (from the pthread cache)
>> as executable.
>>
>> Besides the issue where any __nptl_change_stack_perm failure does not
>> undo the previous executable transition (meaning that if the library
>> fails to load, there can be thread stacks with executable stacks), this
>> behavior was used on recent CVE [1] as a vector for RCE.
> 
>> [1] https://www.qualys.com/2023/07/19/cve-2023-38408/rce-openssh-forwarded-ssh-agent.txt
> 
> It's no longer recent, I would say.
> 
>> diff --git a/elf/dl-load.c b/elf/dl-load.c
>> index e986d7faab..f525eec662 100644
>> --- a/elf/dl-load.c
>> +++ b/elf/dl-load.c
>> @@ -1282,12 +1282,13 @@ _dl_map_object_from_fd (const char *name, const char *origname, int fd,
>>    if (__glibc_unlikely ((stack_flags &~ GL(dl_stack_flags)) & PF_X))
>>      {
>>        /* The stack is presently not executable, but this module
>> -	 requires that it be executable.  */
>> -#if PTHREAD_IN_LIBC
>> -      errval = _dl_make_stacks_executable (stack_endp);
>> -#else
>> -      errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
>> -#endif
>> +	 requires that it be executable.  Only tries to change the
>> +	 stack protection during process startup.  */
>> +      if ((mode & __RTLD_DLOPEN) == 0)
>> +	errval = _dl_make_stack_executable (stack_endp);
>> +      else
>> +	errval = EINVAL;
>> +
>>        if (errval)
>>  	{
>>  	  errstring = N_("\
> 
> The specified error message is the same for the initial vs dlopen
> failure case, but I think that's okay because the full dlopen failure
> message will look different because it includes the DSO name.
> 

I can change the error message if it would preferable, but I agree that it
should give is enough information to diagnosticate the failure.

>> diff --git a/elf/tst-execstack.c b/elf/tst-execstack.c
>> index 560b353918..cd758c089e 100644
>> --- a/elf/tst-execstack.c
>> +++ b/elf/tst-execstack.c
>> @@ -9,6 +9,11 @@
>>  #include <error.h>
>>  #include <stackinfo.h>
>>  
>> +#include <stdlib.h>
>> +#include <support/check.h>
>> +#include <support/xthread.h>
>> +#include <support/xdlfcn.h>
> 
> These cleanups seem mostly unrelated?

Right, I will split the patch with one to move the test to libsupport and
another one with the required changes for the patch itself.

> 
> And why doesn't the dlopen call fail?
> 

It does, the tests changes the expected results as:

185   /* Loading this module should force stacks to become executable.  */
186 #if USE_PTHREADS
187   const char *soname = "tst-execstack-threads-mod.so";
188 #else
189   const char *soname = "tst-execstack-mod.so";
190 #endif
191   void *h = dlopen (soname, RTLD_LAZY);
192 #if !DEFAULT_RWX_STACK
193   TEST_VERIFY_EXIT (h == NULL);
194 #else
195   TEST_VERIFY_EXIT (h != NULL);
[...]
244   return ! allow_execstack;

So for DEFAULT_RWX_STACK (the default), the test expects that dlopen fails.

> 
>> diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
>> index 91447a5e77..b897da7e7b 100644
>> --- a/sysdeps/generic/ldsodefs.h
>> +++ b/sysdeps/generic/ldsodefs.h
>> @@ -413,13 +413,6 @@ struct rtld_global
>>  #endif
>>  #include <dl-procruntime.c>
>>  
>> -#if !PTHREAD_IN_LIBC
>> -  /* If loading a shared object requires that we make the stack executable
>> -     when it was not, we do it by calling this function.
>> -     It returns an errno code or zero on success.  */
>> -  EXTERN int (*_dl_make_stack_executable_hook) (void **);
>> -#endif
>> -
> 
> Huh.  Hurd doesn't even use that, presumably because stacks are always
> executable?

No idea of default Hurd ABI, but I would guess it is default to executable since
the glibc build sets GNU_STACK to RWE.

> 
>> diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
>> index b986898598..68db6737f0 100644
>> --- a/sysdeps/unix/sysv/linux/dl-execstack.c
>> +++ b/sysdeps/unix/sysv/linux/dl-execstack.c
> 
>> -static int
>> -make_main_stack_executable (void **stack_endp)
>> +int
>> +_dl_make_stack_executable (void **stack_endp)
>>  {
>>    /* This gives us the highest/lowest page that needs to be changed.  */
>>    uintptr_t page = ((uintptr_t) *stack_endp
>> @@ -52,57 +43,3 @@ make_main_stack_executable (void **stack_endp)
>>  
>>    return 0;
>>  }
> 
> Okay.  We only need this for the main thread because pthread_create
> creates new stacks directly with the right permissions.

Yes, using GL(dl_stack_flags) that should be set only during loading
(maybe move it to GLRO, not sure if we can).

> 
> 
>> diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile
>> index d5725c69d8..05ec9150b2 100644
>> --- a/sysdeps/unix/sysv/linux/mips/Makefile
>> +++ b/sysdeps/unix/sysv/linux/mips/Makefile
>> @@ -61,6 +61,7 @@ ifeq ($(subdir),elf)
>>  # this test is expected to fail.
>>  ifneq ($(mips-has-gnustack),yes)
>>  test-xfail-check-execstack = yes
>> +CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
>>  endif
>>  endif
> 
> Is the xfail still needed?

Without --enable-kernel=4.8, the mips seems to require an executable stack for
some fp emulation:

sysdeps/unix/sysv/linux/mips/configure.ac
138 # Check if we are supposed to run on kernels older than 4.8.0. If so,
139 # force executable stack to avoid potential runtime problems with fpu
140 # emulation.

And kernel seems to still enforce it some, even on some recent kernels (6.3.0-2-5kc-malta):

$ readelf -lW elf/tst-execstack | grep GNU_STACK
  GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x10

$ gdb --args ./elf/tst-execstack --direct
[...]
(gdb) b __start
(gdb) r
[...]

Breakpoint 2, __start () at ../sysdeps/mips/start.
S:81
81		SETUP_GPX64($25,$0)
(gdb) info proc
process 792

$ cat /proc/792/maps  | grep "\[stack\]"
fffffdc000-ffffffd000 rwxp 00000000 00:00 0                              [stack]

I think once we move to a minimal kernel of at least 4.8 for mips we can clean this up.

> 
>>  
>> @@ -68,6 +69,12 @@ ifeq ($(subdir),stdlib)
>>  gen-as-const-headers += ucontext_i.sym
>>  endif
>>  
>> +ifeq ($(subdir),nptl)
>> +ifeq ($(mips-force-execstack),yes)
>> +CFLAGS-tst-execstack-threads.c += -DDEFAULT_RWX_STACK=1
>> +endif
>> +endif
>> +
>>  ifeq ($(mips-force-execstack),yes)
>>  CFLAGS-.o += -Wa,-execstack
>>  CFLAGS-.os += -Wa,-execstack
> 
> Why is setting DEFAULT_RWX_STACK only needed on MIPS, but not on i386?
> Does it reflect the combined kernel/toolchain default?

Yes, and DEFAULT_RWX_STACK is only needed on mips because during
_dl_map_object_from_fd mips without $(mips-force-execstack) will have
GL(dl_stack_flags) equal to PF_X|PF_W|PF_R (due the -Wa,-execstack);
different than i686 (where with a recent toolchain by default it will
be just PF_W|PF_R).
Florian Weimer Dec. 2, 2024, 8:45 p.m. UTC | #3
* Adhemerval Zanella Netto:

>> Huh.  Hurd doesn't even use that, presumably because stacks are always
>> executable?
>
> No idea of default Hurd ABI, but I would guess it is default to
> executable since the glibc build sets GNU_STACK to RWE.

I assumed it's heavily using nested functions which have their address
taken.

>> Okay.  We only need this for the main thread because pthread_create
>> creates new stacks directly with the right permissions.
>
> Yes, using GL(dl_stack_flags) that should be set only during loading
> (maybe move it to GLRO, not sure if we can).

I think we can move it to GLRO, I was about to suggest that in a
follow-up patch.

>>> diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile
>>> index d5725c69d8..05ec9150b2 100644
>>> --- a/sysdeps/unix/sysv/linux/mips/Makefile
>>> +++ b/sysdeps/unix/sysv/linux/mips/Makefile
>>> @@ -61,6 +61,7 @@ ifeq ($(subdir),elf)
>>>  # this test is expected to fail.
>>>  ifneq ($(mips-has-gnustack),yes)
>>>  test-xfail-check-execstack = yes
>>> +CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
>>>  endif
>>>  endif
>> 
>> Is the xfail still needed?
>
> Without --enable-kernel=4.8, the mips seems to require an executable stack for
> some fp emulation:
>
> sysdeps/unix/sysv/linux/mips/configure.ac
> 138 # Check if we are supposed to run on kernels older than 4.8.0. If so,
> 139 # force executable stack to avoid potential runtime problems with fpu
> 140 # emulation.
>
> And kernel seems to still enforce it some, even on some recent kernels (6.3.0-2-5kc-malta):
>
> $ readelf -lW elf/tst-execstack | grep GNU_STACK
>   GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x10
>
> $ gdb --args ./elf/tst-execstack --direct
> [...]
> (gdb) b __start
> (gdb) r
> [...]

Ahh, different test (check-execstack vs tst-execstack).

>> Why is setting DEFAULT_RWX_STACK only needed on MIPS, but not on i386?
>> Does it reflect the combined kernel/toolchain default?
>
> Yes, and DEFAULT_RWX_STACK is only needed on mips because during
> _dl_map_object_from_fd mips without $(mips-force-execstack) will have
> GL(dl_stack_flags) equal to PF_X|PF_W|PF_R (due the -Wa,-execstack);
> different than i686 (where with a recent toolchain by default it will
> be just PF_W|PF_R).

Okay.

Thanks,
Florian
Adhemerval Zanella Dec. 20, 2024, 5:20 p.m. UTC | #4
On 02/12/24 17:45, Florian Weimer wrote:
> * Adhemerval Zanella Netto:
> 
>>> Huh.  Hurd doesn't even use that, presumably because stacks are always
>>> executable?
>>
>> No idea of default Hurd ABI, but I would guess it is default to
>> executable since the glibc build sets GNU_STACK to RWE.
> 
> I assumed it's heavily using nested functions which have their address
> taken.
> 
>>> Okay.  We only need this for the main thread because pthread_create
>>> creates new stacks directly with the right permissions.
>>
>> Yes, using GL(dl_stack_flags) that should be set only during loading
>> (maybe move it to GLRO, not sure if we can).
> 
> I think we can move it to GLRO, I was about to suggest that in a
> follow-up patch.
> 
>>>> diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile
>>>> index d5725c69d8..05ec9150b2 100644
>>>> --- a/sysdeps/unix/sysv/linux/mips/Makefile
>>>> +++ b/sysdeps/unix/sysv/linux/mips/Makefile
>>>> @@ -61,6 +61,7 @@ ifeq ($(subdir),elf)
>>>>  # this test is expected to fail.
>>>>  ifneq ($(mips-has-gnustack),yes)
>>>>  test-xfail-check-execstack = yes
>>>> +CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
>>>>  endif
>>>>  endif
>>>
>>> Is the xfail still needed?
>>
>> Without --enable-kernel=4.8, the mips seems to require an executable stack for
>> some fp emulation:
>>
>> sysdeps/unix/sysv/linux/mips/configure.ac
>> 138 # Check if we are supposed to run on kernels older than 4.8.0. If so,
>> 139 # force executable stack to avoid potential runtime problems with fpu
>> 140 # emulation.
>>
>> And kernel seems to still enforce it some, even on some recent kernels (6.3.0-2-5kc-malta):
>>
>> $ readelf -lW elf/tst-execstack | grep GNU_STACK
>>   GNU_STACK      0x000000 0x0000000000000000 0x0000000000000000 0x000000 0x000000 RW  0x10
>>
>> $ gdb --args ./elf/tst-execstack --direct
>> [...]
>> (gdb) b __start
>> (gdb) r
>> [...]
> 
> Ahh, different test (check-execstack vs tst-execstack).
> 
>>> Why is setting DEFAULT_RWX_STACK only needed on MIPS, but not on i386?
>>> Does it reflect the combined kernel/toolchain default?
>>
>> Yes, and DEFAULT_RWX_STACK is only needed on mips because during
>> _dl_map_object_from_fd mips without $(mips-force-execstack) will have
>> GL(dl_stack_flags) equal to PF_X|PF_W|PF_R (due the -Wa,-execstack);
>> different than i686 (where with a recent toolchain by default it will
>> be just PF_W|PF_R).
> 
> Okay.

Florian, do you think this patch still required additional work?
diff mbox series

Patch

diff --git a/NEWS b/NEWS
index dae2332eab..8cb5597631 100644
--- a/NEWS
+++ b/NEWS
@@ -49,6 +49,12 @@  Deprecated and removed features, and other changes affecting compatibility:
 
 * The nios2*-*-linux-gnu configurations are no longer supported.
 
+* dlopen and dlmopen no longer make the stack executable if a shared
+  library requires it, either implicitly because of a missing GNU_STACK ELF
+  header (and default ABI permission having the executable bit set) or
+  explicitly because of the executable bit in GNU_STACK, and the stack is
+  not already executable.
+
 Changes to build and runtime requirements:
 
 * On recent Linux kernels with vDSO getrandom support, getrandom does not
diff --git a/elf/dl-load.c b/elf/dl-load.c
index e986d7faab..f525eec662 100644
--- a/elf/dl-load.c
+++ b/elf/dl-load.c
@@ -1282,12 +1282,13 @@  _dl_map_object_from_fd (const char *name, const char *origname, int fd,
   if (__glibc_unlikely ((stack_flags &~ GL(dl_stack_flags)) & PF_X))
     {
       /* The stack is presently not executable, but this module
-	 requires that it be executable.  */
-#if PTHREAD_IN_LIBC
-      errval = _dl_make_stacks_executable (stack_endp);
-#else
-      errval = (*GL(dl_make_stack_executable_hook)) (stack_endp);
-#endif
+	 requires that it be executable.  Only tries to change the
+	 stack protection during process startup.  */
+      if ((mode & __RTLD_DLOPEN) == 0)
+	errval = _dl_make_stack_executable (stack_endp);
+      else
+	errval = EINVAL;
+
       if (errval)
 	{
 	  errstring = N_("\
diff --git a/elf/dl-support.c b/elf/dl-support.c
index ee590edf93..fe1f8c8f6a 100644
--- a/elf/dl-support.c
+++ b/elf/dl-support.c
@@ -178,10 +178,6 @@  size_t _dl_stack_cache_actsize;
 uintptr_t _dl_in_flight_stack;
 int _dl_stack_cache_lock;
 #else
-/* If loading a shared object requires that we make the stack executable
-   when it was not, we do it by calling this function.
-   It returns an errno code or zero on success.  */
-int (*_dl_make_stack_executable_hook) (void **) = _dl_make_stack_executable;
 void (*_dl_init_static_tls) (struct link_map *) = &_dl_nothread_init_static_tls;
 #endif
 struct dl_scope_free_list *_dl_scope_free_list;
diff --git a/elf/rtld.c b/elf/rtld.c
index b8cc3f605f..3b232f8525 100644
--- a/elf/rtld.c
+++ b/elf/rtld.c
@@ -1335,12 +1335,6 @@  dl_main (const ElfW(Phdr) *phdr,
 
   __tls_pre_init_tp ();
 
-#if !PTHREAD_IN_LIBC
-  /* The explicit initialization here is cheaper than processing the reloc
-     in the _rtld_local definition's initializer.  */
-  GL(dl_make_stack_executable_hook) = &_dl_make_stack_executable;
-#endif
-
   /* Process the environment variable which control the behaviour.  */
   skip_env = process_envvars (&state);
 
diff --git a/elf/tst-execstack.c b/elf/tst-execstack.c
index 560b353918..cd758c089e 100644
--- a/elf/tst-execstack.c
+++ b/elf/tst-execstack.c
@@ -9,6 +9,11 @@ 
 #include <error.h>
 #include <stackinfo.h>
 
+#include <stdlib.h>
+#include <support/check.h>
+#include <support/xthread.h>
+#include <support/xdlfcn.h>
+
 static void
 print_maps (void)
 {
@@ -20,11 +25,21 @@  print_maps (void)
 #endif
 }
 
-static void deeper (void (*f) (void));
+#ifndef DEFAULT_RWX_STACK
+# define DEFAULT_RWX_STACK 0
+#else
+static void
+deeper (void (*f) (void))
+{
+  char stack[1100 * 1024];
+  explicit_bzero (stack, sizeof stack);
+  (*f) ();
+  memfrob (stack, sizeof stack);
+}
+#endif
 
 #if USE_PTHREADS
-# include <pthread.h>
-
+# if DEFAULT_RWX_STACK
 static void *
 tryme_thread (void *f)
 {
@@ -32,16 +47,21 @@  tryme_thread (void *f)
 
   return 0;
 }
+# endif
 
 static pthread_barrier_t startup_barrier, go_barrier;
 static void *
 waiter_thread (void *arg)
 {
-  void **f = arg;
-  pthread_barrier_wait (&startup_barrier);
-  pthread_barrier_wait (&go_barrier);
+  xpthread_barrier_wait (&startup_barrier);
+  xpthread_barrier_wait (&go_barrier);
 
+# if DEFAULT_RWX_STACK
+  void **f = arg;
   (*((void (*) (void)) *f)) ();
+# else
+  abort ();
+# endif
 
   return 0;
 }
@@ -83,52 +103,36 @@  do_test (void)
 
   printf ("executable stacks %sallowed\n", allow_execstack ? "" : "not ");
 
+#if USE_PTHREADS || DEFAULT_RWX_STACK
   static void *f;		/* Address of this is used in other threads. */
+#endif
 
 #if USE_PTHREADS
   /* Create some threads while stacks are nonexecutable.  */
   #define N 5
-  pthread_t thr[N];
 
-  pthread_barrier_init (&startup_barrier, NULL, N + 1);
-  pthread_barrier_init (&go_barrier, NULL, N + 1);
+  xpthread_barrier_init (&startup_barrier, NULL, N + 1);
+  xpthread_barrier_init (&go_barrier, NULL, N + 1);
 
   for (int i = 0; i < N; ++i)
-    {
-      int rc = pthread_create (&thr[i], NULL, &waiter_thread, &f);
-      if (rc)
-	error (1, rc, "pthread_create");
-    }
+    xpthread_create (NULL, &waiter_thread, &f);
 
   /* Make sure they are all there using their stacks.  */
-  pthread_barrier_wait (&startup_barrier);
+  xpthread_barrier_wait (&startup_barrier);
   puts ("threads waiting");
 #endif
 
   print_maps ();
 
-#if USE_PTHREADS
+#if USE_PTHREADS && DEFAULT_RWX_STACK
   void *old_stack_addr, *new_stack_addr;
   size_t stack_size;
   pthread_t me = pthread_self ();
   pthread_attr_t attr;
-  int ret = 0;
-
-  ret = pthread_getattr_np (me, &attr);
-  if (ret)
-    {
-      printf ("before execstack: pthread_getattr_np returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
 
-  ret = pthread_attr_getstack (&attr, &old_stack_addr, &stack_size);
-  if (ret)
-    {
-      printf ("before execstack: pthread_attr_getstack returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
+  TEST_VERIFY_EXIT (pthread_getattr_np (me, &attr) == 0);
+  TEST_VERIFY_EXIT (pthread_attr_getstack (&attr, &old_stack_addr,
+					    &stack_size) == 0);
 # if _STACK_GROWS_DOWN
     old_stack_addr += stack_size;
 # else
@@ -143,18 +147,12 @@  do_test (void)
   const char *soname = "tst-execstack-mod.so";
 #endif
   void *h = dlopen (soname, RTLD_LAZY);
-  if (h == NULL)
-    {
-      printf ("cannot load: %s\n", dlerror ());
-      return allow_execstack;
-    }
+#if !DEFAULT_RWX_STACK
+  TEST_VERIFY_EXIT (h == NULL);
+#else
+  TEST_VERIFY_EXIT (h != NULL);
 
-  f = dlsym (h, "tryme");
-  if (f == NULL)
-    {
-      printf ("symbol not found: %s\n", dlerror ());
-      return 1;
-    }
+  f = xdlsym (h, "tryme");
 
   /* Test if that really made our stack executable.
      The `tryme' function should crash if not.  */
@@ -163,28 +161,15 @@  do_test (void)
 
   print_maps ();
 
-#if USE_PTHREADS
-  ret = pthread_getattr_np (me, &attr);
-  if (ret)
-    {
-      printf ("after execstack: pthread_getattr_np returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
-
-  ret = pthread_attr_getstack (&attr, &new_stack_addr, &stack_size);
-  if (ret)
-    {
-      printf ("after execstack: pthread_attr_getstack returned error: %s\n",
-	      strerror (ret));
-      return 1;
-    }
-
-# if _STACK_GROWS_DOWN
+# if USE_PTHREADS
+  TEST_VERIFY_EXIT (pthread_getattr_np (me, &attr) == 0);
+  TEST_VERIFY_EXIT (pthread_attr_getstack (&attr, &new_stack_addr,
+					    &stack_size) == 0);
+#  if _STACK_GROWS_DOWN
     new_stack_addr += stack_size;
-# else
+#  else
     new_stack_addr -= stack_size;
-# endif
+#  endif
 
   /* It is possible that the dlopen'd module may have been mmapped just below
      the stack.  The stack size is taken as MIN(stack rlimit size, end of last
@@ -194,48 +179,29 @@  do_test (void)
      stacksize and stackaddr respectively.  If the size changes due to the
      above, then both stacksize and stackaddr can change, but the stack bottom
      should remain the same, which is computed as stackaddr + stacksize.  */
-  if (old_stack_addr != new_stack_addr)
-    {
-      printf ("Stack end changed, old: %p, new: %p\n",
-	      old_stack_addr, new_stack_addr);
-      return 1;
-    }
+  TEST_VERIFY_EXIT (old_stack_addr == new_stack_addr);
   printf ("Stack address remains the same: %p\n", old_stack_addr);
-#endif
+# endif
 
   /* Test that growing the stack region gets new executable pages too.  */
   deeper ((void (*) (void)) f);
 
   print_maps ();
 
-#if USE_PTHREADS
+# if USE_PTHREADS
   /* Test that a fresh thread now gets an executable stack.  */
-  {
-    pthread_t th;
-    int rc = pthread_create (&th, NULL, &tryme_thread, f);
-    if (rc)
-      error (1, rc, "pthread_create");
-  }
+  xpthread_create (NULL, &tryme_thread, f);
 
   puts ("threads go");
   /* The existing threads' stacks should have been changed.
      Let them run to test it.  */
-  pthread_barrier_wait (&go_barrier);
+  xpthread_barrier_wait (&go_barrier);
 
   pthread_exit ((void *) (long int) (! allow_execstack));
+# endif
 #endif
 
   return ! allow_execstack;
 }
 
-static void
-deeper (void (*f) (void))
-{
-  char stack[1100 * 1024];
-  explicit_bzero (stack, sizeof stack);
-  (*f) ();
-  memfrob (stack, sizeof stack);
-}
-
-
 #include <support/test-driver.c>
diff --git a/nptl/allocatestack.c b/nptl/allocatestack.c
index d9adb5856c..9662b43afe 100644
--- a/nptl/allocatestack.c
+++ b/nptl/allocatestack.c
@@ -448,25 +448,6 @@  allocate_stack (const struct pthread_attr *attr, struct pthread **pdp,
 
 	  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
 
-
-	  /* There might have been a race.  Another thread might have
-	     caused the stacks to get exec permission while this new
-	     stack was prepared.  Detect if this was possible and
-	     change the permission if necessary.  */
-	  if (__builtin_expect ((GL(dl_stack_flags) & PF_X) != 0
-				&& (prot & PROT_EXEC) == 0, 0))
-	    {
-	      int err = __nptl_change_stack_perm (pd);
-	      if (err != 0)
-		{
-		  /* Free the stack memory we just allocated.  */
-		  (void) __munmap (mem, size);
-
-		  return err;
-		}
-	    }
-
-
 	  /* Note that all of the stack and the thread descriptor is
 	     zeroed.  This means we do not have to initialize fields
 	     with initial value zero.  This is specifically true for
diff --git a/sysdeps/generic/ldsodefs.h b/sysdeps/generic/ldsodefs.h
index 91447a5e77..b897da7e7b 100644
--- a/sysdeps/generic/ldsodefs.h
+++ b/sysdeps/generic/ldsodefs.h
@@ -413,13 +413,6 @@  struct rtld_global
 #endif
 #include <dl-procruntime.c>
 
-#if !PTHREAD_IN_LIBC
-  /* If loading a shared object requires that we make the stack executable
-     when it was not, we do it by calling this function.
-     It returns an errno code or zero on success.  */
-  EXTERN int (*_dl_make_stack_executable_hook) (void **);
-#endif
-
   /* Prevailing state of the stack, PF_X indicating it's executable.  */
   EXTERN ElfW(Word) _dl_stack_flags;
 
@@ -716,17 +709,10 @@  extern const ElfW(Phdr) *_dl_phdr;
 extern size_t _dl_phnum;
 #endif
 
-#if PTHREAD_IN_LIBC
-/* This function changes the permissions of all stacks (not just those
-   of the main stack).  */
-int _dl_make_stacks_executable (void **stack_endp) attribute_hidden;
-#else
-/* This is the initial value of GL(dl_make_stack_executable_hook).
-   A threads library can change it.  The ld.so implementation changes
-   the permissions of the main stack only.  */
-extern int _dl_make_stack_executable (void **stack_endp);
-rtld_hidden_proto (_dl_make_stack_executable)
-#endif
+/* This function changes the permission of the memory region pointed
+   by STACK_ENDP to executable and update the internal memory protection
+   flags for future thread stack creation.  */
+int _dl_make_stack_executable (void **stack_endp) attribute_hidden;
 
 /* Variable pointing to the end of the stack (or close to it).  This value
    must be constant over the runtime of the application.  Some programs
diff --git a/sysdeps/mach/hurd/dl-execstack.c b/sysdeps/mach/hurd/dl-execstack.c
index 31371bc6e3..0222430131 100644
--- a/sysdeps/mach/hurd/dl-execstack.c
+++ b/sysdeps/mach/hurd/dl-execstack.c
@@ -47,4 +47,3 @@  _dl_make_stack_executable (void **stack_endp)
   return ENOSYS;
 #endif
 }
-rtld_hidden_def (_dl_make_stack_executable)
diff --git a/sysdeps/nptl/pthreadP.h b/sysdeps/nptl/pthreadP.h
index c2db165052..a8e09bf754 100644
--- a/sysdeps/nptl/pthreadP.h
+++ b/sysdeps/nptl/pthreadP.h
@@ -289,12 +289,6 @@  extern _Noreturn void __syscall_do_cancel (void) attribute_hidden;
 extern void __nptl_free_tcb (struct pthread *pd);
 libc_hidden_proto (__nptl_free_tcb)
 
-/* Change the permissions of a thread stack.  Called from
-   _dl_make_stacks_executable and pthread_create.  */
-int
-__nptl_change_stack_perm (struct pthread *pd);
-rtld_hidden_proto (__nptl_change_stack_perm)
-
 /* longjmp handling.  */
 extern void __pthread_cleanup_upto (__jmp_buf target, char *targetframe);
 libc_hidden_proto (__pthread_cleanup_upto)
diff --git a/sysdeps/unix/sysv/linux/Versions b/sysdeps/unix/sysv/linux/Versions
index 213ff5f1fe..55d565545a 100644
--- a/sysdeps/unix/sysv/linux/Versions
+++ b/sysdeps/unix/sysv/linux/Versions
@@ -360,7 +360,4 @@  ld {
     __rseq_offset;
     __rseq_size;
   }
-  GLIBC_PRIVATE {
-    __nptl_change_stack_perm;
-  }
 }
diff --git a/sysdeps/unix/sysv/linux/dl-execstack.c b/sysdeps/unix/sysv/linux/dl-execstack.c
index b986898598..68db6737f0 100644
--- a/sysdeps/unix/sysv/linux/dl-execstack.c
+++ b/sysdeps/unix/sysv/linux/dl-execstack.c
@@ -16,19 +16,10 @@ 
    License along with the GNU C Library; if not, see
    <https://www.gnu.org/licenses/>.  */
 
-#include <errno.h>
 #include <ldsodefs.h>
-#include <libintl.h>
-#include <list.h>
-#include <pthreadP.h>
-#include <stackinfo.h>
-#include <stdbool.h>
-#include <sys/mman.h>
-#include <sysdep.h>
-#include <unistd.h>
 
-static int
-make_main_stack_executable (void **stack_endp)
+int
+_dl_make_stack_executable (void **stack_endp)
 {
   /* This gives us the highest/lowest page that needs to be changed.  */
   uintptr_t page = ((uintptr_t) *stack_endp
@@ -52,57 +43,3 @@  make_main_stack_executable (void **stack_endp)
 
   return 0;
 }
-
-int
-_dl_make_stacks_executable (void **stack_endp)
-{
-  /* First the main thread's stack.  */
-  int err = make_main_stack_executable (stack_endp);
-  if (err != 0)
-    return err;
-
-  lll_lock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  list_t *runp;
-  list_for_each (runp, &GL (dl_stack_used))
-    {
-      err = __nptl_change_stack_perm (list_entry (runp, struct pthread, list));
-      if (err != 0)
-	break;
-    }
-
-  /* Also change the permission for the currently unused stacks.  This
-     might be wasted time but better spend it here than adding a check
-     in the fast path.  */
-  if (err == 0)
-    list_for_each (runp, &GL (dl_stack_cache))
-      {
-	err = __nptl_change_stack_perm (list_entry (runp, struct pthread,
-						    list));
-	if (err != 0)
-	  break;
-      }
-
-  lll_unlock (GL (dl_stack_cache_lock), LLL_PRIVATE);
-
-  return err;
-}
-
-int
-__nptl_change_stack_perm (struct pthread *pd)
-{
-#if _STACK_GROWS_DOWN
-  void *stack = pd->stackblock + pd->guardsize;
-  size_t len = pd->stackblock_size - pd->guardsize;
-#elif _STACK_GROWS_UP
-  void *stack = pd->stackblock;
-  size_t len = (uintptr_t) pd - pd->guardsize - (uintptr_t) pd->stackblock;
-#else
-# error "Define either _STACK_GROWS_DOWN or _STACK_GROWS_UP"
-#endif
-  if (__mprotect (stack, len, PROT_READ | PROT_WRITE | PROT_EXEC) != 0)
-    return errno;
-
-  return 0;
-}
-rtld_hidden_def (__nptl_change_stack_perm)
diff --git a/sysdeps/unix/sysv/linux/mips/Makefile b/sysdeps/unix/sysv/linux/mips/Makefile
index d5725c69d8..05ec9150b2 100644
--- a/sysdeps/unix/sysv/linux/mips/Makefile
+++ b/sysdeps/unix/sysv/linux/mips/Makefile
@@ -61,6 +61,7 @@  ifeq ($(subdir),elf)
 # this test is expected to fail.
 ifneq ($(mips-has-gnustack),yes)
 test-xfail-check-execstack = yes
+CFLAGS-tst-execstack.c += -DDEFAULT_RWX_STACK=1
 endif
 endif
 
@@ -68,6 +69,12 @@  ifeq ($(subdir),stdlib)
 gen-as-const-headers += ucontext_i.sym
 endif
 
+ifeq ($(subdir),nptl)
+ifeq ($(mips-force-execstack),yes)
+CFLAGS-tst-execstack-threads.c += -DDEFAULT_RWX_STACK=1
+endif
+endif
+
 ifeq ($(mips-force-execstack),yes)
 CFLAGS-.o += -Wa,-execstack
 CFLAGS-.os += -Wa,-execstack